""" Example application demonstrating job submission via bigjob 
    advert_job implementation of BigJob is not used
"""

import bigjob_condor
import saga
import os
import time

""" Directory with condor_submit wrapper and soft links to original condor_rm, condor_q"""
CONDOR_BIN = "/home/luckow/saga/condor_bin"
X509_USER_PROXY = "/home/luckow/.globus/userproxy.pem"	# or alternatively os.environ.get("X509_USER_PROXY")

resources_list = (
		  { "gram_url" : "poseidon1.loni.org/jobmanager-pbs", "queue" : "checkpt", "project" : "loni_loniadmin1", "number_nodes" : 2, "walltime" : 20},
		  { "gram_url" : "oliver1.loni.org/jobmanager-pbs", "queue" : "checkpt", "project" : "loni_loniadmin1", "number_nodes" : 2, "walltime" : 20},
#		  { "gram_url" : "louie1.loni.org/jobmanager-pbs", "queue" : "checkpt", "project" : "loni_loniadmin1", "number_nodes" : 1, "walltime" : 10},
		)	

NUMBER_JOBS = 2

""" Test Job Submission of NAMD via Condor BigJob """
if __name__ == "__main__":

	##########################################################################################
	# Start BigJob
	# Parameter for BigJob

	# Create a local Condor pool (glidein master_condor on remote resources via Condor-G/GRAM2)
	print "Create a local Condor pool"
	print time.ctime()

	bj = bigjob_condor.bigjob_condor()
	for i in resources_list:
		pj = bj.start_pilot_job(lrms_url=i["gram_url"],
			queue=i["queue"],
			project=i["project"],
			number_nodes=i["number_nodes"],
			walltime=i["walltime"],
			working_directory="/tmp/luckow",
			userproxy=X509_USER_PROXY)
    		print "Glidein Condor-G Job URL: " + bj.pilot_url + " State: " + str(bj.get_state(pj))

	##########################################################################################
	# Submit SubJob through BigJob (to the local Condor pool)
	# NAMD command: mpirun -np $v -machinefile machines `which namd2` NPT.conf
	# working directory: $TG_CLUSTER_SCRATCH/saga/bigjob/data

	jd = saga.job.description()
	jd.executable = "/home/luckow/src/bigjob/condor/condor_namd.sh"
	jd.arguments = ["NPT.conf"]
	jd.working_directory = "/work/lukas/saga/bigjob/data"
	jd.output = "namd.$(CLUSTER).$(PROCESS).$(NODE).out"
	jd.error = "namd.$(CLUSTER).$(PROCESS).$(NODE).err"

	attr = open(CONDOR_BIN + "/condor_attr", "w")
	attr.write("universe = parallel\n")
	attr.write("machine_count = 2\n")			# number of nodes (not cores)
	attr.write("+WantParallelSchedulingGroups = True\n")	# to avoid running on nodes from different clusters
	attr.close()

	jobs = []
	for i in range (0, NUMBER_JOBS):
		print "Start job no.: " + str(i)
		print time.ctime()
		sj = bigjob_condor.subjob(bigjob=bj)
		sj.submit_job(jd)
		jobs.append(sj)

	# busy wait for completion
	sj = jobs.pop()
	while 1:
		try:
			state = str(sj.get_state())
			print "state: " + state
			if state=="Failed" or state=="Done" or state=="Canceled" or state=="Suspended":
				if len(jobs) > 0:
					sj = jobs.pop()
					continue
				else:
					break
			time.sleep(10)
		except KeyboardInterrupt:
			break

	#######################################################################
	# Cleaning - stop BigJob - release nodes in the local Condor pool
	bj.cancel()

	print time.ctime()