#
# $Id: sched_body.tcl,v 1.1 1999/04/16 00:05:55 hender Exp $
#
# TCL Based Scheduler for PBS
# This is the "body" part of the scheduler.  See sched_init.tcl for
# procedure definitions and variable initializations.  NOTE: Some 
# variables must be initialized with each invocation of the 
# scheduler body.
#
# $Log: sched_body.tcl,v $
# Revision 1.1  1999/04/16 00:05:55  hender
# Restore old sample Tcl scheduler just for the fun of it
#
# Revision 1.1.2.1  1997/03/07 00:28:56  hender
# Add sample tcl scripts from Other sites.
#
# Revision 1.10  1996/07/08  17:44:56  tmillima
# added vesta queue
#
# Revision 1.9  1996/06/03  16:20:35  tmillima
# added code to unset vars ... trying to track
# down a memory leak
#
# Revision 1.8  1995/05/16  21:05:59  tmillima
# modified host assignment for queues
#
# Revision 1.7  1995/05/12  19:14:21  tmillima
# fixed bug introduced in change from queue to ehost described
# below
#
# Revision 1.6  1995/05/12  18:51:55  tmillima
# change burst processing so that jobs with ehost = juno
# rather than queue = juno are killed
#
# Revision 1.5  1995/05/12  13:27:36  tmillima
# call procedure to sort jobs by queue priority
# fixed bugs in processing burst queue
#
# Revision 1.3  1995/04/24  20:23:57  tmillima
# changed rjob,qjob to running_job,queued_job
# changed jobr,jobq to nrunnning,nqueued
# modified so that queue name = client name does
# right thing
#
# Revision 1.3  1995/04/24  20:23:57  tmillima
# changed rjob,qjob to running_job,queued_job
# changed jobr,jobq to nrunnning,nqueued
# modified so that queue name = client name does
# right thing
#
# Revision 1.2  1995/04/24  14:04:18  tmillima
# Moved some stuff to sched_init.tcl.  Check for queue names
# which match client names so that jobs can be queued for a
# particular client.  Rearranged some stuff.
#
# Revision 1.1  1995/04/19  15:21:19  tmillima
# Initial revision
#
#
# get rid of previous values so we don't use up memory or
# confuse ourself
#
if {[info exists nrunning]} {unset nrunning}
if {[info exists nqueued]} {unset nqueued}
if {[info exists max_running]} {unset max_running}
if {[info exists priority]} {unset priority}
if {[info exists queued_job]} {unset queued_job}
if {[info exists running_job]} {unset running_job}
if {[info exists clientlist]} {unset clientlist}
if {[info exists idlelist]} {unset idlelist}
if {[info exists servstat]} {unset servstat}
if {[info exists qinfo]} {unset qinfo}
if {[info exists qname]} {unset qname}
if {[info exists priority]} {unset priority}
if {[info exists load]} {unset load}
if {[info exists idle]} {unset idle}
if {[info exists nidle]} {unset nidle}
if {[info exists queueinfo]} {unset queueinfo}
if {[info exists ajinfo]} {unset ajinfo}
if {[info exists prime]} {unset prime}
if {[info exists nj]} {unset nj}
if {[info exists ehost]} {unset ehost}
if {[info exists client]} {unset client}
if {[info exists nq]} {unset nq}
if {[info exists nr]} {unset nr}
if {[info exists hour]} {unset hour}
if {[info exists qstruct]} {unset qstruct}
if {[info exists dummy]} {unset dummy}
if {[info exists nqueues]} {unset nqueues}
if {[info exists jid]} {unset jid}
if {[info exists queue_name]} {unset queue_name}
if {[info exists i]} {unset i}
if {[info exists exechost]} {unset exechost}
if {[info exists la]} {unset la}
if {[info exists qcnt]} {unset qcnt}
if {[info exists specialjobs]} {unset specialjobs}
if {[info exists queue]} {unset queue}
if {[info exists jstat]} {unset jstat}
if {[info exists owner]} {unset owner}
if {[info exists queue_started]} {unset queue_started}
if {[info exists rmcon]} {unset rmcon}
if {[info exists jstruct]} {unset jstruct}
if {[info exists s_name]} {unset s_name}
if {[info exists s_nqueued]} {unset s_nqueued}
if {[info exists s_nrunning]} {unset s_nrunning}
if {[info exists s_maxr]} {unset s_maxr}
if {[info exists s_state]} {unset s_state}
if {[info exists s_cnt]} {unset s_cnt}
if {[info exists datim]} {unset datim}
if {[info exists clist]} {unset clist}
if {[info exists now]} {unset now}
if {[info exists queuelist]} {unset queuelist}
if {[info exists day]} {unset day}
if {[info exists varlist]} {unset varlist}
if {[info exists hostlist]} {unset hostlist}
if {[info exists k]} {unset k}
set varlist [info vars]
#puts "variable list: $varlist"
#
# initialize some body iteration dependent variables
#
# put time stamp in the scheduler output file
#
set datim [clock format [clock seconds] -format "%D %T"]
puts "$datim"
#
# Figure out if it is primetime.  This may be useful for having
# the scheduling policy depend on time of day or day of week.
#
set prime 0
#set now [exec date]
#set day [lindex $now 0]
#if { [lsearch {Mon Tue Wed Thu Fri} $day] != -1 } {
#    set hour [lindex $now 3]
#    if { "07:00:00" <= $hour && $hour <= "18:00:00" } {
#	set prime 1
#    }
#}
if {$debug} {
    puts "Prime time: $prime"
}
#
# Set client list.  The order in the list indicates the scheduling
# preference of the clients ie. the first client on the list will 
# be filled up most quickly.  The order in which systems are filled
# depends on the time of day.  
#
if { $prime} {
    set clientlist {comptel juno pallas nova vesta}
} else {
    set clientlist {comptel juno pallas nova vesta}
}
if {$debug} {
    puts "Client list: $clientlist"
}
# 
# initialize array for number of jobs running on each client
#
foreach client $clientlist {
    set nrunning(host.$client) 0
}
#
# Get server status.  We do this first under the assumption that
# it is more likely that we have no jobs pending than that we will 
# have no 'idle' client to run a job.
#
set servstat [pbsstatserv]
set s_name [getattrlval "name" $servstat]
set s_maxr [getattrlval "max_running" $servstat]
set s_state [getattrlval "server_state" $servstat]
set s_cnt [getattrlval "state_count" $servstat]
set s_nrunning [getcntval "Running" $s_cnt]
set s_nqueued [getcntval "Queued" $s_cnt]
#
# If there are no jobs queued we're all done.
#
if {$s_nqueued == 0} {
    if {$debug} {
	puts "No jobs queued."
    }
    return
}
#
# Ok, we've got jobs queued so we need to get the job info.  
# Locate queues on this server.  This call gives us the 
# max_running parameter for each queue and the number of jobs
# running and queued in each queue.  Check here for jobs
# queued in any special queues.  This can be used to prevent 
# returning before the special jobs are handled.
#
set qinfo [pbsstatque]
set nqueues [llength $qinfo]
for {set i 0} {$i < $nqueues} {incr i 1} {
    set qstruct [lindex $qinfo $i]
    set qname($i) [getattrlval "name" $qstruct]
    set qcnt [getattrlval "state_count" $qstruct]
    set max_running($qname($i)) [getattrlval "max_running" $qstruct]
    keylset priority $qname($i) [getattrlval "Priority" $qstruct] 
    set nrunning(queue.$qname($i)) [getcntval "Running" $qcnt]
    set nqueued($qname($i)) [getcntval "Queued" $qcnt]
}
#
# Set flag for special jobs.  This flag is used to make sure
# that the special jobs are processed even if we've reached some
# max (eg. the max running jobs per server).
#
if { "$nqueued(burst)" > "0" && "$nrunning(queue.burst)" == "0" } {
    set specialjobs 1
} else {
    set specialjobs 0
}
if {$debug} {
    if {$specialjobs} {
	puts "$nqueued(burst) special jobs are queued"
    } else {
	puts "No special jobs are queued"
    }
}
#
# If we're already at the maximum number of jobs running for this 
# server and there are no special jobs then we are all done.
#
if { "$s_nrunning" >= "$s_maxr" && "$specialjobs"  == "0" } {
    if {$debug} {
	puts "Maximum number of jobs reached for the server"
	puts "And no special jobs queued"
    }
    return
}
#
# Next look for 'idle' clients to run jobs.
#
# Loop over batch clients once to get load average info use this to 
# eliminate clients which are too busy to accept new jobs.  Do this 
# by making a list of 'idle' clients to use for scheduling jobs.
#
foreach client $clientlist {
    #
    # connect to resource monitor and get load average
    #
    set rmcon [openrm $client]
    if {$rmcon < 0} {
	puts "bad return from openrm for $client"
	puts "skipping this client."
	continue
    }
    addreq $rmcon "loadave"
    set la [getreq $rmcon]
    set load($client) $la
    #
    # make list of clients which respond and are not too loaded
    #
    if {$load($client) < $max_load($client)} {
	lappend idlelist "$client"
	set idle($client) "yes"
    } else {
	set idle($client) "no"
    }
    #
    # close connections
    #
    closerm $rmcon
}
#
# Check that there is at least one idle client available.  If not
# we're all done.
#
if {[info exists idlelist]} {
    set nidle [llength $idlelist]
    if {$debug} {
	puts "$nidle idle clients: $idlelist"
    }
} else {
    if {$debug} {
	puts "no idle clients exist"
    }
    if {! $specialjobs } {
	return
    }
}
#
# Get info on all queues for this server
#
set queueinfo [pbsstatque]
foreach qstruct $queueinfo {
    set queue_name [getattrlval "name" $qstruct]
    set queue_started($queue_name) [getattrlval "started" $qstruct]
}
#
# Get job info for ALL JOBS on  this server
#
set ajinfo [pbsstatjob]
#
# Sort jobs in list by queue priority
#
sortjobinfo "$priority" ajinfo
set nj [llength $ajinfo]
set nq 0
set nr 0
for {set i 0} {$i < $nj} {incr i 1} {
    set jstruct [lindex $ajinfo $i]
    set jid [getattrlval "name" $jstruct]
    set owner [getattrlval "Job_Owner" $jstruct]
    regexp {([a-z]+)@} "$owner" dummy owner
    set jstat [getattrlval "job_state" $jstruct]
    set queue [getattrlval "queue" $jstruct]
    switch -- $jstat {
	Q {
	    #
	    # Store info on queued jobs
	    #
	    set queued_job($nq,jid) $jid
	    set queued_job($nq,owner) $owner
	    set queued_job($nq,queue) $queue
	    set queued_job($nq,state) $jstat
	    if {[info exists queue_hostlist($queue)]} {
		set queued_job($nq,ehost) $queue_hostlist($queue)
	    } else {
		set queued_job($nq,ehost) "ANYHOST"
	    }
	    incr nq 1
	}
	R {
	    set ehost [getattrlval "exec_host" $jstruct]
	    #
	    # Store info on running jobs
	    # Strip domain from hostname if its there
	    #
	    regexp {([a-z]+)\.} "$ehost" dummy ehost
	    set running_job($nr,jid) $jid
	    set running_job($nr,owner) $owner
	    set running_job($nr,queue) $queue
	    set running_job($nr,state) $jstat
	    set running_job($nr,ehost) $ehost
	    incr nr 1
	    #
	    # Store info on jobs running per user
	    #
	    if {[info exists nrunning(user.$owner)]} {
		incr nrunning(user.$owner) 1
	    } else {
		set nrunning(user.$owner) 1
	    }
	    #
	    # Store info on jobs running per client
	    #
	    if {[info exists nrunning(host.$ehost)]} {
		incr nrunning(host.$ehost) 1
	    } else {
		set nrunning(host.$ehost) 1
	    }
	}
    }
}
#
# for debugging check that everything is still there
#
if {$debug} {
    puts "Server name: $s_name"
    puts "\tState:         $s_state"
    puts "\tMax. running:  $s_maxr"
    puts "\tJobs running:  $s_nrunning"
    puts "\tJobs queued:   $s_nqueued"
    puts "\tNo of queues:  $nqueues"
    puts "\t\Queues:"
    for {set i 0} {$i < $nqueues} {incr i 1} {
	puts "\t\tqueue: $qname($i)"
	puts "\t\t\tStarted: $queue_started($qname($i))"
	puts "\t\t\tPriority: [keylget priority $qname($i)]"
	puts "\t\t\tMax run:  $max_running($qname($i))"
	puts "\t\t\tRunning:  $nrunning(queue.$qname($i))"
	puts "\t\t\tQueued:   $nqueued($qname($i))"
    }
    puts "\t\tQueued Jobs:"
    for {set i 0} {$i < $nq} {incr i 1} {
	puts "\t\t\t$queued_job($i,jid) \
		$queued_job($i,queue)\t \
		$queued_job($i,owner)"
    }
    puts "\t\tRunning Jobs:"
    for {set i 0} {$i < $nr} {incr i 1} {
	puts "\t\t\t$running_job($i,jid) \
		$running_job($i,queue)\t \
		$running_job($i,owner)\t \
		$running_job($i,ehost)"
    }
    puts "\tClients:"
    foreach client $idlelist {
	puts "\t\tclient: $client"
	puts "\t\t\tLoad Average: $load($client)"
	puts "\t\t\tMax Load Avg: $max_load($client)"
	puts "\t\t\tIdle:         $idle($client)"
	puts "\t\t\tRunning jobs: $nrunning(host.$client)"
    }
}
#
# Ok, go through list of jobs (in order) and schedule them
#
if {$debug} {
    puts "Begin scheduling jobs"
}
for {set i 0} {$i < $nq} {incr i 1} {
    set queue $queued_job($i,queue)
    set owner $queued_job($i,owner)
    set ehost $queued_job($i,ehost)
    set jid $queued_job($i,jid)
    if {$debug} {
	puts "checking $jid $queue $owner $ehost"
    }
    #
    # Check that the queue is started
    #
    if { $queue_started($queue) == "False" } {
	if {$debug} {
	    puts "\tQueue $queue not started."
	}
	continue
    }
    #
    # Check that this queue hasn't reached the max number of running jobs.  
    # Do this before checking for special jobs because if there is already 
    # a special job running we want to do the continue.
    #
    if {$nrunning(queue.$queue) >= $max_running($queue)} {
	if {$debug} {
	    puts "\tmaximum number of jobs for queue $queue reached"
	}
	continue
    }
    #
    # if queue is burst queue then do something special
    #
    if {"$queue" == "burst"} {
	#
	# if there are any running jobs on juno delete them
	#
	set exechost juno
	if {$nrunning(host.juno) >= 1} { 
	    for {set k 0} {$k < $nr} {incr k 1} {
		if {"$running_job($k,ehost)" == "juno"} {
                    puts "Deleting job $running_job($k,jid)"
		    pbsdeljob $running_job($k,jid) 
                }
	    }
	}
	# 
	# then run the queued job
	#
	puts "\tRunning special job $queued_job($i,jid)"
	pbsrunjob $queued_job($i,jid) juno
        incr s_nrunning
	incr nrunning(queue.$queue) 1
	if { ! [info exists nrunning(user.$owner)] } {
	    set nrunning(user.$owner) 1
	} else {
	    incr nrunning(user.$owner) 1
	}
	incr nrunning(host.$exechost) 1
	set load($exechost) [expr $load($exechost) + 2]
	continue
    }
    #
    # Check that this users hasn't reached the limit of running jobs.
    #
    if { ! [info exists nrunning(user.$owner)]} {
	set nrunning(user.$owner) 0
    }
    if {$nrunning(user.$owner) >= $usermaxr} {
	if {$debug} {
	    puts "\tmaximum number of jobs for user $owner reached"
	}
	continue
    }
    # 
    # next find an appropriate client
    #
    # If ehost is already set then just see if that host is 
    # available.  If ehost is set to 'ANYHOST' search the
    # idlehost list.
    #
    if { $ehost == "ANYHOST" } {
	set hostlist $idlelist
    } else {
	set hostlist $ehost
    }
    if {[info exists exechost]} {
	unset exechost
    }
    foreach client $hostlist {
	#
	# Check that this client hasn't reached the limit of running jobs.
	#
	if {$nrunning(host.$client) >= $max_run($client)} {
	    if {$debug} {
		puts "\tmaximum number of jobs for $client reached"
	    }
	    continue
	}
	#
	# Check that this client hasn't reached the maximum load avg.
	#
	if {$load($client) >= $max_load($client)} {
	    if {$debug} {
		puts "\tmaximum load for $client reached"
	    }
	    continue
	}
	set exechost $client
	break
    }
    if {[info exists exechost]} {
	puts "\tRunning job: $queued_job($i,jid) on $exechost"
        pbsrunjob $queued_job($i,jid) $exechost
        incr s_nrunning
	incr nrunning(queue.$queue) 1
	incr nrunning(user.$owner) 1
	incr nrunning(host.$exechost) 1
	set load($exechost) [expr $load($exechost) + 2]
    } else {
	if {$debug} {
	    puts "No client found for this job"
	}
    }
}

#
# get rid of previous values so we don't use up memory or
# confuse ourself
#
if {[info exists nrunning]} {unset nrunning}
if {[info exists nqueued]} {unset nqueued}
if {[info exists max_running]} {unset max_running}
if {[info exists priority]} {unset priority}
if {[info exists queued_job]} {unset queued_job}
if {[info exists running_job]} {unset running_job}
if {[info exists clientlist]} {unset clientlist}
if {[info exists idlelist]} {unset idlelist}
if {[info exists servstat]} {unset servstat}
if {[info exists qinfo]} {unset qinfo}
if {[info exists qname]} {unset qname}
if {[info exists priority]} {unset priority}
if {[info exists load]} {unset load}
if {[info exists idle]} {unset idle}
if {[info exists nidle]} {unset nidle}
if {[info exists queueinfo]} {unset queueinfo}
if {[info exists ajinfo]} {unset ajinfo}
if {[info exists prime]} {unset prime}
if {[info exists nj]} {unset nj}
if {[info exists ehost]} {unset ehost}
if {[info exists client]} {unset client}
if {[info exists nq]} {unset nq}
if {[info exists nr]} {unset nr}
if {[info exists hour]} {unset hour}
if {[info exists qstruct]} {unset qstruct}
if {[info exists dummy]} {unset dummy}
if {[info exists nqueues]} {unset nqueues}
if {[info exists jid]} {unset jid}
if {[info exists queue_name]} {unset queue_name}
if {[info exists i]} {unset i}
if {[info exists exechost]} {unset exechost}
if {[info exists la]} {unset la}
if {[info exists qcnt]} {unset qcnt}
if {[info exists specialjobs]} {unset specialjobs}
if {[info exists queue]} {unset queue}
if {[info exists jstat]} {unset jstat}
if {[info exists owner]} {unset owner}
if {[info exists queue_started]} {unset queue_started}
if {[info exists rmcon]} {unset rmcon}
if {[info exists jstruct]} {unset jstruct}
if {[info exists s_name]} {unset s_name}
if {[info exists s_nqueued]} {unset s_nqueued}
if {[info exists s_nrunning]} {unset s_nrunning}
if {[info exists s_maxr]} {unset s_maxr}
if {[info exists s_state]} {unset s_state}
if {[info exists s_cnt]} {unset s_cnt}
if {[info exists datim]} {unset datim}
if {[info exists clist]} {unset clist}
if {[info exists now]} {unset now}
if {[info exists queuelist]} {unset queuelist}
if {[info exists day]} {unset day}
if {[info exists varlist]} {unset varlist}
if {[info exists hostlist]} {unset hostlist}
if {[info exists k]} {unset k}

set varlist [info vars]
#puts "variable list: $varlist"
if {[info exists varlist]} {unset varlist}

