static char USMID[] = "%Z%%M%	%I%	%G% %U%";

/*

    Program: pbs_sched.a
       File: assist.c

        Author : Nicholas P. Cardo
                 Sterling Software
                 NAS Facility
                 NASA Ames Research Center

  Description:
   Perform the necessary steps to prevent job starvation.  Job assisting
   is provided for memory, srfs big, and srfs fast.  If a job is being
   assisted, it is being assisted for all three areas.  This way the
   job can startup as soon as the resources are available.

*/

#include <pbs_config.h>   /* the master config generated by configure */

#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <time.h>
#include <sys/param.h>
#if SRFS == 1
#include <sys/srfs.h>
#endif
#include <sys/quota.h>

#include "gblxvars.h"
#include "toolkit.h"

#include "pbs_error.h"
#include "pbs_ifl.h"
#include "log.h"

#define	MAXQUEUES 20

extern	int	connector;
extern	int	LARGE_CNT_LIM;
extern	int	Large_Running;

static	void assist_job();
void	clear_assist();

struct	jobent {
	char	*jobid;
	char	*queue;
	int	qpriority;
	int	qtime;
	long long	memory;
	int	srfsfast;
	int	srfsbig;
	int	starve;
} joblist[MAXQUEUES];

struct	jobent *base = &joblist[0];
int	jobsrt();

/*
 *  See if there is a job that should be assisted.
 */
int ckassist()
{
char	*id = "ckassist";
static	struct batch_status *bs;
struct	assist ast[3];
int	x,y;
time_t	now;
struct	strvent *sptr;
struct	qthresh *qptr;
char	*ptr;
int	idx,qpri;
int	lm;

	/*
	 *  Are we already assisting a job
	 */
	if(Assist.jobname != NULL) {
		/*
		 *  Does the job still exist, if not clear it and
		 *  release the srfs reserve
		 */
		if((bs=pbs_statjob(connector,Assist.jobname,NULL,NULL)) ==
			(struct batch_status *)0L) {
			sprintf(log_buffer,"Job %s is gone, clearing assist",
				Assist.jobname);
			log_record(PBSEVENT_SYSTEM,PBS_EVENTCLASS_SERVER,
				id,log_buffer);
			free(Assist.jobname);
			Assist.jobname = NULL;
			clear_assist();
		} else {
			/*
			 *  Is the job still in the "Q" state
			 */
			ptr = getat(ATTR_state,bs,NULL);
			if(ptr[0] == 'Q') {
				sprintf(log_buffer,"Assisting %s,%d/%d/%d",
					Assist.jobname,Assist.memory,
					Assist.srfsfast,Assist.srfsbig);
				log_record(PBSEVENT_SYSTEM,
					PBS_EVENTCLASS_SERVER,id,log_buffer);
				pbs_statfree(bs);
				return(1);
			}

			/*
			 *  Job is nolonger in the "Q" state, so clear the
			 *  the assist
			 */
			pbs_statfree(bs);
			clear_assist();
		}
	}

	sptr = Starvlist;

	/*
	 *  Loop by order
	 */
	qpri = 1;
	idx  = 0;
	while(sptr != (struct strvent *)0L) {
		ptr = strtok(sptr->qlist,",");
		/*
		 *  Loop by queue for this order
		 */
		while(ptr != NULL) {
			qptr = Thresholds;
			while(qptr != (struct qthresh *)0L) {
				if(!strcmp(ptr,qptr->qname))
					break;
				qptr = qptr->next;
			}

			/*
			 *  Check for configuration error, does queue
			 *  have a timeout value.
			 */
			if(qptr == (struct qthresh *)0L) {
				ptr = strtok(NULL,",");
				continue;
			}

			/*
			 *  Are there any jobs queues?
			 */
			if((bs=getjobs(ptr,"Q")) == (struct batch_status *)0L) {
				ptr = strtok(NULL,",");
				continue;
			}

			/*
			 *  Is this job starving
			 */
			joblist[idx].qtime = atoi(getat(ATTR_qtime,bs,NULL));
			time(&now);

			if((now - qptr->starve) < joblist[idx].qtime) {
				/*
				 *  Job is not starving
				 */
				ptr = strtok(NULL,",");
				pbs_statfree(bs);
				continue;
			}
			
			/*
			 *  This job is starving
			 */
			joblist[idx].jobid = strdup(bs->name);
			joblist[idx].qpriority = qpri;
			joblist[idx].srfsfast = 
				val2byte(getat(ATTR_l,bs,"srfs_fast"));
			joblist[idx].srfsbig = 
				val2byte(getat(ATTR_l,bs,"srfs_big"));
			joblist[idx].memory =
				val2byte(getat(ATTR_l,bs,"mem"));
			joblist[idx].starve = qptr->starve;
			joblist[idx].queue = strdup(ptr);

			pbs_statfree(bs);

			/*
			 *  If this is a large memory job, 
			 *  and if we are are already
			 *  running enough large memory jobs, 
			 *  don't assist it. NO OVERRIDES or else
			 *  the system could keep on assisting big
			 *  jobs and seriously degrade it's performance.
			 */
			if(pnp())
				lm = (LARGE_MEM_NONPRIME)?
					LARGE_MEM_NONPRIME:LARGE_MEM;
			else
				lm = (LARGE_MEM_PRIME)?
					LARGE_MEM_PRIME:LARGE_MEM;
		
			if(lm && (joblist[idx].memory >= lm)) {
				if(Large_Running >= (LARGE_CNT_LIM -1)) {
					ptr = strtok(NULL,",");
					continue;
				}
			}

			/*
			 *  Check the queues limits
			 */
			if(queue_limits(ptr)) {
/*
				pbs_statfree(bs);
*/
				ptr = strtok(NULL,",");
				continue;
			}
		
			bs = pbs_statjob(connector,Assist.jobname,NULL,NULL);
		
			/*
			 *  Check the user limits
			 */
			if(user_limits(bs,ptr)) {
				pbs_statfree(bs);
				ptr = strtok(NULL,",");
				continue;
			}

			pbs_statfree(bs);

			/* Clear to Assist
			 */
			idx++;
			ptr = strtok(NULL,",");
		}
		sptr = sptr->next;
		qpri++;
	}

	/*
	 * Are there any jobs eligible for assistance
	 */
	if(!idx)
		return(0);

	/*
	 *  The result of the qsort is that element 0 of
	 *  the array is the highest priority oldest starving
	 *  job.  This is the one to assist.
	 */
	qsort(base,idx,sizeof(*base),jobsrt);

	assist_job();

	/*
	 *  Clean up
	 */
	for(x=0;x<idx;x++) {
		free(joblist[x].jobid);
		free(joblist[x].queue);
		joblist[x].jobid = joblist[x].queue = NULL;
	}

	return(1);
}

/*
 *  Clear out the job assist structure
 */
void clear_assist()
{
char	*id = "clear_assist";
struct  attrl alist[] = {
	{NULL,ATTR_comment,"",""}
};

#if SRFS == 1
	srfs_clear();
#endif

	/*
	 *  If the job still exists, then clear the comment
	 *  from it so that AAA is not displayed
	 */
	if(Assist.jobname != NULL) {
		pbs_alterjob(connector,Assist.jobname,alist,NULL);
		free(Assist.jobname);
	}

	Assist.jobname  = NULL;
	Assist.memory   = 0;
	Assist.srfsfast = 0;
	Assist.srfsbig  = 0;
	Assist.qtime    = 0;
}

/*
 *  Fill in the job assist structure and make the
 *  call to make the srfs reserve
 */
static void assist_job()
{
char	*id = "assist_job";
struct  attrl alist[] = {
        {NULL,ATTR_comment,"","Job Assistance Active"}
};

	Assist.jobname  = strdup(joblist[0].jobid);
	Assist.memory   = joblist[0].memory;
	Assist.srfsfast = joblist[0].srfsfast;
	Assist.srfsbig  = joblist[0].srfsbig;
	Assist.qtime    = joblist[0].qtime;

#if SRFS == 1
	srfs_assist();
#endif

	pbs_alterjob(connector,Assist.jobname,alist,NULL);
}

/*
 *  This is a special job starting routine specifically for
 *  jobs that need assistance in getting started.  Once the
 *  resources are available, start the job.  Let the regular
 *  scheduler deal with managing the resources.  The theory is
 *  that if the job is assisted, then I want it started as soon
 *  as the resources are available.
 */
int run_assist(rsrcs)
struct 	resutl *rsrcs;
{
char	*id = "run_assist";
#if SRFS == 1
struct	fsres_s sr;
#endif

	/*
	 *  Check memory
	 */
	if((BATCH_MEM-rsrcs->batchmem) < Assist.memory)
		return(0);

#if SRFS == 1
	/*
	 *  Check fast
	 */
	if(Assist.srfsfast) {
		quotactl("/fast",SRFS_INFO,(char *)&sr);
		if((sr.ares_avail*BSIZE) < Assist.srfsfast)
			return(0);
	}

	/*
	 *  Check big
	 */
	if(Assist.srfsbig) {
		quotactl("/big" ,SRFS_INFO,(char *)&sr);
		if((sr.ares_avail*BSIZE) < Assist.srfsbig)
			return(0);
	}
#endif

	/*
	 *  At this point, there is enough free batch memory and
	 *  enough srfs, so start the job
	 */
	if(pbs_runjob(connector,Assist.jobname,NULL,NULL)) {
		(void) sprintf(log_buffer, "failed to start job %s, %d",
			Assist.jobname,pbs_errno);
		log_record(PBSEVENT_SYSTEM,PBS_EVENTCLASS_SERVER,id,log_buffer);
		return(0);
	}

	(void) sprintf(log_buffer, "job %s started",Assist.jobname);
	log_record(PBSEVENT_SYSTEM,PBS_EVENTCLASS_SERVER,id,log_buffer);

	/*
	 *  Clear the assist
	 */
	clear_assist();

	return(1);
}

void create_starvlist(qlist)
char	*qlist;
{
struct	strvent *sptr;

	if(Starvlist == (struct strvent *)0L) {
		Starvlist = (struct strvent *)malloc(sizeof(struct strvent));
		memset(Starvlist,0,sizeof(struct strvent));
		sptr = Starvlist;
	} else {
		sptr = Starvlist;
		while(sptr->next != (struct strvent *)0L)
			sptr = sptr->next;
		sptr->next = (struct strvent *)malloc(sizeof(struct strvent));
		memset(sptr->next,0,sizeof(struct strvent));
		sptr = sptr->next;
	}

	sptr->qlist = strdup(qlist);
}

int read_thresh()
{
FILE	*fd;
char	buf[255];
char	qname[128];
char	timeout[128];
struct	qthresh *qptr,*nptr;

	if((fd=fopen("/usr/spool/pbs_home/etc/pbsqueues","r")) == NULL) {
		return(1);
	}

	while(fgets(buf,sizeof(buf),fd)){
		if(buf[0] == '#')
			continue;

		if(buf[0] == '\n')
			continue;

		buf[strlen(buf)-1] = '\0';
		sscanf(buf,"%s %s", qname, timeout);

		nptr = (struct qthresh *)malloc(sizeof(struct qthresh));
		memset(nptr,0,sizeof(struct qthresh));
		nptr->qname = strdup(qname);
		nptr->starve = val2sec(timeout);

		if(Thresholds == (struct qthresh *)0L)
			Thresholds = qptr = nptr;
		else {
			qptr->next = nptr;
			qptr = qptr->next;
		}
	}

	fclose(fd);
	return(0);
}

int jobsrt(const void *a, const void *b)
{
	if(((struct jobent *)a)->qpriority == ((struct jobent *)b)->qpriority)
		return(((struct jobent *)b)->qtime-((struct jobent *)a)->qtime);

	return(((struct jobent *)a)->qpriority-((struct jobent *)b)->qpriority);
}

