/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * req_jobobit.c - functions dealing with a Job Obituary Request (Notice)
 *	and the associated post execution job clean up.
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libpbs.h"
#include "server_limits.h"
#include "list_link.h"
#include "attribute.h"
#include "resource.h"
#include "server.h"
#include "job.h"
#include "credential.h"
#include "batch_request.h"
#include "work_task.h"
#include "pbs_error.h"
#include "log.h"
#include "acct.h"
#include "net_connect.h"
#include "svrfunc.h"
#include "sched_cmds.h"

static char ident[] = "@(#) $RCSfile: req_jobobit.c,v $ $Revision: 2.1 $";

#define RESC_USED_BUF 2048

/* External Global Data Items */

extern unsigned int   pbs_mom_port;
extern char *path_spool;
extern int   server_init_type;
extern pbs_net_t pbs_server_addr;
extern char *msg_init_abt;
extern char *msg_job_end;
extern char *msg_job_end_sig;
extern char *msg_job_end_stat;
extern char *msg_momnoexec1;
extern char *msg_momnoexec2;
extern char *msg_obitnojob;
extern char *msg_obitnocpy;
extern char *msg_obitnodel;
extern struct connection svr_conn[];
extern struct connect_handle connection[];
extern char  server_host[];
extern int   svr_do_schedule;
extern time_t time_now;

/* External Functions called */

extern void set_resc_assigned A_((job *, enum batch_op));

/* Local public functions  */

void req_jobobit A_((struct batch_request *));

/* Local private functions */

static struct batch_request *setup_cpyfiles A_((struct batch_request *, job *, char *,  char *, int, int));


/*
 * setup_from - setup the "from" name for a standard job file:
 *	output, error, or chkpt
 */

static char *setup_from(pjob, suffix)
	job  *pjob;
	char *suffix;
{
	char *from;

	from = malloc(strlen(pjob->ji_qs.ji_fileprefix) + strlen(suffix) + 1);
	if (from) {
		(void)strcpy(from, pjob->ji_qs.ji_fileprefix);
		(void)strcat(from, suffix);
	}
	return (from);
}

/* 
 * setup_cpyfiles - if need be, allocate and initalize a Copy Files
 *	batch request, then append the file pairs
 */

static struct batch_request *setup_cpyfiles(preq, pjob, from, to, direction, tflag)
	struct batch_request *preq;
	job  *pjob;
	char *from;	 /* local (to mom) name */
	char *to;	 /* remote (destination) name */
	int   direction; /* copy direction */
	int   tflag;	 /* 1 if stdout or stderr , 2 if stage out or in*/
{
	struct rq_cpyfile *pcf;
	struct rqfpair    *pair;

	if (preq == (struct batch_request *)0) {

	    /* allocate and initialize the batch request struct */

	    preq = alloc_br(PBS_BATCH_CopyFiles);
	    if (preq == (struct batch_request *)0) {
	    	(void)free(from);
		if (to)
	 	   	(void)free(to);
	    	return (preq);
	    }
	    pcf = &preq->rq_ind.rq_cpyfile;
	    CLEAR_HEAD(pcf->rq_pair);

	    /* copy jobid, owner, exec-user, group names, upto the @host part */

	    (void)strcpy(pcf->rq_jobid, pjob->ji_qs.ji_jobid);
	    get_jobowner(pjob->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str,
			 pcf->rq_owner);
	    get_jobowner(pjob->ji_wattr[(int)JOB_ATR_euser].at_val.at_str,
			pcf->rq_user);
	    if ( ((pjob->ji_wattr[(int)JOB_ATR_egroup].at_flags & ATR_VFLAG_DEFLT) ==0) && (pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str != 0) )
		(void)strcpy(pcf->rq_group, pjob->ji_wattr[(int)JOB_ATR_egroup].at_val.at_str);
	    else
		pcf->rq_group[0] = '\0';	/* default: use login group */

	    pcf->rq_dir = direction;

	} else {

	    /* use the existing request structure */

	    pcf = &preq->rq_ind.rq_cpyfile;
	}

	pair = (struct rqfpair *)malloc(sizeof (struct rqfpair));
	if (pair == (struct rqfpair *)0) {
		free_br(preq);
		return ((struct batch_request *)0);
	}
		
	CLEAR_LINK(pair->fp_link);
	pair->fp_local  = from;
	pair->fp_rmt    = to;
	pair->fp_flag   = tflag;
	append_link(&pcf->rq_pair, &pair->fp_link, pair);
	return (preq);
}

static int is_joined(pjob, ati)
	job		*pjob;
	enum job_atr	 ati;
{
	char       key;
	attribute *pattr;
	char	  *pd;

	if (ati == JOB_ATR_outpath)
		key = 'o';
	else if (ati == JOB_ATR_errpath)
		key = 'e';
	else
		return (0);
	pattr = &pjob->ji_wattr[(int)JOB_ATR_join];
	if (pattr->at_flags & ATR_VFLAG_SET) {
		pd = pattr->at_val.at_str;
		if (pd && *pd && (*pd != 'n')) {
			/* if not the first letter, and in list - is joined */
			if ( (*pd != key) && (strchr(pd+1, (int)key)) )
				return (1);	/* being joined */
		}
	}
	return (0);	/* either the first or not in list */
}

/*
 * cpy_stdfile - determine if one of the job's standard files (output or error)
 *	is to be copied, if so set up the Copy Files request.
 */

static struct batch_request *cpy_stdfile(preq, pjob, ati)
	struct batch_request *preq;
	job		     *pjob;
	enum job_atr	      ati;	/* JOB_ATR_ output or error path */
{
	char *from;
	char  key;
	attribute *jkpattr;
	attribute *pathattr = &pjob->ji_wattr[(int)ati];
	char *suffix;
	char *to = (char *)0;

	/* if the job is interactive, don't bother to return output file */

	if (pjob->ji_wattr[(int)JOB_ATR_interactive].at_flags &&
	    pjob->ji_wattr[(int)JOB_ATR_interactive].at_val.at_long)
		return ((struct batch_request *)0);

	/* set up depending on which file */

	if (ati == JOB_ATR_errpath) {
		key    = 'e';
		suffix = JOB_STDERR_SUFFIX;
	} else {
		key    = 'o';
		suffix = JOB_STDOUT_SUFFIX;
	}

	if ((pathattr->at_flags & ATR_VFLAG_SET) == 0) { /* This shouldn't be */

		(void)sprintf(log_buffer, "%c file missing", key);
		log_event(PBSEVENT_ERROR|PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		return ((struct batch_request *)0);
	}

	/* Is the file joined to another, if so don't copy it */

	if (is_joined(pjob, ati))
		return (preq);

	/*
	 * If the job has a keep file attribute, and the specified file is in
	 * the keep list, MOM has already placed the file in the user's HOME
	 * directory.  It don't need to be copied.
	 */

	jkpattr = &pjob->ji_wattr[(int)JOB_ATR_keep];
	if ( (jkpattr->at_flags & ATR_VFLAG_SET) &&
	      (strchr(jkpattr->at_val.at_str, (int)key)) )

		return (preq);

	/* else go with the supplied name */

	to = malloc(  strlen(pathattr->at_val.at_str) + 1);
	if (to) {
	    (void)strcpy(to, pathattr->at_val.at_str);

	} else
		return (preq);	/* cannot continue with this one */

	/* build up the name used by MOM as the from name */

	from = setup_from(pjob, suffix);
	if (from == (char *)0) {
		(void)free(to);
		return (preq);
	}

	/* now set names into the batch request */

	return (setup_cpyfiles(preq, pjob, from, to,STAGE_DIR_OUT, STDJOBFILE));
}

/*
 * cpy_stage - set up a Copy Files request to include files specified by the
 *	user to be staged out (also used for stage-in).
 *	"stage_out" is a resource that may or may not *	exist on a host.
 *	If such exists, the files are listed one per string as
 *	"local_name@remote_host:remote_name".  
 */

struct batch_request *cpy_stage(preq, pjob, ati, direction)
	struct batch_request *preq;
	job		     *pjob;
	enum job_atr	      ati;	 /* JOB_ATR_stageout */
	int		      direction; /* 1 = , 2 = */
{
	int		      i;
	char		     *from;
	attribute 	     *pattr;
	struct array_strings *parst;
	char 		     *plocal;
	char		     *prmt;
	char		     *to;

	pattr = &pjob->ji_wattr[(int)ati];
	if (pattr->at_flags & ATR_VFLAG_SET) {

		/* at last, we know we have files to stage out/in */

		parst = pattr->at_val.at_arst;
		for (i = 0; i<parst->as_usedptr; ++i) {
			plocal = parst->as_string[i];
			prmt   = strchr(plocal, (int)'@');
			if (prmt) {
			    *prmt = '\0';
			    from = malloc(strlen(plocal)+1);
			    if (from) {
				(void)strcpy(from, plocal);
				*prmt = '@';	/* restore the @ */
			    } else
				return (preq);
			    to   = malloc(strlen(prmt+1) + 1);
			    if (to) {
			 	(void)strcpy(to, prmt+1);
			    } else {
				(void)free(from);
				return (preq);
			    }
			    preq = setup_cpyfiles(preq, pjob, from, to, direction, STAGEFILE);
			}
		}
	}

	return (preq);
}


/* 
 * mom_comm - if needed, open a connection with the MOM under which
 *	the job was running.  The connection is typically set up by
 *	req_jobobit() using the connection already established by MOM.
 *	However, on server recovery there will be no pre-established connection.
 *
 *	If a connection is needed and cannot be setup, set up a work-task
 *	entry and try again later.
 */

int mom_comm(pjob, func)
	job *pjob;
	void (*func) A_((struct work_task *));
{
	unsigned int dummy;

	if (pjob->ji_momhandle < 0) {

		/* need to make connection, called from pbsd_init() */

		if (pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) {
			pjob->ji_qs.ji_un.ji_exect.ji_momaddr = get_hostaddr(parse_servername(pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str, &dummy));
		}
		pjob->ji_momhandle = svr_connect( 
					pjob->ji_qs.ji_un.ji_exect.ji_momaddr,
					pbs_mom_port, process_Dreply, 
					ToServerDIS);
		if (pjob->ji_momhandle < 0) {
			set_task(WORK_Timed,
				 (long)(time_now+PBS_NET_RETRY_TIME), 
				 func, (void *)pjob);
			return(-1);
		}
	}
	return (pjob->ji_momhandle);
}


/*
 * on_job_exit - continue post-execution processing of a job that terminated.
 *
 *	This function is called by pbsd_init() on recovery, by req_jobobit()
 *	on job termination and by itself (via a work task).  The clue to where
 *	we are is the job substate and the type of the work task entry it is
 *	called with.  If the work task entry type is Work_Immed, then this is
 *	the first time in for the job substate.  Otherwise it is with the reply
 *	given by MOM.
 *
 *	NOTE: 
 *	On the initial work task (WORK_Immed), the wt_parm1 is a job pointer.
 *	On a call-back work task (WORK_Deferred_Reply) generated by
 *	send_request(), the wt_parm1 is pointing to the request; and the
 *	rq_extra field in the request points to the job.
 */

void on_job_exit(ptask)
	struct work_task *ptask;
{
	int    handle;
	job   *pjob;
	struct batch_request *preq;

	if (ptask->wt_type != WORK_Deferred_Reply) {
		preq = (struct batch_request *)0;
		pjob = (job *)ptask->wt_parm1;
	} else {
		preq = (struct batch_request *)ptask->wt_parm1;
		pjob = (job *)preq->rq_extra;
	}
	if ((handle = mom_comm(pjob, on_job_exit)) < 0)
		return;
		

	switch (pjob->ji_qs.ji_substate) {

	    case JOB_SUBSTATE_EXITING:
	    case JOB_SUBSTATE_ABORT:

		/* see if have any dependencys */

		if (pjob->ji_wattr[(int)JOB_ATR_depend].at_flags & ATR_VFLAG_SET) {
		    (void)depend_on_term(pjob);
		}
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
					    JOB_SUBSTATE_STAGEOUT);
		ptask->wt_type = WORK_Immed;

		/* NO BREAK, fall into stage out processing */

	    case JOB_SUBSTATE_STAGEOUT:

		if (ptask->wt_type != WORK_Deferred_Reply) {

		    /* this is the very first call, have mom copy files */
		    /* first check the standard files: output & error   */

		    preq = cpy_stdfile(preq, pjob, JOB_ATR_outpath);
		    preq = cpy_stdfile(preq, pjob, JOB_ATR_errpath);

		    /* are there any stage-out files ?		 	*/

		    preq = cpy_stage(preq,pjob,JOB_ATR_stageout,STAGE_DIR_OUT);

		    if (preq) {		/* have files to copy 		*/
			preq->rq_extra = (void *)pjob;
			if (issue_Drequest(handle, preq, on_job_exit, 0) == 0) {
				return;	/* come back when mom replies */
			} else 
				/* set up as if mom returned error */

				preq->rq_reply.brp_code = PBSE_MOMREJECT;
				preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL;
				preq->rq_reply.brp_un.brp_txt.brp_txtlen = 0;
				/* we will "fall" into the post reply side */

		    } else {		/* no files to copy, any to delete? */

			(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
						    JOB_SUBSTATE_STAGEDEL);
			ptask = set_task(WORK_Immed, 0, on_job_exit, pjob);
			return;
		    }
		}

		/* here we have a reply (maybe faked) from MOM about the copy */

		if (preq->rq_reply.brp_code != 0) {	/* error from MOM */

		    (void)sprintf(log_buffer, msg_obitnocpy, pjob->ji_qs.ji_jobid, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
		    log_event(PBSEVENT_ERROR|PBSEVENT_ADMIN|PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		    if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Text) {
		        (void)strncat(log_buffer,
				      preq->rq_reply.brp_un.brp_txt.brp_str,
				      LOG_BUF_SIZE - strlen(log_buffer) - 1);
		    }
		    svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buffer);
		}

		/*
		 * files (generally) copied ok, move on to the next phase by
		 * "faking" the immediate work task.
		 */

		free_br(preq);
		preq = 0;
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING, 
					    JOB_SUBSTATE_STAGEDEL);
		ptask->wt_type = WORK_Immed;

		/* NO BREAK - FALL INTO THE NEXT CASE */

	    case JOB_SUBSTATE_STAGEDEL:

		if (ptask->wt_type != WORK_Deferred_Reply) { /* first time in */

		    /* Build list of files which were staged-in so they can
		     * can be deleted.
		     */
		
		    preq = cpy_stage(preq, pjob, JOB_ATR_stagein, 0);

		    if (preq) {		/* have files to delete		*/

			/* change the request type from copy to delete  */

			preq->rq_type = PBS_BATCH_DelFiles;
			preq->rq_extra = (void *)pjob;
			if (issue_Drequest(handle, preq, on_job_exit, 0) == 0) {
				return;	/* come back when mom replies */
			} else 
				/* set up as if mom returned error */

				preq->rq_reply.brp_code = PBSE_MOMREJECT;
				preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL;

				/* we will "fall" into the post reply side */

		    } else {		/* preq == 0, no files to delete   */

			(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
						    JOB_SUBSTATE_EXITED);
			ptask = set_task(WORK_Immed, 0, on_job_exit, pjob);
			return;
		    }
		}

		/* After MOM replied (maybe faked) to Delete Files request */

		if (preq->rq_reply.brp_code != 0) {	/* an error occurred */
		    
		    (void)sprintf(log_buffer, msg_obitnodel, pjob->ji_qs.ji_jobid, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
		    log_event(PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		    if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Text) {
		        (void)strncat(log_buffer,
				      preq->rq_reply.brp_un.brp_txt.brp_str,
				      LOG_BUF_SIZE - strlen(log_buffer) - 1);
		    }
		    svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buffer);
		}
		free_br(preq);
		preq = 0;
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING, 
				      JOB_SUBSTATE_EXITED);

		ptask->wt_type = WORK_Immed;

		/* NO BREAK, FALL INTO NEXT CASE */


	    case JOB_SUBSTATE_EXITED:

	 	/* tell mom to delete the job, send final track and purge it */

		preq = alloc_br(PBS_BATCH_DeleteJob);
		if (preq) {
		    (void)strcpy(preq->rq_ind.rq_delete.rq_objname,
				 pjob->ji_qs.ji_jobid);
		    (void)issue_Drequest(handle, preq, release_req, 0);
		    /* release_req will free preq and close connection */
		}

		if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
			issue_track(pjob);
		job_purge(pjob);
		break;

	}
}

/*
 * on_job_rerun - Handle the clean up of jobs being rerun.  This gets
 *	messy if the job is being executed on another host.  Then the
 *	"standard" files must be copied to the server for safe keeping.
 *
 *	The basic flow is very much like that of on_job_exit().
 *	The substate will already set to JOB_SUBSTATE_RERUN and the
 *	JOB_SVFLG_HASRUN bit set in ji_svrflags.
 */
void on_job_rerun(ptask)
	struct work_task *ptask;
{
	int		      handle;
	int		      newstate;
	int		      newsubst;
	job		     *pjob;
	struct batch_request *preq;

	if (ptask->wt_type != WORK_Deferred_Reply) {
		preq = (struct batch_request *)0;
		pjob = (job *)ptask->wt_parm1;
	} else {
		preq = (struct batch_request *)ptask->wt_parm1;
		pjob = (job *)preq->rq_extra;
	}

	if ((handle = mom_comm(pjob, on_job_rerun)) < 0)
		return;

	switch (pjob->ji_qs.ji_substate) {


	    case JOB_SUBSTATE_RERUN:

		if (ptask->wt_type != WORK_Deferred_Reply) {
		    if (pjob->ji_qs.ji_un.ji_exect.ji_momaddr==pbs_server_addr){

			/* files don`t need to be moved, go to next step */

			(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
					      JOB_SUBSTATE_RERUN1);
			ptask = set_task(WORK_Immed, 0, on_job_rerun, pjob);
			return;
		    }
			
		    /* here is where we have to save the files	*/
		    /* ask mom to send them back to the server	*/
		    /* mom deletes her copy if returned ok	*/

		    preq = alloc_br(PBS_BATCH_Rerun);
		    if (preq == (struct batch_request *)0) {
			return;
		    }
		    (void)strcpy(preq->rq_ind.rq_rerun, pjob->ji_qs.ji_jobid);
		    preq->rq_extra = (void *)pjob;

		    if (issue_Drequest(handle, preq, on_job_rerun, 0) == 0) {
				/* request ok, will come back when its done */
				return;
		    } else {
				/* set up as if mom returned error */

				preq->rq_reply.brp_code = 1;
				/* we will "fall" into the post reply side */
		    }


		}

		/* We get here if MOM replied (may be faked above)  */
		/* to the rerun (return files) request issued above */

		if (preq->rq_reply.brp_code != 0) {	/* error */
		    /* for now, just log it */
		    (void)sprintf(log_buffer, msg_obitnocpy, pjob->ji_qs.ji_jobid, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
		    
		    log_event(PBSEVENT_ERROR|PBSEVENT_ADMIN|PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		}
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
				      JOB_SUBSTATE_RERUN1);
		ptask->wt_type = WORK_Immed;
		free_br(preq);
		preq = 0;

		/* NO BREAK, FALL THROUGH TO NEXT CASE, including the request */


	    case JOB_SUBSTATE_RERUN1:

		if (ptask->wt_type != WORK_Deferred_Reply) {

		    /* this is the very first call, have mom copy files */
		    /* are there any stage-out files to process? 	*/

		    preq = cpy_stage(preq,pjob,JOB_ATR_stageout,STAGE_DIR_OUT);

		    if (preq) {		/* have files to copy 		*/
			preq->rq_extra = (void *)pjob;
			if (issue_Drequest(handle, preq, on_job_rerun,0) == 0) {
				return;	/* come back when mom replies */
			} else 
				/* set up as if mom returned error */

				preq->rq_reply.brp_code = PBSE_MOMREJECT;
				preq->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL;
				preq->rq_reply.brp_un.brp_txt.brp_txtlen = 0;
				/* we will "fall" into the post reply side */

		    } else {		/* no files to copy, any to delete? */

			(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
						    JOB_SUBSTATE_RERUN2);
			ptask = set_task(WORK_Immed, 0, on_job_rerun, pjob);
			return;
		    }
		}

		/* here we have a reply (maybe faked) from MOM about the copy */

		if (preq->rq_reply.brp_code != 0) {	/* error from MOM */

		    (void)sprintf(log_buffer, msg_obitnocpy, pjob->ji_qs.ji_jobid, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
		    log_event(PBSEVENT_ERROR|PBSEVENT_ADMIN|PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		    if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Text) {
		        (void)strncat(log_buffer,
				      preq->rq_reply.brp_un.brp_txt.brp_str,
				      LOG_BUF_SIZE - strlen(log_buffer) - 1);
		    }
		    svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buffer);
		}

		/*
		 * files (generally) copied ok, move on to the next phase by
		 * "faking" the immediate work task.
		 */

		free_br(preq);
		preq = 0;
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING, 
					    JOB_SUBSTATE_RERUN2);
		ptask->wt_type = WORK_Immed;

		/* NO BREAK - FALL INTO THE NEXT CASE */

	    case JOB_SUBSTATE_RERUN2:

		if (ptask->wt_type != WORK_Deferred_Reply) {

		    /* here is where we delete  any stage-in files	   */
 
		    preq = cpy_stage(preq, pjob, JOB_ATR_stagein,  0);
		    if (preq) {
			preq->rq_type = PBS_BATCH_DelFiles;
			preq->rq_extra = (void *)pjob;
			if (issue_Drequest(handle, preq, on_job_rerun,0) == 0) {
				return;
			} else {	/* error on sending request */
				preq->rq_reply.brp_code = 1;
				/* we will "fall" into the post reply side */
			}
		    } else {
			(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
						    JOB_SUBSTATE_RERUN3);
			ptask = set_task(WORK_Immed, 0, on_job_rerun, pjob);
			return;
		    }
		}

		/* post reply side for delete file request to MOM */
		if (preq->rq_reply.brp_code != 0) {	/* error */
		    /* for now, just log it */
		    (void)sprintf(log_buffer, msg_obitnocpy, pjob->ji_qs.ji_jobid, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str);
		    
		    log_event(PBSEVENT_ERROR|PBSEVENT_ADMIN|PBSEVENT_JOB,
			      PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
			      log_buffer);
		}
		free_br(preq);
		preq = 0;
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
					    JOB_SUBSTATE_RERUN3);
		ptask->wt_type = WORK_Immed;

		/* NO BREAK, FALL THROUGH TO NEXT CASE */


	    case JOB_SUBSTATE_RERUN3:

		/* need to have MOM delete her copy of the job */
		preq = alloc_br(PBS_BATCH_DeleteJob);
		if (preq) {
		    (void)strcpy(preq->rq_ind.rq_delete.rq_objname,
				 pjob->ji_qs.ji_jobid);
		    preq->rq_extra = (void *)pjob;
		    (void)issue_Drequest(handle, preq, release_req, 0);
		    /* release_req will free preq and close connection */
		}
		/* Now re-queue the job */
		if ((pjob->ji_qs.ji_svrflags | JOB_SVFLG_HOTSTART) == 0) {
			/* in case of server shutdown, don't clear exec_host */
			/* will use it on hotstart when next comes up	     */
			job_attr_def[(int)JOB_ATR_exec_host].at_free(
				      &pjob->ji_wattr[(int)JOB_ATR_exec_host]);
		}
		pjob->ji_modified = 1;	/* force full job save */
		pjob->ji_momhandle = -1;
		pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_StagedIn;
		svr_evaljobstate(pjob, &newstate, &newsubst, 0);
		(void)svr_setjobstate(pjob, newstate, newsubst);
	}
}

/*
 * wait_for_send - recall req_jobobit after delay when race (condition)
 *	goes to an Obit from MOM rather than to the SIGCHLD of the send_job()
 *	child that sent the job to MOM.
 */

static void wait_for_send(ptask)
	struct work_task *ptask;
{
	req_jobobit((struct batch_request *)ptask->wt_parm1);
}

static int
setrerun(pjob)
	job	*pjob;
{
	if (pjob->ji_wattr[(int)JOB_ATR_rerunable].at_val.at_long) {
			/* job is rerunnable */
		pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;
		return 0;
	}
	else {
		svr_mailowner(pjob, MAIL_ABORT, MAIL_FORCE, msg_init_abt);
		return 1;
	}
}

/*
 * req_jobobit - process the Job Obituary Notice (request) from MOM.
 *	This notice is sent from MOM when a job terminates.
 */

void req_jobobit(preq)
	struct batch_request *preq;
{
	int		  alreadymailed = 0;
	int		  amt;
	int		  bad;
	char 		  acctbuf[RESC_USED_BUF];
	int		  accttail;
	int		  exitstatus;
	char		  mailbuf[RESC_USED_BUF];
	int		  need;
	int		  newstate;
	int		  newsubst;
	char		 *pc;
	job		 *pjob;
	struct work_task *ptask;
	svrattrl	 *patlist;

	pjob = find_job(preq->rq_ind.rq_jobobit.rq_jid);
	if (pjob == (job *)0) {		/* not found */
		if ((server_init_type == RECOV_COLD) ||
		    (server_init_type == RECOV_CREATE)) {
			/* tell MOM the job was blown away */
			(void)sprintf(log_buffer, msg_obitnojob, preq->rq_host,
				      PBSE_CLEANEDOUT);
			req_reject(PBSE_CLEANEDOUT, 0, preq);
		} else {
			(void)sprintf(log_buffer, msg_obitnojob, preq->rq_host,
				      PBSE_UNKJOBID);
			req_reject(PBSE_UNKJOBID, 0, preq);
		}
		log_event(PBSEVENT_ERROR|PBSEVENT_JOB,
			  PBS_EVENTCLASS_JOB,
			  preq->rq_ind.rq_jobobit.rq_jid,
			  log_buffer);
		return;
	}
	if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING)  {
		if (pjob->ji_qs.ji_state == JOB_STATE_EXITING) {

			/* already in exit processing, ignore this request */
			bad = PBSE_ALRDYEXIT;

		} else {

			/* not running and not exiting - bad news */

			(void)sprintf(log_buffer, msg_obitnojob, preq->rq_host, 
				      PBSE_BADSTATE);
			log_event(PBSEVENT_ERROR|PBSEVENT_JOB,
				  PBS_EVENTCLASS_JOB,
				  preq->rq_ind.rq_jobobit.rq_jid,
				  log_buffer);
			bad = PBSE_BADSTATE;
		}
		req_reject(bad, 0, preq);
		return;
	}
	if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) {

		/* have hit a race condition, the send_job child's SIGCHLD */
		/* has not yet been reaped.  Must wait for it.		   */

		ptask = set_task(WORK_Timed, time_now+1, wait_for_send,
				 (void *)preq);
		if (ptask == 0) {
			req_reject(PBSE_SYSTEM, 0, preq);
		}
		return;
	}
		

	/*
	 * save exit state, update the resources used, and reply to MOM.
	 *
	 * Note, must make the log/mail message about resources used BEFORE
	 * replying to MOM.  The reply will free the attribute list so it
	 * cannot be used after the call to reply_ack();
	 */

	exitstatus = preq->rq_ind.rq_jobobit.rq_status;
	pjob->ji_qs.ji_un.ji_exect.ji_exitstat = exitstatus;
	patlist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_jobobit.rq_attr);

	(void)sprintf(acctbuf, msg_job_end_stat, 
		      pjob->ji_qs.ji_un.ji_exect.ji_exitstat);
	if (exitstatus < 10000) {
		(void)strcpy(mailbuf, acctbuf);
	} else {
		(void)sprintf(mailbuf, msg_job_end_sig, exitstatus - 10000);
	}
	accttail = strlen(acctbuf);
	amt = RESC_USED_BUF - accttail;
	while(patlist) {
	    need = strlen(patlist->al_resc) + strlen(patlist->al_value) + 3;
	    if (need < amt) {
		(void)strcat(acctbuf, "\n");
		(void)strcat(acctbuf, patlist->al_name);
		if (patlist->al_resc) {
			(void)strcat(acctbuf, ".");
			(void)strcat(acctbuf, patlist->al_resc);
		}
		(void)strcat(acctbuf,"=");
		(void)strcat(acctbuf, patlist->al_value);
		amt -= need;
	    }
	    patlist = (svrattrl *)GET_NEXT(patlist->al_link);
	}
	(void)strncat(mailbuf, (acctbuf + accttail),
		      RESC_USED_BUF-strlen(mailbuf)-1);
	mailbuf[RESC_USED_BUF-1] = '\0';

	/* free any nodes assigned to the job */

	free_nodes(pjob);

	/* removed the resources used by the job from the used svr/que attr */

	set_resc_assigned(pjob, DECR);

	/* Mark that scheduler should be called */

	svr_do_schedule = SCH_SCHEDULE_TERM;

	/* make sure ji_momhandle is -1 to force new connection to mom */

	pjob->ji_momhandle = -1;

	reply_ack(preq);

	/* clear suspended flag if it was set */

	pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;

	/* Was there a special exit status from MOM ? */

	if (exitstatus < 0) {		/* negative exit status is special */

	    switch (exitstatus) {
	        case JOB_EXEC_FAIL1:
		default:

		    /* MOM rejected job with fatal error, abort job */

		    svr_mailowner(pjob, MAIL_ABORT, MAIL_FORCE, msg_momnoexec1);
		    alreadymailed = 1;
		    break;

		case JOB_EXEC_FAIL2:

		    /* MOM reject job after files setup, abort job */

		    svr_mailowner(pjob, MAIL_ABORT, MAIL_FORCE, msg_momnoexec2);
		    alreadymailed = 1;
		    break;

	        case JOB_EXEC_INITABT:

		    /* MOM aborted job on her initialization */

		    alreadymailed = setrerun(pjob);
		    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN;
		    break;

	        case JOB_EXEC_RETRY:

		    /* MOM rejected job, but said retry it */

		    if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) {
			/* has run before, treat this as another rerun */
			alreadymailed = setrerun(pjob);
		    } else {
			/* have mom remove job files, not saving them,	*/
		        /* and requeue job				*/
		        pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN1;
		    }
		    break;

	        case JOB_EXEC_BADRESRT:

		    /* MOM could not restart job, setup for rerun */

		    alreadymailed = setrerun(pjob);
		    pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHKPT;
		    break;

	        case JOB_EXEC_INITRST:

		    /* MOM abort job on init, job has checkpoint file	*/
		    /* Requeue it, and thats all folks.			*/

		    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN|JOB_SVFLG_CHKPT;
		    svr_evaljobstate(pjob, &newstate, &newsubst, 1);
		    (void)svr_setjobstate(pjob, newstate, newsubst);
		    svr_disconnect(pjob->ji_momhandle);
		    return;

	        case JOB_EXEC_INITRMG:

		    /* MOM abort job on init, job has migratable checkpoint */
		    /* Must recover output and checkpoint file, do eoj	    */

		    alreadymailed = setrerun(pjob);
		    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN |
					       JOB_SVFLG_ChkptMig;
	    }
	}

	/* What do we now do with the job... */

	if ( (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RERUN) &&
	     (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RERUN1) ) {

		/* If job is terminating (not rerun), */
		/*  update state and send mail        */

		(void)svr_setjobstate(pjob, JOB_STATE_EXITING, 
					    JOB_SUBSTATE_EXITING);
		if (alreadymailed == 0)
			svr_mailowner(pjob, MAIL_END, MAIL_NORMAL, mailbuf);

		/* replace new-lines with blanks for log message */

		for (pc = acctbuf; *pc; ++pc)
			if (*pc == '\n')
				*pc = ' ';

		/* record accounting and maybe in log */

		account_jobend(pjob, acctbuf);

		if (server.sv_attr[(int)SRV_ATR_log_events].at_val.at_long &
		    PBSEVENT_JOB_USAGE) {
			/* log events set to record usage */
			log_event(PBSEVENT_JOB_USAGE | PBSEVENT_JOB_USAGE,
				  PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
				  acctbuf);
		} else {
			/* no usage in log, truncate messge */
			*(acctbuf + accttail) = '\0';
			log_event(PBSEVENT_JOB,
				  PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
				  acctbuf);
		}
			

		ptask = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

		/* "on_job_exit()" will be dispatched out of the main loop */

	} else {

		/*
		 * Rerunning job ...
		 * If not checkpointed, clear "resources_used"
		 * Requeue job
		 */

		if ((pjob->ji_qs.ji_svrflags & 
				(JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig)) == 0) {
			job_attr_def[(int)JOB_ATR_resc_used].at_free(
				      &pjob->ji_wattr[(int)JOB_ATR_resc_used]);

		} else if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHKPT) {

		    /* non-migratable checkpoint (cray), leave there */
		    /* and just requeue the job			     */

		    pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN;
		    svr_evaljobstate(pjob, &newstate, &newsubst, 1);
		    (void)svr_setjobstate(pjob, newstate, newsubst);
		    svr_disconnect(pjob->ji_momhandle);
		    return;
		}
		(void)svr_setjobstate(pjob, JOB_STATE_EXITING, 
				      pjob->ji_qs.ji_substate);

		ptask = set_task(WORK_Immed, 0, on_job_rerun, (void *)pjob);

		/* "on_job_rerun()" will be dispatched out of the main loop */
	}
	return;
}
