/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * req_rerun.c - functions dealing with a Rerun Job Request
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include "libpbs.h"
#include <signal.h>
#include "server_limits.h"
#include "list_link.h"
#include "work_task.h"
#include "attribute.h"
#include "server.h"
#include "credential.h"
#include "batch_request.h"
#include "job.h"
#include "pbs_error.h"
#include "log.h"
#include "acct.h"
#include "svrfunc.h"

static char ident[] = "@(#) $RCSfile: req_rerun.c,v $ $Revision: 2.1 $";

/* Private Function local to this file */

/* Global Data Items: */

extern char *msg_manager;
extern char *msg_jobrerun;


/*
 * post_rerun - handler for reply from mom on signal_job sent in req_rerunjob
 *	If mom acknowledged the signal, then all is ok.
 *	If mom rejected the signal for unknown jobid, then force local requeue.
 */

static void post_rerun(pwt)
	struct work_task *pwt;
{
	int	 newstate;
	int	 newsub;
	job	*pjob;
	struct batch_request *preq;

	preq = (struct batch_request *)pwt->wt_parm1;
	
	if (preq->rq_reply.brp_code != 0) {
		(void)sprintf(log_buffer, "rerun signal reject by mom: %d",
			      preq->rq_reply.brp_code);
		log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, 
			  preq->rq_ind.rq_signal.rq_jid, log_buffer);
			
		if (pjob = find_job(preq->rq_ind.rq_signal.rq_jid)) {
			svr_evaljobstate(pjob, &newstate, &newsub, 1);
			(void)svr_setjobstate(pjob, newstate, newsub);
		}
	}
	release_req(pwt);
	return;
}

/*
 * req_rerunjob - service the Rerun Job Request
 *
 *	This request Reruns a job by:
 *		sending to MOM a signal job request with SIGKILL
 *		marking the job as being rerun by setting the substate.
 */

void req_rerunjob(preq)
	struct batch_request *preq;
{
	job		 *pjob;

	if ((pjob = chk_job_request(preq->rq_ind.rq_rerun, preq)) == 0)
		return;

	if ((preq->rq_perm & ( ATR_DFLAG_MGWR | ATR_DFLAG_OPWR )) == 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	/* the job must be running */

	if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	/* the job must be rerunnable */

	if (pjob->ji_wattr[(int)JOB_ATR_rerunable].at_val.at_long == 0) {
		req_reject(PBSE_NORERUN, 0, preq);
		return;
	}

	/* ask MOM to kill off the job */

	if (issue_signal(pjob, "SIGKILL", post_rerun, 0) != 0) {
		req_reject(PBSE_MOMREJECT, 0, preq);
		return;
	}
	
	/* So job has run and is to be rerun (not restarted) */

	pjob->ji_qs.ji_substate  = JOB_SUBSTATE_RERUN;
	pjob->ji_qs.ji_svrflags = (pjob->ji_qs.ji_svrflags & 
				  ~(JOB_SVFLG_CHKPT | JOB_SVFLG_ChkptMig)) |
				  JOB_SVFLG_HASRUN;
		
	(void)sprintf(log_buffer, msg_manager, msg_jobrerun, 
		      preq->rq_user, preq->rq_host);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid,
		  log_buffer);
	reply_ack(preq);

	/* note in accounting file */

	account_record(PBS_ACCT_RERUN, pjob, (char *)0);
}
