/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * req_delete.c 
 *
 * Functions relating to the Delete Job Batch Requests.
 *
 * Included funtions are:
 *	
 *
 */
#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include <signal.h>
#include "libpbs.h"
#include "server_limits.h"
#include "list_link.h"
#include "work_task.h"
#include "attribute.h"
#include "server.h"
#include "credential.h"
#include "batch_request.h"
#include "job.h"
#include "queue.h"
#include "pbs_error.h"
#include "acct.h"
#include "log.h"
#include "svrfunc.h"

static char ident[] = "@(#) $RCSfile: req_delete.c,v $ $Revision: 2.1 $";

/* Global Data Items: */

extern char *msg_deletejob;
extern char *msg_delrunjobsig;
extern char *msg_manager;
extern struct server server;
extern time_t time_now;

/* Private Functions in this file */

static void post_delete_route A_((struct work_task *));
static void post_delete_mom1 A_((struct work_task *));
static void post_delete_mom2 A_((struct work_task *));

/* Private Data Items */

static char *deldelaystr = DELDELAY;

/* 
 * remove_stagein() - request that mom delete staged-in files for a job
 *	used when the job is to be purged after files have been staged in
 */

void remove_stagein(pjob)
	job *pjob;
{
	struct batch_request *preq = 0;

	preq = cpy_stage(preq, pjob, JOB_ATR_stagein, 0);

	if (preq) {		/* have files to delete		*/

		/* change the request type from copy to delete  */

		preq->rq_type = PBS_BATCH_DelFiles;
		preq->rq_extra = (void *)0;
		if (relay_to_mom(pjob->ji_qs.ji_un.ji_exect.ji_momaddr,
						preq, release_req) == 0) {
			pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_StagedIn;
		} else {
			/* log that we were unable to remove the files */
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_FILE, 
				  pjob->ji_qs.ji_jobid,
				  "unable to remove staged in files for job");
			free_br(preq);
		}
	}
}

/*
 * req_deletejob - service the Delete Job Request
 *
 *	This request deletes a job.
 */

void req_deletejob(preq)
	struct batch_request *preq;
{
	job		 *pjob;
	struct work_task *pwtold;
	struct work_task *pwtnew;
	int		  rc;
	char		 *sigt = "SIGTERM";

	pjob = chk_job_request(preq->rq_ind.rq_delete.rq_objname, preq);
	if (pjob == (job *)0)
		return;

	if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) {

		/*
		 * Find pid of router from existing work task entry,
 		 * then establish another work task on same child.
		 * Next, signal the router and wait for its completion;
		 */

		pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask);
		while (pwtold) {
			if ((pwtold->wt_type == WORK_Deferred_Child) ||
			    (pwtold->wt_type == WORK_Deferred_Cmp)) {
				pwtnew = set_task(pwtold->wt_type,
					 pwtold->wt_event, post_delete_route,
					 preq);
				if (pwtnew) {

					/*
					 * reset type in case the SIGCHLD came
					 * in during the set_task;  it makes
					 * sure that next_task() will find the
					 * new entry.
					 */
					pwtnew->wt_type = pwtold->wt_type;
					pwtnew->wt_aux = pwtold->wt_aux;

					kill((pid_t)pwtold->wt_event, SIGTERM);
					pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT;
					return;	/* all done for now */

				} else {

					req_reject(PBSE_SYSTEM, 0, preq);
					return;
				}
			}
			pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj);
		}
		/* should never get here ...  */
		log_err(-1, "req_delete", "Did not find work task for router");
		req_reject(PBSE_INTERNAL, 0, preq);
		return;

	} else if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) {

		/* being sent to MOM, wait till she gets it going */
		/* retry in one second				  */

		pwtnew = set_task(WORK_Timed, time_now+1, post_delete_route,
				  preq);
		if (pwtnew == 0) 
			req_reject(PBSE_SYSTEM, 0, preq);

		return;
	}


	/*
	 * Log delete and if if requesting client is not job owner, send mail.
	 */

	(void)sprintf(log_buffer,"requestor=%s@%s",preq->rq_user,preq->rq_host);
	account_record(PBS_ACCT_DEL, pjob, log_buffer);
	(void)sprintf(log_buffer, msg_manager, msg_deletejob,
		      preq->rq_user, preq->rq_host);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,
		  log_buffer);
	
	if (preq->rq_extend != (char *)0) {
		if (strncmp(preq->rq_extend,deldelaystr,strlen(deldelaystr))) {
			/* have text message in request extention, add it */
			(void)strcat(log_buffer, "\n");
			(void)strcat(log_buffer, preq->rq_extend);
		}
	}
	if (svr_chk_owner(preq, pjob) != 0) {
		svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buffer);
	}
	

	if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) {
		/*
		 * Send signal request to MOM.  The server will automagically
		 * pick up and "finish" off the client request when MOM replies.
		 */
		if (rc = issue_signal(pjob, sigt, post_delete_mom1, preq))
			req_reject(rc, 0, preq);   /* cant send to MOM */

		/* normally will ack reply when mom responds */

		(void)sprintf(log_buffer, msg_delrunjobsig, sigt);
		LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
			  pjob->ji_qs.ji_jobid, log_buffer);
		return; 
	} else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHKPT) != 0) {

		/* job has restart file at mom, do end job processing */

		(void)svr_setjobstate(pjob, JOB_STATE_EXITING,
					    JOB_SUBSTATE_EXITING);
		pjob->ji_momhandle = -1;	/* force new connection */
		(void)set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

	} else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) {

		/* job has staged-in file, should remove them */
		remove_stagein(pjob);
		(void)job_abt(pjob, (char *)0);

	} else {

		/*
		 * the job is not transitting (though it may have been) and
		 * is not running, so abort it.
 		 */

		(void)job_abt(pjob, (char *)0);
	}
	reply_ack(preq);
}


/*
 * post_delete_route - complete the task of deleting a job which was
 *	being routed at the time the delete request was received.
 *
 *	Just recycle the delete request, the job will either be here or not.
 */

static void post_delete_route(pwt)
	struct work_task *pwt;
{
	req_deletejob((struct batch_request *)pwt->wt_parm1);
	return;
}

/*
 * post_delete_mom1 - first of 2 work task trigger functions to finish the
 *	deleting of a running job.  This first part is invoked when MOM
 *	responds to the SIGTERM signal request.  
 */

static void post_delete_mom1(pwt)
	struct work_task *pwt;
{
	int 		      delay = 0;
	int		      dellen = strlen(deldelaystr);
	job		     *pjob;
	struct work_task     *pwtnew;
	pbs_queue	     *pque;
	struct batch_request *preq_sig;		/* signal request to MOM */
	struct batch_request *preq_clt;		/* original client request */
	int		      rc;

	preq_sig = pwt->wt_parm1;
	rc       = preq_sig->rq_reply.brp_code;
	preq_clt = preq_sig->rq_extra;
	release_req(pwt);

	pjob     = find_job(preq_clt->rq_ind.rq_delete.rq_objname);
	if (pjob == (job *)0) {
		/* job has gone away */
		req_reject(PBSE_UNKJOBID, 0, preq_clt);
		return;
	}

	if (rc) {
		/* mom rejected request */
		if (rc == PBSE_UNKJOBID) {
			/* MOM claims no knowledge, so just purge it */

			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
				  pjob->ji_qs.ji_jobid,
				  "MOM rejected signal during delete");
			/* removed the resources assigned to job */
			free_nodes(pjob);
			set_resc_assigned(pjob, DECR);
			job_purge(pjob);
			reply_ack(preq_clt);
		} else {
			req_reject(rc, 0, preq_clt);
		}
		return;
	}

	if (preq_clt->rq_extend) {
		if (strncmp(preq_clt->rq_extend, deldelaystr, dellen) == 0) {
			delay = atoi(preq_clt->rq_extend + dellen);
		}
	}
	reply_ack(preq_clt);		/* dont need it, reply now */

	/*
	 * if no delay specified in original request, see if kill_delay
	 * queue attribute is set.
	 */
	
	if (delay == 0) {
		pque = pjob->ji_qhdr;
		if (pque->qu_attr[(int)QE_ATR_KillDelay].at_flags &
							ATR_VFLAG_SET)
			delay = pque->qu_attr[(int)QE_ATR_KillDelay].at_val.at_long;
		else
			delay = 2;
	}
	pwtnew = set_task(WORK_Timed, delay+time_now, post_delete_mom2, pjob);
	if (pwtnew) 
		/* insure that work task will be removed if job goes away */
		append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
}

static void post_delete_mom2(pwt)
	struct work_task *pwt;
{
	job  *pjob;
	char *sigk = "SIGKILL";

	pjob = (job *)pwt->wt_parm1;
	if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) {
		(void)issue_signal(pjob, sigk, release_req, 0);
		(void)sprintf(log_buffer, msg_delrunjobsig, sigk);
		LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
			  pjob->ji_qs.ji_jobid, log_buffer);
	}
}
