/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * job_recov.c - This file contains the functions to record a job
 *	data struture to disk and to recover it from disk.
 *
 *	The data is recorded in a file whose name is the job_id.
 *
 *	The following public functions are provided:
 *		job_save()   - save the disk image 
 *		job_recov()  - recover (read) job from disk
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include <sys/param.h>
#include "pbs_ifl.h"
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "server_limits.h"
#include "list_link.h"
#include "attribute.h"
#include "job.h"
#include "log.h"
#include "svrfunc.h"
#if __STDC__ != 1
#include <memory.h>
#endif

static char ident[] = "@(#) $RCSfile: job_recov.c,v $ $Revision: 2.1.8.1 $";

#define JOBBUFSIZE 2048
#define MAX_SAVE_TRIES 3

/*
 * the following funny business is due to the fact that O_SYNC 
 * is not currently POSIX
 */
#ifdef O_SYNC
#define O_Sync O_SYNC
#elif _FSYNC
#define O_Sync _FSYNC
#else
#define O_Sync 0
#endif

/* global data items */

extern char  *path_jobs;
extern time_t time_now;

/* data global only to this file */

const static unsigned int quicksize = sizeof (struct jobfix);

/*
 * job_save() - Saves (or updates) a job structure image on disk
 *
 *	Save does either - a quick update for state changes only,
 *			 - a full update for an existing file, or
 *			 - a full write for a new job
 *
 *	For a quick update, the data written is less than a disk block
 *	size and no size change occurs; so it is rewritten in place
 *	with O_SYNC.
 *
 *	For a full update (usually following modify job request), to
 *	insure no data is ever lost due to system crash:
 *	1. write (with O_SYNC) new image to a new file using a temp name
 *	2. unlink the old (image) file
 *	3. link the correct name to the new file
 *	4. unlink the temp name
 *
 *	For a new file write, first time, the data is written directly to
 *	the file.
 */

int job_save (pjob, updatetype)
	job *pjob;		/* pointer to job structure */
	int  updatetype;	/* 0=quick, 1=full	    */
{
	int	fds;
	int	i;
	char	namebuf1[MAXPATHLEN];
	char	namebuf2[MAXPATHLEN];
	int	openflags;
	int	redo;


	(void)strcpy(namebuf1, path_jobs);	/* job directory path */
	(void)strcat(namebuf1, pjob->ji_qs.ji_fileprefix);
	(void)strcpy(namebuf2, namebuf1);	/* setup for later */
	(void)strcat(namebuf1, JOB_FILE_SUFFIX);

	/* if ji_modified is set, ie an attribute changed, then update mtime */

	if (pjob->ji_modified) {
		pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long = time_now;
	}

	if (updatetype == SAVEJOB_QUICK) {

		openflags =  O_WRONLY | O_Sync;
		fds = open(namebuf1, openflags, 0600);
		if (fds < 0) {
			log_err(errno, "job_save", "error on open");
			return (-1);
		}

		/* just write the "critical" base structure to the file */

		while ((i = write(fds, (char *)&pjob->ji_qs, quicksize)) != quicksize) {
			if ((i < 0) && (errno == EINTR)) {
				/* retry the write */
				if (lseek(fds, (off_t)0, SEEK_SET) < 0) {
					log_err(errno, "job_save", "lseek");
					(void)close(fds);
					return (-1);
				}
				continue;
			} else {
				log_err(errno, "job_save", "quickwrite");
				(void)close(fds);
				return (-1);
			}
		}
		(void)close(fds);

	} else {

		/*
		 * write the whole structure to the file.
		 * For a update, this is done to a new file to protect the
		 * old against crashs.
		 * The file is written in four parts:
		 * (1) the job structure, 
		 * (2) the attribtes in "encoded" form,
		 * (3) the attributes in the "external" form, and last
		 * (4) the dependency list.
		 */
		
		(void)strcat(namebuf2, JOB_FILE_COPY);
		openflags =  O_CREAT | O_WRONLY | O_Sync;

		if (updatetype == SAVEJOB_NEW)
			fds = open(namebuf1, openflags, 0600);
		else
			fds = open(namebuf2, openflags, 0600);

		if (fds < 0) {
			log_err(errno, "job_save", "open for full save");
			return (-1);
		}

		for (i=0; i<MAX_SAVE_TRIES; ++i) {
			redo = 0;	/* try to save twice */
			save_setup(fds);
			if (save_struct((char *)&pjob->ji_qs,(size_t)quicksize)
			    != 0) {
				redo++;
			} else if ( save_attr(job_attr_def, pjob->ji_wattr,
					(int)JOB_ATR_LAST) != 0) {
				redo++;
			} else if ( save_flush() != 0) {
				redo++;
			}
			if (redo != 0) {
				if (lseek(fds, (off_t)0, SEEK_SET) < 0) {
					log_err(errno,"job_save","full lseek");	
				}
			} else
				break;
		} 
		(void)close (fds);
		if (i >= MAX_SAVE_TRIES) {
			if (updatetype == SAVEJOB_FULL)
				(void)unlink(namebuf2);
			return (-1);
		}

		if (updatetype == SAVEJOB_FULL) {
			(void)unlink(namebuf1);
			if (link(namebuf2, namebuf1) == -1) {
				LOG_EVENT(PBSEVENT_ERROR|PBSEVENT_SECURITY,
					  PBS_EVENTCLASS_JOB,
					  pjob->ji_qs.ji_jobid,
					  "Link in job_save failed");
			} else {
				(void)unlink(namebuf2);
			}
		}
		pjob->ji_modified = 0;
	}
	return (0);
}

/*
 * job_recov() - recover (read in) a job from its save file
 *
 *	This function is only needed upon server start up.
 *
 *	The job structure, its attributes strings, and its dependencies
 *	are recovered from the disk.  Space to hold the above is
 *	malloc-ed as needed.  
 *
 *	Returns: job pointer to new job structure or a
 *		 null pointer on an error.
*/

job *job_recov(filename)
	char *filename;		/* pathname to job save file */
{
	int		 fds;
	job		*pj;
	char		*pn;
	char		 namebuf[MAXPATHLEN];


	pj = job_alloc();	/* allocate & initialize job structure space */
	if (pj == (job *)0) {
		return ((job *)0);
	}

	(void)strcpy(namebuf, path_jobs);	/* job directory path */
	(void)strcat(namebuf, filename);
	fds = open(namebuf, O_RDONLY, 0);
	if (fds < 0) {
		log_err(errno, "job_recov", "open of job file");
		free((char *)pj);
		return ((job *)0);
	}

	/* read in job quick save sub-structure */

	if (read(fds, (char *)&pj->ji_qs, quicksize) != quicksize) {
		log_err(errno, "job_recov", "read");
		free((char *)pj);
		(void)close(fds);
		return ((job *)0);
	}
	/* Does file name match the internal name? */
	/* This detects ghost files */

	pn = strrchr(namebuf, (int)'/') + 1;
	if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) {
		/* mismatch, discard job */

		(void)sprintf(log_buffer,
			      "Job Id %s does not match file name for %s",
			      pj->ji_qs.ji_jobid,
			      namebuf);
		log_err(-1, "job_recov", log_buffer);
		free((char *)pj);
		(void)close(fds);
		return ((job *)0);
	}

	/* read in working attributes */

	if (recov_attr(fds, pj, job_attr_def, pj->ji_wattr, (int)JOB_ATR_LAST,
	    (int)JOB_ATR_UNKN) != 0) {
		log_err(errno, "job_recov", "err from recov_attr");
		job_free(pj);
		(void)close(fds);
		return ((job *)0);
	}
	(void)close(fds);

	/* all done recovering the job */

	return (pj);
}
