/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * The entry point function for MOM.
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include	<assert.h>
#include	<stdio.h>
#include	<stdlib.h>
#include	<unistd.h>
#ifdef _CRAY
#include	<termios.h>
#endif
#include	<pwd.h>
#include	<signal.h>
#include	<string.h>
#include	<ctype.h>
#include	<errno.h>
#include	<fcntl.h>
#include	<time.h>
#include	<limits.h>
#include	<netdb.h>
#include	<sys/types.h>
#include	<sys/param.h>
#include	<sys/times.h>
#include	<sys/stat.h>
#if (PLOCK_DAEMONS & 4)
#include	<sys/lock.h>
#endif	/* PLOCK_DAEMONS */
#include	<netinet/in.h>
#include	<sys/socket.h>
#ifdef _CRAY
#include	<sys/category.h>
#include	<sys/usrv.h>
#include	<sys/sysv.h>
#endif	/* _CRAY */
#include	<sys/time.h>
#include	<sys/resource.h>
#include	<sys/utsname.h>

#include 	"libpbs.h"
#include 	"pbs_ifl.h"
#include	"server_limits.h"
#include	"list_link.h"
#include	"attribute.h"
#include	"resource.h"
#include	"job.h"
#include	"mom_mach.h"
#include	"mom_func.h"
#include	"svrfunc.h"
#include	"pbs_error.h"
#include	"log.h"
#include	"net_connect.h"
#include	"rpp.h"
#include	"dis.h"
#include	"dis_init.h"
#include	"resmon.h"

static char ident[] = "@(#) $RCSfile: mom_main.c,v $ $Revision: 2.16 $";

#define CHECK_POLL_TIME 120

/* Global Data Items */

double		cputfactor = 1.00;
unsigned int	default_server_port;
int		exiting_tasks = 0;
float		ideal_load_val = -1.0;
int		internal_state = 0;
int		lockfds;
time_t		loopcnt;		/* used for MD5 calc */
float		max_load_val   = -1.0;
char		mom_host[PBS_MAXHOSTNAME+1];
char		mom_short_name[PBS_MAXHOSTNAME+1];
int		num_var_env;
char	       *path_epilog;
char	       *path_jobs;
char	       *path_prolog;
char	       *path_spool;
char	       *path_undeliv;
char           *path_home = PBS_SERVER_HOME;
char		pbs_current_user[PBS_MAXUSER] = "pbs_mom";  /* for libpbs.a */
char	       *msg_daemonname = pbs_current_user;	/* for logs     */
int		pbs_errno;
gid_t		pbsgroup;
unsigned int	pbs_mom_port;
unsigned int	pbs_rm_port;
list_head	mom_polljobs;	/* jobs that must have resource limits polled */
list_head	svr_newjobs;	/* jobs being sent to MOM */
list_head	svr_alljobs;	/* all jobs under MOM's control */
int		termin_child = 0;
time_t		time_now = 0;
time_t		last_scan = 0;
extern list_head svr_requests;
extern struct var_table vtable;	/* see start_exec.c */
#if MOM_CHECKPOINT == 1
char	       *path_checkpoint = (char *)0;
static resource_def *rdcput;
#endif	/* MOM_CHECKPOINT */
double		wallfactor = 1.00;


/* Local Data Items */

static char	*log_file = (char *)0;
static int	mom_run_state;
static int	call_hup = 0;
static char	*path_log;

struct	config_list {
	struct	config		c;
	struct	config_list	*c_link;
};

char			*ret_string;
int			ret_size;
struct	config		*config_array = NULL;
struct	config_list	*config_list = NULL;
sigset_t		allsigs;
int			rm_errno;
unsigned	int	reqnum = 0;		/* the packet number */
int			port_care = 1;		/* secure connecting ports */
uid_t			uid = 0;		/* uid we are running with */
int			alarm_time = 10;	/* time before alarm */

extern	void		*okclients;		/* accept connections from */
char			**maskclient = NULL;	/* wildcard connections */
int			mask_num = 0;
int			mask_max = 0;
u_long			localaddr = 0;

char			extra_parm[] = "extra parameter(s)";
char			no_parm[] = "required parameter not found";

int			cphosts_num = 0;
struct cphosts *pcphosts = 0;

static	int		config_file_specified = 0;
static	char		config_file[_POSIX_PATH_MAX] = "config";

/*
**	These routines are in the "dependent" code.
*/
extern	void	dep_initialize A_((void));
extern	void	dep_cleanup A_((void));

/* External Functions */

extern void  catch_child A_((int));
extern void  init_abort_jobs A_((int));
extern void  scan_for_exiting();
extern void  scan_for_terminated();

/* Local public functions */

void stop_me A_((int));

/* Local private functions */

static char *mk_dirs A_((char *));

char	*
nullproc(attrib)
    struct	rm_attribute	*attrib;
{
	char	*id = "nullproc";

	log_err(-1, id, "should not be called");
	return NULL;
}

static char	*
arch(attrib)
    struct	rm_attribute	*attrib;
{
	char	*id = "arch";

	if (attrib) {
		log_err(-1, id, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	return PBS_MACH;
}

char	*
getuname()
{
	struct	utsname	n;
	static	char	*name = NULL;

	if (name == NULL) {
		if (uname(&n) == -1)
			return NULL;
		sprintf(ret_string, "%s %s %s %s %s", n.sysname,
			n.nodename, n.release, n.version, n.machine);
		name = strdup(ret_string);
	}
	return name;
}

static char	*
requname(attrib)
    struct	rm_attribute	*attrib;
{
	char	*id = "uname";
	char	*cp;

	if (attrib) {
		log_err(-1, id, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}
	cp = getuname();
	return cp;
}

static char *
validuser(attrib)
struct	rm_attribute	*attrib;
{
  char	*id = "valid_user";
  struct passwd *p;

  if (attrib == NULL || attrib -> a_value == NULL ) {
    log_err(-1, id, no_parm);
    rm_errno = RM_ERR_NOPARAM;
    return NULL;
  }

  p = getpwnam(attrib -> a_value);
  if ( p ) {
    return "yes";
  } else {
    return "no";
  }
}


char *loadave(attrib)
	struct rm_attribute *attrib;
{
	char	    *id = "loadave";
	static char  ret_string[20];
	double	     la;

	if (attrib) {
		log_err(-1, id, extra_parm);
		rm_errno = RM_ERR_BADPARAM;
		return NULL;
	}

	if (get_la(&la) != 0) {
		rm_errno = RM_ERR_SYSTEM;
		return NULL;
	}

	sprintf(ret_string, "%.2f", la);
	return ret_string;
}

struct	config	common_config[] = {
	{ "arch", arch },
	{ "uname", requname },
	{ "validuser", validuser },
	{ NULL, nullproc }
};

/*
**	Search the array of resources read from the config files.
*/
struct	config	*
rm_search(where, what)
    struct	config	*where;
    char	*what;
{
	struct	config	*cp;

	if (where == NULL || what == NULL)
		return NULL;

	for (cp=where; cp->c_name; cp++) {
		if (strcmp(cp->c_name, what) == 0)
			break;
	}
	return ( cp->c_name ? cp : NULL );
}


/*
**	Search the various resource lists.
*/
char	*
dependent(res, attr)
    char	*res;
    struct	rm_attribute	*attr;
{
	struct	config		*ap;
	extern	struct	config	standard_config[];
	extern	struct	config	dependent_config[];

	ap = rm_search(common_config, res);
	if (ap)
		return (ap->c_u.c_func(attr));

	ap = rm_search(standard_config, res);
	if (ap)
		return (ap->c_u.c_func(attr));

	ap = rm_search(dependent_config, res);
	if (ap)
		return (ap->c_u.c_func(attr));

	rm_errno = RM_ERR_UNKNOWN;
	return NULL;
}

void
DIS_rpp_reset()
{
	if (dis_getc != rpp_getc) {
		dis_getc = rpp_getc;
		dis_puts = (int (*) A_((int, const char *, size_t)) )rpp_write;
		dis_gets = (int (*) A_((int, char *, size_t)) )rpp_read;
		disr_skip   = (int (*) A_((int, size_t)) )rpp_skip;
		disr_commit = rpp_rcommit;
		disw_commit = rpp_wcommit;
	}
}

/*
**	Initialize standard resource array
*/
void
initialize()
{
	char	*id = "initialize";

	log_record(PBSEVENT_SYSTEM, 0, id, "independent");

	dep_initialize();
	return;
}

void
cleanup()
{
	dep_cleanup();
	return;
}

/*
**	Clean up after a signal.
*/
void
die(sig)
    int		sig;
{
	char	*id = "die";

	if (sig > 0) {
		sprintf(log_buffer, "caught signal %d", sig);
		log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);
	}
	else
		log_record(PBSEVENT_SYSTEM, 0, id, "abnormal termination");
		
	cleanup();
	log_close(1);
	exit(1);
}

/*
**	Check for fatal memory allocation error.
*/
void
memcheck(buf)
    char	*buf;
{
	if (buf)
		return;
	log_err(-1, "memcheck", "memory allocation failed");
	die(0);
}

/*
**	Check the ret_string buffer to make sure that there is
**	enought room starting at *spot to hold len characters more.
**	If not, realloc the buffer and make *spot point to
**	the corresponding place that it used to point to in
**	the old buffer.
*/
void
checkret(spot, len)
    char	**spot;
    int		len;
{
	char	*id = "checkret";
	char	*hold;

	if ((*spot - ret_string) < (ret_size - len))
		return;

	ret_size += len*2;				/* new buf size */
	sprintf(log_buffer, "size increased to %d", ret_size);
	log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);
	hold = realloc(ret_string, ret_size);		/* new buf */
	memcheck(hold);
	*spot = *spot - ret_string + hold;		/* new spot in buf */
	ret_string = hold;
}

char	*
skipwhite(str)
    char	*str;
{
	for (; *str; str++) {
		if (!isspace(*str))
			break;
	}
	return str;
}

char	*
tokcpy(str, tok)
    char	*str, *tok;
{
	for (; *str; str++, tok++) {
		if (!isalnum(*str) && *str != ':' && *str != '_')
			break;
		*tok = *str;
	}
	*tok = '\0';
	return str;
}

void
rmnl(str)
    char	*str;
{
	int i;

	i = strlen(str);
	while (--i) {
		if ( (*(str+i) != '\n') && !isspace((int)*(str+i)) ) 
			break;
		*(str+i) = '\0';
	}
}

static	u_long
addclient(name)
    char	*name;
{
	static	char	id[] = "addclient";
	struct	hostent		*host, *gethostbyname();
	struct	in_addr		saddr;
	u_long			ipaddr;

	if ((host = gethostbyname(name)) == NULL) {
		sprintf(log_buffer, "host %s not found", name);
		log_err(-1, id, log_buffer);
		return 0;
	}
	memcpy((char *)&saddr, host->h_addr, host->h_length);
	ipaddr = ntohl(saddr.s_addr);
	tinsert(ipaddr, &okclients);
	return ipaddr;
}

static u_long
setlogevent(value)
	char *value;
{
	char *bad;

	* log_event_mask = strtol(value, &bad, 0);
	if ((*bad == '\0') || isspace((int)*bad))
		return 1;
	else
		return 0;
}

static	u_long
restricted(name)
	char	*name;
{
	static	char	id[] = "restricted";

	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, name);
	if (mask_max == 0) {
		maskclient = (char **)calloc(4, sizeof(char *));
		mask_max = 4;
	}
	maskclient[mask_num++] = strdup(name);
	if (mask_num == mask_max) {
		mask_max *= 2;
		maskclient = (char **)realloc(maskclient,
				mask_max*sizeof(char *));
	}
	return 1;
}

static u_long
cputmult(value)
	char 	*value;
{
	static char	id[] = "cputmult";

	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, value);
	if ((cputfactor = atof(value)) == 0.0)
		return 0;	/* error */
	return 1;
}

static u_long
wallmult(value)
	char 	*value;
{
	static char	id[] = "wallmult";

	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, value);
	if ((wallfactor = atof(value)) == 0.0)
		return 0;	/* error */
	return 1;
}

static u_long
usecp(value)
	char 	*value;
{
	char *pnxt;
	static int   cphosts_max = 0;
	static char *id = "usecp";

	if (cphosts_max == 0) {
		pcphosts = malloc(2 * sizeof(struct cphosts));
		cphosts_max = 2;
	} else if (cphosts_max == cphosts_num) {
		pcphosts = realloc(pcphosts,
				(cphosts_max + 2) * sizeof(struct cphosts));
		cphosts_max += 2;
	}
	pnxt = strchr(value, (int)':');
	if (pnxt == NULL) {
		sprintf(log_buffer, "invalid host specification: %s", value);
		log_err(-1, id, log_buffer);
		return 0;
	}
	*pnxt++ = '\0';
	(pcphosts+cphosts_num)->cph_hosts = strdup(value);
	value = pnxt;	/* now ptr to path */
	while (!isspace(*pnxt))
		pnxt++;
	*pnxt++ = '\0';
	(pcphosts+cphosts_num)->cph_from = strdup(value);

	(pcphosts+cphosts_num)->cph_to   = strdup(skipwhite(pnxt));
	
	cphosts_num++;
	
	return 1;
}

static unsigned long prologalarm(value)
	char    *value;
{
	int i;
	extern unsigned int pe_alarm_time;

	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,"prolog alarm",value);
	i = (unsigned int)atoi(value);
	if (i <= 0) 
		return 0;	/* error */
	pe_alarm_time = (unsigned int)i;
	return 1;
}

/*
**	Add static resource or shell escape line from config file.
**	This is a support routine for read_config().
*/

static void add_static(str, file, linenum)
	char	*str;
	char	*file;
	int	 linenum;
{
	int	 i;
	char	 name[50];
	struct	config_list	*cp;


	str = tokcpy(str, name);/* resource name */
	str = skipwhite(str);	/* resource value */
	if (*str == '!')	/* shell escape command */
		rmnl(str);
	else {			/* get the value */
		i = strlen(str);
		while (--i) {	/* strip trailing blanks */
			if (!isspace((int)*(str+i)))
				break;
			*(str+i) = '\0';
		}
	}

	cp = (struct config_list *)malloc(sizeof(struct config_list));
	memcheck((char *)cp);

	cp->c_link = config_list;
	cp->c.c_name = strdup(name);
	memcheck(cp->c.c_name);
	cp->c.c_u.c_value = strdup(str);
	memcheck(cp->c.c_u.c_value);

	sprintf(log_buffer, "%s[%d] add name %s value %s",
		file, linenum, name, str);
	log_record(PBSEVENT_DEBUG, 0, "add_static", log_buffer);

	config_list = cp;
}

static unsigned long setidealload(value)
	char	*value;
{
	char	 newstr[50] = "ideal_load ";
	float	 val;

	val = atof(value);
	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,"ideal_load",value);
	if (val < 0.0)
		return 0;	/* error */
	ideal_load_val = val;
	if (max_load_val < 0.0)
		max_load_val = val;	/* set a default */
	(void)strcat(newstr, value);
	add_static(newstr, "config", 0);
	return 1;
}

static unsigned long setmaxload(value)
	char	*value;
{
	char	 newstr[50] = "max_load ";
	float	 val;

	val = atof(value);
	log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,"max_load",value);
	if (val < 0.0)
		return 0;	/* error */
	max_load_val = val;
	if (ideal_load_val < 0.0)
		ideal_load_val = val;
	(void)strcat(newstr, value);
	add_static(newstr, "config", 0);
	return 1;
}

/*
**	Open and read the config file.  Save information in a linked
**	list.  After reading the file, create an array, copy the list
**	elements to the array and free the list.
*/
int
read_config(file)
    char	*file;
{
	static	char		id[] = "read_config";
	static	struct		specials {
		char		*name;
		u_long		(*handler)();
	}			special[] = {
		{ "clienthost", addclient },
		{ "cputmult",	cputmult },
		{ "ideal_load",	setidealload },
		{ "logevent",   setlogevent },
		{ "max_load",	setmaxload },
		{ "prologalarm",prologalarm },
		{ "restricted",	restricted },
		{ "usecp",	usecp },
		{ "wallmult",	wallmult },
		{ NULL, NULL }
	};

	FILE			*conf;
	struct	stat		sb;
	struct	config_list	*cp;
	struct	config		*ap;
	char			line[120];
	char			name[50], *str;
	int			linenum, nconfig, i;

	for (i=0; i<mask_num; i++)
		free(maskclient[i]);
	mask_num = 0;

	if (file == NULL)
		file = config_file;
	if (file[0] == '\0')
		return 0;		/* no config file */

	if (stat(file, &sb) == -1) {
		sprintf(log_buffer, "fstat: %s", file);
		log_err(errno, id, log_buffer);
		if (config_file_specified)
			return 1;	/* file given and not there = error */
		else
			return 0;	/* ok for "config" not to be there  */
	}
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
	if (chk_file_sec(file, 0, 0, S_IWGRP|S_IWOTH, 1))
		return 1;
#endif	/* NO_SECURITY_CHECK */
	if ((conf = fopen(file, "r")) == NULL) {
		sprintf(log_buffer, "fopen: %s", file);
		log_err(errno, id, log_buffer);
		return 1;
	}

	nconfig = 0;
	linenum = 0;
	while (fgets(line, sizeof(line), conf)) {
		linenum++;
		if (line[0] == '#')	/* comment */
			continue;
		str = skipwhite(line);	/* pass over initial whitespace */
		if (*str == '\0')
			continue;

		if (*str == '$') {	/* special command */
			str = tokcpy(++str, name);/* resource name */
			for (i=0; special[i].name; i++) {
				if (strcmp(name, special[i].name) == 0)
					break;
			}
			if (special[i].name == NULL) {	/* didn't find it */
				sprintf(log_buffer,
					"special command name %s not found",
					name);
				log_err(-1, id, log_buffer);
				continue;
			}
			str = skipwhite(str);		/* command param */
			rmnl(str);
			if (special[i].handler(str) == 0) {
				sprintf(log_buffer,
				"%s[%d] special command %s failed with %s",
					file, linenum, name, str);
				log_err(-1, id, log_buffer);
			}
			continue;
		}

		add_static(str, file, linenum);
		nconfig++;

	}		
/*
**	Create a new array.
*/
	if (config_array) {
		for (ap=config_array; ap->c_name; ap++) {
			free(ap->c_name);
			free(ap->c_u.c_value);
		}
		free(config_array);
	}
	config_array = (struct config *)calloc(nconfig+1,
					       sizeof(struct config));
	memcheck((char *)config_array);
/*
**	Copy in the new information saved from the file.
*/
	for (i=0, ap=config_array; i<nconfig; i++, ap++) {
		*ap = config_list->c;
		cp = config_list->c_link;
		free(config_list);	/* don't free name and value strings */
		config_list = cp;	/* they carry over from the list */
	}
	ap->c_name = NULL;		/* one extra */
	fclose(conf);

	return 0;
}

/*
**	Get an rm_attribute structure from a string.  If a NULL is passed
**	for the string, use the previously remembered string.
*/
struct	rm_attribute	*
momgetattr(str)
    char	*str;
{
	static	char	cookie[] = "tag:";	/* rm_attribute to ignore */
	static	char	*hold = NULL;
	static	char	qual[80] = "";
	static	char	valu[4096] = "";
	static	struct	rm_attribute	attr = {qual, valu};
	int		level, i;

	if (str == NULL)	/* if NULL is passed, used prev value */
		str = hold;
	
	do {
		str = skipwhite(str);
		if (*str++ != '[')
			return NULL;
		
		str = skipwhite(str);		/* copy qualifier */
		str = tokcpy(str, qual);
		str = skipwhite(str);
		
		if (*str++ != '=')
			return NULL;
		
		level = 0;
		for (i=0; *str; str++, i++) {
			if (*str == '[')
				level++;
			else if (*str == ']') {
				if (level == 0)
					break;
				level--;
			}
			valu[i] = *str;
		}
		if (*str++ != ']')
			return NULL;
		
		valu[i] = '\0';
		DBPRT(("momgetattr: found %s = %s\n", qual, valu))
	} while (strncmp(qual, cookie, sizeof(cookie)-1) == 0);
	hold = str;
	DBPRT(("momgetattr: passing back %s = %s\n", qual, valu))
	return &attr;
}

/*
**	Check the request against the format of the line read from
**	the config file.  If it is a static value, there should be
**	no params.  If it is a shell escape, the parameters (if any)
**	should match the command line for the system call.
*/
char	*
conf_res(s, attr)
    char			*s;
    struct	rm_attribute	*attr;
{
	char	*id = "conf_res";
	char	*name[RM_NPARM];
	char	*value[RM_NPARM];
	int	used[RM_NPARM];
	char	param[80], *d;
	int	i, fd, len;
	FILE	*child;
	char	*child_spot;
	int	child_len;

	if (*s != '!') {	/* static value */
		if (attr) {
			sprintf(ret_string, "? %d", RM_ERR_BADPARAM);
			return ret_string;
		}
		else
			return s;
	}
/*
**	From here on we are going to put together a shell command
**	to do the requestor's bidding.  Parameter substitution
**	is the first step.
*/
	for (i=0; i<RM_NPARM; i++) {	/* remember params */
		if (attr == NULL)
			break;
		name[i] = strdup(attr->a_qualifier);
		memcheck(name[i]);
		value[i] = strdup(attr->a_value);
		memcheck(value[i]);
		used[i] = 0;
		attr = momgetattr(NULL);
	}
	if (attr) {			/* too many params */
		log_err(-1, id, "too many parms");
		sprintf(ret_string, "? %d", RM_ERR_BADPARAM);
		goto done;
	}
	name[i] = NULL;

	for (d=ret_string, s++; *s;) {	/* scan command */
		if (*s == '%') {	/* possible token */
			char	*hold;

			hold = tokcpy(s+1, param);
			for (i=0; name[i]; i++) {
				if (strcmp(param, name[i]) == 0)
					break;
			}
			if (name[i]) {	/* found a match */
				char	*x = value[i];
				while (*x)
					*d++ = *x++;
				s = hold;
				used[i] = 1;
			}
			else
				*d++ = *s++;
		}
		else
			*d++ = *s++;
	}
	for (i=0; name[i]; i++) {
		if (!used[i]) {		/* parameter sent but not used */
			log_err(-1, id, "unused parameters");
			sprintf(ret_string, "? %d", RM_ERR_BADPARAM);
			goto done;
		}
	}
			
	*d = '\0';
	DBPRT(("command: %s\n", ret_string))

	if ((child=popen(ret_string, "r")) == NULL) {
		log_err(errno, id, "popen");
		sprintf(ret_string, "? %d", RM_ERR_SYSTEM);
		goto done;
	}

	fd = fileno(child);
	child_spot = ret_string;
	child_len = 0;
	child_spot[0] = '\0';
	while ( (len = read(fd, child_spot, ret_size-child_len)) > 0 ) {
		for (i=0; i<len; i++) {
			if (child_spot[i] == '\n')
				break;
		}
		if (i < len) {		/* found newline */
			child_len += i+1;
			break;
		}
		child_len += len;
		child_spot += len;
		checkret(&child_spot, len);
	}
	if (len == -1) {
		log_err(errno, id, "pipe read");
		sprintf(ret_string, "? %d", RM_ERR_SYSTEM);
		fclose(child);
		goto done;
	}

	pclose(child);
	if (child_len)
		ret_string[child_len-1] = '\0';	/* hack off newline */
 done:
	for (i=0; name[i]; i++) {		/* free up params */
		free(name[i]);
		free(value[i]);
	}
	return ret_string;
}

static void
catch_hup(sig)
int	sig;
{
	sprintf(log_buffer, "caught signal %d", sig);
	log_record(PBSEVENT_SYSTEM, 0, "catch_hup", "reset");
	call_hup = 1;
	rpp_dbprt = 1 - rpp_dbprt;	/* toggle debug prints for RPP */
}

/*
**	Do a restart of resmom.
**	Read the last seen config file and 
**	Clean up and reinit the dependent code.
*/
static void
process_hup()
{
	char	*id = "process_hup";

	call_hup = 0;
	log_record(PBSEVENT_SYSTEM, 0, id, "reset");
	log_close(1);
	log_open(log_file, path_log);

	read_config(NULL);
	cleanup();
	initialize();
}

/*
**	Got an alarm call.
**	Close all general network connections, clean up and reinit the
**	dependent code.
*/
void
toolong(sig)
int	sig;
{
	char	*id = "toolong";
	int	i;

	log_record(PBSEVENT_SYSTEM, 0, id, "alarm call");
	DBPRT(("alarm call\n"))
}

#ifdef	DEBUG
void
log_verbose(id, buf, len)
char	*id;
char	*buf;
int	len;
{
	int	i;
	char	*cp;

	len = MIN(len, 50);
	cp = log_buffer;
	for (i=0; i<len; i++) {
		int	c = buf[i];

		if (isprint(c))
			*cp++ = c;
		else {
			sprintf(cp, "(%d)", c);
			cp += strlen(cp);
		}
	}
	*cp = '\0';
	log_record(PBSEVENT_DEBUG, 0, id, log_buffer);
}
#else
#define	log_verbose(a, b, c)
#endif

/*
**	See if an IP address matches any names stored as "restricted"
**	access hosts.  Return 0 if a name matches, 1 if not.
*/
int
bad_restrict(ipadd)
    u_long	ipadd;
{
	struct	hostent		*host, *gethostbyaddr();
	struct	in_addr	in;
	int	i, len1, len2;
	char	*cp1, *cp2;

	in.s_addr = htonl(ipadd);
	if ((host = gethostbyaddr((void *)&in,
			sizeof(struct in_addr), AF_INET)) == NULL)
		return 1;

	len1 = strlen(host->h_name) - 1;
	for (i=0; i<mask_num; i++) {
		len2 = strlen(maskclient[i]) - 1;
		if (len1 < len2)
			continue;

		cp1 = &host->h_name[len1];
		cp2 = &maskclient[i][len2];
		while (len2 >= 0 && *cp1 == *cp2) {
			cp1--;
			cp2--;
			len2--;
		}
		if ((len2 == 0 && *cp2 == '*') || len2 == -1)
			return 0;
	}
	return 1;
}

/*
**	Process a request for the resource monitor.  The i/o
**	will take place using DIS over a tcp fd or an rpp stream.
*/
int
rm_request(iochan, version, tcp)
     int	iochan;
     int	version;
     int	tcp;
{
	static	char		id[] = "rm_request";
	char			name[100];
	char			output[BUFSIZ];
	int			len, i;
	int			command, ret;
	int			restrictrm = 0;
	char			*curr, *value, *cp, *body;
	struct	config		*ap;
	struct	rm_attribute	*attr;
	struct	sockaddr_in	*addr;
	unsigned long		ipadd;
	u_short			port;
	void			(*close_io)	A_((int));
	int			(*flush_io)	A_((int));
	extern	struct	connection	svr_conn[];

	errno = 0;
	if (tcp) {
		ipadd = svr_conn[iochan].cn_addr;
		port = svr_conn[iochan].cn_port;
		close_io = close_conn;
		flush_io = DIS_tcp_wflush;
	}
	else {
		addr = rpp_getaddr(iochan);
		ipadd = ntohl(addr->sin_addr.s_addr);
		port = ntohs((unsigned short)addr->sin_port);
		close_io = (void(*) A_((int)) )rpp_close;
		flush_io = rpp_flush;
	}
	if (version != RM_PROTOCOL_VER) {
		sprintf(log_buffer, "protocol version %d unknown", version);
		goto bad;
	}
	if ((port_care && (port >= IPPORT_RESERVED)) ||
			!tfind(ipadd, &okclients) ) {
		if (bad_restrict(ipadd)) {
			sprintf(log_buffer, "bad attempt to connect");
			goto bad;
		}
		restrictrm = 1;
	}

	/* looks okay, find out what command it is */
	command = disrsi(iochan, &ret);
	if (ret != DIS_SUCCESS) {
		sprintf(log_buffer, "no command %s", dis_emsg[ret]);
		goto bad;
	}

	switch (command) {

	case RM_CMD_CLOSE:		/* no responce to this */
		close_io(iochan);
		return 1;

	case RM_CMD_REQUEST:
		reqnum++;
		ret = diswsi(iochan, RM_RSP_OK);
		if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"write request responce failed: %s",
				dis_emsg[ret]);
			goto bad;
		}

		for (;;) {
			cp = disrst(iochan, &ret);
			if (ret == DIS_EOD)
				break;
			else if (ret != DIS_SUCCESS) {
				sprintf(log_buffer,
					"problem with request line: %s",
					dis_emsg[ret]);
				goto bad;
			}
			curr = skipwhite(cp);
			curr = tokcpy(curr, name);
			if (strlen(name) == 0) {		/* no name */
				sprintf(output, "%s=? %d",
					cp, RM_ERR_UNKNOWN);
			}
			else {
				ap = rm_search(config_array, name);
				attr = momgetattr(curr);

				alarm(alarm_time);
				if (ap && !restrictrm) {	/* static */
					sprintf(output, "%s=%s",
						cp,
						conf_res(ap->c_u.c_value,
						attr));
				}
				else {		/* check dependent code */
					log_buffer[0] = '\0';
					value = dependent(name, attr);
					if (value) {
						sprintf(output, "%s=%s",
							cp, value);
					}
					else {	/* not found anywhere */
						sprintf(output, "%s=? %d",
							cp, rm_errno);
					}
				}
				alarm(0);
			}
			free(cp);
			ret = diswst(iochan, output);
			if (ret != DIS_SUCCESS) {
				sprintf(log_buffer,
					"write string failed %s",
					dis_emsg[ret]);
				goto bad;
			}
		}
		break;

	case RM_CMD_CONFIG:
		if (restrictrm) {
			log_err(-1, id, "restricted configure attempt");
			goto bad;
		}

		log_record(PBSEVENT_SYSTEM, 0, id, "configure");
		body = disrst(iochan, &ret);
		if (ret == DIS_EOD)
			body = NULL;
		else if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"problem with config body %s",
				dis_emsg[ret]);
			goto bad;
		}
		len = read_config(body);

		ret = diswsi(iochan, len ? RM_RSP_ERROR : RM_RSP_OK);
		if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"write config responce failed %s",
				dis_emsg[ret]);
			goto bad;
		}
		break;

	case RM_CMD_SHUTDOWN:
		if (restrictrm) {
			log_err(-1, id, "restricted shutdown attempt");
			goto bad;
		}

		log_record(PBSEVENT_SYSTEM, 0, id, "shutdown");
		ret = diswsi(iochan, RM_RSP_OK);
		if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"write shutdown responce failed %s",
				dis_emsg[ret]);
			log_err(-1, id, log_buffer);
		}
		flush_io(iochan);
		close_io(iochan);
		cleanup();
		log_close(1);
		rpp_shutdown();
		exit(0);

	default:
		sprintf(log_buffer, "unknown command %d", command);
		log_err(-1, id, log_buffer);
		ret = diswsi(iochan, RM_RSP_ERROR);
		if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"write default responce failed %s",
				dis_emsg[ret]);
			goto bad;
		}
		ret = diswst(iochan, log_buffer);
		if (ret != DIS_SUCCESS) {
			sprintf(log_buffer,
				"write string failed %s",
				dis_emsg[ret]);
			goto bad;
		}
		break;
	}
	if (flush_io(iochan) == -1) {
		log_err(errno, id, "flush");
		goto bad;
	}
	return 0;

bad:
	sprintf(output,
		"\n\tmessage refused from port %d addr %ld.%ld.%ld.%ld", port,
		(ipadd & 0xff000000) >> 24,
		(ipadd & 0x00ff0000) >> 16,
		(ipadd & 0x0000ff00) >> 8,
		(ipadd & 0x000000ff));
        strcat(log_buffer, output);
	log_err(errno, id, log_buffer);

	close_io(iochan);
	return -1;
}

/*
** Read a RPP message from a stream, figure out if it is a
** Resource Monitor request or an InterMom message.
*/
void
do_rpp(stream)
    int		stream;
{
	static	char		id[] = "do_rpp";
	int			ret, proto, version;
	void	im_request	A_((int stream, int version));
	void	is_request	A_((int stream, int version));
	void	im_eof		A_((int stream, int ret));

	DIS_rpp_reset();
	proto = disrsi(stream, &ret);
	if (ret != DIS_SUCCESS) {
		im_eof(stream, ret);
		return;
	}
	version = disrsi(stream, &ret);
	if (ret != DIS_SUCCESS) {
		DBPRT(("%s: no protocol version number %s\n",
				id, dis_emsg[ret]))
		im_eof(stream, ret);
		return;
	}

	switch (proto) {
	case	RM_PROTOCOL:
		DBPRT(("%s: got a resource monitor request\n", id))
		if (rm_request(stream, version, 0) == 0)
			rpp_eom(stream);
		break;

	case	IM_PROTOCOL:
		DBPRT(("%s: got an internal task manager request\n", id))
		im_request(stream, version);
		break;

	case	IS_PROTOCOL:
		DBPRT(("%s: got an inter-server request\n", id))
		is_request(stream, version);
		break;

	default:
		DBPRT(("%s: unknown request %d\n", id, proto))
		rpp_close(stream);
		break;
	}
	return;
}

void
rpp_request(fd)
    int		fd;	/* not used */
{
	static	char	id[] = "rpp_request";
	int	stream;

	for (;;) {
		if ((stream = rpp_poll()) == -1) {
			log_err(errno, id, "rpp_poll");
			break;
		}
		if (stream == -2)
			break;
		do_rpp(stream);
	}
	return;
}

int
do_tcp(fd)
    int		fd;
{
	static	char	id[] = "do_tcp";
	int	ret, proto, version;
	int	tm_request A_((int stream, int version));
	extern	int		pbs_tcp_timeout;

	pbs_tcp_timeout = 0;
	proto = disrsi(fd, &ret);
	pbs_tcp_timeout = 30;

	switch (ret) {
	case DIS_SUCCESS:		/* worked */
		break;
	case DIS_EOF:			/* closed */
		close_conn(fd);
	case DIS_EOD:			/* still open */
		return 1;
	default:
		sprintf(log_buffer, "no protocol number: %s",
				dis_emsg[ret]);
		goto bad;
	}

	version = disrsi(fd, &ret);
	if (ret != DIS_SUCCESS) {
		DBPRT(("%s: no protocol version number %s\n",
				id, dis_emsg[ret]))
		goto bad;
	}

	switch (proto) {
	case	RM_PROTOCOL:
		DBPRT(("%s: got a resource monitor request\n", id))
		pbs_tcp_timeout = 0;
		ret = rm_request(fd, version, 1);
		pbs_tcp_timeout = 30;
		break;

	case	TM_PROTOCOL:
		DBPRT(("%s: got an internal task manager request\n", id))
		ret = tm_request(fd, version);
		break;

	default:
		DBPRT(("%s: unknown request %d\n", id, proto))
		goto bad;
	}
	return ret;

  bad:
	close_conn(fd);
	return -1;
}

void
tcp_request(fd)
     int	fd;
{
	static	char	id[] = "tcp_request";
	int			i, c;
	int			ct;
	long			ipadd;
	char			address[80];
	extern	struct	connection	svr_conn[];
	extern	int		port_care;

	ipadd = svr_conn[fd].cn_addr;
	sprintf(address, "%ld.%ld.%ld.%ld:%d",
			(ipadd & 0xff000000) >> 24,
			(ipadd & 0x00ff0000) >> 16,
			(ipadd & 0x0000ff00) >> 8,
			(ipadd & 0x000000ff),
			ntohs(svr_conn[fd].cn_port));
	DBPRT(("%s: fd %d addr %s\n", id, fd, address))
	DIS_tcp_setup(fd);

	if (!tfind(ipadd, &okclients)) {
		sprintf(log_buffer, "bad connect from %s", address);
		log_err(errno, id, log_buffer);
		close_conn(fd);
		return;
	}

	log_buffer[0] = '\0';
	for (c=0;; c++) {
		DIS_tcp_funcs();

		if (do_tcp(fd))
			break;
	}
	DBPRT(("%s: processed %d\n", id, c))
	return;
}

/*
 *	Kill a job.
 *	Call with the job pointer and a signal number.
 */
int kill_job(pjob, sig)
    job		*pjob;
    int		sig;
{
	task	*ptask;
	int	ct = 0;

	log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
		pjob->ji_qs.ji_jobid, "kill_job");
 
	ptask = (task *)GET_NEXT(pjob->ji_tasks);
	while (ptask) {
		if (ptask->ti_qs.ti_status == TI_STATE_RUNNING) {
			ct += kill_task(ptask, sig);
		}
		ptask = (task *)GET_NEXT(ptask->ti_jobtask);
	}
	return ct;
}

/*
 * finish_loop - the finish of MOM's main loop
 *	Actually the heart of the loop
 */

static void finish_loop(waittime)
    time_t	 waittime;
{
	static	char	id[] = "finish_loop";

	/* check for any extra rpp messages */
	rpp_request(42);

	if (termin_child)
		scan_for_terminated();
	if (exiting_tasks)
		scan_for_exiting();	

	/* unblock signals */
	if (sigprocmask(SIG_UNBLOCK, &allsigs, NULL) == -1)
		log_err(errno, id, "sigprocmask(UNBLOCK)");

	/* wait for a request to process */
	if (wait_request(waittime) != 0)
		log_err(-1, msg_daemonname, "wait_request failed");

	/* block signals while we do things */
	if (sigprocmask(SIG_BLOCK, &allsigs, NULL) == -1)
		log_err(errno, id, "sigprocmask(BLOCK)");
}

/*
 * mom_lock - lock out other MOMs from this directory.
 */

static void mom_lock(fds, op)
	int fds;
	int op;		/* F_WRLCK  or  F_UNLCK */
{
	struct flock flock;

	flock.l_type   = op;
	flock.l_whence = SEEK_SET;
	flock.l_start  = 0;
	flock.l_len    = 0;	/* whole file */
	if (fcntl(fds, F_SETLK, &flock) < 0) {
	    (void)strcpy(log_buffer, "pbs_mom: another mom running");
	    log_err(errno, msg_daemonname, log_buffer);
	    fprintf(stderr, log_buffer);
	    exit (1);
	}
}

/*
 * size decoding routine.
 *
 *	Accepts a resource pointer and a pointer to the unsigned long integer
 *	to receive the decoded value.  It returns the decoded value in kb.
 *
 *		sizeof(word) = sizeof(int)
 */

unsigned long
getsize(pres)
    resource		*pres;
{
	unsigned long	value;
	unsigned long	shift;

	if (pres->rs_value.at_type != ATR_TYPE_SIZE)
		return (0);
	value = pres->rs_value.at_val.at_size.atsv_num;
	shift = pres->rs_value.at_val.at_size.atsv_shift;

	if (pres->rs_value.at_val.at_size.atsv_units ==
	    ATR_SV_WORDSZ) {
		if (value > ULONG_MAX / sizeof(int))
			return (0);
		value *= sizeof(int);
	}
	if (shift > 10) {
		shift -= 10;
		return (value << shift);
	}
	else {
		shift = 10 - shift;
		return (value >> shift);
	}
}

/*
 * time decoding routine.
 *
 *	Accepts a resource pointer and a pointer to the unsigned long integer
 *	to receive the decoded value.  It returns the decoded value of time
 *	in seconds.
 */

unsigned long
gettime(pres)
    resource		*pres;
{

	if (pres->rs_value.at_type != ATR_TYPE_LONG)
		return (0);
	if (pres->rs_value.at_val.at_long < 0)
	        return (0);
	return ((unsigned long)pres->rs_value.at_val.at_long);
}

int
job_over_limit(pjob)
	job	*pjob;
{
	attribute	*attr;
	attribute	*used;
	resource	*limresc;
	resource	*useresc;
	struct resource_def	*rd;
	long		total;
	int		index, i;
	unsigned long	limit;
	char		*units;

	if (mom_over_limit(pjob)) {		/* check my own limits */
		pjob->ji_nodekill = pjob->ji_nodeid;	/* no more POLL's */
		return 1;
	}
	if ( pjob->ji_numnodes == 1 ||		/* no other nodes */
	    (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)	/* not MS */
		return 0;

	if (pjob->ji_nodekill != TM_ERROR_NODE) {
		hnodent	*pnode = &pjob->ji_hosts[pjob->ji_nodekill];

		sprintf(log_buffer, "node %d (%s) requested job die, code %d",
			pjob->ji_nodekill, pnode->hn_host, pnode->hn_sister);
		return 1;
	}
	attr = &pjob->ji_wattr[JOB_ATR_resource];
	used = &pjob->ji_wattr[JOB_ATR_resc_used];
	for (limresc = (resource *)GET_NEXT(attr->at_val.at_list);
			limresc != (resource *)0;
			limresc = (resource *)GET_NEXT(limresc->rs_link)) {

		if ((limresc->rs_value.at_flags & ATR_VFLAG_SET) == 0)
			continue;

		rd = limresc->rs_defin;
		if (strcmp(rd->rs_name, "cput") == 0)
			index = 0;
		else if (strcmp(rd->rs_name, "mem") == 0)
			index = 1;
		else
			continue;

		useresc = find_resc_entry(used, rd);
		if (useresc == (resource *)0)
			continue;
		if ((useresc->rs_value.at_flags & ATR_VFLAG_SET) == 0)
			continue;

		total = (index == 0) ? gettime(useresc) : getsize(useresc);
		for (i=0; i<pjob->ji_numnodes-1; i++) {
			noderes	*nr = &pjob->ji_resources[i];

			total += ((index == 0) ? nr->nr_cput : nr->nr_mem);
		}
		limit = (index == 0) ? gettime(limresc) : getsize(limresc);

		if (limit <= total)
			break;
	}
	if (limresc == (resource *)0)
		return 0;

	units = index == 0 ? "secs" : "kb";
	sprintf(log_buffer, "%s job total %lu %s exceeded limit %lu %s",
		rd->rs_name, total, units, limit, units);
	pjob->ji_nodekill = pjob->ji_nodeid;
	return 1;
}
/*
 * main - the initialization and main loop of pbs_daemon
 */

void
usage(prog)
    char	*prog;
{
	fprintf(stderr,
		"Usage: %s [-C chkdirectory][-d dir][-c configfile][-r|-p][-R port][-M port][-L log][-a alarm]\n", prog);
	exit(1);
}

/*
 * main - the main program of MOM
 */

main(argc, argv)
	int	 argc;
	char	*argv[];
{
	static	char	id[] = "mom_main";
	int	 	errflg, c;
	FILE		*dummyfile;
	char		*mom_home;
	task		*ptask;
	char		*ptr;
	int	 	recover = 0;
	int		tryport;
	int		rppfd;			/* fd for rm and im comm */
	int		privfd;			/* fd for sending job info */
	double		myla;
	struct sigaction act;
	job		*pjob;
	static time_t	polltime = 0;
	extern time_t	wait_time;
	resource	*prscput;
	extern char	*optarg;
	extern int	optind;

	/* If we are not run with real and effective uid of 0, forget it */

	if ((getuid() != 0) || (geteuid() != 0)) {
		fprintf(stderr, "%s: Must be run as root\n", argv[0]);
		return (1);
	}
	pbsgroup = getgid();
	loopcnt = time(NULL);

	/* Get our default service port */

	pbs_mom_port = get_svrport(PBS_MOM_SERVICE_NAME, "tcp",
				   PBS_MOM_SERVICE_PORT);
	default_server_port = get_svrport(PBS_BATCH_SERVICE_NAME, "tcp",
					  PBS_BATCH_SERVICE_PORT_DIS);

	pbs_rm_port = get_svrport(PBS_MANAGER_SERVICE_NAME, "tcp", 
				   PBS_MANAGER_SERVICE_PORT);

	errflg = 0;
	while ((c = getopt(argc, argv, "d:c:M:S:R:L:a:xC:pr")) != -1) {
		switch(c) {
		case 'd':	/* directory */
			path_home = optarg;
			break;
		case 'c':	/* config file */
			config_file_specified = 1;
			strcpy(config_file, optarg);	/* remember name */
			break;
		case 'M':
			pbs_mom_port = (unsigned int)atoi(optarg);
			if (pbs_mom_port == 0) {
				fprintf(stderr, "Bad MOM port value %s\n",
					optarg);
				exit(1);
			}
			break;
		case 'S':
			default_server_port = (unsigned int)atoi(optarg);
			if (default_server_port == 0) {
				fprintf(stderr, "Bad Server port value %s\n",
					optarg);
				exit(1);
			}
			break;
		case 'R':
			pbs_rm_port = (unsigned int)atoi(optarg);
			if (pbs_rm_port == 0) {
				fprintf(stderr, "Bad RM port value %s\n",
					optarg);
				exit(1);
			}
			break;
		case 'L':
			log_file = optarg;
			break;
		case 'a':
			alarm_time = (int)strtol(optarg, &ptr, 10);
			if (alarm_time <= 0 || *ptr != '\0') {
				fprintf(stderr,
					"%s: bad alarm time\n", optarg);
				errflg = 1;
			}
			break;
		case 'x':
			port_care = 0;
			break;
		case 'C':
#if MOM_CHECKPOINT == 1
			if (*(optarg+strlen(optarg)) == '/') {
				path_checkpoint = optarg;
			}
			else {
				path_checkpoint = malloc(strlen(optarg)+2);
				(void)strcpy(path_checkpoint, optarg);
				(void)strcat(path_checkpoint, "/");
			}
#else
			fprintf(stderr, "Not compiled with CHECKPOINT\n");
#endif	/* MOM_CHECKPOINT */
			break;
		case 'p':
			if (recover == 0)
				recover = 2;
			else
				errflg = 1;
			break;
		case 'r':
			if (recover == 0)
				recover = 1;
			else
				errflg = 1;
			break;

		case '?':
		default:
			errflg = 1;
		}
	}
	if (errflg || optind != argc)
		usage(argv[0]);		/* exits */

	/* The following is code to reduce security risks                */
	/* start out with standard umask, system resource limit infinite */

	umask(022);
	if ((num_var_env = setup_env(PBS_ENVIRON)) == -1)
		exit(1);
	c = getgid();
	(void)setgroups(1, (gid_t *)&c);	/* secure suppl. groups */
	c = sysconf(_SC_OPEN_MAX);
	/* Close any inherited extra files, leaving stdin-err open */
	while (--c > 2)
		(void)close(c);	/* close any file desc left open by parent */
#ifndef DEBUG
#ifdef _CRAY
	(void)limit(C_JOB,      0, L_CPROC, 0);
	(void)limit(C_JOB,      0, L_CPU,   0);
	(void)limit(C_JOBPROCS, 0, L_CPU,   0);
	(void)limit(C_PROC,     0, L_FD,  255);
	(void)limit(C_JOB,      0, L_FSBLK, 0);
	(void)limit(C_JOBPROCS, 0, L_FSBLK, 0);
	(void)limit(C_JOB,      0, L_MEM  , 0);
	(void)limit(C_JOBPROCS, 0, L_MEM  , 0);
#else	/* not  _CRAY */
	{
	struct rlimit rlimit;

	rlimit.rlim_cur = RLIM_INFINITY;
	rlimit.rlim_max = RLIM_INFINITY;
	(void)setrlimit(RLIMIT_CPU,   &rlimit);
	(void)setrlimit(RLIMIT_FSIZE, &rlimit);
	(void)setrlimit(RLIMIT_DATA,  &rlimit);
#ifdef	RLIMIT_RSS
	(void)setrlimit(RLIMIT_RSS  , &rlimit);
#endif	/* RLIMIT_RSS */
#ifdef	RLIMIT_VMEM
	(void)setrlimit(RLIMIT_VMEM  , &rlimit);
#endif	/* RLIMIT_VMEM */
	}
#endif	/* not _CRAY */
#endif	/* DEBUG */

	/* set up and validate home paths    */

	c = 0;
	mom_home   = mk_dirs("mom_priv");
	path_jobs  = mk_dirs("mom_priv/jobs/");
	path_epilog =mk_dirs("mom_priv/epilogue");
	path_prolog =mk_dirs("mom_priv/prologue");
	path_log   = mk_dirs("mom_logs");
	path_spool = mk_dirs("spool/");
	path_undeliv = mk_dirs("undelivered/");
#if MOM_CHECKPOINT == 1
	if (path_checkpoint == (char *)0)	/* if not -C option */
		path_checkpoint = mk_dirs("checkpoint/");
	/* locate cput resource definition, needed for checking chkpt time */
	rdcput = find_resc_def(svr_resc_def, "cput", svr_resc_size);
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
	c = chk_file_sec(path_checkpoint, 1, 0, S_IWGRP|S_IWOTH, 1);
#endif  /* not DEBUG and not NO_SECURITY_CHECK */
#endif	/* MOM_CHECKPOINT */

	/* change working directory to mom_priv */

	if (chdir(mom_home) == -1) {
		perror("pbs_mom unable to go home");
		return (1);
	}

#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
	c |= chk_file_sec(path_jobs,   1, 0, S_IWGRP|S_IWOTH, 1);
	c |= chk_file_sec(path_spool,  1, 1, S_IWOTH, 0);
	c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP|S_IWOTH, 0);
	if (c)
		return (3);
#endif  /* not DEBUG and not NO_SECURITY_CHECK */

	/* open log file while std in,out,err still open, forces to fd 4 */

	if ((c = log_open(log_file, path_log)) != 0) {	/* use given name */
		fprintf(stderr, "pbs_mom: Unable to open logfile\n");
		return 1;
	}

	lockfds = open("mom.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644);
	if (lockfds < 0) {
		(void)strcpy(log_buffer, "pbs_mom: Unable to open lock file\n");
		return (1);
	} 
	mom_lock(lockfds, F_WRLCK);	/* See if other MOMs are running */

	/* initialize the network interface */

	if (init_network(pbs_mom_port, process_request) != 0) {
		c = errno;
		(void)sprintf(log_buffer,
			"server port = %u, errno = %d",
			pbs_mom_port, c);
		if (c == EADDRINUSE)
			(void)strcat(log_buffer, ", already in use");
		(void)strcat(log_buffer, "\n");
		log_err(-1, msg_daemonname, log_buffer);
		(void)fprintf(stderr, log_buffer);
		return (3);
	}

	if (init_network(pbs_rm_port, tcp_request) != 0) {
		c = errno;
		(void)sprintf(log_buffer,
			"resource (tcp) port = %u, errno = %d",
			pbs_rm_port, c);
		if (c == EADDRINUSE)
			(void)strcat(log_buffer, ", already in use");
		(void)strcat(log_buffer, "\n");
		log_err(-1, msg_daemonname, log_buffer);
		(void)fprintf(stderr, log_buffer);
		return (3);
	}
	net_set_type(Secondary, TaskManagerDIS);

	/* go into the background and become own session/process group */


#ifndef DEBUG
	mom_lock(lockfds, F_UNLCK);	/* unlock so child can relock */

	 if (fork() > 0)
		return (0);	/* parent goes away */

	if (setsid() == -1) {
		log_err(errno, msg_daemonname, "setsid failed");
		return (2);
	}
	mom_lock(lockfds, F_WRLCK);	/* lock out other MOMs */
	
	(void)fclose(stdin);
	(void)fclose(stdout);
	(void)fclose(stderr);
	dummyfile = fopen("/dev/null", "r");
	assert( (dummyfile != 0) && (fileno(dummyfile) == 0) );
	dummyfile = fopen("/dev/null", "w");
	assert( (dummyfile != 0) && (fileno(dummyfile) == 1) );
	dummyfile = fopen("/dev/null", "w");
	assert( (dummyfile != 0) && (fileno(dummyfile) == 2) );

#else	/* DEBUG */
#if	defined(_CRAY)
	
	/* CRAY cannot restart checkpointed job if MOM has controlling tty */

	(void)sprintf(log_buffer, "/tmp/pbs_mom.%d", getpid());
	printf("Debug output will be in %s\n", log_buffer);

	freopen(log_buffer, "w", stdout);
	freopen(log_buffer, "w", stderr);
	(void)ioctl(0, TCCLRCTTY, 0);
	(void)close(0);
#endif	/* _CRAY */
	(void)setvbuf(stdout, NULL, _IOLBF, 0);
	(void)setvbuf(stderr, NULL, _IOLBF, 0);
#endif /* DEBUG */

	/* write MOM's pid into lockfile */
	(void)ftruncate(lockfds, (off_t)0);
	(void)sprintf(log_buffer, "%d\n", getpid());
	(void)write(lockfds, log_buffer, strlen(log_buffer)+1);

#if (PLOCK_DAEMONS & 4)
	(void)plock(PROCLOCK);	/* lock daemon into memory */
#endif
	
	sigemptyset(&allsigs);
	act.sa_mask = allsigs;
	act.sa_flags = 0;

	/*
	**	Signals to be ignored.
	*/
	act.sa_handler = SIG_IGN;
	sigaction( SIGPIPE, &act, NULL);
	sigaction( SIGUSR1, &act, NULL);
	sigaction( SIGUSR2, &act, NULL);
#ifdef	SIGINFO
	sigaction( SIGINFO, &act, NULL);
#endif

	sigaddset(&allsigs, SIGHUP);	/* remember to block these */
	sigaddset(&allsigs, SIGINT);	/* during critical sections */
	sigaddset(&allsigs, SIGTERM);	/* so we don't get confused */
	sigaddset(&allsigs, SIGCHLD);
#ifdef _CRAY
	sigaddset(&allsigs, WJSIGNAL);
#endif
	act.sa_mask = allsigs;

	/*
	**	We want to abort system calls
	**	and call a function.
	*/
#ifdef	SA_INTERRUPT
	act.sa_flags |= SA_INTERRUPT;	/* don't restart system calls */
#endif
	act.sa_handler = catch_child;	/* set up to catch Death of Child */
	sigaction(SIGCHLD, &act, NULL);
#ifdef _CRAY
	sigaction(WJSIGNAL, &act, NULL);
#endif
	act.sa_handler = catch_hup;	/* do a restart on SIGHUP */
	sigaction(SIGHUP, &act, NULL);

	act.sa_handler = toolong;	/* handle an alarm call */
	sigaction(SIGALRM, &act, NULL);

	act.sa_handler = stop_me;	/* shutdown for these */
	sigaction( SIGINT, &act, NULL);
	sigaction( SIGTERM, &act, NULL);
#ifdef	SIGXCPU
	sigaction(SIGXCPU, &act, NULL);
#endif
#ifdef	SIGXFSZ
	sigaction(SIGXFSZ, &act, NULL);
#endif
#ifdef	SIGCPULIM
	sigaction(SIGCPULIM, &act, NULL);
#endif
#ifdef	SIGSHUTDN
	sigaction(SIGSHUTDN, &act, NULL);
#endif

#ifdef _CRAY
	/* Special code for CRAY MLS Systems */
	if (sysconf(_SC_CRAY_SECURE_SYS)) {
		struct usrv usrv;

		if (getusrv(&usrv) < 0) {
			(void)fprintf(stderr, "cannot get security info\n");
			return (1);
		}
		usrv.sv_permit = 0;
		usrv.sv_intcat = 0;
		usrv.sv_valcat = 0;
		if (setusrv(&usrv) < 0) {
			(void)fprintf(stderr, "cannot put security info\n");
			return (1);
		}
		if (setucat(0) < 0) {
			(void)fprintf(stderr, "cannot put security cat\n");
			return (2);
		}
	}
#endif	/* _CRAY */

	/* initialize variables */

	CLEAR_HEAD(svr_newjobs);
	CLEAR_HEAD(svr_alljobs);
	CLEAR_HEAD(mom_polljobs);
	CLEAR_HEAD(svr_requests);

	if ((c = gethostname(mom_host, PBS_MAXHOSTNAME)) == 0) {
		(void)strcpy(mom_short_name, mom_host);
		c = get_fullhostname(mom_host, mom_host, PBS_MAXHOSTNAME);
	}
	if (c == -1) {
		log_err(-1, msg_daemonname, "Unable to get my host name");
		return (-1);
	}

	time_now = time((time_t *)0);
	ret_size = 4096;
	if ((ret_string = malloc(ret_size)) == NULL) {
		perror("malloc");
		exit(1);
	}

	if ((rppfd = rpp_bind(pbs_rm_port)) == -1) {
		log_err(errno, id, "rpp_bind");
		exit(1);
	}

	rpp_fd = -1;		/* force rpp_bind() to get another socket */
	tryport = IPPORT_RESERVED;
	while (--tryport > 0) {
		if ((privfd = rpp_bind(tryport)) != -1)
			break;
		if ((errno != EADDRINUSE) && (errno != EADDRNOTAVAIL))
			break;
	}
	if (privfd == -1) {
		log_err(errno, id, "no privileged ports");
		exit(1);
	}

	localaddr = addclient("localhost");
	(void)addclient(mom_host);
	if (gethostname(ret_string, ret_size) == 0)
		(void)addclient(ret_string);

	if (read_config(NULL)) {
		fprintf(stderr, "%s: config file '%s' failed\n",
			argv[0], config_file);
		exit(1);
	}

	initialize();		/* init RM code */
	add_conn(rppfd, Primary, (pbs_net_t)0, 0, rpp_request);
	add_conn(privfd, Primary, (pbs_net_t)0, 0, rpp_request);

	/* initialize machine dependent polling routines */
	if ((c = mom_open_poll()) != PBSE_NONE) {
		log_err(c, msg_daemonname, "pre_poll failed");
		return (3);
	}
		
	/* recover & abort Jobs which were under MOM's control */

	init_abort_jobs(recover);

	/* record the fact that we are up and running */

	log_record(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
		  msg_daemonname, "Is up");

	/*
	 * Now at last, we are ready to do some work, the following
	 * section constitutes the "main" loop of MOM
	 */

	for (mom_run_state=1; mom_run_state; finish_loop(wait_time)) {

		if (call_hup)
			process_hup();
		end_proc();
		time_now = time((time_t *)0);
		if (time_now < (polltime + CHECK_POLL_TIME))
			continue;

		last_scan = polltime;
		polltime = time_now;

#if IBM_SP2==2
		(void)query_adp();
#endif	/*IBM_SP2 */

		/* check if loadave means we should be "busy" */

		if (max_load_val > 0.0) {
			(void)get_la(&myla);
			/* check if need to update busy state */
			check_busy(myla);
		}

		/* if needed, update server with my state change */
		/* can be changed in check_busy() or query_adp() */

		if (internal_state & UPDATE_MOM_STATE)
			state_to_server(0);

		/* are there any jobs? */

		if ((pjob = (job *)GET_NEXT(svr_alljobs)) == (job *)0)
			continue;

		/* there are jobs so update status */
		if (mom_get_sample() != PBSE_NONE)
			continue;

		for (; pjob; pjob = (job *)GET_NEXT(pjob->ji_alljobs)) {
			if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)
				continue;

			/* update information for my tasks */
			(void)mom_set_use(pjob);
			(void)rpp_io();

			/* has all job processes vanished undetected ?       */
			/* double check by sig0 to session pid for each task */

			if (pjob->ji_flags & MOM_NO_PROC) {
			    pjob->ji_flags &= ~MOM_NO_PROC;
				ptask = (task *)GET_NEXT(pjob->ji_tasks);
				while (ptask) {
				
				    if ((kill(ptask->ti_qs.ti_sid, 0)==-1) &&
					(errno == ESRCH)) {

					LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, id, "no active process found");
				        ptask->ti_qs.ti_exitstat = 0;
				        ptask->ti_qs.ti_status =TI_STATE_EXITED;
				        pjob->ji_qs.ji_un.ji_momt.ji_exitstat=0;
				        task_save(ptask);
				        exiting_tasks = 1;
				    }
			    	    ptask = (task *)GET_NEXT(ptask->ti_jobtask);
				}
			}
			
#if MOM_CHECKPOINT == 1
			/* see if need to check point any job */

			if (pjob->ji_chkpttime == 0)
				continue;

			prscput = find_resc_entry(
				       &pjob->ji_wattr[(int)JOB_ATR_resc_used],
					rdcput);
			if (pjob->ji_chkptnext>prscput->rs_value.at_val.at_long)
				continue;

			pjob->ji_chkptnext = prscput->rs_value.at_val.at_long +
					     pjob->ji_chkpttime;
			if ((c = start_checkpoint(pjob, 0, 0)) == PBSE_NONE)
				continue;
			if (c == PBSE_NOSUP)
				continue;

			/* getting here means something bad happened */
			(void)sprintf(log_buffer, 
				      "Checkpoint failed, error %d",c);
			(void)message_job(pjob, StdErr, log_buffer);
			log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
                               	   pjob->ji_qs.ji_jobid, log_buffer);
#endif	/* MOM_CHECKPOINT */
		}

		/* check on over limit condition for polled jobs */
		for (pjob = (job *)GET_NEXT(mom_polljobs); pjob;
				pjob = (job *)GET_NEXT(pjob->ji_jobque)) {
			if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)
				continue;
			/*
			** Send message to get info from other MOM's
			** if I am Mother Superior for the job and
			** it is not being killed.
			*/
			if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) &&
			    (pjob->ji_nodekill == TM_ERROR_NODE)) {
				/*
				** If can't send poll to everybody, the
				** time has come to die.
				*/
				if (send_sisters(pjob, IM_POLL_JOB) !=
						pjob->ji_numnodes-1)
					pjob->ji_nodekill = pjob->ji_nodeid;
			}

			c = pjob->ji_qs.ji_svrflags;
			if (c & JOB_SVFLG_OVERLMT2) {
				(void)kill_job(pjob, SIGKILL);
				continue;
			}
			else if (c & JOB_SVFLG_OVERLMT1) {
				(void)kill_job(pjob, SIGTERM);
				pjob->ji_qs.ji_svrflags |= JOB_SVFLG_OVERLMT2;
				continue;
			}

			log_buffer[0] = '\0';
			if (job_over_limit(pjob)) {
				log_record(PBSEVENT_JOB | PBSEVENT_FORCE,
					PBS_EVENTCLASS_JOB,
		  			pjob->ji_qs.ji_jobid, log_buffer);

				if (c & JOB_SVFLG_HERE) {
					char	*kill_msg;

					kill_msg = malloc(80 +
						strlen(log_buffer));
					sprintf(kill_msg,
						"=>> PBS: job killed: %s\n",
						log_buffer);
					message_job(pjob, StdErr, kill_msg);
					free(kill_msg);
				}

				(void)kill_job(pjob, SIGTERM);
				pjob->ji_qs.ji_svrflags |= JOB_SVFLG_OVERLMT1;
			}
		}
	}

	/* kill any running jobs */

	pjob = (job *)GET_NEXT(svr_alljobs);
	while (pjob) {
		if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING) {
			(void)kill_job(pjob, SIGKILL);
			pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING;
			job_save(pjob, SAVEJOB_QUICK);
		}
		else
			term_job(pjob);

		pjob = (job *)GET_NEXT(pjob->ji_alljobs);
	}

	if (termin_child)
		scan_for_terminated();
	if (exiting_tasks)
		scan_for_exiting();	
	(void)mom_close_poll();

	rpp_shutdown();
	net_close(-1);		/* close all network connections */
	log_record(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
		  msg_daemonname, "Is down");
	log_close(1);
	return (0);
}

/*
 * mk_dirs - make the directory names used by MOM 
 */

static char *mk_dirs(base)
	char *base;
{
	char *pn;
	int   ltop = strlen(path_home);

	pn = malloc(ltop + strlen(base) + 2);
	if (pn == (char *)0)
		exit(2);
	(void)strcpy(pn, path_home);
	if (*(path_home + ltop - 1) != '/')
		(void)strcat(pn, "/");
	(void)strcat(pn, base);
	return (pn);
}

/*
 * stop_me = signal handler for SIGTERM
 */

void stop_me(sig)
    int	sig;
{
	sprintf(log_buffer, "caught signal %d", sig);
	log_record(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
		  msg_daemonname, log_buffer);
	mom_run_state = 0;
}
