/*
*         Portable Batch System (PBS) Software License
* 
* Copyright (c) 1999, MRJ Technology Solutions.
* All rights reserved.
* 
* Acknowledgment: The Portable Batch System Software was originally developed
* as a joint project between the Numerical Aerospace Simulation (NAS) Systems
* Division of NASA Ames Research Center and the National Energy Research
* Supercomputer Center (NERSC) of Lawrence Livermore National Laboratory.
* 
* Redistribution of the Portable Batch System Software and use in source
* and binary forms, with or without modification, are permitted provided
* that the following conditions are met:
* 
* - Redistributions of source code must retain the above copyright and
*   acknowledgment notices, this list of conditions and the following
*   disclaimer.
* 
* - Redistributions in binary form must reproduce the above copyright and 
*   acknowledgment notices, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided with the
*   distribution.
* 
* - All advertising materials mentioning features or use of this software must
*   display the following acknowledgment:
* 
*   This product includes software developed by NASA Ames Research Center,
*   Lawrence Livermore National Laboratory, and MRJ Technology Solutions.
* 
*         DISCLAIMER OF WARRANTY
* 
* THIS SOFTWARE IS PROVIDED BY MRJ TECHNOLOGY SOLUTIONS ("MRJ") "AS IS" WITHOUT 
* WARRANTY OF ANY KIND, AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT ARE EXPRESSLY DISCLAIMED.
* 
* IN NO EVENT, UNLESS REQUIRED BY APPLICABLE LAW, SHALL MRJ, NASA, NOR
* THE U.S. GOVERNMENT BE LIABLE FOR ANY DIRECT DAMAGES WHATSOEVER,
* NOR ANY INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* 
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/

#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <ctype.h>
#include <sys/types.h>
#include <pbs_ifl.h>
#include <log.h>
#include <rm.h>
#include "node_info.h"
#include "misc.h"
#include "globals.h"

static char *ident = "$Id: node_info.c,v 2.6.2.2 2000/01/22 00:17:12 hender Exp $";

/* we want to track the last node returned by find_best_node() so we can start
 * after it when we are load balencing by round robin on nodes.  We keep the 
 * node_info instead of an index just incase a new node was added since our 
 * last scheduling cycle.
 */
static node_info *last_node;

/*
 *      query_nodes - query all the nodes associated with a server
 *
 *	  pbs_sd - communication descriptor wit the pbs server
 *	  sinfo -  server information
 *
 *	returns array of nodes associated with server
 *
 */
node_info **query_nodes( int pbs_sd, server_info *sinfo )
{
  struct batch_status *nodes;		/* nodes returned from the server */
  struct batch_status *cur_node;	/* used to cycle through nodes */
  node_info **ninfo_arr;		/* array of nodes for scheduler's use */
  node_info *ninfo;			/* used to set up a node */
  char errbuf[256];
  char *err;				/* used with pbs_geterrmsg() */
  int num_nodes = 0;			/* the number of nodes */
  int i;

  if( ( nodes = pbs_statnode(pbs_sd, NULL, NULL, NULL) ) == NULL )
  {
    err = pbs_geterrmsg(pbs_sd);
    sprintf(errbuf, "Error getting nodes: %s", err);
    log(PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, "", errbuf);
    return NULL;
  }

  cur_node = nodes;
  while( cur_node != NULL )
  {
    num_nodes++;
    cur_node = cur_node -> next;
  }

  if( ( ninfo_arr = (node_info **) malloc( (num_nodes + 1) * sizeof(node_info *) ) ) == NULL )
  {
    perror("Error Allocating Memory");
    pbs_statfree(nodes);
    return NULL;
  }

  cur_node = nodes;
  for( i = 0; cur_node != NULL; i++ )
  {
    if( ( ninfo = query_node_info( cur_node, sinfo ) ) == NULL )
    {
      pbs_statfree(nodes);
      free_nodes( ninfo_arr );
      return NULL;
    }

    /* query mom on the node for resources */
    talk_with_mom( ninfo );
    ninfo_arr[i] = ninfo;

    cur_node = cur_node -> next;
  }
  ninfo_arr[i] = NULL;
  sinfo -> num_nodes = num_nodes;
  pbs_statfree(nodes);
  return ninfo_arr;
}

/*
 *
 *      query_node_info - collect information from a batch_status and
 *                        put it in a node_info struct for easier access
 *
 *	  node - a node returned from a pbs_statnode() call
 *
 *	returns a node_info filled with information from node
 *
 */
node_info *query_node_info( struct batch_status *node, server_info *sinfo )
{
  node_info *ninfo;		/* the new node_info */
  struct attrl *attrp;		/* used to cycle though attribute list */

  if( ( ninfo = new_node_info() ) == NULL )
    return NULL;
  
  attrp = node -> attribs;
  ninfo -> name = string_dup(node -> name);
  ninfo -> server = sinfo;

  while( attrp != NULL )
  {
    /* Node State... i.e. offline down free etc */
    if( !strcmp(attrp -> name, ATTR_NODE_state ) )
      set_node_state( ninfo, attrp -> value );

      /* properties from the servers nodes file */
    else if( !strcmp( attrp -> name, ATTR_NODE_properties ) )
      ninfo -> properties = break_comma_list( attrp -> value );
    
      /* the jobs running on the node */
    else if( !strcmp( attrp -> name, ATTR_NODE_jobs) )
      ninfo -> jobs = break_comma_list( attrp -> value );

      /* the node type... i.e. timesharing or cluster */
    else if( !strcmp( attrp -> name, ATTR_NODE_ntype ) )
      set_node_type( ninfo, attrp -> value );
    
    attrp = attrp -> next;
  }
  return ninfo;
}

/*
 *
 *      new_node_info - allocates a new node_info
 *
 *	returns the new node_info
 *
 */
node_info *new_node_info()
{
  node_info *new;

  if( ( new = (node_info *) malloc( sizeof(node_info) ) ) == NULL )
  {
    perror("Memory Allocation Error");
    return NULL;
  }

  new -> is_down = 0;
  new -> is_free = 0;
  new -> is_offline = 0;
  new -> is_unknown = 0;
  new -> is_reserved = 0;
  new -> is_exclusive = 0;
  new -> is_sharing = 0;
  new -> is_timeshare = 0;
  new -> is_cluster = 0;

  new -> name = NULL;
  new -> properties = NULL;
  new -> jobs = NULL;

  new -> max_load = 0.0;
  new -> ideal_load = 0.0;
  new -> arch = NULL;
  new -> ncpus = 0;
  new -> physmem = 0;
  new -> loadave = 0.0;

  return new;
}

/*
 *
 *	free_nodes - free all the nodes in a node_info array
 *
 *	  ninfo_arr - the node info array
 *
 *	returns nothing
 *
 */
void free_nodes( node_info **ninfo_arr )
{
  int i;

  if( ninfo_arr != NULL )
  {
    for( i = 0; ninfo_arr[i] != NULL; i++ )
      free_node_info( ninfo_arr[i] );
    
    free(ninfo_arr);
  }
}

/*
 *
 *      free_node_info - frees memory used by a node_info
 *
 *	  ninfo - the node to free
 *
 *	returns nothing
 *
 */
void free_node_info( node_info *ninfo )
{
  if( ninfo != NULL )
  {
    if( ninfo -> name != NULL )
      free(ninfo -> name);
    
    if( ninfo -> properties != NULL )
      free_string_array( ninfo -> properties );
    
    if( ninfo -> jobs != NULL )
      free_string_array( ninfo -> jobs );
    
    if( ninfo -> arch != NULL )
      free( ninfo -> arch );
  
    free(ninfo);
  }
}

/*
 *
 *	set_node_type - set the node type bits
 *
 *	  ninfo - the node to set the type
 *	  ntype - the type string from the server
 * 
 *	returns non-zero on error
 *
 */
int set_node_type( node_info *ninfo, char *ntype )
{
  char errbuf[256];

  if( ntype != NULL && ninfo != NULL )
  {
    if( !strcmp(ntype, ND_timeshared) )
      ninfo -> is_timeshare = 1;
    else if( !strcmp(ntype, ND_cluster) )
      ninfo -> is_cluster = 1;
    else
    {
      sprintf(errbuf, "Unknown node type: %s", ntype);
      log( PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, ninfo -> name, errbuf);
      return 1;
    }
    return 0;
  }
  return 1;
}

/*
 *
 *      set_node_state - set the node state info bits
 *
 *	  ninfo - the node to set the state
 *	  state - the state string from the server
 *
 *	returns non-zero on error
 *
 */
int set_node_state( node_info *ninfo, char *state )
{
  char errbuf[256];
  char *tok;				/* used with strtok() */

  if( ninfo != NULL && state != NULL )
  {
    tok = strtok(state, ",");

    while( tok != NULL )
    {
      while( isspace( (int) *tok ) )
	tok++;
      
      if( !strcmp(tok, ND_down) )
	ninfo -> is_down = 1;
      else if( !strcmp(tok, ND_free) )
	ninfo -> is_free = 1;
      else if( !strcmp(tok, ND_offline) )
	ninfo -> is_offline = 1;
      else if( !strcmp(tok, ND_state_unknown) )
	ninfo -> is_unknown = 1;
      else if( !strcmp( tok, ND_job_exclusive) )
	ninfo -> is_exclusive = 1;
      else if( !strcmp(tok, ND_job_sharing) )
	ninfo -> is_sharing = 1;
      else if( !strcmp(tok, ND_reserve) )
	ninfo -> is_reserved = 1;
      else if( !strcmp(tok, ND_busy) )
	ninfo -> is_busy = 1;
      else
      {
	sprintf(errbuf, "Unknown Node State: %s", tok);
	log(PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, ninfo -> name, errbuf);
      }

      tok = strtok(NULL, ",");
    }
    return 0;
  }

  return 1;
}
      
/*
 *
 *      talk_with_mom - talk to mom and get resources
 *
 *	  ninfo - the node to to talk to its mom
 *
 *	returns non-zero on error
 *
 */
int talk_with_mom( node_info *ninfo )
{
  int mom_sd;			/* connection descriptor to mom */
  char *mom_ans;		/* the answer from mom - getreq() */
  char *endp;			/* used with strtol() */
  double testd;			/* used to convert string -> double */
  int testi;			/* used to convert string -> int */
  char errbuf[256];
  int i;

  if( ninfo != NULL && !ninfo -> is_down )
  {
    if( ( mom_sd = openrm(ninfo -> name, pbs_rm_port) ) < 0 )
    {
      log(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, ninfo -> name, "Can not open connection to mom");
      return 1;
    }

    for(i = 0; i < num_resget; i++)
      addreq(mom_sd, (char *) res_to_get[i]);
    
    for( i = 0; i < num_resget && (mom_ans = getreq(mom_sd)) != NULL; i++ )
    {
      if( !strcmp(res_to_get[i], "max_load") )
      {
	testd = strtod(mom_ans, &endp);
	if( *endp == '\0' )
	  ninfo -> max_load = testd;
	else 
	  ninfo -> max_load = ninfo -> ncpus;
	free(mom_ans);
      }
      else if( !strcmp(res_to_get[i], "ideal_load") )
      {
	testd = strtod(mom_ans, &endp);
	if( *endp == '\0' )
	  ninfo -> ideal_load = testd;
	else
	  ninfo -> ideal_load = ninfo -> ncpus;
	free(mom_ans);
      }
      else if( !strcmp(res_to_get[i], "arch") )
	ninfo -> arch = mom_ans;
      else if( !strcmp(res_to_get[i], "ncpus") )
      {
	testi = strtol(mom_ans, &endp, 10);
	if( *endp == '\0' )
	  ninfo -> ncpus = testi;
	else
	  ninfo -> ncpus = 1;
	free(mom_ans);
      }
      else if( !strcmp(res_to_get[i], "physmem") )
      {
	ninfo -> physmem = res_to_num( mom_ans );
	free(mom_ans);
      }
      else if( !strcmp( res_to_get[i], "loadave") )
      {
	testd = strtod(mom_ans, &endp);
	if( *endp == '\0' )
	  ninfo -> loadave = testd;
	else 
	  ninfo -> loadave = -1.0;
	free(mom_ans);
      }
      else
      {
	sprintf(errbuf, "Unknown resource value[%d]: %s", i, mom_ans);
	log(PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, ninfo -> name, errbuf);
      }
    }
    closerm(mom_sd);
  }
  return 0;
}

/*
 *
 *	node_filter - filter a node array and return a new filterd array
 *
 *	  nodes - the array to filter
 *	  size  - size of nodes
 * 	  filter_func - pointer to a function that will filter the nodes
 *		- returns 1: job will be added to filtered array
 *		- returns 0: job will NOT be added to filtered array
 *	  arg - an optional arg passed to filter_func
 *
 *	returns pointer to filtered array
 *
 *	filter_func prototype: int func( node_info *, void * )
 *
 */
node_info **node_filter( node_info **nodes, int size, 
			 int (*filter_func) (node_info*, void*), void *arg )
{
  node_info **new_nodes = NULL;			/* the new node array */
  int i, j;

  if( ( new_nodes = (node_info **) malloc( ( size + 1) * sizeof( node_info *) ) ) == NULL )
  {
    perror("Memory Allocation Error");
    return NULL;
  }

  for(i = 0, j = 0; i < size; i++)
  {
    if( filter_func( nodes[i], arg) )
    {
      new_nodes[j] = nodes[i];
      j++;
    }
  }
  new_nodes[j] = NULL;

  if( j == 0 )
  {
    free(new_nodes);
    new_nodes = NULL;
  }
  else if( (new_nodes = realloc(new_nodes, (j+1) * sizeof( node_info * ) ) ) == NULL)
  {
    perror( "Memory Allocation Error" );
    free(new_nodes);
  }

  return new_nodes;
}

/*
 *
 *      is_node_timeshared - check if a node is timeshared
 *
 *        node - node to check
 *        arg  - unused argument
 *
 *      returns 
 *        1: is a timeshared node
 *        0: is not a timeshared node
 *
 *      NOTE: this function used for node_filter
 *
 */
int is_node_timeshared( node_info *node, void *arg )
{
  if( node != NULL )
    return node -> is_timeshare;

  return 0;
} 

/*
 *
 *	find_best_node - find the best node to run a job
 *
 *	best node is defined as a node whose arch is the same as the job, and
 *	has at least enough memory for the job and who is below their max
 *	load, if not ideal load
 *	
 *
 *	  jinfo - the job to run
 *	  ninfo_arr - array of nodes to find the best of
 *
 *	returns the node to run the job on
 *
 */
node_info *find_best_node( job_info *jinfo, node_info **ninfo_arr )
{
  node_info *possible_node = NULL;	/* node which under max node not ideal*/
  node_info *good_node = NULL;		/* node which is under ideal load */
  resource_req *req;			/* used to find requested resources */
  sch_resource_t ncpus;			/* used for number of CPUS on nodes */
  sch_resource_t mem;			/* used for memory on the nodes */
  char *arch;				/* used for the architecture of nodes */
  char *host;				/* used for the node name */
  int i;
  int ln_i;				/* index of last_node in ninfo_arr */
  float good_node_la = 1.0e10;		/* big value */

  if( ninfo_arr == NULL && jinfo == NULL )
    return NULL;
  
  /* if the job is requesting nodes, then don't try and load balance it */
  if( find_resource_req( jinfo -> resreq, "nodes" ) != NULL )
    return NULL;
  
  if( ( req = find_resource_req( jinfo -> resreq, "ncpus" ) ) == NULL )
    ncpus = 1;
  else 
    ncpus = req -> amount;

  if( ( req = find_resource_req( jinfo -> resreq, "mem" ) ) == NULL )
    mem = 0;
  else
    mem = req -> amount;
  
  if( ( req = find_resource_req( jinfo -> resreq, "arch" ) ) == NULL )
    arch = NULL;
  else
    arch = req -> res_str;
  if( ( req = find_resource_req( jinfo -> resreq, "host" ) ) == NULL )
    host = NULL;
  else
    host = req -> res_str;

  if( last_node == NULL )
    last_node = ninfo_arr[0];

  if( cstat.load_balancing_rr )
  {
    /* find the node we last ran a job on */
    for(ln_i = 0; ninfo_arr[ln_i] != NULL && ninfo_arr[ln_i] != last_node; 
								      ln_i++)
      ;
    
    /* if the last_node is deleted since the last scheduling_cycle */
    if( ninfo_arr[ln_i] == NULL )
    {
      last_node = ninfo_arr[0];
      ln_i = 0;
    }

    /* start with the node after the last good node */
    i = ln_i + 1;
  } 
  else
    i = 0;


  while( ninfo_arr[i] != last_node && good_node == NULL)
  {
    if( ninfo_arr[i] == NULL )
      i = 0;

    /* if the job didn't specify memory, it will default to 0, and if 
     * the mom didn't return physmem, it defaults to 0.
     */
    if( ninfo_arr[i] -> is_free  &&
	(arch == NULL || !strcmp(arch, ninfo_arr[i] -> arch)) &&
	(host == NULL || !strcmp(host, ninfo_arr[i] -> name)) &&
	mem <= ninfo_arr[i] -> physmem )
    {
      if(ninfo_arr[i] -> loadave + ncpus <= ninfo_arr[i] -> ideal_load) 
      {
	if(ninfo_arr[i] -> loadave < good_node_la) 
	{
	  good_node = ninfo_arr[i];
	  good_node_la = ninfo_arr[i] -> loadave;
	}
      } 
      else if( possible_node == NULL &&
		  ninfo_arr[i] -> loadave + ncpus <= ninfo_arr[i] -> max_load)
	if(ninfo_arr[i] -> loadave < good_node_la) 
	{
	  possible_node = ninfo_arr[i];
	  good_node_la = ninfo_arr[i] -> loadave;
	}
    }
    i++;
  }

  if( good_node == NULL )
  {
    if( cstat.load_balancing_rr )
      last_node = possible_node;
    return possible_node;
  }
  
  if( cstat.load_balancing_rr )
    last_node = good_node;
  return good_node;
}

/*
 *
 *	find_node_info - find a node in a node array
 *
 *	  nodename - the node to find
 *	  ninfo_arr - the array of nodes to look in
 *
 *	returns the node or NULL of not found
 *
 */
node_info *find_node_info( char *nodename, node_info **ninfo_arr )
{
  int i;

  if( nodename != NULL && ninfo_arr != NULL )
  {
    for(i = 0; ninfo_arr[i] != NULL && 
    				strcmp(ninfo_arr[i] -> name, nodename); i++)
      ;

    return ninfo_arr[i];
  }
  return NULL;
}

/*
 *
 *	print_node - print all the information in a node.  Mainly used for 
 * 		     debugging purposes 
 *
 *	  ninfo - the node to print
 *	  brief - boolean: only print the name ?
 *
 *	returns nothing
 *
 */
void print_node( node_info *ninfo, int brief )
{
  int i;
  if( ninfo != NULL )
  {
    if( ninfo -> name != NULL )
      printf("Node: %s\n", ninfo -> name);
    
    if( !brief )
    {
      printf("is_down: %s\n", ninfo -> is_down ? "TRUE" : "FALSE");
      printf("is_free: %s\n", ninfo -> is_free ? "TRUE" : "FALSE"); 
      printf("is_offline: %s\n", ninfo -> is_offline ? "TRUE" : "FALSE"); 
      printf("is_unknown: %s\n", ninfo -> is_unknown ? "TRUE" : "FALSE"); 
      printf("is_reserved: %s\n", ninfo -> is_reserved ? "TRUE" : "FALSE"); 
      printf("is_exclusive: %s\n", ninfo -> is_exclusive ? "TRUE" : "FALSE"); 
      printf("is_sharing: %s\n", ninfo -> is_sharing ? "TRUE" : "FALSE"); 
      printf("is_timeshare: %s\n", ninfo -> is_timeshare ? "TRUE" : "FALSE"); 
      printf("is_cluster: %s\n", ninfo -> is_cluster ? "TRUE" : "FALSE"); 

      if( ninfo -> properties != NULL )
      {
	printf("Properties: ");
	for( i = 0; ninfo -> properties[i] != NULL; i++ )
	{
	  if( i )
	    printf(", ");
	  printf("%s", ninfo -> properties[i]);
	}
	printf("\n");
      }

      if( ninfo -> jobs != NULL )
      {
	printf("Running Jobs: ");
	for( i = 0; ninfo -> jobs[i] != NULL; i++ )
	{
	  if( i )
	    printf(", ");
	  printf("%s", ninfo -> jobs[i]);
	}
      }
    }
  }
}

	  


