#! /usr/local/bin/perl5
#
$debug = 0;
$username = getlogin || (getpwuid($<))[0] ;
# restart_serv = 1 DOES NOT WORK because of bugs in perl.
# (I've worked around this bug below, but it is pretty hideous)
$bindir="/usr/local/mpich/bin";
$datadir="/usr/local/mpich/share";
$restart_serv = 0;
$serv_p4 = "$bindir/serv_p4";
#$serv_p4 = "$bindir/serv_p4 -w $bindir";
#$server_name = "serv_p4";
$run_dir = "$HOME";
$kill_serv = 0;
$rsh = "rsh";
#
use Socket;
#
# This is an experimental perl program to check on your servers
#
# Get the port to check
$portnum=1234;
# Get the list of machines
$machinelist="$datadir/machines.freebsd";
#
# Read command line for overrides
while ($_ = $ARGV[0]) {
    shift;
    print "Processing argument $_\n" if $debug;
    if ($_ eq "-debug") {
	$debug = 1;
	}
    elsif ($_ eq "-port") {
	$portnum = $ARGV[0];
	shift;
        }
    elsif ($_ eq "-startdir") {
	$run_dir = $ARGV[0];
        shift;
        }
    elsif ($_ eq "-machinelist") {
	$machinelist = $ARGV[0];
	shift;
	}
    elsif ($_ eq "-server") {
	$serv_p4 = $ARGV[0];
	shift;
	}
    elsif ($_ eq "-pgm") {
	$serv_p4 = "$bindir/$ARGV[0]";
	shift;
	}
    elsif ($_ eq "-restart") {
        $restart_serv = 1;
        }
    elsif ($_ eq "-user") {
	$username = $ARGV[0];
	shift;
        }
    elsif ($_ eq "-kill") {
	$kill_serv = 1;
	}
    else {
	print "chkserv [ -port num ] [ -restart ] [ -machinelist file ]";
        print "        [ -user name ] [ -server pgm ]";
        if ($_ eq "-help") { exit 1; }
	die "Unrecognized argument $_\n";
	}
    }

if ($username eq "") {
    die "Could not get username!\n";
    }
#
# If the program name was just the name without the location, try a
# default location.
if ($restart_serv) { 
    if (! -x $serv_p4 ) {
        $serv_p4 = "$bindir/$serv_p4";
    }
    if (! -x $serv_p4 ) {
        die "$serv_p4 is not executable!\n";
    }
}
#
#
open( FLIST, $machinelist ) || die "Could not open $machinelist" ;
# Force output to be flushed
$| = 1;
#
# See perlipc for an example of a tcp client
# Because of bugs in perl, we should really read the file into a perl
# variable (array?) and then operate on that array.
#
# We start in the indicated directory ($HOME by default)
chdir $run_dir ;
#
while (<FLIST>) {
    # Skip comment lines
    if (/^#/) { next ; }
    # Setup socket
    $sockaddr = 'S n a4 x8';
    chop;
    ($hostname,$clustersize) = split(/:/);
    print "Read $hostname from process $$\n" if $debug; 
    ($name,$aliases,$proto) = getprotobyname('tcp');
#    ($name,$aliases,$port)  = getservbyname($portnum,'tcp') 
#	   unless $portnum =~/^\d+$/;
    $port = $portnum;
    ($name,$aliases,$type,$len,$thataddr) = 
	gethostbyname($hostname);
    ($name,$aliases,$type,$len,$thisaddr) = 
	gethostbyname('hostname');
    
    $this = pack($sockaddr,AF_INET,0,$thisaddr);
    $that = pack($sockaddr,AF_INET,$port,$thataddr);
    
    $hostok = 1;
    print "Connecting to $hostname:$port with protocol $proto\n" if $debug;
    socket(S,PF_INET,SOCK_STREAM,$proto) || ($hostok = 0);
    if ($hostok == 0) { print "socket: $!\n"; next; }
    bind(S,$this) || ($hostok = 0); 
    if ($hostok != 0) {
        print "bind: $!\n" if $debug;
        connect(S,$that) || ($hostok = 0); 
        if ($hostok == 0) { 
	    print "connect: $!\n" if $debug; }
	}

    if ($hostok == 0) { 
        close( S );
	if ($debug) { print "bind: $!\n" } 
	else {
	    # This has been hideously painful.  perl seems to get confused
            # when reading from FLIST when I do a fork; for some reason the 
            # EOF is lost, along with the first few characters of the input.
	    # This would have been easier in C, where fork and fgets work
            # correctly.  I've found documentation on this at 
	    # http://www.perl.com/perl/bugs/NETaa14449-5.html.
            # Note that the workaround is to (a) not close files and
            # (b) not call exit.  Thus, even when doing "norun", we
            # have to exec /usr/bin/echo, not use print.
	    if ($restart_serv) {
	        print "Restarting server on $hostname:$portnum\n" ;
  	        $cmd = "$rsh $hostname -n $serv_p4 -o -p $portnum";
	        $pid = fork();
	        if ($pid > 0) {
		     waitpid( $pid, 0 );
		     }
		else {
		    # I am the child
		    #close( STDIN );
		    #close( FLIST );
		    #    exec "/usr/bin/echo $cmd\n";
		    exec $cmd;
		    exit 1;      # just in case exec fails
		    }
		}
	    else {
                 print "$hostname:$port needs new server\n"; }
	         }
        next; }

    # We need to establish a timeout in case we're not talking to
    # who we expect.  To make this work, we'd need yet another fork,
    # WHICH WON'T WORK FOR THE REASONS ABOVE!
    #
    $SIG{'ALRM'} = sub { 
	print "Timeout in connection to $hostname\n" ; 
        close( S );
        };
    alarm 20;
    # Send request for id or kill
    if ($kill_serv) {
	$msg = "$username\n$username\n%exit\n"; 
	}
    else {
        $msg = "$username\n$username\n%id\n";
	}
    $datalen = length $msg;
    syswrite S,$msg,$datalen;
    if ($debug) {
        while (<S>) { print $_; }
        }
    else {
	$msg = <S>;
	# Proceed-2 indicates server version 2
        if ($msg ne "Proceed\n" && $msg ne "Proceed-2\n") { 
	    print "Bad message from $hostname: :$msg:\n";
	}
	if ($kill_serv != 1) {
            $msg = <S>;
            if ($msg eq 
	    "Port $port for client $username and server user $username\n") {
	         print "$hostname:$port has valid server\n";
                 }
            }
        }
    close(S);
    alarm 0;
}
close( FLIST );
0;

