#!/usr/bin/perl
#
# Checkservice by Paul van Tilburg <paul@linvision.com>
#
# Syntax: 
# checkservice [-t targethost] [-c cfgdir] [-l logdir|-] [-m] [-v [short]]
#
# Checks ports (services) on servers for their status according to configfiles
# in <cfgdir>/hosts/. Global config file is <cfgdir>/checkservice.conf
# (the default for <cfgdir> is /etc/checkservice).
#
# Reports for each file (filename represents host that should be scanned) to
# logfiles for every host in <cfgdir>/hosts/ or just one host if <host>
# is given. Logfiles can be found in <logdir>. If logdir-option is omitted or
# the value is '-', Checkservice will print to STDOUT. Special output per
# host can be generated with the -m option, Checkservice will print a 0
# (host <host> and all it's services are up) or a 1.

# <<INITIALIZATION>>

use POSIX;
use strict;
use IO::Socket;
use File::Find;
use Mail::Send;
use Getopt::Long;

my $program = "Checkservice";
my $version = "1.1.0";
my ($p) = $0 =~ /\S*\/(\w*)/;

my $help_info = <<EOT;
$program $version checks certain services on certain hosts. It's equipped with
various warning methods and several ways for reporting and logging.

Usage: checkservice [OPTIONS]...

 -t, --targethost=STR	Specify host to scan, a configfile for that host must
                        exist in <configdir>/hosts/. If STR is '*' all
			hosts will be scanned for which configfiles are
			available.
 -c, --configdir=PATH   Specify configdir, (default: /etc/checkservice).
 -l, --logdir=PATH	Logdirectory where log should be kept. If PATH is
			'-', $program will print to stdout.
 -m, --mrtg		Generate only a 0 (host is up & all services are
  			ok) or 1 to stdout. Using this option, logdir
			specification will be ignored.
			NB. the targethost option is compulsory!
 -h, --help		Show this help and exit.
 -v, --version=MODE	Show version (if MODE is 'short', only version
			number will be printed) and exit.

EOT

# >> Read command line paramaters.
my %params = (targethost => '*', cfgdir => '/etc/checkservice', logdir => '-');
GetOptions (\%params, qw(cfgdir=s targethost=s logdir=s help! mrtg! version:s))
  or die $help_info;

# If --version|-v is provided, show version and exit!
# With 'short' as value, only the version is shown (for the php-statuspage)
if (exists $params{version}) {
  if ($params{version} eq "short") { print "$version\n"; }
  else { 
    print "Linvision $program version $version by Paul van Tilburg"
          . "<paul\@linvision.com>.\n";
  }
  exit;
}

# Show help and exit!
if ($params{help}) { die $help_info; }

# >> Read and parse global configfile.
my %config = ();
(! -d "$params{cfgdir}") and 
  die "$p: Can't find config dir ($params{cfgdir})\n";

if (-r "$params{cfgdir}/checkservice.conf") {
  open CONFIG, "$params{cfgdir}/checkservice.conf" or 
    die "$p: Can't open global configfile "
        . "'$params{cfgdir}/checkservice.conf'\n";
  my $key;
  my $value;
  
  while (<CONFIG>) {
    chomp; s/#.*//;
    if (($key, $value) = /^\s*(\S+)\s*=\s*(\S.*)/) { $config{$key} = $value; }
  }

  close CONFIG;
}

# Do some pathcheckking.
(! -d "$config{lockpath}") &&
  die "$p: Can't find or open directory for lockfiles: $config{lockpath}!\n";
(! -d "$config{cachepath}") &&
  die "$p: Can't find or open directory for caching: $config{cachepath}!\n";
(! -d "$params{cfgdir}/hosts") &&
(! -d "$params{logdir}") && ($params{logdir} ne "-") and
  die "$p: Can't find dir for hostfiles ($params{cfgdir}/hosts)!\n";

# >> Read and parse host-specific configfiles.
  
my @conffiles = ();

# Determine if all (default) or just one host should be checked.
if ($params{targethost} eq "*") {
  finddepth sub { push @conffiles, "$File::Find::dir/\x00$_" if (-f && /^\w/)},
       "$params{cfgdir}/hosts";
} 
else { 
  (! -r "$params{cfgdir}/hosts/$params{targethost}") and 
    die "$p Couldn't read/find configfile for host "
         . "'$params{targethost}': $!\n";
  push @conffiles, "$params{cfgdir}/hosts/$params{targethost}";
}

# << MAIN >>
# 
# Global variables:
	
my $cfile;	
my $wcache;	# Flag to determine if web.cache should be written.
my %mailto;	# Returns mailto-string for each host.
my %fports;	# Hash keeping indices for the following arrays
		# that have all the information. %fports will have the index
		# for every first port of a portset.
my @actions;	# Service action.
my @ports;	# Service portset.
my @chlvls;	# Service checkmethod.
my @snames;	# Service name short.
my @snamesl;	# Service name long.


# Determine if webcache is enabled...
# if so... the target for writing in the stdout-output-sub will be diverted
# to the webcache if in logging mode!

$wcache = 1 if ($config{webcache} eq "yes" && $params{logdir} ne "-");

my $target = $wcache ? "$config{cachepath}/web.cache"
		 : ">/dev/stdout";
		
open T, ">$target" or
  die "$p: Can't open $target for writing: $!\n";

# Start processing every host-specific configfile!
foreach $cfile (map { tr/\x00//d; $_ } sort @conffiles) {
  chomp $cfile;
  my ($host) = $cfile =~ /([^\/]+)$/;
  my $relpath = substr $cfile, (length "$params{cfgdir}/hosts") + 1;
  my %fports = @actions = @ports = @chlvls = @snames = @snamesl = ();
  my $I = 0;
  my $i = 0;

  # Read config file for $host.
  open CHECKS, "$cfile";

  while (<CHECKS>) {
    chomp; s/#.*//;
    my ($option, $data) = split("=");
    next if (!defined $option);
    if ( $option eq "service") {
      my ($portdef, $sshort, $checklvl, $slong, $action) = split(":", $data);
      my @newports = split(",", $portdef);
      if ($checklvl ne "n") {
        $fports{$newports[0]} = $i;
        push @ports, [ @newports ];
        push @chlvls, $checklvl;
        push @snames, $sshort;
        push @snamesl, $slong;
	push @actions, $action;
        $i++;
      }
    }
    elsif ($option eq "mailto") { $mailto{$host} = $data; }
  }

  close CHECKS;
  
  # Determine if output should go to logfile named $host in $params{logdir},
  # to stdout in special format, ready for mrtg (host must be specified in
  # commandline parameters).
  #
  # Print to stdout in simple format (shell-style return values):
  # 0    <-- hosts is up, all services up
  # 1    <-- host down or one or more services are down
  if ($params{mrtg}) {
    $params{targethost} eq "*" &&
      die "$p: Specific host must be provided, use '$0 -t <host> -m'\n";
    my $check = 0;

    if ($config{downcritical} eq "yes" && host_up($host)) { $check = 1;; }
    else {
      my $Port;

      foreach $Port (sort comp_num keys %fports) {
        my $I = $fports{$Port};
        my $P = $ports[$I];
      
        $check = 1 if checkservice($host, $P, $snames[$I], $chlvls[$I]); 
      }
    }
    print $check, " (", localtime, ")\n";    
  }
  
  # Logging to logfile is selected, this enables warnings!
  # There are three warning systems: beep, sms and mail.
  elsif ($params{logdir} ne "-") {
    my $localhost = `hostname`; chomp $localhost;
    open OUTPUT, ">>$params{logdir}/$host" or
      die "$p: Can\'t open $params{logdir}/$host: $!\n";
    
    if ($config{webcache} eq "yes") { $wcache = 1; }
    
    if ($config{downcritical} eq "yes" && host_up($host)) {
      print OUTPUT strftime ("%b %e %H:%M:%S", localtime);
      print OUTPUT " $localhost checkservice: error:"
                   . " host $host is unreachable.\n";
      warning("$params{cfgdir}/unreachable.mail", 2, $host, "-", "-");
    } 
    else {
      my $Port;
      
      (-f "$config{lockpath}/$host.-") &&
	unlink "$config{lockpath}/$host.-";
	
      foreach $Port (sort comp_num keys %fports) {
	my $I = $fports{$Port};
	my $P = $ports[$I];
      
	print OUTPUT strftime ("%b %e %H:%M:%S", localtime),
	             " $localhost checkservice: service ",
	             $snames[$I], ": ";
     
	my $chk = checkservice($host, $P, $snames[$I], $chlvls[$I]);
	if ($chk == 1) {
	  print OUTPUT "failed.\n";
	  warning ("$params{cfgdir}/failed.mail", $chk, $host, $snames[$I],
	            join(",", @$P), $actions[$I]);
	}
	elsif ($chk == 2) {
 	  print OUTPUT "timeout.\n";
	  warning ("$params{cfgdir}/timeout.mail", $chk, $host, $snames[$I],
	            join(",", @$P), $actions[$I]);
	}
	elsif ($chk == 0) {
	  print OUTPUT "ok.\n"; 
	  (-f "$config{lockpath}/$host.$snames[$I]") &&
	    unlink "$config{lockpath}/$host.$snames[$I]";
	} 
	else {
	  print OUTPUT "unknown ($chk).\n";
	}
      }
    }

    close OUTPUT;
  }
  
  # Print to stdout! Delivers for every host a block, blockdefinition:
  # host:<host>
  # ...
  # error:<message>  ||
  # service:<portset>:<short servicename>:<checklevel>:<longservicename>
  # ...
  # endhost:
  if (($params{logdir} eq "-" || $wcache) && ! $params{mrtg}) {

    print T "host:$relpath\n";
    if ($config{downcritical} eq "yes" && host_up($host)) {
      print T "error:Host $host is unreachable!\n";
    }
    else {
      my @resultn = ("ok", "failed", "timeout");
      my $Port;

      foreach $Port (sort comp_num keys %fports) {
        my $I = $fports{$Port};
        my $P = $ports[$I];
        my $chk = checkservice($host, $P, $snames[$I], $chlvls[$I]);
	my $perc = percentage($host, $snames[$I]);
      
	print T "service:", join(",", @$P),
	      ":$snames[$I]:$chlvls[$I]:$snamesl[$I]:";
	print T $chk <= $#resultn ? $resultn[$chk] : "unknown ($chk)", ":";
	if ($perc ne "n/a") { printf T "%3.6f\n", $perc; }
	else { print T "n/a\n"; }
      }
    }
    print T "endhost:\n";
  }
}

close T;

tail_failcache();
exit 0;

# <<HELPER SUBROUTINES>>

# Sub for numeric ascending sort.
sub comp_num {
  if ($a < $b) { return -1; }
  elsif ($a == $b) {return 0; }
  elsif ($a > $b) {return 1; }
}

# Check if an array contains a certain value.
sub array_contains_str {
  my $value = shift;
  my $contains = 0;
    
  foreach (@_) { $_ eq $value && ($contains = 1) };
  
  return $contains;
}

# Check if a host is up, uses hostping option that
# sets the pingcount (default 2).
sub host_up {
  my $System = `uname -s`;
  
  unless (defined $config{hostping}) { $config{"hostping"} = 2 }
  chomp $System;
  
  # Attempt to increase portability! If I missed one, please let me no.
  if ($System =~ /SunOS/) {
    return system("ping -s $_[0] 56 $config{hostping} > /dev/null 2>&1");
  }
  elsif ($System =~ /HP-UX/) {
    return system("ping $_[0] -n $config{hostping} > /dev/null 2>&1");
  }
  else {
    return system("ping -c $config{hostping} $_[0] > /dev/null 2>&1");
  }
}

# Creates a tail of the failure-cache with a length defined in the global
# configfile (the failureslen-option, default 5)
sub tail_failcache {
  (! -d $params{logdir}) && return;
  my @Lines = (open FCACHE, "$config{cachepath}/failures.cache") ? 
               <FCACHE> : ();
  my @NewCache = ();
  my $Line;
  my $start;
  $config{failureslen} = 5 unless (defined $config{"failureslen"});
  
  $start = ($config{failureslen} > $#Lines) ? 
            0 : ($#Lines - $config{"failureslen"} + 1);
  foreach $Line (@Lines[$start..$#Lines]) {
    push @NewCache, $Line;
  }
 
  open FAILS, ">$config{cachepath}/failures.cache" or 
    die "$p: Couldn't open $config{cachepath}/failures.cache"
        . " for writing: $!\n";
  print FAILS @NewCache; 
  close FAILS;
}

# Find lowest value of 2 params.
sub min {
  return $_[0] < $_[1] ? $_[0] : $_[1];
}

# Generate mail warning. This is the builtin mailwarning plugin.
# If mail.plugin warningplugin is installed it will override this builtin.
# Uses lockfile system to prevent a mail every check.
# Who should be mailed to is a combination of the value of the mailto-var
# in the globalconffile and the host-specific conffile.
sub mail_warning {
  my @wmethods = split(":", $config{wmethod});
  my $template = shift;
  my $crflag = 0;
  my %substs;
  my ($ptime, $mtime, $ctime);
 
  my ($host, $service, $ports, $action, $aoutput) = @_;
  my $to = $mailto{$host} ?
           $config{mailto} . ":" . $mailto{$host} : $config{mailto};
  my $lockf = "$config{lockpath}/$host.$service";
  
  if (! -r $lockf) { 
    open LOCKF, ">$lockf" || die "$p: Could not create lockfile $lockf: $!\n";
    print LOCKF time, "\n";
    
    $crflag = 1;
    $ptime = time;
  }
  else {
    open LOCKF, "<$lockf" || die "$p: Could not read lockfile $lockf: $!\n";
    $ptime = <LOCKF>;
    chomp $ptime;
    my @stats = stat($lockf);
    $mtime = $stats[9];
  }    

  # Send mail if lockfile hasn't been touched longer than given
  # repeatmailw-interval or didn't exist yet.
  # A template is used for the mail that will be sent, substitutions in
  # template:
  # %t -> time server/service went down.
  # %h -> host that went down/host from which a service went down.
  # %s -> service that went down (none if whole server went down).
  # %p -> portset defined for service.
  #
  # If action warning was enabled, the output of the action wil be appended
  # to the mail.
  if ($crflag || $mtime < (time - $config{repeatmailw})) {
    my $M;
    my $Mail;
    my $line;
    my $Subject = "Checkservice warning";
    my @Contents;
    
    $ctime = ctime $ptime;
    chomp $ctime;
    utime time, time, $lockf;
   
    @substs{'%t', '%h', '%s', '%p'} = ($ctime, $host, $service, $ports);
    $to =~ s/:/,\ /g;

    if (open TEMP, $template) {
      my $subst;
      
      while (<TEMP>) { 
	foreach $subst (keys %substs) { s/$subst/$substs{$subst}/g; }
	if (/^Subject:\s(.*)$/) { $Subject = $1; }
	else { push @Contents, $_; }
      }
      if (array_contains_str("action", @wmethods) && @$aoutput) {
	push @Contents, "\nPS. Output of action '$action':\n";
	foreach (@$aoutput) { push @Contents, "] $_"; } 
      }
    }
    else {
	push @Contents, "Could not open mail template '$template': $!\n\n" 
		      . ">> Service $service on $host:$ports failed "
		      . "on $ctime!\n";
    }

    $Mail = new Mail::Send(Subject => $Subject, To => $to);
    $M = $Mail->open();
    foreach $line (@Contents) { print $M $line; }
    $M->close();
  }
}

# Try to run a defined action, capture the output and return that.
# Kill the command if it takes longer than what is set in the 
# atimeout-option (default 20 sec.)
sub do_action {
  my @Outp;
  my $Output = \@Outp;
  my $ret;
 
  if (!defined($_[0]) || !$_[0]) {
    @Outp = ();
  }
  else {
    unless (defined($config{atimeout})) { $config{"atimeout"} = 20; }
    unless (open PIPE, "-|") { alarm $config{atimeout}; exec "$_[0] 2>&1"; }

    while (<PIPE>) { push @Outp, $_; }
    if (!close PIPE) { 
      push @Outp, "--- Action timed out after $config{atimeout}s and died ---";
    }
  }

  return $Output;
} 

# Launch warning subroutines for each warning method enabled in global
# configfile.
sub warning {
  my @wmethods = split(":", $config{wmethod});
  my ($template, $result, $host, $service, $ports, $action) = @_;
  my ($Output, $w);
  
  (! -f "$config{lockpath}/$host.$service") && reportfail($host, $service, $ports);
  $Output = do_action($action);

  foreach $w (@wmethods) {
    mail_warning($template, $host, $service, $ports, $action, $Output)
      if ($w eq 'mail' && ! -x "$config{pluginpath}/warning/$w.plugin");

    -x "$config{pluginpath}/warning/$w.plugin" &&
    system("$config{pluginpath}/warning/$w.plugin -c $params{cfgdir} "
           . "-h $host -p $ports -s $service -r $result");
  }
}

# Calculates uptime percentage for a service by parsing a logfile.
# Calculation is done by taking all entries from the current month
# and dividing the 'ok'-entries by the total number of entries.
# Thus at the end of a month the uptime calculation is the most accurate.
sub percentage {
  my ($host, $service) = @_;
  my ($succ, $cnt, $perc) = (0, 0, 0.0);
  my $month = strftime '%b', localtime;
  my $res;

  open LOGF, "$config{defaultlogdir}/$host";

  while (<LOGF>) {
    chomp;
    if (($res) =
        /$month\s*\d+\s.*\s.*\scheckservice:\sservice\s$service:\s(.*)\./) {
      if (defined $res && $res eq "ok") { $succ++ }
        $cnt++;
    }
  }

  close LOGF;

  if ($cnt) { return (100. * $succ) / $cnt; }
  else { return "n/a"; }
}

sub reportfail {
  my ($host, $service, $ports) = @_;
  
  open  FAILS, ">>$config{cachepath}/failures.cache" or
    die "$p: Couldn't open $config{cachepath}/failures.cache for writing:"
	. " $!\n";
  print FAILS "$host:$service:$ports:", $service eq "-" ?
	      "unreachable" : "unavailable", ":", time, "\n";
  close FAILS;
}


# Does the actual checking! It tries to find a plugin for the service first.
sub checkservice {
  my ($host, $P, $service, $chklvl) = @_;
  $chklvl = $chklvl || "s";

  # If checklevel is eXtended and plugin exist for that service,
  # use plugin to scan.
  if ($chklvl eq "x" && -x "$config{pluginpath}/check/$service.plugin") {
    my $rval = system("$config{pluginpath}/check/$service.plugin -h $host"
                      . " -t $config{ctimeout} > /dev/null 2>&1");
    return ($rval / 256);  
  }
  # If checklevel is Simple or is eXtended, but plugin didn't exist,
  # do a simple portstcan.
  elsif ($chklvl eq "s" || $chklvl eq "x") {
    my $Check = 0;
    my $port;

    foreach $port (@$P) {
      eval {
        local $SIG{ALRM} = sub { $Check = 2; die; };

	unless (defined($config{timeout})) { $config{"timeout"} = 10; }	
        alarm $config{ctimeout};
        $Check = 1 unless defined (
          IO::Socket::INET->new(PeerAddr => "$host:$port", Proto => "tcp"));
      }
    }
    alarm 0;
    return $Check;
  }
  else { return 0; }
}

# << END >>
