#!/usr/bin/perl -w

# complain-httpd.pl - Scan httpd logs for 
# CodeRed and other probes, find site contacts from IP addresses,
# and mail admin
#
# Version 1.1.8
#
# Copyright 2001, Dan Pelleg, daniel+hunch@pelleg.org
#
# Released under the BSD license
#
# Use: $0 [log-file]
#
#
# log file is in Apache's "combined" format ("common" might also work)
#
# We're basically parsing the log entry for the IP
# and mailing to the address found by the "contact" helper
# script. But we try to:
# - prevent the same IP from being reported too often
# - prevent the same admin from being spammed too many times
# - incorporate information that's not in WHOIS (special abuse
#    addresses that are not in the database).
#
#
# First-time use:
#  - set $sender to your own mailing address
#     (you want the reports to be credible, and it's also nice
#      to hear what's going on with the attacking hosts)
#  - set "no-mailing" to 0 (otherwise mails won't go out)
#  - make sure "contact" is in searchable path when you run this
#  - if your whois command expects to see "handle@server" and not
#    "-h server handle", change the $contact_finder variable appropriately
#

use strict;
use Net::SMTP;
use Time::Local;
use POSIX;
use Socket;

if(@ARGV > 1) {
  die("Usage: $0 [log-file]");
}

my $no_mailing = 0;             # when 1, only prints out message, does not email it

print "Dry-run mode: not mailing anything out\n" if($no_mailing);

my $sender = '';

die("Please set sender to your own mailing address in the 'my \$sender =' line") unless ($no_mailing || $sender);

my $attack_threshold = 4*60*60;   # minimum time between consecutive attacks to be reported, in seconds

my $max_mails = 3;              # maximum number of times to send to any email address

my $last_done_timestamp_file = "/var/db/hunch-timestamp";
my $special_handling = "/usr/local/etc/hunch-special";

# Use for whois programs that only understand "handle@server" syntax
#my $contact_finder = '/usr/local/bin/contact -w -d0';
# Use for whois programs that understand "-h server handle" syntax
my $contact_finder = '/usr/local/bin/contact -d0';

my $mailhost = 'localhost';

my $logfile = shift(@ARGV) || '-';

my %last_seen;                  # IPs already seen (and complained about), and time of last
                                # reported attack
my %contact_emails_seen;        # email addresses complaints have been sent to

my $timezone = `date +%Z`;
chop($timezone);

my @ips_to_ignore = (
                     # a whole bunch of IP ranges it doesn't make sense looking up
                     '0/8',
                     '127/8',
                     '10/8',
                     '192.168/16',
                     '172.16/12',
                     '169.254/16',
                     '192.0.2/24',
                     );

# list for special handling
# format: each element is:
# src target_email
#
# (a space separates target_email from the src)
#
# target_email is the address to send the complaint to
#
# src be be either of:
# 1. IPaddr/numbits
#    meaning that match occurs if a given IP has the most-significant numbits in
#    common with IPaddr
#    if numbits is ommitted, 32 is used
# 2. a string containing the character @.
#    a match occurs if the WHOIS contact is the same as src.
#    This is used to list various "abuse" and "security" addresses
#    for organization who just put their NOC address in WHOIS
#
# if target_email is the string IGNORE, this attack will be ignored.
#
# list is processed in order, first match wins.
#

my @specials;
if(open(SPECIAL, $special_handling)) {
  while(<SPECIAL>) {
    chop;
    s/\#.*$//;                   # remove comments
    next if(/^\s*$/);           # ignore empty lines
    my ($src, $email) = split;
    if ($src =~ /@/) {          # email-to-email translation
      # nothing to do
    } else {                    # IP-range to email translation
      my ($netaddr, $dummy, $netbits) = ($src =~ /^([.\d]+)(\/(\d+))?$/);
      $netbits = 32 unless(defined($netbits));
      die "Bad net address $src" unless defined($netaddr) && ($netbits >= 0 && $netbits <= 32);
      $src = "$netaddr/$netbits";
    }
    die "Bad email $email" unless ($email =~ /@/ || $email =~ /^IGNORE$/);
    push(@specials, "$src $email");
  }
  close(SPECIAL);
}

# add list of IP-ranges to ignore to special-list
for my $ignoreme (@ips_to_ignore) {
  push(@specials, "$ignoreme IGNORE")
}

my $lastdone;                   # time of last run, to prevent double-reporting

my %months = (
              'Jan',=> 0,
              'Feb',=> 1,
              'Mar',=> 2,
              'Apr',=> 3,
              'May',=> 4,
              'Jun',=> 5,
              'Jul',=> 6,
              'Aug',=> 7,
              'Sep',=> 8,
              'Oct',=> 9,
              'Nov',=> 10,
              'Dec',=> 11);

# get the timestamp of the last run
if(open(TSTAMP, $last_done_timestamp_file)) {
  $lastdone = <TSTAMP>;
  if($lastdone) {
    chop($lastdone);
    close(TSTAMP);
    if ($lastdone =~ /^([a-zA-z]+) +([0-9]+) +(\d+):+(\d+):+(\d+)\s*/) {
      # fill in the details that may be missing from the logs from 
      # the current ones 
      my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime();
      # now put in the details from the log entry
      ($mon, $mday, $hour, $min, $sec) = ($months{$1}, $2, $3, $4, $5);
      $lastdone = timelocal($sec, $min, $hour, $mday, $mon, $year);
    } else {                      # can't parse
      $lastdone = undef;
    }
  }
}

# if couldn't parse the timestamp, or didn't find it
# or it's older than 4 days ago, set it to 4 days ago
if(!defined($lastdone) || $lastdone < time() - 4*24*60*60) {
  $lastdone = time() - 4*24*60*60;
}

print "Looking at attacks since " . localtime($lastdone) . "\n";
open(LOG, $logfile) or die("Can't open log file $logfile");
my $nskipped = 0;
while (<LOG>) {

  my ($attacktime, $diagnostic, $attackerip, $method, $rest);
  my ($orig_line, $truncated_line);
  my $log_entry_time;

  chop;

  if(/^\s*(\S+).*\[(\d+)\/([a-zA-z]+)\/+(\d+):(\d+):(\d+):(\d+).*\"(GET|POST|HEAD|CONNECT|OPTIONS|SEARCH) ([^ ]+)/) {
    my ($sec, $min, $hour, $mday, $mon, $year);
    ($year, $mon, $mday, $hour, $min, $sec, $attackerip, $method, $rest) = ($4, $months{$3}, $2, $5, $6, $7, $1, $8, $9);
    
    $log_entry_time = timelocal($sec, $min, $hour, $mday, $mon, $year);
    $orig_line = $_;
    $truncated_line = substr($orig_line, 0, 70);

    $attacktime = localtime($log_entry_time) . " $timezone";

    if($log_entry_time < $lastdone) {
      $nskipped++;
      next;
    } else {
      # we're assuming the log lines are ordered chronologically,
      # so the first non-ignored lines will be after the last ignored line
      if($nskipped > 0) {
        print "$nskipped lines assumed already seen and skipped\n";
        $nskipped = 0;
      }
    }

  }

  next unless($rest);

  if($rest =~ m!^/default.ida\?XXXXXXX!) {
    $diagnostic = 'Code Red 2';
  } elsif($rest =~ m!^/default.ida\?NNNNNNN!) {
    $diagnostic = 'Code Red 1';
  } elsif($rest =~ m!winnt/system32/cmd\.exe!) {
    $diagnostic = "IIS scan";
  } elsif($rest =~ m!^/cgi-bin/formmail!i) {
    $diagnostic = "formmail scan";
  } elsif($rest =~ m!^/cgi-bin/(contact\.(cgi|pl)|mailform\.(cgi|pl)|webmailer\.exe|sendmail\.pl|fmail\.pl|request\.(cgi|pl)|reqinfo\.pl|tellafriend\.cgi|refer\.cgi|feedback\.(cgi|pl)|ezformml\.cgi|mailer\.(cgi|pl))$!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/cgi-bin/(mail\.(cgi|pl)|fm\.pl|form\.(cgi|pl)|sendform\.cgi|sender\.pl|swmail\.pl|npl_mailer\.cgi|email\.pl|contactus\.cgi|friends\.cgi|send_to_a_friend\.pl|emailer\.cgi|send2friend\.cgi|referralhub\.cgi|form2mail\.cgi)$!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/cgi-bin/cgiemail/(mailtemp|contact)\.txt$!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/cgi-bin/mailer/mailer.cgi$!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/(mail\.cgi|contact\.cgi|formmail\.pl|emailform\.pl|BFormMail\.pl)!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/cgi/(contact\.pl|formmail)!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/form-bin/deliver!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/cgibin/contact\.cgi$!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/elink/elink_results!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/(contact|formmail|mailform)\.cgi!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/mtb/cgi-bin/mailform\.pl!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/msadc/!) {
    $diagnostic = "IIS scan";
  } elsif($rest =~ m!^/scripts.*(cmd|root)\.exe!) {
    $diagnostic = "IIS scan";
  } elsif($rest =~ m!^/cgi-bin/test-cgi!) {
    $diagnostic = "test-cgi";
  } elsif($rest =~ m!^/cgi-bin/phf\?Qalias=x!) {
    $diagnostic = "phf escalation attempt";
  } elsif($rest =~ m!^http:/!) {
    $diagnostic = "HTTP proxy scan";
  } elsif($rest =~ m!^/scripts/nsiislog\.dll!) {
    $diagnostic = "Microsoft Windows Media Services scan";
  } elsif($rest =~ m!^/_vti_bin/_vti_aut/fp30reg\.dll!) {
    $diagnostic = "FPSE fp30reg.dll overflow scan (MS03-051)";
  } elsif($method =~ m!^CONNECT!) {
    $diagnostic = "mail proxy scan";
  } elsif($rest =~ m!^/contact\.cgi!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/mtb/cgi-bin/mailform\.pl!i) {
    $diagnostic = "mail relay scan";
  } elsif($rest =~ m!^/msadc/!) {
    $diagnostic = "IIS scan";
  } elsif($rest =~ m!^/scripts.*(cmd|root)\.exe!) {
    $diagnostic = "IIS scan";
  } elsif($rest =~ m!^/cgi-bin/test-cgi!) {
    $diagnostic = "test-cgi";
  } elsif($rest =~ m!^/cgi-bin/phf\?Qalias=x!) {
    $diagnostic = "phf escalation attempt";
  } elsif($rest =~ m!^http:/!) {
    $diagnostic = "HTTP proxy scan";
  } elsif($rest =~ m!^/scripts/nsiislog\.dll!) {
    $diagnostic = "Microsoft Windows Media Services scan";
  } elsif($rest =~ m!^/_vti_bin/_vti_aut/fp30reg\.dll!) {
    $diagnostic = "FPSE fp30reg.dll overflow scan (MS03-051)";
  } elsif($method =~ m!^CONNECT!) {
    $diagnostic = "mail proxy scan";
  } elsif($rest =~ m!^/(|blog/|blogs?/xmlsrv/|drupal/|phpgroupware/|xmlrpc/|xmlsrv/)xmlrpc\.php!) {
    $diagnostic = "PHP blog scan";
  } elsif($rest =~ m!^/cvs/mambo/index2.php!) {
    $diagnostic = "mambo scan";
  } elsif($rest =~ m!^/awstats/awstats.pl\?configdir=\|echo;!) {
    $diagnostic = "AWstats scan";
  } elsif($method =~ m!^SEARCH! && $rest =~ m!^/\\x90\\x02\\xb1\\x02\\xb1\\x02\\xb1\\x02\\xb1\\x02\\xb1\\x02\\xb1\\x02!) {
    $diagnostic = "Microsoft IIS webDAV scan (MS03-007)";
  }

  next unless($diagnostic);     # give up now if signature unknown

  if($attackerip !~ /^[.\d]+$/) {   # not a numeric IP - convert to one
    my ($name,$aliases,$addrtype,$length,@addrs) = gethostbyname($attackerip);
    if(@addrs) {
      $attackerip = inet_ntoa($addrs[0]);
    } else {
      warn("Cannot find IP for host $attackerip - ignored");
      next;
    }
  }

  if(!($attackerip && $attacktime && $diagnostic)) {
    # warn("can't parse log line, ignored: $_\n");
    next;
  }

  if(defined($last_seen{$attackerip}) && ($last_seen{$attackerip} > ($log_entry_time - $attack_threshold))) {
    LOG_MSG("IP seen recently, skipping: $attackerip\n");
    next;
  } else {
    $last_seen{$attackerip} = $log_entry_time; # record time last seen
  }

  my $ignore = 0;
  my $to;

  # find contact email from attacker's ip
  # first look in list of special handling
  for my $special (@specials) {
    my ($iprange, $mailaddr) = split(' ', $special);
    next if($iprange =~ /@/);   # an email-to-email entry
    my ($netaddr, $netbits) = ($iprange =~ /^([.\d]+)\/(\d+)$/);
    if(&ip_in_net($attackerip, $netaddr, $netbits)) {
      if($mailaddr eq 'IGNORE') {
        print "$attackerip matches ignore rule $iprange - ignored\n";
        $ignore = 1;
      } else {
        $to = $mailaddr;
        print "attacker $attackerip matches special case - using $to\n";
      }
      last;
    }
  }
  next if($ignore);

  # no special case for the IP - contact WHOIS
  if(!defined($to)) {
    $to = `$contact_finder $attackerip`;
    chop($to);

    if($to !~ /@/) {
      warn("Cannot find contact email for $attackerip");
      next;
    }

  }

  # look in list of special handling for email translations
  for my $special (@specials) {
    my ($srcaddr, $tgtaddr) = split(' ', $special);
    next if($srcaddr !~ /@/);   # an IP range entry
    if($to =~ /^$srcaddr$/i) {
      if($tgtaddr eq 'IGNORE') {
        print "address $to matches ignore rule $srcaddr - ignored\n";
        $ignore = 1;
      } else {
        print "attacker $to matches special case - using $tgtaddr\n";
        $to = $tgtaddr;
      }
      last;
    }
  }

  if(defined($contact_emails_seen{$to})) {
    if(++$contact_emails_seen{$to} > $max_mails) {
      print "contact already emailed enough times, skipping: $to\n";
      next;
    }
  } else {
    $contact_emails_seen{$to} = 1;
  }

  if($no_mailing) {
    print "To: $to Host: $attackerip Time: $attacktime Diagnostic: $diagnostic\n";
  } else {
    print "Mailing To: $to Host: $attackerip Time: $attacktime Diagnostic: $diagnostic\n";
    my $smtp = Net::SMTP->new($mailhost);

    die("Cannot connect to host $mailhost") unless $smtp;
    
    $smtp->mail($sender);
    $smtp->to($to);

    my $data = "To: $to\n".
      "From: $sender\n".
        "Subject: network abuse: $attackerip\n".
          "\n".
            "Please have a look at the following host. It seems\n".
              "to be infected with scanning/break-in code.\n\n".
                "Host: $attackerip\n".
                  "Time: $attacktime\n".
                    "Diagnostic: $diagnostic\n".
		      "Log line sample:\n$truncated_line\n";

    $smtp->data();
    $smtp->datasend($data);
    $smtp->dataend();

    $smtp->quit;
  }

}
close(LOG);

LOG_MSG("");                    # flush log

# place timestamp in file for next time
if(!$no_mailing) {              # if no side-effects, don't bother updating timestamp
  if(open(TSTAMP, ">$last_done_timestamp_file")) {
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime();
    print TSTAMP POSIX::strftime("%b %d %H:%M:%S\n",
                                 $sec, $min, $hour, $mday, $mon, $year);
    close(TSTAMP);
  } else {
    warn("Can't write last-seen time in $last_done_timestamp_file");
  }
}

sub ip_in_net {
  my ($ip_, $netaddr_, $netbits) = @_;

  # convert IP to cannonical form
  my $ip = pack('C4', split('\.', $ip_));

  # convert numbits to cannonical form
  $netbits = 32 if($netbits > 32);
  $netbits = 0 if($netbits < 0);
  my $netmask = pack('B32', ('1' x $netbits) . ('0' x (32 - $netbits)));

  # convert netaddr to cannonical form
  my $netaddr = pack('C4', split('\.', $netaddr_));

  # check
  return(($netaddr & $netmask) eq ($ip & $netmask));
 
}

sub LOG_MSG {
  use vars qw($last_msg $last_msg_count);
  my $msg = shift;
  chop($msg);

  if(!defined($last_msg)) {
    $last_msg = $msg;
    $last_msg_count = 1;
    return;
  }

  if($msg eq $last_msg) {       # repeating message
    $last_msg_count++;
    return;
  }

  # a new message - show message in buffer
  if($last_msg_count > 1) {
    print $last_msg . " (repeats $last_msg_count times)\n";
  } else {
    print $last_msg . "\n";
  }
  $last_msg = $msg;
  $last_msg_count = 1;
}
