#!/usr/bin/perl -w

# snort-rep - snort-reporting tool
# by David Schweikert <dws@ee.ethz.ch>
# Copyright (c) 2001, 2002 by ETH Zurich. All rights reserved.
# See 'LICENSE' at bottom for copying conditions
#
# ---> http://people.ee.ethz.ch/~dws/software/snort-rep/ <---
#
# based on snort-stat.pl 1.6 by Yen-Ming Chen, <chenym+@CMU.EDU>

my $VERSION = "1.10";

######## Parse::Syslog 1.02 (automatically embedded) ########
package Parse::Syslog;
use Carp;
use Symbol;
use Time::Local;
use strict;
use vars qw($VERSION);
my %months_map = (
    'Jan' => 0, 'Feb' => 1, 'Mar' => 2,
    'Apr' => 3, 'May' => 4, 'Jun' => 5,
    'Jul' => 6, 'Aug' => 7, 'Sep' => 8,
    'Oct' => 9, 'Nov' =>10, 'Dec' =>11,
    'jan' => 0, 'feb' => 1, 'mar' => 2,
    'apr' => 3, 'may' => 4, 'jun' => 5,
    'jul' => 6, 'aug' => 7, 'sep' => 8,
    'oct' => 9, 'nov' =>10, 'dec' =>11,
);
# year-increment algorithm: if in january, if december is seen, decrement year
my $enable_year_decrement = 1;
# fast timelocal, cache minute's timestamp
# don't cache more than minute because of daylight saving time switch
my @str2time_last_minute;
my $str2time_last_minute_timestamp;
# 0: sec, 1: min, 2: h, 3: day, 4: month, 5: year
sub str2time($$$$$$$)
{
    my $GMT = pop @_;
    if(defined $str2time_last_minute[4] and
        $str2time_last_minute[0] == $_[1] and
        $str2time_last_minute[1] == $_[2] and
        $str2time_last_minute[2] == $_[3] and
        $str2time_last_minute[3] == $_[4] and
        $str2time_last_minute[4] == $_[5])
    {
        return $str2time_last_minute_timestamp + $_[0];
    }
    my $time;
    if($GMT) {
        $time = timegm(@_);
    }
    else {
        $time = timelocal(@_);
    }
    @str2time_last_minute = @_[1..5];
    $str2time_last_minute_timestamp = $time-$_[0];
    return $time;
}
sub _use_locale($)
{
    use POSIX qw(locale_h strftime);
    my $old_locale = setlocale(LC_TIME);
    for my $locale (@_) {
        croak "new(): wrong 'locale' value: '$locale'" unless setlocale(LC_TIME, $locale);
        for my $month (0..11) {
            $months_map{strftime("%b", 0, 0, 0, 1, $month, 96)} = $month;
        }
    }
    setlocale(LC_TIME, $old_locale);
}
sub new($$;%)
{
    my ($class, $file, %data) = @_;
    croak "new() requires one argument: file" unless defined $file;
    %data = () unless %data;
    if(not defined $data{year}) {
        $data{year} = (localtime(time))[5]+1900;
    }
    $data{_repeat}=0;
    if(ref $file eq 'File::Tail') {
        $data{filetail} = 1;
        $data{file} = $file;
    }
    else {
        $data{file}=gensym;
        open($data{file}, "<$file") or croak "can't open $file: $!";
    }
    if(defined $data{locale}) {
        if(ref $data{locale} eq 'ARRAY') {
            _use_locale @{$data{locale}};
        }
        elsif(ref $data{locale} eq '') {
            _use_locale $data{locale};
        }
        else {
            croak "'locale' parameter must be scalar or array of scalars";
        }
    }
    return bless \%data, $class;
}
sub _next_line($)
{
    my $self = shift;
    my $f = $self->{file};
    if(defined $self->{filetail}) {
        return $f->read;
    }
    else {
        return <$f>;
    }
}
sub next($)
{
    my ($self) = @_;
    while($self->{_repeat}>0) {
        $self->{_repeat}--;
        return $self->{_repeat_data};
    }
    line: while(my $str = $self->_next_line()) {
        # date, time and host 
        $str =~ /^
            (\S{3})\s+(\d+)   # date  -- 1, 2
            \s
            (\d+):(\d+):(\d+) # time  -- 3, 4, 5
            \s
            ([-\w\.]+)        # host  -- 6
            \s+
            (.*)              # text  -- 7
            $/x or do
        {
            carp "line not in syslog format: $str";
            next line;
        };
        my $mon = $months_map{$1};
        defined $mon or croak "unknown month $1\n";
        # year change
        if($mon==0) {
            $self->{year}++ if defined $self->{_last_mon} and $self->{_last_mon} == 11;
            $enable_year_decrement = 1;
        }
        elsif($mon == 11) {
            if($enable_year_decrement) {
                $self->{year}-- if defined $self->{_last_mon} and $self->{_last_mon} != 11;
            }
        }
        else {
            $enable_year_decrement = 0;
        }
        $self->{_last_mon} = $mon;
        # convert to unix time
        my $time = str2time($5,$4,$3,$2,$mon,$self->{year}-1900,$self->{GMT});
        my ($host, $text) = ($6, $7);
        # last message repeated ... times
        if($text =~ /^(?:last message repeated|above message repeats) (\d+) time/) {
            next line if defined $self->{repeat} and not $self->{repeat};
            next line if not defined $self->{_last_data}{$host};
            $1 > 0 or do {
                carp "last message repeated 0 or less times??";
                next line;
            };
            $self->{_repeat}=$1-1;
            $self->{_repeat_data}=$self->{_last_data}{$host};
            return $self->{_last_data}{$host};
        }
        # marks
        next if $text eq '-- MARK --';
        # some systems send over the network their
        # hostname prefixed to the text. strip that.
        $text =~ s/^$host\s+//;
        # discard ':' in HP-UX 'su' entries like this:
        # Apr 24 19:09:40 remedy : su : + tty?? root-oracle
        $text =~ s/^:\s+//;
        $text =~ /^
            ([^:]+?)        # program   -- 1
            (?:\[(\d+)\])?  # PID       -- 2
            :\s+
            (?:\[ID\ (\d+)\ ([a-z0-9]+)\.([a-z]+)\]\ )?   # Solaris 8 "message id" -- 3, 4, 5
            (.*)            # text      -- 6
            $/x or do
        {
            carp "line not in syslog format: $str";
            next line;
        };
        if($self->{arrayref}) {
            $self->{_last_data}{$host} = [
                $time,  # 0: timestamp 
                $host,  # 1: host      
                $1,     # 2: program   
                $2,     # 3: pid       
                $6,     # 4: text      
                ];
        }
        else {
            $self->{_last_data}{$host} = {
                timestamp => $time,
                host      => $host,
                program   => $1,
                pid       => $2,
                msgid     => $3,
                facility  => $4,
                level     => $5,
                text      => $6,
            };
        }
        return $self->{_last_data}{$host};
    }
    return undef;
}

######## Text::FormatTable 0.01 (automatically embedded) ########
package Text::FormatTable;
use Carp;
use strict;
use vars qw($VERSION);
# minimal width of $1 if word-wrapped
sub _min_width($)
{
    my $str = shift;
    my $min;
    for my $s (split(/\s+/,$str)) {
        my $l = length $s;
        $min = $l if not defined $min or $l > $min;
    }
    return $min;
}
# width of $1 if not word-wrapped
sub _max_width($)
{
    my $str = shift;
    return length $str;
}
sub _max($$)
{
    my ($a,$b) = @_;
    return $a if defined $a and (not defined $b or $a >= $b);
    return $b;
}
# word-wrap multi-line $2 with width $1
sub _wrap($$)
{
    my ($width, $text) = @_;
    my @lines = split(/\n/, $text);
    my @w = ();
    for my $l (@lines) {
        push @w, @{_wrap_line($width, $l)};
    }
    return \@w;
}
sub _wrap_line($$)
{
    my ($width, $text) = @_;
    my $width_m1 = $width-1;
    my @t = ($text);
    while(1) {
        my $t = pop @t;
        my $l = length $t;
        if($l <= $width){
            # last line is ok => done
            push @t, $t;
            return \@t;
        }
        elsif($t =~ /^(.{0,$width_m1}\S)\s+(\S.*?)$/) {
            # farest space < width
            push @t, $1;
            push @t, $2;
        }
        elsif($t =~ /(.{$width,}?\S)\s+(\S.*?)$/) {
            # nearest space > width
            push @t, $1;
            push @t, $2;
        }
        else {
            # can't break
            push @t, $t;
            return \@t;
        }
    }
    return \@t;
}
# render left-box $2 with width $1
sub _l_box($$)
{
    my ($width, $text) = @_;
    my $lines = _wrap($width, $text);
    map { $_ .= ' 'x($width-length($_)) } @$lines;
    return $lines;
}
# render right-box $2 with width $1
sub _r_box($$)
{
    my ($width, $text) = @_;
    my $lines = _wrap($width, $text);
    map { $_ = (' 'x($width-length($_)).$_) } @$lines;
    return $lines;
}
# Algorithm of:
# http://ei5nazha.yz.yamagata-u.ac.jp/~aito/w3m/eng/STORY.html
sub _distribution_f($)
{
    my $max_width = shift;
    return log($max_width);
}
sub _calculate_widths($$)
{
    my ($self, $width) = @_;
    my @widths = ();
    # calculate min and max widths for each column
    for my $r (@{$self->{data}})
    {
        $r->[0] eq 'data' or $r->[0] eq 'head' or next;
        my $cn=0;
        my ($max, $min) = (0,0);
        for my $c (@{$r->[1]}) {
            $widths[$cn][0] = _max($widths[$cn][0], _min_width $c);
            $widths[$cn][1] = _max($widths[$cn][1], _max_width $c);
            $cn++;
        }
    }
    # calculate total min and max width
    my ($total_min, $total_max) = (0,0);
    for my $c (@widths) {
        $total_min += $c->[0];
        $total_max += $c->[1];
    }
    # extra space
    my $extra_width += scalar grep {$_->[0] eq '|' or $_->[0] eq ' '}
        (@{$self->{format}});
    $total_min += $extra_width;
    $total_max += $extra_width;
    # if total_max <= screen width => use max as width
    if($total_max <= $width) {
        my $cn = 0;
        for my $c (@widths) {
            $self->{widths}[$cn]=$c->[1];
            $cn++;
        }
        $self->{total_width} = $total_max;
    }
    else {
        my @dist_width;
        ITERATION: while(1) {
            my $total_f = 0.0;
            my $fixed_width = 0;
            my $remaining=0;
            for my $c (@widths) {
                if(defined $c->[2]) {
                    $fixed_width += $c->[2];
                }
                else {
                    $total_f += _distribution_f($c->[1]);
                    $remaining++;
                }
            }
            my $available_width = $width-$extra_width-$fixed_width;
            # enlarge width if it isn't enough
            if($available_width < $remaining*5) {
                $available_width = $remaining*5;
                $width = $extra_width+$fixed_width+$available_width;
            }
            my $cn=-1;
            COLUMN: for my $c (@widths) {
                $cn++;
                next COLUMN if defined $c->[2]; # skip fixed-widths
                my $w = _distribution_f($c->[1]) * $available_width / $total_f;
                if($c->[0] > $w) {
                    $c->[2] = $c->[0];
                    next ITERATION;
                }
                if($c->[1] < $w) {
                    $c->[2] = $c->[1];
                    next ITERATION;
                }
                $dist_width[$cn] = int($w);
            }
            last;
        }
        my $cn = 0;
        for my $c (@widths) {
            $self->{widths}[$cn]=defined $c->[2] ? $c->[2] : $dist_width[$cn];
            $cn++;
        }
    }
}
sub _render_rule($$)
{
    my ($self, $char) = @_;
    my $out = '';
    my ($col,$data_col) = (0,0);
    for my $c (@{$self->{format}}) {
        if($c->[0] eq '|') {
            $out .= $char eq '-' ? '+' : $char;
        }
        elsif($c->[0] eq ' ') {
            $out .= $char;
        }
        elsif($c->[0] eq 'l' or $c->[0] eq 'r') {
            $out .= ($char)x($self->{widths}[$data_col]);
            $data_col++;
        }
        $col++;
    }
    return $out."\n";
}
sub _render_data($$)
{
    my ($self,$data) = @_;
    my @rdata; # rendered data
    # render every column and find out number of lines
    my ($col, $data_col) = (0,0);
    my $lines=0;
    for my $c (@{$self->{format}}) {
        if($c->[0] eq 'l') {
            my $lb = _l_box($self->{widths}[$data_col], $data->[$data_col]);
            $rdata[$data_col] = $lb;
            my $l = scalar @$lb ;
            $lines = $l if $lines < $l;
            $data_col++;
        }
        elsif($c->[0] eq 'r') {
            my $rb = _r_box($self->{widths}[$data_col], $data->[$data_col]);
            $rdata[$data_col] = $rb;
            my $l = scalar @$rb ;
            $lines = $l if $lines < $l;
            $data_col++;
        }
        $col++;
    }
    # render each line
    my $out = '';
    for my $l (0..($lines-1)) {
        my ($col, $data_col) = (0,0);
        for my $c (@{$self->{format}}) {
            if($c->[0] eq '|') {
                $out .= '|';
            }
            elsif($c->[0] eq ' ') {
                $out .= ' ';
            }
            elsif($c->[0] eq 'l' or $c->[0] eq 'r') {
                if(defined $rdata[$data_col][$l]) {
                    $out .= $rdata[$data_col][$l];
                }
                else {
                    $out .= ' 'x($self->{widths}[$data_col]);
                }
                $data_col++;
            }
            $col++;
        }
        $out .= "\n";
    }
    return $out;
}
sub _parse_format($$)
{
    my ($self, $format) = @_;
    my @f = split(//, $format);
    my @format = ();
    my ($col,$data_col) = (0,0);
    for my $f (@f) {
        if($f eq 'l' or $f eq 'r') {
            $format[$col] = [$f, $data_col];
            $data_col++;
        }
        elsif($f eq '|' or $f eq ' ') {
            $format[$col] = [$f];
        }
        else {
            croak "unknown column format: $f";
        }
        $col++;
    }
    $self->{format}=\@format;
    $self->{col}=$col;
    $self->{data_col}=$data_col;
}
sub new($$)
{
    my ($class, $format) = @_;
    croak "new() requires one argument: format" unless defined $format;
    my $self = { col => '0', row => '0', data => [] };
    bless $self, $class;
    $self->_parse_format($format);
    return $self;
}
# remove head and trail space
sub _preprocess_row_data($$)
{
    my ($self,$data) = @_;
    my $cn = 0;
    for my $c (0..($#$data)) {
        $data->[$c] =~ s/^\s+//m;
        $data->[$c] =~ s/\s+$//m;
    }
}
sub head($@)
{
    my ($self, @data) = @_;
    scalar @data == $self->{data_col} or
        croak "number of columns must be $self->{data_col}";
    $self->_preprocess_row_data(\@data);
    $self->{data}[$self->{row}++] = ['head', \@data];
}
sub row($@)
{
    my ($self, @data) = @_;
    scalar @data == $self->{data_col} or
        croak "number of columns must be $self->{data_col}";
    $self->_preprocess_row_data(\@data);
    $self->{data}[$self->{row}++] = ['data', \@data];
}
sub rule($$)
{
    my ($self, $char) = @_;
    $char = '-' unless defined $char;
    $self->{data}[$self->{row}++] = ['rule', $char];
}
sub render($$)
{
    my ($self, $width) = @_;
    $width = 79 unless defined $width;
    $self->_calculate_widths($width);
    my $out = '';
    for my $r (@{$self->{data}}) {
        if($r->[0] eq 'rule') {
            $out .= $self->_render_rule($r->[1]);
        }
        elsif($r->[0] eq 'head') {
            $out .= $self->_render_data($r->[1]);
        }
        elsif($r->[0] eq 'data') {
            $out .= $self->_render_data($r->[1]);
        }
    }
    return $out;
}

#####################################################################
#####################################################################
#####################################################################

use strict;
use Time::Local;
use Socket;    # for resolv()
use Getopt::Long;

use vars qw($a $b);

# config
my $default_priority_med  = 7;
my $default_priority_high = 16;
my $default_text_width    = 79;
my $narrow_host_size      = 31;    # must be odd

# prototypes
sub read_local_nets_file($);
sub resolv($);
sub ip_mask($$);
sub is_local($);
sub source_syslog($);
sub parse_syslog_alert($$);
sub parse_portscan($$);
sub source_fastlog($);
sub parse_fastlog($);
sub prepare_rules();
sub report_text();
sub report_html();

# global variables
my $ports_threshold = 1;
my ($start_time, $end_time);
my %opt;
my @local_nets;
my @sources = ();
my %PRIO    = ();
my $text_hr = ('=' x 70) . "\n";

# statistics hashes
my %alerts        = ();
my %alerts_local  = ();
my %alerts_remote = ();
my %alerts_port   = ();
my $alerts_total  = 0;
my %rules         = ();
my %portscans     = ();

sub usage() {
    print <<USAGE;
Usage: snort-rep [OPTIONS] [syslog-file]
Snort reporting tool.

  -r, --resolve            resolve host names
  -s, --source=SOURCE[,SOURCE...]   read input from SOURCE
  -t, --text               text report (default)
      --text-width=N       width of text reports (default: 79)
  -H, --html               HTML report
  -l, --local=NET[,NET...] NET is a local network
  -L, --local-file=FILE    FILE contains list of local networks
  -R, --remove-name=REGEX  remove REGEX from host names
      --priority-med=N     prio>=N is considered 'medium' (default:$default_priority_med)
      --priority-high=N    prio>=N is considered 'high' (default:$default_priority_high)
  -N, --narrow             make the report narrower (cut too long words, etc.)
  -h, --help               print this help text
  -V, --version            print version information

Specify networks as IP-addr/mask, for example 192.168.1.0/24
USAGE

    exit 0;
}

sub version() {
    print <<VERSION;
snort-rep $VERSION
Written by David Schweikert <dws\@ee.ethz.ch>

Copyright (c) 2001, 2002 by ETH Zurich. All rights reserved.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
VERSION

    exit 0;
}

sub main() {

    # options
    Getopt::Long::Configure('no_auto_abbrev', 'no_ignore_case', 'bundling');
    GetOptions(
        \%opt, 'help|h', 'version|V', 'resolve|r', 'text|t', 'html|H',
        'local|l=s' => \@local_nets,
        'local-file|L=s', 'remove-name|R=s', 'source|s=s' => \@sources,
        'narrow|N',
        'text-width=i'
    ) or exit 1;
    usage   if $opt{help};
    version if $opt{version};
    usage   if $#ARGV > 0;

    $opt{'text-width'} = $default_text_width unless defined $opt{'text-width'};

    # initialize sources
    if (scalar @sources == 0) {
        if ($#ARGV >= 0) {
            my $logfilename = shift @ARGV;
            push @sources, "syslog:$logfilename";
        }
        else {
            push @sources, 'syslog:-';
        }
    }
    else {
        @sources = split (/,/, join (',', @sources));
    }

    # initialize local networks
    @local_nets = split (/,/, join (',', @local_nets));
    read_local_nets_file($opt{'local-file'}) if $opt{'local-file'};

    # initialize priorities
    my $prio_med  = $opt{'priority-med'}  || $default_priority_med;
    my $prio_high = $opt{'priority-high'} || $default_priority_high;
    for (my $i = 0 ; $i <= 20 ; $i++) {
        $PRIO{$i} = {
            'order' => 1,
            'text'  => 'low',
            'html'  => 'low'
        };
    }

    for (my $i = $prio_med ; $i <= 20 ; $i++) {
        $PRIO{$i} = {
            'order' => 1,
            'text'  => 'med',
            'html'  => '<FONT COLOR="#FF8800">med</FONT>'
        };
    }

    for (my $i = $prio_high ; $i <= 20 ; $i++) {
        $PRIO{$i} = {
            'order' => 0,
            'text'  => 'HIGH',
            'html'  => '<FONT COLOR="#BB0000">high</FONT>'
        };
    }

    foreach my $source (@sources) {
        if ($source =~ /syslog:(.*)/) {
            source_syslog($1);
        }
        elsif ($source =~ /fast:(.*)/) {
            source_fastlog($1);
        }
        else {
            die "ERROR: I don't know how to handle source '$source'\n";
        }
    }

    prepare_rules;

    if ($opt{text} or (!$opt{text} and !$opt{html})) {
        report_text;
    }
    if ($opt{text} and $opt{html}) {
        print '<' x 79;
    }

    if ($opt{html}) {
        report_html;
    }
}

######################## UTILITIES ######################### 

sub read_local_nets_file($) {
    my $file = shift;
    open(LOCALNETS, "<$file") or die "ERROR: can't open $file: $!\n";
    while (<LOCALNETS>) {
        chomp;

        # strip comments
        s/#.*//;

        # trim
        s/^\s+//;
        s/\s+$//;

        # empty lines
        next if $_ eq '';

        /^\d+\.\d+\.\d+\.\d+\/\d+$/
          or die "ERROR: $file, line $.: wrong syntax for network\n";

        push @local_nets, $_;
    }
}

sub add_count($$$) {
    my $h = shift;
    my $k = shift;
    my $d = shift;

    if (defined $h->{$k}) {
        $h->{$k}{count}++;
    }
    else {
        $h->{$k}{data}  = $d;
        $h->{$k}{count} = 1;
    }
}

my %resolv_cache = ();

sub resolv($) {
    my $host = shift;
    my $iaddr;
    my $name;
    $iaddr = inet_aton($host);
    if (not defined $resolv_cache{$host}) {
        $name = gethostbyaddr($iaddr, AF_INET) or $name = $host;
        $name =~ s/$opt{'remove-name'}$//o if defined $opt{'remove-name'};
        $resolv_cache{$host} = $name;
    }
    return $resolv_cache{$host};
}

sub ip_mask($$) {
    my $ip   = shift;
    my $mask = shift;

    my @ip = $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/
      or die "ERROR: wrong syntax for IP: $ip\n";

    my $m = ~((1 << (32 - $mask)) - 1);
    for (my $n = 3 ; $n >= 0 ; $n--) {
        $ip[$n] &= $m;
        $m >>= 8;
    }

    return "$ip[0].$ip[1].$ip[2].$ip[3]";
}

sub host_in_net($$) {
    my $host     = shift;
    my $net_mask = shift;
    $net_mask =~ /^(\d+\.\d+\.\d+\.\d+)\/(\d+)$/
      or die "ERROR: wrong syntax for net: $net_mask\n";
    my ($net, $mask) = ($1, $2);

    # apply mask
    my $host_masked = ip_mask($host, $mask);
    my $net_masked  = ip_mask($net,  $mask);

    return $host_masked eq $net_masked;
}

my %local_cache = ();

sub is_local($) {
    my $h = shift;

    if (not exists $local_cache{$h}) {
        $local_cache{$h} = 0;
        for my $s (@local_nets) {
            if (host_in_net($h, $s)) {
                $local_cache{$h} = 1;
                last;
            }
        }
    }

    return $local_cache{$h};
}

sub port_to_service($) {
    my $name = (getservbyport($_[0], 'tcp'))[0];
    return $name || $_[0];
}

######################## DATABASE ########################## 

sub add_alert($) {
    my $a = shift;

    # Incomplete Packet Fragments discarded reports port=0...
    $a->{src_port} = undef if defined $a->{src_port} and $a->{src_port} == 0;
    $a->{dst_port} = undef if defined $a->{dst_port} and $a->{dst_port} == 0;

    if (is_local($a->{dst_ip}) or !is_local($a->{src_ip})) {
        $a->{local_ip}    = $a->{dst_ip};
        $a->{local_port}  = $a->{dst_port};
        $a->{remote_ip}   = $a->{src_ip};
        $a->{remote_port} = $a->{src_port};
    }
    else {
        $a->{local_ip}    = $a->{src_ip};
        $a->{local_port}  = $a->{src_port};
        $a->{remote_ip}   = $a->{dst_ip};
        $a->{remote_port} = $a->{dst_port};
    }

    if ($opt{resolve}) {
        $a->{local_name}  = resolv($a->{local_ip});
        $a->{remote_name} = resolv($a->{remote_ip});
    }
    else {
        $a->{local_name}  = $a->{local_ip};
        $a->{remote_name} = $a->{remote_ip};
    }

    if ($opt{narrow}) {

        # shorten host names
        my $l = ($narrow_host_size - 3) / 2;
        if (length $a->{local_name} > $narrow_host_size) {
            $a->{local_name} =~ s/^(.{$l}).*(.{$l})$/$1...$2/o;
        }

        if (length $a->{remote_name} > $narrow_host_size) {
            $a->{remote_name} =~ s/^(.{$l}).*(.{$l})$/$1...$2/o;
        }

        # improve word wrapping of rule description
        my @n = split (/\s+/, $a->{name});
        my $max_word = $opt{'text-width'} - $narrow_host_size - 10;
        $max_word = 20 unless $max_word > 20;
        my $min_word = int($max_word / 2);
        ITERATION: while (1) {

            for my $i (0 .. $#n) {
                length $n[$i] > $max_word or next;
                if ($n[$i] =~ /^(.{$min_word,}?)[-_](.*)$/o) {
                    splice @n, $i, 1, $1, $2;
                    next ITERATION;
                }
            }
            last ITERATION;
        }
        $a->{name} = join (' ', @n);
    }

    if (not defined $a->{ruleid}) {
        if (defined $a->{sid}) {
            $a->{ruleid} = $a->{sid};
        }
        else {
            $a->{ruleid} = $a->{name};
        }
    }
    my $r = $a->{ruleid};

    if (not defined $rules{$r}) {
        $rules{$r} = {
            sid  => $a->{sid},
            name => $a->{name},
            prio => $a->{prio},
        };
    }

    if (defined $a->{prio} and not defined $rules{$r}{prio}) {
        $rules{$r}{name} = $a->{name};
        $rules{$r}{prio} = $a->{prio};
    }
    $a->{rule} = $rules{$r};

    add_count(\%alerts, $r, $a);
    add_count(\%alerts_local,  "$r, $a->{local_ip}",  $a);
    add_count(\%alerts_remote, "$r, $a->{remote_ip}", $a);
    if (defined $a->{local_port} and is_local($a->{dst_ip})) {
        add_count(\%alerts_port, "$r, $a->{dst_port}", $a);
    }
    $alerts_total++;
}

sub prepare_rules() {
    # go through all rules and set some text that will be used in the reports
    for my $r (keys %rules) {
        my $prio = $rules{$r}{prio};
        $rules{$r}{prio_text} = defined $prio ? $PRIO{$prio}{text} || '' : '';
        $rules{$r}{prio_html} = defined $prio ? $PRIO{$prio}{html} || '' : '';
        if (defined $rules{$r}{sid} and $rules{$r}{sid} != 0) {
            $rules{$r}{name_html} =
              "<A href=\"http://www.snort.org/snort-db/sid.html?sid=$rules{$r}{sid}\">$rules{$r}{name}</A>";
        }
        else {
            $rules{$r}{name_html} = $rules{$r}{name};
        }
    }
}

######################### SYSLOG ########################### 

sub source_syslog($) {
    my $file   = shift;
    my $parser = Parse::Syslog->new($file);
    my $time;
    while (my $sl = $parser->next) {
        $time = $sl->{timestamp};
        my $text = $sl->{text};

        # process only snort logs
        $sl->{program} eq 'snort' or next;

        # set start_time and end_time
        $start_time = $time unless defined $start_time;

        # we don't handle (yet) spp_stream4
        $text =~ /^(?:\[\d+:\d+:\d+\]\s+)?spp_stream4/ and next;
        # we don't handle (yet) spp_http_decode
        $text =~ /^(?:\[\d+:\d+:\d+\]\s+)?spp_http_decode/ and next;
        # we don't handle (yet) spp_bo
        $text =~ /^(?:\[\d+:\d+:\d+\]\s+)?spp_bo/ and next;

        # spp_portscan
        next if parse_portscan($time, $text);

        # normal alerts
        next if parse_syslog_alert($time, $text);
        warn "WARNING: couldn't parse:\n$text\n";
    }
    if (defined $time) {
        $end_time = $time unless defined $end_time and $time < $end_time;
    }
}

sub parse_syslog_alert($$) {
    my ($time, $text) = @_;

# [1:1104:1] WEB-MISC whisker splice attack [Classification: Attempted Information Leak] [Priority: 3]: {TCP} 192.168.1.5:44553 -> 192.168.2.75:80

    $text =~ /^
                (?:\[\d+:(\d+):\d+\]\s+)? # sid               - 1
		([^\[]*?)	        # name                - 2
		(?:\s+\[Classification:\s+[^\]]+\])?
                (?:\s+\[Priority:\s+(\d+)\])?      # priority - 3
		:\s+
                (?:{(?:TCP|UDP|ICMP)}\s+)?
		(\d+\.\d+\.\d+\.\d+)	# source IP                  - 4
		(?::(\d+))?		# source port                - 5
		\s+->\s+		# ->
		(\d+\.\d+\.\d+\.\d+)	# destination IP             - 6
		(?::(\d+))?		# destination port           - 7
		$
		/x and do {
        add_alert(
            {
                timestamp   => $time,
                sid      => $1,
                name     => $2,
                prio     => $3,
                src_ip   => $4,
                src_port => $5,
                dst_ip   => $6,
                dst_port => $7
            }
        );

        return 1;
    };
    return 0;
}

sub parse_portscan($$) {
    my ($time, $text) = @_;

    $text =~ /^
		spp_portscan:\ End\ of\ portscan\ from[ ]
		(\d+\.\d+\.\d+\.\d+)	# 1
		:\s+
		TOTAL\s+
		time\((\d+s)\)		# 2
		\s+
		hosts\((\d+)\)		# 3
		\s+
		TCP\((\d+)\)		# 4
		\s+
		UDP\((\d+)\)		# 5
		(\s+STEALTH)?
		/x and do
    {
        my $h = $1;
        return 1 if is_local($h);
        $h = resolv($h) if $opt{resolve};
        $portscans{$h}{hosts} += $3;
        $portscans{$h}{ports} += $4 + $5;
        $portscans{$h}{stealth} = 1 if $6;
        return 1;
    };

    $text =~ /^spp_portscan: PORTSCAN DETECTED/ and return 1;
    $text =~ /^spp_portscan: portscan status/   and return 1;

    $text =~ /^
        (?:\[\d+:\d+:\d+\]\s+)?
        \(spp_portscan2\)\s+
        Portscan\ detected\ from[ ]
        (\d+\.\d+\.\d+\.\d+)        # 1
        :\s+
        (\d+)\ targets\s+           # 2
        (\d+)\ ports\s+             # 3
        /x and do
    {
        my ($h, $t, $p) = ($1, $2, $3);
        return 1 if is_local($1);
        $h = resolv($h) if $opt{resolve};
        $portscans{$h}{hosts} += $t;
        $portscans{$h}{ports} += $p;
        return 1;
    };

    return 0;
}

######################## FASTLOG ########################### 

sub source_fastlog($) {
    my $file = shift;

    open(FASTLOG_FILE, "<$file") or die "ERROR: can't open $file: $!\n";
    while (my $l = <FASTLOG_FILE>) {
        parse_fastlog($l);
    }
}

sub parse_fastlog($) {
    my $line = shift;
    chomp $line;
    $line =~ s/^\s+//;
    $line =~ s/\s+$//;
    my @l = split (/\s*\[\*\*\]\s*/, $line);
    my %a = ();

    # time
    $l[0] =~ /^(\d{2})\/(\d{2})(?:\/(\d{2}))?-(\d{2}):(\d{2}):(\d{2}\.\d+)$/ or do {
        warn "WARNING: can't parse fast-alert line(1):\n$line\n";
        return 0;
    };

    my $year = defined $3 ? $3 : (localtime(time))[5]+1900;

    # convert to unix time
    $a{timestamp} = timelocal($6, $5, $4, $2, $1 - 1, $year);
    $start_time = $a{timestamp}
      if not defined $start_time or $a{timestamp} < $start_time;
    $end_time = $a{timestamp} if not defined $end_time or $a{timestamp} > $end_time;

    # name
    $l[1] =~ /^\[\d+:(\d+):\d+\]\s+(.*)$/ or do {
        warn "WARNING: can't parse fast-alert line(2):\n$line\n";
        return 0;
    };
    $a{sid}  = $1 unless $1 == 0;
    $a{name} = $2;

    # portscans
    return 1 if parse_portscan($a{timestamp}, $a{name});

    # priority, source and dest
    $l[2] =~ /
		(?:\[Priority:\ (\d+)\]\s+)? # priority              - 1
                (?:\{(?:UDP|TCP)\}\s+)?      # {TCP} or {UDP}
		(\d+\.\d+\.\d+\.\d+)	     # source IP             - 2
		(?::(\d+))?		     # source port           - 3
		\s+->\s+		     # ->
		(\d+\.\d+\.\d+\.\d+)	     # destination IP        - 4
		(?::(\d+))?		     # destination port      - 5
		$/x or do {
        warn "WARNING: can't parse fast-alert line(3):\n<$line>\n";
        return 0;
    };
    $a{prio}     = $1;
    $a{src_ip}   = $2;
    $a{src_port} = $3;
    $a{dst_ip}   = $4;
    $a{dst_port} = $5;

    add_alert(\%a);

    return 1;
}

####################### SORT ORDER ######################### 

sub alert_prio_order($) {
    my $a = shift;
    my $p = $a->{data}{rule}{prio};
    if (defined $p and defined $PRIO{$p}) {
        return $PRIO{$p}{order};
    }
    return 1;
}

sub cmp_alerts {
    return alert_prio_order($a) <=> alert_prio_order($b)
      || $b->{count} <=> $a->{count}
      || $a->{data}{rule}{name} cmp $b->{data}{rule}{name};
}

sub cmp_alerts_local {
    return alert_prio_order($a) <=> alert_prio_order($b)
      || $b->{count} <=> $a->{count}
      || $a->{data}{rule}{name} cmp $b->{data}{rule}{name}
      || $a->{data}{local_name} cmp $b->{data}{local_name};
}

sub cmp_alerts_remote {
    return alert_prio_order($a) <=> alert_prio_order($b)
      || $b->{count} <=> $a->{count}
      || $a->{data}{rule}{name} cmp $b->{data}{rule}{name}
      || $a->{data}{remote_name} cmp $b->{data}{remote_name};
}

sub cmp_alerts_port {
    return $a->{data}{local_port} <=> $b->{data}{local_port}
      || $b->{count} <=> $a->{count}
      || $a->{data}{rule}{name} cmp $b->{data}{rule}{name};
}

sub cmp_portscans {
    return $portscans{$b}{ports} <=> $portscans{$a}{ports} || $a cmp $b;
}

####################### REPORT TEXT ######################## 

sub report_text_alerts() {
    my $table = Text::FormatTable->new('r r l l');
    $table->rule('=');
    $table->head('%', '#', 'prio', 'description');
    $table->rule('=');

    for my $a (sort cmp_alerts values %alerts) {
        $table->row(sprintf("%.01f", $a->{count} / $alerts_total * 100),
            $a->{count}, $a->{data}{rule}{prio_text}, $a->{data}{rule}{name},);
    }

    print $table->render($opt{'text-width'});
}

sub report_text_alerts_remote() {
    my $table = Text::FormatTable->new('r l l l');
    $table->rule('=');
    $table->head('#', 'remote host', 'prio', 'description');
    $table->rule('=');

    for my $a (sort cmp_alerts_remote values %alerts_remote) {
        $table->row($a->{count}, $a->{data}{remote_name},
            $a->{data}{rule}{prio_text}, $a->{data}{rule}{name},);
    }

    print $table->render($opt{'text-width'});
}

sub report_text_alerts_local() {
    my $table = Text::FormatTable->new('r l l l');
    $table->rule('=');
    $table->head('#', 'local host', 'prio', 'description');
    $table->rule('=');

    for my $a (sort cmp_alerts_local values %alerts_local) {
        $table->row($a->{count}, $a->{data}{local_name},
            $a->{data}{rule}{prio_text}, $a->{data}{rule}{name});
    }

    print $table->render($opt{'text-width'});
}

sub report_text_alerts_port() {
    my $table = Text::FormatTable->new('r l l l');
    $table->rule('=');
    $table->head('#', 'port', 'prio', 'description');
    $table->rule('=');

    for my $a (sort cmp_alerts_port values %alerts_port) {
        $a->{count} >= $ports_threshold or next;
        $table->row($a->{count}, port_to_service($a->{data}{dst_port}),
            $a->{data}{rule}{prio_text}, $a->{data}{rule}{name});
    }

    print $table->render($opt{'text-width'});
    print "\n";
    print "Note: 'port' is destination port, only traffic with local\n";
    print "      destination is considered.\n";
}

sub report_text_portscans() {
    my $table = Text::FormatTable->new('r l l');
    $table->rule('=');
    $table->head('ports', 'hosts', 'remote host');
    $table->rule('=');

    for my $h (sort cmp_portscans keys %portscans) {
        $table->row($portscans{$h}{ports}, $portscans{$h}{hosts}, $h);
    }

    print $table->render($opt{'text-width'});
}

sub report_text() {
    print "begins: " . localtime($start_time) . "\n" if defined $start_time;
    print "ends  : " . localtime($end_time) . "\n"   if defined $end_time;
    print "source: " . join (', ', @sources) . "\n" if scalar @sources >= 0;
    print "\n";

    if (scalar keys %portscans > 0) {
        print "* Portscans\n\n";
        report_text_portscans;
        print "\n";
    }
    print "* Alerts\n\n";
    report_text_alerts;
    print "\n";
    report_text_alerts_remote;
    print "\n";
    report_text_alerts_local;
    print "\n";
    report_text_alerts_port;

    print "\n--\nsnort-rep $VERSION (dws\@ee.ethz.ch)\n";
}

####################### REPORT HTML ######################## 

sub report_html_alerts() {
    print "<TABLE BORDER=\"1\" SUMMARY=\"alerts by id\"><TR>\n";
    print "<TH ALIGN=\"center\">%</TH>";
    print "<TH ALIGN=\"center\">#</TH>";
    print "<TH>prio</TH>";
    print "<TH>description</TH>";
    print "</TR>\n";

    for my $a (sort cmp_alerts values %alerts) {
        printf "<TR>" . "<TD ALIGN=\"right\">%4.1f</TD>"
          . "<TD ALIGN=\"right\">%d</TD>" . "<TD>%s</TD>"
          . "<TD><TT>%s</TT></TD>" . "</TR>\n",
          $a->{count} / $alerts_total * 100, $a->{count},
          $a->{data}{rule}{prio_html}, $a->{data}{rule}{name_html};
    }
    print "</TABLE>\n";
}

sub report_html_alerts_remote() {
    print "<TABLE BORDER=\"1\" SUMMARY=\"alerts by remote host, id\"><TR>\n";
    print "<TH ALIGN=\"center\">#</TH>";
    print "<TH>remote host</TH>";
    print "<TH>prio</TH>";
    print "<TH>description</TH>";
    print "</TR>\n";

    for my $a (sort cmp_alerts_remote values %alerts_remote) {
        printf "<TR><TD ALIGN=\"right\">%d</TD>" . "<TD><TT>%s</TT></TD>"
          . "<TD>%s</TD>" . "<TD><TT>%s</TT></TD></TR>\n", $a->{count},
          $a->{data}{remote_name}, $a->{data}{rule}{prio_html},
          $a->{data}{rule}{name_html};
    }
    print "</TABLE>\n";
}

sub report_html_alerts_local() {
    print "<TABLE BORDER=\"1\" SUMMARY=\"alerts by local host, id\"><TR>\n";
    print "<TH ALIGN=\"center\">#</TH>";
    print "<TH>local host</TH>";
    print "<TH>prio</TH>";
    print "<TH>description</TH>";
    print "</TR>\n";

    for my $a (sort cmp_alerts_local values %alerts_local) {
        printf "<TR><TD ALIGN=\"right\">%d</TD>" . "<TD><TT>%s</TT></TD>"
          . "<TD>%s</TD>" . "<TD><TT>%s</TT></TD></TR>\n", $a->{count},
          $a->{data}{local_name}, $a->{data}{rule}{prio_html},
          $a->{data}{rule}{name_html};
    }
    print "</TABLE>\n";
}

sub report_html_alerts_port() {
    print "<TABLE BORDER=\"1\" SUMMARY=\"alerts by local port, id\"><TR>\n";
    print "<TH ALIGN=\"center\">#</TH>";
    print "<TH>port</TH>";
    print "<TH>prio</TH>";
    print "<TH>description</TH>";
    print "</TR>\n";

    for my $a (sort cmp_alerts_port values %alerts_port) {
        $a->{count} >= $ports_threshold or next;
        printf "<TR><TD ALIGN=\"right\">%d</TD>" . "<TD>%s</TD>" . "<TD>%s</TD>"
          . "<TD><TT>%s</TT></TD></TR>\n", $a->{count},
          port_to_service($a->{data}{dst_port}), $a->{data}{rule}{prio_html},
          $a->{data}{rule}{name_html};
    }
    print "</TABLE>\n";
    print
"<P><SMALL>Note: 'port' is destination port, only traffic with local destination considered.</SMALL></P>";
}

sub report_html_portscans() {
    print "<TABLE BORDER=\"1\" SUMMARY=\"portscans\"><TR>";
    print "<TH ALIGN=\"center\">ports</TH>";
    print "<TH ALIGN=\"center\">hosts</TH>";
    print "<TH>remote host</TH>";
    print "</TR>\n";

    for my $h (
        sort { $portscans{$b}{ports} <=> $portscans{$a}{ports} or $a cmp $b }
        keys %portscans)
    {
        printf "<TR>" . "<TD ALIGN=\"right\">%d</TD>"
          . "<TD ALIGH=\"right\">%d</TD>"
          . "<TD>%s</TD>" . "</TR>\n", $portscans{$h}{ports},
          $portscans{$h}{hosts}, $h;
    }
    print "</TABLE>\n";
}

sub report_html() {
    print <<HEADER;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<HTML>
<HEAD><TITLE>Snort Report</TITLE></HEAD>
<BODY>
<H1>Snort Report</H1>
HEADER

    print "<TABLE SUMMARY=\"report data\">\n";
    print "<TR><TD>Begins:</TD><TD><TT>" . localtime($start_time)
      . "</TT></TD></TR>\n";
    print "<TR><TD>Ends:</TD><TD><TT>" . localtime($end_time)
      . "</TT></TD></TR>\n";
    print "<TR><TD>Source:</TD><TD><TT>" . join (', ', @sources)
      . "</TT></TD></TR>\n"
      if scalar @sources >= 0;
    print "</TABLE>\n";

    if (scalar keys %portscans > 0) {
        print "<H2>Portscans</H2>\n";
        report_html_portscans;
    }
    print "<H2>Alerts</H2>\n";
    print "<H3>grouped by <EM>id</EM></H3>\n";
    report_html_alerts;
    print "<H3>grouped by <EM>remote host</EM>, <EM>id</EM></H3>\n";
    report_html_alerts_remote;
    print "<H3>grouped by <EM>local host</EM>, <EM>id</EM></H3>\n";
    report_html_alerts_local;
    print "<H3>grouped by <EM>local port</EM>, <EM>id</EM></H3>\n";
    report_html_alerts_port;

    print
"<HR><P><SMALL><A href=\"http://people.ee.ethz.ch/~dws/software/snort-rep/\">";
    print "snort-rep</A> $VERSION</SMALL></P>\n";

    print <<FOOTER;
</BODY></HTML>
FOOTER
}

main;

__END__

=head1 NAME

snort-rep - snort-reporting tool

=head1 SYNOPSIS

B<snort-rep> [I<OPTIONS>] [I<syslog-file>]

=head1 DESCRIPTION

I<snort-rep> is a Snort reporting tool that can produce text or HTML output
from a syslog file. If I<syslog-file> is not specified, it will use
standard-input. The reports contain:

=over 4

=item *

Portscan summary

=item *

Alert Summary by ID

=item *

Alert summary by remote host and ID

=item *

Alert summary by local host and ID

=item *

Alert summary by local port and ID

=back

It is designed to be used for daily e-mail reports to the system administrators
(see I<snort-rep-mail> for an example script that generates daily e-mails). All
reports contain priority information (if used with Snort 1.8+) and the HTML
output contains direct links to the IDS descriptions of whitehats.com.

=head1 OPTIONS

=over 4

=item B<-h>, B<--help>

Print usage.

=item B<-r>, B<--resolve>

Resolve host names.

=item B<-s>, B<--source>=I<SOURCE>[,I<SOURCE>...]

Read information from I<SOURCE> (in addition to I<syslog-file>). This option
can be specified multiple times. If I<syslog-file> is not specified and
no B<--sources> option is used, standard input will be read in syslog format.

I<SOURCE> is a comma separated list of sources which may be:

=over 4

=item B<syslog>:I<FILE>

Syslog file I<FILE>

=item B<fast>:I<FILE>

Snort "fast-alert" file I<FILE>

=back

=item B<-t>, B<--text>

Print text report (default). If both I<--text> and I<--html> are specified,
both will be printed, separated by a line like '<<<<<' (79 times '<').

=item B<--text-width>=I<n>

Try to fit the text report to I<n> columns. Default: 79.

=item B<-H>, B<--html>

Print HTML report.

=item B<-l>, B<--local>=I<NET>[,I<NET>...]

I<NET> is a local network. This options can be specified more than once and
can contain more than one network (comma-separated). I<NET> must be specified
as C<network/mask>, for example C<192.168.1.0/24>.

=item B<-F>, B<--local-file>=I<FILE>

FILE contains list of local networks, as given in B<-l> (one network per line).
I<FILE> can contain hash comments and empty lines.

=item B<-R>, B<--remove-name>=I<REGEX>

Remove REGEX from host names. This option is useful to make nicer host names
for local hosts.

=item B<--priority-med>=I<N>

Priorities greater or equal I<N> will be considered "medium priority" (default: 7).

=item B<--priority-high>=I<N>

Priorities greater or equal I<N> will be considered "high priority" (default:
16). High-priority alerts will be pushed on the top of the reports.

=item B<-N>, B<--narrow>

Try to make the reports better fit on the screen by trimming too long
host-names and placing spaces in the alert descriptions so that they can
be word-wrapped.

=back

=head1 SEE ALSO

S<http://people.ee.ethz.ch/~dws/software/snort-rep/>

=head1 COPYRIGHT

Copyright (c) 2001, 2002 by ETH Zurich. All rights reserved.

=head1 LICENSE

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

=head1 AUTHOR

S<David Schweikert E<lt>dws@ee.ethz.chE<gt>>

=cut

# vi: et sw=4
