#!/usr/bin/perl -w

# SnortSnarf, a utility to convert snort log files to HTML pages
# Authors: Stuart Staniford, Silicon Defense (stuart@SiliconDefense.com)
#          James Hoagland, Silicon Defense (hoagland@SiliconDefense.com)
# copyright (c) 2000,2001 by Silicon Defense (http://www.silicondefense.com/)

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.  

# SnortSnarf description:
#
# Code to parse files of snort alerts and portscan logs, and produce
# HTML output intended to allow an intrusion detection analyst to
# engage in diagnostic inspection and tracking down problems.  
# The model is that one is using a cron job or similar to
# produce a daily/hourly/whatever file of snort alerts.  This script
# can be run on each such file to produce a convenient HTML breakout
# of all the alerts.
#
# The idea is that the analyst can click around the alerts instead
# of wearily grepping and awking through them.

# Please send complaints, kudos, and especially improvements and bugfixes to
# hoagland@SiliconDefense.com. This is a quick hack with features added and
# may only be worth what you paid for it.  It is still under active
# development and may change at any time.

# this file (snortsnarf.pl) is part of SnortSnarf v010821.1

##############################################################################

# Usage:

# SnortSnarf.pl <options> <file1 file2 ...>

# The script will produce a directory snfout.file1 (by default) full of
# a large number of html files.  These are placed under the current
# directory.  It also produces a file index.html in that directory.  
# This is a good place to start with a browser.

# Options include:

# -d directory
#     directory is the path to the directory the HTML pages will be generated
# in, overriding the default.

# -dns
#     This will cause the script to lookup the DNS name of each IP it writes a
# page for.  On a large alert file, this will take a very long time and will
# hammer your DNS server.

# -ldir URL
#     URL is the URL of a base directory in which the log files usually found
# in /var/log/snort are living, With this option, SnortSnarf will generate lots
# of links into those files based at that URL.  Eg: with -ldir
# "http://host.name.here/logs" we get links like
# http://host.name.here/logs/10.0.0.1/UDP-137-137. Note that the logs
# themselves aren't parsed.

# -homenet network
#     network is the network IP address or CIDR network notation for your home
# network, as told to snort.  CIDR takes that standard form of
# address/masksize (sizes 0-32 supported).  In the other form, 1-3 zeros at
# the end of the address are assumed to be what varies within your network.   
# If this argument is not provided, the home network is assumed to be 0.0.0.0
# (no home).  This is currently used only with the -ldir option.

# -color=<option>
#     option is 'yes' or 'no' or 'rotate'.  The style of coloring is given by
#  the option if any.  'rotate' is the only style currently available.  This
# changes the background color between an alert/portscan report and the next
# one iff the next one has a different message (signature) source host, or
# destination host.  If 'yes' or this not given on the command line, the the
# default ('rotate' in this version) is used.

# -split=<threshold>
#     To speed up display of pages containing lots of alerts, <threshold> is a
#  maximum number of alerts to display on a page unless explicitly requested. 
# Instead the alerts are broken into several pages.  Default is 100.  Can be
# set to 0 to never split the list alerts being displayed.

# -onewindow
#     If this option is given, certain URLs will not be targeted to other
# browser windows as is the default.

# -rs
#     If this option is given, the signatures on the signature index page will
# be sorted with the signatures with more alerts appearing first.

# -db path
#     path is the full path to the annotation database (an XML file), from the
# point of view of the server hosting the CGI scripts that access it, or the
# empty string to not use an annotation database.  The default is to not use
# it.

# -cgidir URL
#     URL is the location of the cgi-bin directory that CGI scripts than go
# along with the program are stored.  This may be relative to the pages that
# are generated (so that when a link appears on a page with URL as a prefix,
# it will be valid).  "/cgi-bin" is the default.

# -sisr configfile 
#     Generate links with SnortSnarf Incident Storage and Reporting (SISR).
# The argument is the full path to the SISR configuration file to use.  This
# file is not parsed by SnortSnarf.pl.

# -nmapurl URL
#     URL is the URL of a base URL directory in which the html output of
#  running nmap2html on nmap output is living. With this option, SnortSnarf
# will generate links to that output for IP addresses.  If the path to that
# directory is given with the -nmapdir option, then only those pages that
# actually exist (at the time SnortSnarf is run) are linked to.

# -nmapdir dir
#     dir is the directory on the file system in which nmap2html output is
# stored.  When the -nmapurl option is given, this is used to verify that a
# nmaplog.pl page actually exists before linking to it.

# -rulesfile file
#     file is the base rule file (e.g., snort.conf) that snort was run with.
# If this option is given, the rules in that file (and included files)
# generating a signature  are included in the page for that signatures alerts.
# In addition, the rules are scaned for references to arachNIDS, Bugtraq id's
# etc. to produce URLs.  Note that the processing of the rule files are fairly
# rough. E.g., variables are ignored.

# -rulesdir dir
#     dir is a directry to use as the base path for rules files rather than
# the one given or the paths listed in include directives.  (Useful if the
# files have been relocated.)

# -rulesscanonce
#     When used with -rulesfile, this flag requests that the rules files be
# read only once.  This will speed up run time, but will increase memory usage
# since the rules in those files will need to be retained in memory for future
# use.

# -refresh=<secs>
#       add a refresh tag to every HTML page generated, causing browsers to
# refresh the page every <secs> seconds.

# -year=<opt>
#     define how to infer the year of an alert when its format does not
# provide this information; possible values of <arg> are:
#  + 'cur': assume the current year
#  + 'rec': assume the alert was from within the last 12 months (the default)
#  + a year: use the specified year (e.g., 2000)

# -windows
#     run in windows mode.  You can also change $os in snortsnarf.pl to be
# 'windows' for the same effect.



# SnortSnarf is believed to work with Perl 5 on the following OS's
# OpenBSD 2.6
# RedHat Linux 6.1 and 6.2
# Windows NT 4.0 (NTFS only) (tweak the $os parameter below.  Wildcards won't).

# It probably works with most other Unixen/Linuxen, and is certain not to 
# work on the FAT filesystem.

# Believed to work with alert files generated from common snort command lines.

# This script is pretty memory intensive while it is running.  A previous
# version of the script was known to handle 5MB of alerts on a machine with
# 64MB of memory (no virtual memory).  It has also been known to eventually
# finish of huge alert files with Gigabytes of virtual memory available.  The
# script should be studied to figure out why it uses more memory than expected.

# .tar.gz of the dir is about half the size of the alert file.
# YMMV!

##############################################################################

# Credits, etc

# Initial alert parsing code borrowed from Joe McAlerney, Silicon 
# Defense.
# A couple of ideas were stolen from Snort2html by Dan Swan.
# Thanks to SANS GIAC and Donald McLachlan for a paper with ideas 
# on how to look up IP addresses.

# Huge thanks to DARPA for supporting us for part of the time developing
# this code.  Major thanks to Paul Arabelo for being our test customer
# while we learned to do operational intrusion detection instead of being
# only a researcher.

# Shouts to Marty Roesch and Patrick Mullen and the rest of the snort
# community for developing snort and the snort portscan preprocessor.  

# Kudos to Steve Northcutt who has taught us all what an intrusion
# detection analyst should be.

# Version control info: $Id: snortsnarf.pl,v 1.16 2000/06/14 18:40:45 jim Exp $

use lib qw(/usr/local/libdata/snortsnarf/);
use Cwd;

# avoid needing to refer to SnortSnarf packages as SnortSnarf::*, even if
# that is where they really are:
sub BEGIN { push(@INC,map("$_/SnortSnarf",grep(-d "$_/SnortSnarf",@INC))); }

use SnortFileInput;
use HTMLMemStorage;
use HTMLAnomMemStorage;
use HTMLOutput;
use Filter;
use Input;
use SnortRules;


$html = 'html';         # usually html or htm
$os = 'unix';  # Either 'windows' or 'unix'



$script = "<a href=\"http://www.silicondefense.com/snortsnarf/\">".
                        "SnortSnarf</a>";
$version = "v010821.1";
#$author_email = "hoagland\@SiliconDefense.com";
$author = "<a href=\"mailto:hoagland\@SiliconDefense.com\">Jim Hoagland</a>".
 " and <a href=\"mailto:stuart\@SiliconDefense.com\">Stuart Staniford</a>";

$foot= "<CENTER>$script brought to you courtesy of ".
 "<A HREF=\"http://www.silicondefense.com/\">Silicon Defense</A><BR>\n".
 "Authors: $author<BR>\nSee also the <a href=\"http://www.snort.org/\">".
 "Snort Page</a> by Marty Roesch\n";
$prog_line= "$script $version";

#if ($::sap_version ne $version) {die "SnortSnarf.pl is version \"$version\" but snort_alert_parse.pl is version \"$::sap_version\"; did you remember to install the $version include directory someplace perl would find it?\n";}

##############################################################################

# input params for SnortFileInput
%in_params= ('year' => 'rec'); # can also specify alert and packet id sources

# output params for HTMLOutput
%out_params= (
    'html' => $html,
    'dirsep' => "\/",
    'root' => "\/",
    'logfileext' => '',
    'logfileprototerm' => ':',
    'mostsigfirst' => 0, # should the signature with the most alerts appear first (versus last)
    'foot' => $foot, # footer text
    'prog_line' => $prog_line # fixed line of text for the bottom of the header
);



# Main program

$rules_file= undef;
$rules_cacheall= 0;
&process_options();

# portability stuff - toggle for Unix/Windows.
if ($os eq 'windows') {
    $dirsep= $out_params{'dirsep'}= "\\";       
    $root= $out_params{'root'}= "e:\\"; # Do not make this your system drive;
                                        # don't want it to fill up
    $out_params{'logfileext'}= '.ids';
    $out_params{'logfileprototerm'}= '_';
    $def_source= $root."util".$dirsep."snort".$dirsep."log".$dirsep."alert.ids"; # default input file
} elsif ($os eq 'unix') {
    $dirsep= $out_params{'dirsep'}= "\/";
    $root= $out_params{'root'}= "\/";
    $out_params{'logfileext'}= '';
    $out_params{'logfileprototerm'}= ':';
    # default input file
    $def_source= $root."var".$dirsep."log".$dirsep."snort.alert"; 
}

&initialize();
@ins= (); # input module instances
%ins_sources= (); # input sources

# for testing, use last input source for a different input module
#@in2_sources= @in_sources > 1 ? (pop(@in_sources)) : ();

# create SnortFileInput module
@in_tasks= qw(snort spp_portscan spade);
$in_filter= $Filter::true;
$in= SnortFileInput->new(\%in_params,[@in_tasks],$in_filter,@in_sources);
$ins_sources{'SnortFileInput'}= [@in_sources];
push(@ins,$in);

# if (@in2_sources) {
#     # create SnortFileInput2 module
#     require SnortFileInput2;
#     $in2= SnortFileInput2->new(\%in_params,[@in_tasks],$in_filter,@in2_sources);
#     $ins_sources{'SnortFileInput2'}= [@in2_sources];
#     push(@ins,$in2);
# }

# string to recreate the input modules
$in_recreate= Input::stringify_input_mods(@ins);

# create HTMLMemStorage and HTMLAnomMemStorage storage modules
%store_params= ();
$gstore= HTMLMemStorage->new(%store_params);
$astore= HTMLAnomMemStorage->new(%store_params);
%stores= (  'snort' => $gstore, # where different types are to be stored
            'spp_portscan' => $gstore,
            'spade' => $astore);
$out= HTMLOutput->new(%out_params);

# go through each input module grabbing all alerts and adding them to the
# approriate storage module
foreach $in (@ins) {
    while ($alert= $in->get()) {
        $stores{$alert->type()}->store($alert);
    }
}

%output_per_params= ( # output paramaters for a call to "output"
    'insources_str' => $in_recreate,
    'insources' => \%ins_sources
);
$out->output(\%output_per_params,%stores);


##############################################################################

# process the command line options and leave @ARGV with just the input files
# at the end
sub process_options
{
    my $arg;

    # go through arguments
    while(@ARGV && $ARGV[0] =~ /^\-/) {
        $arg = shift @ARGV;
        if ($arg eq '-dns') {
            $out_params{'dns_option'}= 1;
        } elsif ($arg eq '-ldir') {
            $out_params{'log_base'} = shift @ARGV;
            $out_params{'log_base'}.='/'
                unless $out_params{'log_base'} =~ /\/$/;
        } elsif ($arg eq '-homenet') {
            $out_params{'homenet'}= shift @ARGV;
        } elsif ($arg =~ s/^-color//) {    
            if ($arg =~ /=(.*)/) {
                $out_params{'color_opt'}= ($1 eq 'yes')?'rotate':$1;
            } else {
                $out_params{'color_opt'}= 'rotate';
            }
        } elsif ($arg =~ s/^-split=//) {    
            $out_params{'split_threshold'}= $arg;
        } elsif ($arg eq '-d') {
            $out_params{'output_dir'}= shift @ARGV;
        } elsif ($arg eq '-cgidir') {
            $out_params{'cgi_dir'}= shift @ARGV;
            $out_params{'cgi_dir'}=~ s/\/$//;
        } elsif ($arg eq '-db') {
            $out_params{'db_file'}= shift @ARGV;
        } elsif ($arg eq '-nmapdir') {
            $out_params{'nmap_dir'}= shift @ARGV;
        } elsif ($arg eq '-nmapurl') {
            $out_params{'nmap_url'}= shift @ARGV;
            $out_params{'nmap_url'}.='/'
                unless $out_params{'nmap_url'} =~ /\/$/;
        } elsif ($arg eq '-sisr') {
            $out_params{'sisr_config'}= shift @ARGV;
        } elsif ($arg eq '-rulesfile') {
            $rules_file= shift @ARGV;
        } elsif ($arg eq '-rulesdir') {
            $rules_dir= shift @ARGV;
        } elsif ($arg eq '-rulesscanonce') {
            $rules_cacheall= 1;
        } elsif ($arg eq '-onewindow') {
            $out_params{'notarget_option'} = 1;
        } elsif ($arg eq '-win') {
            $os = 'windows';
        } elsif ($arg eq '-rs') {
            $out_params{'mostsigfirst'}= 1;
        } elsif ($arg =~ s/^-refresh=//) {
            unless ($arg =~ /^\d+\s*$/) {
                warn "\"$arg\" does not look like a number of seconds for use with -refresh; skipping\n";
            } else {
                $out_params{'refreshsecs'}= $arg;
            }
        } elsif ($arg =~ s/^-year=//) {
            $arg= 'rec' if $arg =~ /^rec\w+/;  
            $arg= 'cur' if $arg =~ /^cur\w+/;
            unless ($arg =~ /(rec|cur|\d+)/) {
                warn "year option \"$arg\" not recognized, skipping\n";
            } else {
                $in_params{'year'}= $arg;
            }
        } else {
            warn "Unknown option $arg\n";
        }
    }
}

##############################################################################

sub initialize
{
    # Setup to use default file if no args
    @in_sources_text= @ARGV;
    @in_sources_text= ($def_source) unless @in_sources_text;
    $cwd= getcwd();
    # fully qualify file names
    if ($os eq 'unix') {
        @in_sources= map((/^\// ? $_ : "$cwd/$_"),@in_sources_text);
    } else {
        $cwd =~ s:/:\\:g; # convert forward slashes in getcwd output to backslashes
        @in_sources= map((/^\w+\:/ ? $_ : "$cwd$dirsep$_"),@in_sources_text);
    }
    
    if (defined($rules_file)) {
	    my $rulesource= SnortRules->new($rules_file,$rules_dir,$dirsep,$rules_cacheall);
	    $in_params{'rulessource'}= $rulesource;
	    $out_params{'rulessource'}= $rulesource;
    }
}
