#!/usr/local/bin/perl -w
# vi: ts=4 autoindent

# log_analysis, by Mordechai T. Abzug

# RCD $Id: log_analysis.in,v 1.94 2001/07/16 06:42:38 morty Exp morty $

# definitely requires at least 5.005.  Written under 5.00503, not tested with
# 5.005, 5.00501, or 5.00502.
require 5.005;

use strict;

use POSIX;
use English;
use Getopt::Std;
use re 'eval';
use File::stat;
use FileHandle;

my @required_import_scalars=qw(
	host_pat zone_pat ip_pat user_pat mail_user_pat file_pat word_pat
	other_host_message
	date_format
	output_message_one_day
	output_message_all_days
	output_message_all_days_in_range
	real_mode_output_format
	);

my @optional_import_scalars=qw(
	mail_address
	mail_command
	PATH
	nodename osname osrelease
	show_all real_mode days_ago output_file output_file_and_stdout
	real_mode_sleep_interval real_mode_check_interval
	default_sort
	default_filter
	process_all_nodenames
	umask
	priority
	domain
	leave_FQDNs_alone
	);

my %import_scalars;
@import_scalars{@optional_import_scalars, @required_import_scalars}=undef;

my @required_import_arrays=qw(
	log_type_list
	);

my @optional_import_arrays=qw(
	optional_log_files
	commands_to_run
	ignore_categories
	priority_categories
	allow_nodenames
	decompression_rules
	pgp_rules
	filename_ignore_patterns
	);

my %import_arrays;
@import_arrays{@required_import_arrays, @optional_import_arrays}=undef;

my @per_log_required_scalar_exts=qw(
	date_pattern
	date_format
	);

my @per_log_optional_scalar_exts=qw(
	nodename_pattern
	open_command
	pipe_decompress_to_open
	);

my @per_log_required_array_exts=qw(
	filenames
	);

my @per_log_optional_array_exts=qw(
	pre_date_hook
	pre_skip_list_hook
	skip_list
	raw_rules
	);

# the next bunch of things are defined in the config, and can be overridden
# in a user-defined config
my (@required_log_files, @optional_log_files);
my (@decompression_rules, @pgp_rules, @filename_ignore_patterns);
my ($host_pat, $zone_pat, $ip_pat, $user_pat, $mail_user_pat, $file_pat);
my ($word_pat);
my ($date_format, $other_host_message);
my ($output_message_one_day, 
	$output_message_all_days,
	$output_message_all_days_in_range,
	);
my ($real_mode_output_format);
my (@ignore_categories, @priority_categories, @unknown_categories);

my (%count, %command_output); # the variables storing data for output
my (%unknowns, %unknowns_raw);
my %nodename_allowed;
my $data_start=tell DATA; # save the position of the DATA start
my $time_start=time;
my @F;
my $config_scalar={};
my $config_array={};
my (%patterns, @dests);
my (%categories);
my $num_dests=0;
my $minimum_version="0.21"; # the first version that this version can understand
my $current_version="0.38";
my $version_string=basename($0)." $current_version";
my $include_depth_limit=16; # recusion limit for includes
my %tags=( '%' => '%' );
my ($domain, $leave_FQDNs_alone);
my $PATH='/usr/local/bin:/bin:/usr/bin:/usr/ucb';
my $umask="077";
$ENV{PATH}=$PATH;

my %opt;
my $days_ago=1;
my $internal_info="";
my $suppress_commands=0;
my $suppress_footer=0;
my $show_all=0;
my $unknowns_only=0;
my $unknowns_dir=0;
my $mail_command;
my $mail_address="";
my $pgp_type="";
my $priority=0;
my $real_mode=0;
my $real_mode_sleep_interval=1;
my $real_mode_check_interval=300;
my $default_sort="funky";
my $default_filter;
my @commands_to_run;
my $output_file;
my $output_file_and_stdout;
my $process_all_nodenames;
my @allow_nodenames;

my $nodename;
my $osname;
my $osrelease;

# information we track specially in case someone uses -I
my @categories;
my @config_versions;

# preprocessor variable namespace
my %VAR;

# some state variables for real mode
my %real_file_state;
my $real_mode_bypass; # this variable lets us not output in real mode when we
	# initially scan through the files

# general state variables
my %last; # last message processed for file; needed for "last message repeated"
my %multiplier; # multiplier associated with last message for "last message 
	# repeated"
my %unique; # used for tracking the special "unique" categories

# state variable for user-defined use
my %state;

chomp($nodename=qx(uname -n));
chomp($osname=qx(uname -s));
chomp($osrelease=qx(uname -r));

$tags{n}=$nodename;
$tags{s}=$osname;
$tags{r}=$osrelease;

my %do_type;

my @original_ARGV=@ARGV;
if (!getopts("ad:f:hI:m:M:n:No:Op:rsSu:UvzZ:", \%opt)) {
	die &usage;
}

exit print &usage if $opt{h};

if ($opt{v}) {
	print "$version_string\n";
	exit(0);
}

$internal_info=$opt{I}           if defined $opt{I};

if ($internal_info =~ m{^internal[\-\_]config$}) {
	seek(DATA, $data_start, 0);
	print while (defined($_=<DATA>) && !m{^\=});
	seek(DATA, $data_start, 0);
	exit;
}

$VAR{__USE_MINIMAL_CONFIG}=1 if $opt{z};

if ($opt{Z}) {
	my @vars=split(/\,/, $opt{Z});
	@VAR{@vars}=1 x (@vars);
}

&read_config(\*DATA, "internal-config", $config_scalar, $config_array, 0);

sub usage {
	my $base=&basename($0);
	return "Usage: $base [-f config_file] [-n nodename] [-U] [-d days] [-a]\n".
		"      [-m mail-address] [-M mail-prog] [-p pgp-type] [-s] [-S]\n".
		"      [-N] [-v]\n".
		"\n".
		"      -a                 show all logs, even old stuff\n".
		"      -d days-ago        show only logs from this long ago (def.=1)\n".
		"      -f config_file     read config_file for additional config\n".
		"      -I categories      output all categories and exit\n".
		"      -I config_versions output all config versions and exit\n".
		"      -I help            show the other -I options\n".
		"      -m mail-address    mail output to this mail-address\n".
		"      -M mail-prog       set the mail program (default='Mail')\n".
		"      -n nodename        use 'nodename' for scanning syslogs\n".
		"      -N                 process all nodenames\n".
		"      -o output_file     output to output_file\n".
		"      -O                 with -o, also output to stdout\n".
		"      -p pgp-type        encrypt mail output in 'pgp-type' style\n".
		"      -r                 'real mode' for continuous output\n".
		"      -s                 suppress running extra commands\n".
		"      -S                 suppress output \"footer\"\n".
		"      -U                 write unknowns and exit\n".
		"      -v                 version\n".
		"";
}

if ($opt{f}) {
	my $configfh=new FileHandle("<$opt{f}") || die "$0: open $opt{f}: $!";
	&read_config($configfh, $opt{f}, $config_scalar, $config_array, 0);
	$configfh->close;
}

# done reading in the config.  Let's process it.
my (@log_type_list, %log_scalar, %log_array);
&import_config_vars($config_scalar, $config_array);
umask oct $umask;

$show_all=1                      if defined $opt{a};
$days_ago=0 if $show_all;
$days_ago=$opt{d}                if defined $opt{d};
$mail_address=$opt{m}            if defined $opt{m};
$mail_command=$opt{M}            if defined $opt{M};
$nodename=$opt{n}                if defined $opt{n};
$process_all_nodenames=1         if defined $opt{N};
$output_file=$opt{o}             if defined $opt{o};
$output_file_and_stdout=1        if defined $opt{O};
$real_mode=1                     if defined $opt{r};
$suppress_commands=1             if defined $opt{s};
$suppress_footer=1               if defined $opt{S};
$unknowns_dir=$opt{u}            if defined $opt{u};
$unknowns_only=1                 if defined $opt{U};
$pgp_type=$opt{p}                if defined $opt{p};


if (@ARGV) {
	@required_log_files=@ARGV;
	@optional_log_files=();
}

my ($day_start, $day_end, $is_multiday);

if ($days_ago=~m{^(\d{4,}_\d{1,2}_\d{1,2})\-(\d{4,}_\d{1,2}_\d{1,2})$} ||
	$days_ago=~m{^(\d+)-(\d+)$} ) {
		$day_start=$1;
		$day_end=$2;
		$is_multiday=1;
} else {
		$day_start = $day_end = $days_ago;
		$is_multiday=0;
}

my $relday_start = &normalize2relday($day_start);
my $relday_end   = &normalize2relday($day_end);

my @when_start   = &relday2time($relday_start);
my @when_end     = &relday2time($relday_end);

die "Start date must be before or on end date\n"
	if $relday_start < $relday_end;

# if we don't know what domain we're in, let's figure it out.
if (!defined $domain) {
	my $resolvfh=new FileHandle;
	if ($resolvfh->open("</etc/resolv.conf")) {
		while(<$resolvfh>) {
			if (m{^\s*domain\s+([\w\.\-]+)\s*$}i) {
				$domain=$1;
				last;
			}
		}
		$resolvfh->close;
	}
}

# localize nodename
if (defined $domain && !$leave_FQDNs_alone) {
	$nodename=~s{\.${domain}$}{};
}

$tags{n}=$nodename;
$tags{s}=$osname;
$tags{r}=$osrelease;

my (%when);
my ($evals, $filename_pats, $decompression_rules, $pgp_rules)=
	&build_log_stuff($config_scalar, $config_array);

if ($internal_info) {
	my $do_exit=1; # assume we should exit unless we're sure
	if ($internal_info eq "evals") {
		foreach my $type (@log_type_list) {
			print "eval for $type is:\n";
			my $i=1;
			print map(sprintf("%5d\t%s\n", $i++, $_), 
				split("\n", $evals->{$type}));
			print "\n";
		}
	} elsif ($internal_info eq "evals-only") {
		foreach my $type (@log_type_list) {
			print $evals->{$type}, "\n";
		}
	} elsif ($internal_info eq "categories") {
		print map("$_\n", sort &unique(@categories));
	} elsif ($internal_info eq "nothing" || $internal_info eq "null") {
		# do nothing at all.  Used to check that the code is OK because 
		# perl -c is a whole new command line. . .
	} elsif ($internal_info =~ m{^(config|file)[\-\_]versions$}) {
		print @config_versions;
	} elsif ($internal_info =~ m{^log[\-\_]files$}) {
		# do nothing now; we'll handle this later
		$do_exit=0;
	} elsif ($internal_info eq "patterns") {
		foreach my $type (@log_type_list) {
			print "patterns for $type are:\n";
			print map("\t".$_->{pattern}."\n", @{$patterns{$type}});
		}
	} elsif ($internal_info =~ m{^log[\-\_]types$}) {
		print map("$_\n", @log_type_list);
	} elsif ($internal_info eq "help") {
		print map("$_\n", sort qw(
			internal_config evals categories config_versions 
			log_files log_types patterns nothing
			help));
	} else {
		die "$0: internal info type $internal_info is not known.\n";
	}
	exit 0 if $do_exit;
}

for my $type (@log_type_list) {
		my $eval = $evals->{$type};
		eval $eval;
		die "$0: error in eval for type $type (use -I evals to list): $@\n" 
			if $@;
}

setpriority (0, $$, $priority) || die "$0: setpriority $priority: $!\n"
	if $priority;


eval { &sort_keys ($default_sort); };
die "default sort $default_sort gives an error: $@\n" if $@;

if ($unknowns_dir) {
	die "$0: -u isn't compatible with other output options\n"
		if $opt{o} || $opt{m} || $opt{p};
	die "unknowns_dir must be a readable, executable directory\n"
		if -e $unknowns_dir && (!-d _ || !-r _ || !-x _);
	@required_log_files=glob("$unknowns_dir/*") if -e $unknowns_dir;
}

if ($real_mode) {
	die "$0: -r/real_mode isn't compatible with -a/show_all\n"
		if $show_all;
}

# done configging.  Let's rock.


if ($real_mode) {
	&do_real_mode;
	die; # we should never reach this
} else { #normal mode
	&do_normal_mode;
	# done looking at the log files.  Let's run the usual commands. . .
	if (! $suppress_commands) {
		foreach my $command (@commands_to_run) {
			$command_output{$command}=qx($command);
		}
	}
	&do_output;
}

exit 0;
# Fini.


sub do_normal_mode {
	# expand out any globs

	my @globbed_files;
	foreach (@optional_log_files) {
		push @globbed_files, glob($_);
	}
	@optional_log_files=grep(-r, @globbed_files);

	die "none of the log files is readable!\n" 
		unless @optional_log_files+@required_log_files>0;

	foreach my $file (@required_log_files) {
		die "can't open log file $file\n" unless -r $file;
	}

	my @log_files=@required_log_files? @required_log_files 
		: @optional_log_files;

	# If it's too old, skip it right away.  No test for too young, because a 
	# recently modified file may contain old logs.
	@log_files=grep(-M $_ <= $relday_start+2, @log_files) 
		unless $show_all && !$is_multiday;

	if ($internal_info =~ m{^log[\-\_]files$}) {
		print map("$_\n", @log_files);
		exit;
	}

	# if someone hits interrupt, print out what we have
	$SIG{'INT'}=sub { 
		print "\nInterrupt received, dumping output.\n\n";
		&do_output;
		warn "$0: interrupt received, dumped output.\n";
		exit 1;
	};

	# OK, let's actually look at the log files
	foreach my $file (@log_files) {
		my $type=&type($file);

		my ($open_command, $tmpfile)=&open_command($file, $type);

		my $fh=&open($file, $open_command, $tmpfile) || die;
		$do_type{$type}->($file, $fh);

		close $fh;
	}
}


sub do_real_mode {
	$real_mode_bypass=1; # while initially scanning through files
	die "$0: no files to watch!" 
		unless @optional_log_files + @required_log_files;

	my $last_check_time=0; # ie. never

	# currently, required_log_files and optional_log_files are mutually
	# exclusive, but that could change.  Let's avoid assumptions.
	foreach my $file (@optional_log_files) {
		$real_file_state{$file}{required}=0;
	}
	foreach my $file (@required_log_files) {
		$real_file_state{$file}{required}=1;
	}

	while (1) {
		if (time-$last_check_time>=$real_mode_check_interval) {
			$last_check_time=time;

			foreach my $file (keys %real_file_state) {
				my $filename;
				if ($real_file_state{$file}{required}) {
					die "Unable to read required file $file\n" if !-r $file;
					$filename=$file;
				} else { # then we also need to glob the filename
					my @globbed=glob($file);
					@globbed=grep(-r, @globbed); # make sure files are readable

					if (!@globbed) { # nothing to read!
						delete $real_file_state{$file}{handle};
						delete $real_file_state{$file}{name};
						next;
					} elsif (@globbed==1) {
						$filename=$globbed[0];
					} else {
						@globbed=sort modification_sort_helper @globbed;
						$filename=$globbed[0];
					}
				}

				my $type=&type($filename);
				my ($open_command, $tmpfile)=&open_command($filename, $type);
				my $oldhandle=$real_file_state{$file}{handle};

				if ($oldhandle) { # we have an open file.  Is it current?
					my $oldstat=stat $oldhandle || 
						die "$0: stat oldhandle for file/filegroup $file\n";
					my $newstat=stat $filename || die "$0: stat $filename\n";
					if ($newstat->ino == $oldstat->ino) { # same file
						next; # we don't need to play with it.
					} else {
						# different file.  Let's process it one last time and
						# then close it
						my $oldfilename=$real_file_state{$file}{name};
						my $oldtype=$real_file_state{$file}{type};
						$do_type{$oldtype}->($oldfilename, $oldhandle);
						$oldhandle->close;
						undef $real_file_state{$file}{handle};
					}
				}

				next if $open_command;

				# OK, we need to open this sucker.
				my $handle=&open($filename, undef, undef);
				$real_file_state{$file}{handle}=$handle;
				$real_file_state{$file}{name}=$filename;
				$real_file_state{$file}{type}=$type;
			}
		}

		foreach my $file (keys %real_file_state) {
			my $fh=$real_file_state{$file}{handle};
			next if ! defined $fh;
			my $type=$real_file_state{$file}{type};
			my $name=$real_file_state{$file}{name};
			seek ($fh, 0, 1); # straight out of perldoc -f seek
			$do_type{$type}->($name, $fh);
		}
		sleep($real_mode_sleep_interval);
		$real_mode_bypass=0;
	}
	die; # we should never reach here
}


sub modification_sort_helper {
	return -M $a <=> -M $b;
}


sub type {
	defined(my $file=shift) || die "Internal error";

	# what log type is it?
	my @types=grep(&basename($file) =~ m{^$filename_pats->{$_}},
			@log_type_list);
	die "no known log type for file $file\n" if ! @types;
	die "more than one type matches file $file: @types\n" if @types>1;
	return $types[0];
}


sub open_command {
	defined(my $file=shift) || die "Internal error";
	defined(my $type=shift) || die "Internal error";

	my $open_command;
	$open_command=$log_scalar{$type}{open_command}
			if $log_scalar{$type}{open_command};

	my $pipe_decompress_to_open;
	$pipe_decompress_to_open=$log_scalar{$type}{pipe_decompress_to_open}
		if $log_scalar{$type}{pipe_decompress_to_open};

	my $tmpfile;
	# if we have both a decompression rule and we already have an open_command, 
	# we need to worry about two commands.  This shouldn't be a big deal -- 
	# either we pipe the decompression output directly to the open command, or
	# if that won't work, we use a temp file.  Default to the temp file, because
	# it's more likely to work; override by setting pipe_decompress_to_open to
	# true.
	if ($file =~ m{\.([^\.]+)$} && exists $$decompression_rules{$1}) {
		if (!$open_command) {
			$open_command=$$decompression_rules{$1};
		} elsif ($pipe_decompress_to_open) {
			my $command=&process_tags($$decompression_rules{$1}, 
				{%tags, f=>$file});
			$command=~s{\%}{\%\%}g;
			$open_command="$command|$open_command";
			$file="-";
		} else {
			# it would be nice if we could just assume that the open_command can
			# correctly handle input from a pipe on stdin.
			# Unfortunately, this is not the case for "last -f file" under 
			# Solaris 2.6, OpenBSD 2.4, and various Linuxen.
			# So, we do temp file fun.
			my $command=&process_tags($$decompression_rules{$1}, 
				{%tags, f=>$file});
			$tmpfile=&tmpnam;
			$command .= " >$tmpfile";
			die "unable to run '$command'\n" unless !(system($command)/256);
			$file=$tmpfile;
		}
	}
	return ($open_command, $tmpfile);
}


sub open {
	defined(my $file=shift) || die "Internal error";
	my $open_command=shift;
	my $tmpfile=shift;

	my $fh=new FileHandle;

	if (! $open_command) {
		$fh->open("<$file") || die "unable to open $file\n";
	} else {
		my $command=&process_tags($open_command, {%tags, f => $file});
		$fh->open("$command|") || die "unable to run '$command'\n";
	}

	# once we're open, we can delete the tmpfile.
	unlink($tmpfile) or die "$0: Couldn't unlink $tmpfile : $!"
		if $tmpfile;

	$multiplier{$file}=1; # init

	return $fh;
}


sub unique {
	my @result;
	my %found;
	foreach my $elem (@_) {
		push @result, $elem unless $found{$elem}++;
	}
	return @result;
}


sub difference {
	my $A=shift;
	my $B=shift;

	my (%found, @result);
	@found{@$B}=(1) x @{$B};
	foreach my $i (@$A) {
		push @result, $i unless $found{$i};
	}
	return @result;
}


sub do_output {

	$SIG{'INT'}='DEFAULT';

	if ($unknowns_only) {
		if ($unknowns_dir) {
			if (-d $unknowns_dir) {
				system("rm -r $unknowns_dir");
				die "$0: rm -r $unknowns_dir: command failed\n" if $? >> 8;
			}
			die "No more unknowns!\n" if ! %unknowns;
			mkdir ($unknowns_dir, 0755) || die "$0: mkdir $unknowns_dir: $!";
			foreach my $type (sort keys %unknowns) {
				my $outfh=new FileHandle(">$unknowns_dir/$type") 
					|| die "$0: open $unknowns_dir/$type: $!";
				print $outfh sort keys %{$unknowns_raw{$type}};
				close $outfh;
			}
		}
	
		foreach my $type (sort keys %unknowns) {
			print "\nType: $type\n\n";
			print map($_."\n", sort keys %{$unknowns{$type}});
		}
	
		return;
	} elsif ($show_all) {
		&do_output_day($relday_end);
	} else {
		for (my $day=$relday_start; $day>=$relday_end; $day--) {
			&do_output_day($day);
		}
	}
}

sub do_output_day {
	defined(my $relday=shift) || die "$0: missing arg";
	
	my $date       = strftime($date_format, &relday2time($relday));
	my $date_start = strftime($date_format, @when_start);
	my $date_end   = strftime($date_format, @when_end);


	my $output_message;
	my @output_commands;

	if (!$show_all) {
		$output_message=&process_tags($output_message_one_day,
			{ %tags, d=>$date } );
	} elsif (!$is_multiday) {
		$output_message=&process_tags($output_message_all_days,
			{ %tags, d => $date } );
	} else {
		$output_message=&process_tags($output_message_all_days_in_range,
			{ %tags, s => $date_start, e => $date_end } );
	}

	if ($output_file) {
		push @output_commands, "tee ".
			&process_tags($output_file, { %tags, d => $date } );
	}

	if ($pgp_type) {
		die "Unknown PGP type: $pgp_type\n" 
			unless defined $$pgp_rules{$pgp_type};
		my $pgp_command=&process_tags($$pgp_rules{$pgp_type}, 
			{%tags, m => $mail_address});
		push @output_commands, $pgp_command;
	}

	if ($mail_address) {
		push @output_commands, &process_tags($mail_command, 
			{%tags, m => $mail_address, o => $output_message });
	}

	my $outfh;
	if (@output_commands) {
		my $output_command=join("|", @output_commands);
		if ($output_file && !$output_file_and_stdout) {
			$output_command.=" >/dev/null";
		}
		$outfh=new FileHandle("|$output_command")
			|| die "$0: run $output_command: $!";
		select $outfh;
	}

	print "\n$output_message\n\n";
	
	if (! $suppress_commands) {
		foreach my $command (keys %command_output) {
			print "$command output:\n$command_output{$command}\n"
				if exists $command_output{$command};
		}
	}
	
	foreach my $node (sort keys %{$count{$relday}}) {
		print "\n\nLogs found for other hosts.  For node $node:\n" 
			if (keys %{$count{$relday}}) > 1 || 
				( (keys %{$count{$relday}}) == 1 && 
				(keys(%{$count{$relday}}))[0] ne $nodename);
	
		my $unique_ref=$unique{$relday}{$node};
		foreach my $category (keys %$unique_ref) {
			foreach my $item (keys %{$unique_ref->{$category}}) {
				my $count=scalar keys %{$unique_ref->{$category}{$item}};
				$count{$relday}{$node}{$category}{$item}+=$count;
			}
		}

		foreach my $category (keys %categories) {
			my $derive=$categories{$category}{derive};
			next if !defined $derive;
			die "category $category should be derived, but was written to!\n"
				if defined $count{$relday}{$node}{$category};
			%{$count{$relday}{$node}{$category}} = 
				&derive($derive, $relday, $node);
		}

		foreach my $category (@ignore_categories) {
			delete $count{$relday}{$node}{$category};
		}
	
		my %unknown_categories;
		@unknown_categories{@unknown_categories}=1;
		foreach my $category (@priority_categories, 
				grep (!exists $unknown_categories{$_}, 
					sort keys %{$count{$relday}{$node}}),
				@unknown_categories) {
			next unless defined $count{$relday}{$node}{$category};
			my %values=%{$count{$relday}{$node}{$category}};
			my @keys=keys %values;

			my $filter=$categories{$category}{filter};
			$filter=$default_filter if !defined $filter && defined $default_filter;

			my $sort=$categories{$category}{sort};
			$sort=$default_sort if !defined $sort;

			@keys=&filter($filter, %values) if (defined $filter);

			@keys=&sort_keys($sort, \%values, @keys);

			next if !@keys;

			print "\n$category:";
			print " ($filter)" if defined $filter && $filter ne "none";
			print "\n";
			foreach (@keys) {
				printf "%-10d %s\n", $count{$relday}{$node}{$category}{$_}, $_;
			}
		} continue {
			delete $count{$relday}{$node}{$category};
		}
	
		print "\n";
	}
	
	if (! $suppress_footer) {
		print "Program was called as: $0 @original_ARGV\n";
		print "version: $version_string\n";
		print "Elapsed time (seconds): ", time-$time_start, "\n";
		print "\n";
	}

	close $outfh if @output_commands;
	return;
}


sub real_out {
	defined(my $relday   = shift) || die "Internal error";
	defined(my $node     = shift) || die "Internal error";
	defined(my $category = shift) || die "Internal error";
	defined(my $data     = shift) || die "Internal error";
	defined(my $count    = shift) || die "Internal error";

	return if $real_mode_bypass;

	my $pass1=&process_tags($real_mode_output_format, { "\\"=>"\\", 'n'=>"\n", 
		't'=>"\t"}, "\\");
	print &process_tags($pass1, { %tags, '%'=>'%', h=>$node,
		c=>$category, d=>$data, '#'=>$count });
}


sub read_paragraph {
	my $lines_arr_ref=shift || die "Internal error";
	my $line_ref=shift || die "Internal error";

	my @return;
	while (defined ($_=$$lines_arr_ref[++$$line_ref]) && !m{^\s*$}) {
		chomp;
		s{^\s+}{};
		next if m{^\#};
		push @return, $_;
	}
	return @return;
}

sub process_tags {
	my $string=shift;
	my $tags_ref=shift || die "Internal error: missing arg";
	my $tag_char=shift;

	$tag_char='%' unless defined $tag_char; # a nice default

	my $pre_string=$string;
	$string =~ s{\Q$tag_char\E(\-?\d*)([^\d\-])}{
		defined $$tags_ref{$2} ?  sprintf("%${1}s", $$tags_ref{$2}) :
		die "undefined tag in '$pre_string': \%$1$2\n"}eg;
	return $string;
}

sub basename {
	local $_=shift;
	s{\S*\/}{}g;
	return $_;
}

sub read_config{
	my $fh=shift || die "$0: Internal error: undefined arg";
	my $filename=shift || die "$0: Internal error: undefined arg";
	my $config_scalar=shift || die "$0: Internal error: undefined arg";
	my $config_array=shift || die "$0: Internal error: undefined arg";
	my $depth=shift;

	die "$0: Internal error: undefined arg" unless defined $depth;

	die "you've got $include_depth_limit levels of includes; i'm bailing." 
		if $depth>=$include_depth_limit;

	my ($keyword, $vartype, $varname, @arrvalue, $varvalue);
	my $config_version=undef;
	my $file_version=undef;
	my @lines;

	# variables designed to deal with fancy pattern configs
	my ($logtype, $pattern, $format, $count, $dest, $expecting_dest);
	my (%local_patterns, $fancy_ref);

	# variables designed to deal with fancy category configs
	my ($category);

	while(<$fh>) {
		chomp;
		push @lines, $_;
		last if m{^\s*\@\@end\s*$};
	}

	&preprocessor(\@lines, $filename);

	for (my $line=0; $line<@lines; $line++) {
		$_=$lines[$line];

		next if m{^\s*\#};
		next if m{^\s*$};
		s{^\s*}{};
		if (s{^(\S+)\s*}{}) {
			my $keyword=lc $1;
			if ($keyword =~ m{^(set|add|remove)$}) {
				if (! s{^(\S+)\s*}{} || 
					($vartype=$1, $vartype!~m{^(arr|var)$})) {
					die "keyword $keyword should be followed by arr or var";
				}
				if (! s{^\s*([^\s\=]+)\s*}{}) {
					die "$vartype should be followed by variable name";
				}
				$varname=$1;
				die "'$varname' is not a legal variable name"
					if $varname !~ m{([A-Za-z]\w+)};
				die "keyword 'remove' not allowed with vartype 'var'"
					if $vartype eq 'var' && $keyword eq 'remove';
				die "can't find '=' in the right place"
					unless s{^\s*\=\s*}{};
				if ($vartype eq 'var') {
					$varvalue=$_;
					if ($keyword eq 'set') {
						$config_scalar->{$varname}=$varvalue;
					} elsif ($keyword eq 'add') {
						$config_scalar->{$varname}.=$varvalue;
					} else {
						die "Internal error: unknown keyword $keyword ".
							"for vartype $vartype";
					}
					&import_scalar($varname, $varvalue) 
						if exists $import_scalars{$varname};
				} elsif ($vartype eq 'arr') {
					@arrvalue=&read_paragraph(\@lines, \$line);
					if ($keyword eq 'set') {
						$config_array->{$varname} = [ @arrvalue ];
					} elsif ($keyword eq 'add') {
						push @{$config_array->{$varname}}, @arrvalue;
					} elsif ($keyword eq 'remove') {
						my @items_to_remove=&unique(@arrvalue);
						s{([^\w])}{\\$1}g foreach @items_to_remove;
						my $remove_pattern=join('|', @items_to_remove);
						$remove_pattern=qr{^(?:$remove_pattern)$};
						my $size_before=@{$config_array->{$varname}};
						@{$config_array->{$varname}}=
							grep(!m{$remove_pattern}, 
								@{$config_array->{$varname}});
						warn "wasn't able to remove all requested elements ".
								"from $varname" 
							if $size_before-@{$config_array->{$varname}}<
								@items_to_remove;
					} else {
						die "Internal error: unknown keyword $keyword ".
							"for vartype $vartype";
					}
					&import_array($varname, @{$config_array->{$varname}})
						if exists $import_arrays{$varname};
				} else {
					die "Internal error: unknown vartype $vartype";
				}
			} elsif ($keyword eq 'logtype:') {
				die "was expecting a destination for pattern\n:\t$pattern"
					if $expecting_dest;
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$logtype=$1;
			} elsif ($keyword eq 'pattern:') {
				die "was expecting a destination for pattern:\n\t$pattern"
					if $expecting_dest;
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$pattern=$1;
				die "don't have a logtype for pattern:\n\t$pattern"
					if !defined $logtype;
				$format=$count=$dest=undef;
				$expecting_dest=1;
				$fancy_ref={};
				push @{$local_patterns{$logtype}}, $fancy_ref;
				$fancy_ref->{'pattern'}=$pattern;
				$fancy_ref->{'first_dest'}=$num_dests;
			} elsif ($keyword =~ m{^use_sprintf:?$}) {
				die "keyword $keyword takes no arguments"
					unless s{^\s*$}{};
				die "don't have a pattern for $keyword"
					if !defined $pattern;
				$dests[$num_dests]{'use_sprintf'}=1;
				$expecting_dest=1;
			} elsif ($keyword eq 'format:') {
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$format=$1;
				die "don't have a pattern for $keyword\n\t$format"
					if !defined $pattern;
				$dests[$num_dests]{'format'}=$format;
				$expecting_dest=1;
			} elsif ($keyword eq 'count:') {
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$count=$1;
				die "don't have a pattern for $keyword\n\t$count"
					if !defined $pattern;
				$dests[$num_dests]{'count'}=$count;
				$expecting_dest=1;
			} elsif ($keyword eq 'dest:') {
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$dest=$1;
				# special destinations: SKIP, LAST, UNIQUE whatever
				push @categories, $dest if $dest ne 'LAST' && $dest ne 'SKIP';
				die "don't have a format for dest:\n\t$dest"
					if !defined $format && $dest ne 'LAST' && $dest ne 'SKIP';
				$dests[$num_dests]{'dest'}=$dest;
				$fancy_ref->{'last_dest'}=$num_dests;
				$num_dests++;
				$expecting_dest=undef;
			} elsif ($keyword eq 'category:') {
				die "was expecting a destination for pattern:\n\t$pattern"
					if $expecting_dest;
				undef $pattern; # state change
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				$category=$1;
			} elsif ($keyword =~ m{^(filter|sort|derive):$}) {
				$keyword=$1;
				die "keyword $keyword should be followed by something"
					unless s{^(\S.*)$}{};
				my $arg=$1;
				die "don't have a category for $keyword\n"
					if !defined $category;
				die "category $category already has a $keyword ".
                    "($categories{$category}{$keyword})\n".
                    "so we don't need new $keyword $arg" 
                    if exists $categories{$category}{$keyword};
				my %func=(
					filter => \&filter,
					sort   => \&sort_keys,
					derive => \&derive,
					);
                eval { &{$func{$keyword}}($arg); };
                die "Unknown $keyword syntax in $arg: $@\n" if $@;
				$categories{$category}{$keyword}=$arg;
			} elsif ($keyword eq 'block_comment') {
				&read_paragraph(\@lines, \$line);
			} elsif ($keyword eq 'include_if_exists' || $keyword eq 'include'){
				die "you need to give a filename to include"
					unless s{^(\S+)}{};
				my $filename=$1;
				die "extra stuff on include line after filename"
					unless m{^\s*$};
				&flush_config(\%local_patterns); # see comment before the sub
				&include_file($filename, $keyword eq 'include'?1:0, $depth);
			} elsif ($keyword eq 'include_dir_if_exists' || 
					$keyword eq 'include_dir' ) {
				die "you need to give a filename to include_dir"
					unless s{^(\S+)}{};
				my $dirname=$1;
				die "extra stuff on include line after dirname"
					unless m{^\s*$};
				&flush_config(\%local_patterns); # see comment before the sub
				&include_dir($dirname, $keyword eq 'include_dir'?1:0, $depth);
			} elsif ($keyword eq 'config_version') {
				die "config_version takes a version string as an argument"
					unless s{^([\d+\.]+)\s*$}{};
				warn "already saw config_version before"
					if defined $config_version;
				$config_version=$1;
				die "config version '$config_version' is too old"
					if &funky_cmp($config_version, $minimum_version)<0;
				die "config version '$config_version' is newer than my version"
					if &funky_cmp($config_version, $current_version)>0;
			} elsif ($keyword eq 'file_version') {
				die "file_version takes an argument" unless s{(\S+.*)}{};
				warn "already saw file_version before" if defined $file_version;
				$file_version=$1;
			} elsif ($keyword eq 'end') {
				&config_warn($filename, $line, 
					'"end" should be replaced with "@@end"');
				last;
			} else {
				die "unknown keyword in config: $keyword";
			}
		}
	}

	&config_error($filename, "EOF", 
		"was expecting dest for pattern:\n\t$pattern")
		if $expecting_dest;

	&config_error($filename, "EOF", 
		"'config_version' should be set (ie. to $current_version)")
		if (! defined $config_version);

	push @config_versions, sprintf("%-50s %-8s %s\n", $filename, 
		(defined $config_version? $config_version: ""),
		(defined $file_version? $file_version: "")
		);

	&flush_config(\%local_patterns); # see comment before the sub
}


# The upcoming code is obscene.  The reason it's necessary is
# that most of the config has a natural tendency to be overridden
# by later configs, while the pattern stuff has a natural
# tendency to be overridden by earlier configs.  I like
# consistency, so I chose overriding with later configs.  This
# code implements that by prepending the local patterns to the
# global pattern list.
# THIS MUST BE CALLED BEFORE DOING ANY INCLUDES.

sub flush_config {
	my $patterns_ref=shift;

	foreach my $logtype (keys %$patterns_ref) {
		unshift @{$patterns{$logtype}}, @{$$patterns_ref{$logtype}};
	}

	undef %$patterns_ref;
}


# this preprocessor should have identical features to the aide preprocessor
sub preprocessor {
	my $config_ref=shift || die "Internal error";
	my $filename=shift || die "Internal error";

	my @if_else_stack; # did we last see an if or an else?
	my @active_stack=(1); # should we use lines we see, or are we in a false
		# if?  I couldn't think of a better name for this. . .

	for (my $line=0; $line<@$config_ref; $line++) {
		$_=$config_ref->[$line];
		my $raw_line=$_;
		if (s{^\s*(\@\@\S+)\s*}{}) {
			my $directive=$1;

			if (0) {
			} elsif ($directive =~ m{^\@\@define$}) {
				if (!m{^(\S+)\s+(\S+)$}) {
					&config_error($filename, $line, 
						"directive $directive takes two arguments");
				}
				if ($active_stack[$#active_stack]) {
					$VAR{$1}=$2;
				}
			} elsif ($directive =~ m{^\@\@undef$}) {
				if (!m{^(\S+)\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes one argument");
				}
				if ($active_stack[$#active_stack]) {
					delete $VAR{$1};
				}
			} elsif ($directive =~ m{^\@\@ifn?def$}) {
				my $invert=($directive eq '@@ifdef'? 0 : 1);
				if (!m{^(\S+)\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes one argument");
				}
				push @active_stack, ($active_stack[$#active_stack] && 
					($invert xor exists($VAR{$1})));
				push @if_else_stack, "if";
			} elsif ($directive =~ m{^\@\@ifn?host$}) {
				my $invert=($directive eq '@@ifhost'? 0 : 1);
				if (!m{^(\S+)\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes one argument");
				}
				push @active_stack, ($active_stack[$#active_stack] && 
					($invert xor ($nodename eq $1)));
				push @if_else_stack, "if";
			} elsif ($directive =~ m{^\@\@else$}) {
				if (!m{^\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes no arguments");
				}
				&config_error($filename, $line, '@@else without @@if')
					if !@if_else_stack;
				&config_error($filename, $line, '@@else when already in else')
					if $if_else_stack[$#if_else_stack] eq 'else';
				$if_else_stack[$#if_else_stack]='else';
				$active_stack[$#active_stack]=$active_stack[$#active_stack-1]
					&& !$active_stack[$#active_stack];
			} elsif ($directive eq '@@endif') {
				if (!m{^\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes no arguments");
				}
				&config_error($filename, $line, "endif without if")
					if !@if_else_stack;
				pop @active_stack;
				pop @if_else_stack;
			} elsif ($directive eq '@@end') {
				if (!m{^\s*$}) {
					&config_error($filename, $line, 
						"directive $directive takes no arguments");
				}
				# don't need to do anything, handled by the config file
				# reader
			} elsif ($directive eq '@@warn') {
				$raw_line.="\n" if $raw_line !~ m{\n$};
				warn $raw_line;
			} elsif ($directive eq '@@error') {
				$raw_line.="\n" if $raw_line !~ m{\n$};
				die $raw_line;
			} else {
				&config_error($filename, $line, 
					"no such preprocessor directive: $directive");
			}

			# Null the line to avoid confusing the config processor
			$config_ref->[$line]='#';
		} else { # not in a preprocessor directive
			if (!$active_stack[$#active_stack]) { # then we don't want the line
				$config_ref->[$line]='#';
			} else { # perform variable substitutions
				$config_ref->[$line]=~
					s(\@\@\{(\w+)\})(exists $VAR{$1}? $VAR{$1}: "\@\@\{$1\}")eg;
			}
		}
	}
	&config_error($filename, "EOF", "unterminated if") if @if_else_stack;
	die "Internal error" if @active_stack != 1;
}


sub config_error {
	defined(my $filename=shift) || die "Internal error";
	defined(my $line=shift) || die "Internal error";
	defined(my $error=shift) || die "Internal error";

	$line++ unless $line eq "EOF";

	die "$0: config $filename line $line: $error\n";
}

sub config_warn {
	my $filename=shift || die "Internal error";
	my $line=shift || die "Internal error";
	my $error=shift || die "Internal error";

	$line++;

	warn "$0: config $filename line $line: $error\n";
}

sub include_file {
	defined(my $filename=shift) || die "$0: internal err: missing arg";
	defined(my $must_exist=shift) || die "$0: internal err: missing arg";
	defined(my $depth=shift) || die "$0: internal err: missing arg";

	$filename=&process_tags($filename, \%tags);

	if (! -r $filename) {
		die "included filename $filename is not readable"
			if $must_exist;
	} else {
		my $configfh=new FileHandle("<$filename") 
			|| die "$0: open $filename: $!\n";
		read_config($configfh, $filename, $config_scalar, $config_array, 
			$depth+1);
	}
}


sub include_dir {
	defined(my $dirname=shift) || die "$0: internal err: missing arg";
	defined(my $must_exist=shift) || die "$0: internal err: missing arg";
	defined(my $depth=shift) || die "$0: internal err: missing arg";

	if (!-r $dirname || !-x $dirname) {
		die "included dir $dirname is not readable" if $must_exist;
		return;
	}
	local *DIR;
	opendir (DIR, $dirname) || die "$0: opendir $dirname: $!";
	! -d $_ && !&should_ignore_file($_) && &include_file($_, 1, $depth) 
		foreach (map {"$dirname/$_"} readdir DIR);
	closedir DIR;
}


sub make_pattern {
	return "^(?:".join("|(?#\n\t\t)", @_).")" if @_;
	return "^\777";
}


sub string_sort_helper {
	return $a cmp $b;
}


sub numeric_sort_helper {
	return $a <=> $b;
}


sub funky_sort_helper {
	my $a1=$a; my $b1=$b;
	$a1=~s{\s+(\d)}{ $1}g;
	$b1=~s{\s+(\d)}{ $1}g;
	while (length $a1 && length $b1) {
		if ($a1=~m{^(\d+)} && (my $a2=$1, $b1=~m{^(\d+)})) {
			my $ret = $a2<=>$1;
			return $ret if $ret;
			$a1=~s{^\d+}{};
			$b1=~s{^\d+}{};
		} elsif ($a1=~m{^([^\d]+)} && (my $a3=$1, $b1=~m{^([^\d]+)})) {
			my $ret = $a3 cmp $1;
			return $ret if $ret;
			$a1=~s{^[^\d]+}{};
			$b1=~s{^[^\d]+}{};
		} else {
			return $a1 cmp $b1;
		}
	}
	return $a1 cmp $b1;
}


# this sort function takes a hash ref and returns the keys sorted by their
# value in the hash.  Yes, this is weird.
sub sort_by_value {
	my $hash_ref=shift;
	my @arr=@_;
	return sort {$hash_ref->{$a} <=> $hash_ref->{$b}} @arr;
}

# see comment for sort_by_value.
sub reverse_sort_by_value {
	my $hash_ref=shift;
	my @arr=@_;
	return sort {$hash_ref->{$b} <=> $hash_ref->{$a}} @arr;
}

# funky_cmp sets up a call to funky_sort_helper.  Usually you'd expect things
# to be the other way around, but funky_sort_helper is more performance 
# critical, so I dropped some function overhead.
sub funky_cmp {
	local($a, $b)=(shift, shift);
	return &funky_sort_helper;
}


sub import_scalar {
	defined(my $name=shift) || die "Internal error";
	defined(my $value=shift) || die "Internal error";

	eval "\$$name".'=$value;';
	die "$@" if $@;

	# these variables should take immediate effect
	$ENV{PATH}=$PATH;
	$tags{n}=$nodename;
	$tags{s}=$osname;
	$tags{r}=$osrelease;
	umask oct $umask;
}


sub import_array {
	defined(my $name=shift) || die "Internal error";
	my @values=@_;

	eval "\@$name".'=@values';
	die "$@" if $@;
}


sub import_config_vars {
	my $config_scalar=shift || die "$0: Internal error: expecting arg";
	#mta my $config_array=shift || die "$0: Internal error: expecting arg";
	my $PATH;

	my ($i);
	my (%big_eval, %filename_pats);

	# verify that all required arrays are defined
	foreach (@required_import_arrays) {
		die "config missing required array '$_'\n"
			unless exists $config_array->{$_};
	}

	# verify that all required scalars are defined
	foreach (@required_import_scalars) {
		die "config missing scalar $_\n"
			unless exists $config_scalar->{$_};
	}

	# import all defined arrays (required and optional) into namespace
	foreach (@required_import_arrays, @optional_import_arrays) {
		next unless exists $config_array->{$_};
		eval "\@$_".'=@{$config_array->{$_}};';
		die "$@" if $@;
		delete $config_array->{$_};
	}

	# import all defined scalars (required and optional) into namespace
	foreach (@required_import_scalars, @optional_import_scalars) {
		next unless exists $config_scalar->{$_};
		eval "\$$_".'=$config_scalar->{$_};';
		die "$@" if $@;
		delete $config_scalar->{$_};
	}

	# make sure certain arrays contain only unique elements
	@log_type_list      = &unique(@log_type_list);
	@optional_log_files = &unique(@optional_log_files);

	foreach my $log_type (@log_type_list) {
		foreach my $ext (@per_log_required_scalar_exts) {
			my $i="${log_type}_$ext";
			die "config missing scalar $i required by logtype $log_type\n"
				unless exists $config_scalar->{$i};
		}
		foreach my $ext (@per_log_required_scalar_exts,
					@per_log_optional_scalar_exts) {
			my $i="${log_type}_$ext";
			next unless exists $config_scalar->{$i};
			$log_scalar{$log_type}{$ext}=$config_scalar->{$i};
			delete $config_scalar->{$i};
		}
		foreach my $ext (@per_log_required_array_exts) {
			my $i="${log_type}_$ext";
			die "config missing array $i required by logtype $log_type\n"
				unless exists $config_array->{$i};
		}
		foreach my $ext (@per_log_required_array_exts,
					@per_log_optional_array_exts) {
			my $i="${log_type}_$ext";
			next unless $config_array->{$i};
			$log_array{$log_type}{$ext}=$config_array->{$i};
			delete $config_array->{$i};
		}
	}

	foreach my $key (%$config_array) {
		die "unknown array defined in config: $key\n";
	}

	foreach my $key (%$config_scalar) {
		die "unknown scalar defined in config: $key\n";
	}

	#special rule for $PATH
	$ENV{PATH}=$PATH if $PATH;
}


sub build_log_stuff {

	my ($i, %big_eval, %filename_pats, %decompression_rules, %pgp_rules);

	# brass tacks time
	foreach my $type (@log_type_list) {
		for (my $relday=$relday_start; $relday >= $relday_end; $relday--) {
			$when{$type}{strftime($log_scalar{$type}{date_format}, 
					&relday2time($relday))}=$relday;
		}

		# backwards compatibility for skip_list
		if (exists $log_array{$type}{skip_list}) {
			my $skip_pattern=&make_pattern(
				map( m{\$$} ? $_ : "$_.*", @{$log_array{$type}{skip_list}}));
			push @{$patterns{$type}}, { 
				pattern=>$skip_pattern, 
				first_dest=>$num_dests,
				last_dest=>$num_dests};
			$dests[$num_dests]{dest}="SKIP";
			$num_dests++;
		}

		# backwards compatibility for raw_rules
		foreach my $raw_rule (@{$log_array{$type}{raw_rules}}) {
			my ($category, $pattern, $format, $code_hook)
				=split(m{, }, $raw_rule);
			eval "'foo' =~ m{$pattern}"; # check pattern for validity
			die "problem with pattern '$pattern':\n\t$@\n" if $@;
			die "1st field missing in rule '$raw_rule'" if ! defined $category;
			die "2nd field missing in rule '$raw_rule'" if ! defined $pattern;
			die "3rd field missing in rule '$raw_rule'" if ! defined $format;
			die "4th field removed in version 0.35, sorry" 
				if defined $code_hook;
			$pattern.=".*" unless $pattern=~m{\$$};
			push @{$patterns{$type}}, { 
				pattern=>$pattern, 
				first_dest=>$num_dests,
				last_dest=>$num_dests, };
			$dests[$num_dests]={ dest=>$category, format=>$format };
			$num_dests++;
			push @categories, $category;
		}

		my $unknowns="Unknowns for type $type";
		push @unknown_categories, $unknowns;
		push @categories, $unknowns;

		# now let's build the big eval
		my $big_eval="\$do_type{$type} = sub {\n";
		$big_eval.="\tdefined(my \$file=shift) || die qq($0: missing arg);\n";
		$big_eval.="\tdefined(my \$fh=shift) || die qq($0: missing arg);\n";
		$big_eval.="\n";
		$big_eval.="\tmy \$relday;\n";
		if (!$real_mode) {
			# only for real-mode do we need to maintain global per-file last
			# and multiplier state, so when not in real mode, let's have 
			# local variables to make this a bit faster
			$big_eval.="\tmy (\%last, \%multiplier);\n";
			$big_eval.="\t\$multiplier{\$file}=1;\n";
		}
		$big_eval.="\tmy \$node=\$nodename;\n";
		$big_eval.="\tmy \$raw_line;\n";
		$big_eval.="\tmy \$pos=tell \$fh;\n"
			if $real_mode;
		$big_eval.="\twhile(<\$fh>) {\n";

		$big_eval.="\t\t\$raw_line=\$_;\n";
		$big_eval.="\t\tif (!m{\\n\$}) { fseek \$fh, \$pos, 0; return; }\n"
			if $real_mode;
		$big_eval.="\t\tchomp;\n";
		$big_eval.=join("", map("\t\t$_\n", @{$log_array{$type}{pre_date_hook}}))
			if $log_array{$type}{pre_date_hook};
		$big_eval.="\n";

		$big_eval.="\t\t# deal with the date (if applicable)\n";
		$big_eval.="\t\tif(s{$log_scalar{$type}{date_pattern}}{}) {\n";
		if (!$show_all && !$real_mode) { # ie. simple report mode
			$big_eval.="\t\t\t\$relday=\$when{\$type}{\$1};\n";
			$big_eval.="\t\t\tif (!defined \$relday) {\n";
			$big_eval.="\t\t\t\tnext; \n"."\t\t\t}\n";
		} elsif ($show_all && $is_multiday) {
			$big_eval.="\t\t\tif (!defined \$when{\$type}{\$1}) {\n";
			$big_eval.="\t\t\t\tnext; \n"."\t\t\t}\n";
			$big_eval.="\t\t\t\$relday=\$relday_end;\n";
		} else {
			$big_eval.="\t\t\t\$relday=\$relday_end;\n";
		}
		$big_eval.="\t\t} else {\n";
		$big_eval.="\t\t\twarn qq(can't find $type date_pattern in '\$_');\n";
		$big_eval.="\t\t\tnext;\n";
		$big_eval.="\t\t}\n\n";

		$big_eval.="\t\t# deal with the nodename (if applicable)\n";
		if ($log_scalar{$type}{nodename_pattern}) {
			$big_eval.="\t\tif(s{$log_scalar{$type}{nodename_pattern}}{}) {\n";
			$big_eval.="\t\t\t\$node=\$1;\n";
			if (defined $domain && !$leave_FQDNs_alone) {
				$big_eval.="\t\t\t\$node=~s{\.${domain}\$}{};\n";
			}
			if ($process_all_nodenames) {
			} elsif (@allow_nodenames) {
				$big_eval.="\t\t\tif (! exists \$nodename_allowed{\$node}) {\n";
				$big_eval.=
					"\t\t\t\t\$count{\$relday}{\$nodename}{\$other_host_message}{\$node}+=\n".
					"\t\t\t\t\t\$multiplier{\$file};\n"
					if !$real_mode;
				$big_eval.="\t\t\t\t".'&real_out($relday, $nodename, '.
					'$other_host_message, $node, $multiplier{$file});'."\n" 
					if $real_mode;
				$big_eval.= "\t\t\t\tnext;\n\t\t\t}\n";
			} else {
				$big_eval.="\t\t\tif (\$node ne '$nodename') { \n";
				$big_eval.=
					"\t\t\t\t\$count{\$relday}{\$nodename}{\$other_host_message}{\$node}+=\n".
					"\t\t\t\t\t\$multiplier{\$file};\n"
					if !$real_mode;
				$big_eval.="\t\t\t\t".'&real_out($relday, $nodename, '.
					'$other_host_message, $node, $multiplier{$file});'."\n" 
					if $real_mode;
				$big_eval.="\t\t\t\tnext;\n\t\t\t}\n";
			}
			$big_eval.="\t\t} else {\n";
			$big_eval.="\t\t\tdie qq(Can't find $type nodename_pattern in '\$_');\n";
			$big_eval.="\t\t}\n";
		}

		if ($log_array{$type}{pre_skip_list_hook}) {
			$big_eval.=join("", map("\t\t$_\n", @{$log_array{$type}{pre_skip_list_hook}}));
		}

		$big_eval.="\t\t# if (0) up front, so we can use elsif everywhere\n";
		$big_eval.="\t\tif (0) {\n";
		$big_eval.="\t\t} ";

		$i=0;
		foreach my $pattern_ref (@{$patterns{$type}}) {
			my $pattern=$pattern_ref->{'pattern'};
			$pattern.='\s*$' unless $pattern=~m{\$$};
			$pattern="^$pattern" unless $pattern=~m{^\^};
			$big_eval.=" elsif (m{$pattern}o) {\n";
			for (my $i=$pattern_ref->{'first_dest'}; 
					$i<=$pattern_ref->{'last_dest'}; $i++) {

				my $format=$dests[$i]{'format'};
				my $dest=$dests[$i]{'dest'};
				my $count=1;
				$count=$dests[$i]{'count'} if exists $dests[$i]{'count'};

				if ($dest eq 'LAST') { # special dest
					die "LAST dest can only be used as the only dest"
						unless $pattern_ref->{'first_dest'} ==
							$pattern_ref->{'last_dest'};
					$big_eval.=qq(\t\t\t\$multiplier{\$file}=$count;\n);
					$big_eval.=qq(\t\t\t\$_=\$last{\$file};\n);
					$big_eval.=qq(\t\t\tredo if defined \$_;\n);
					$big_eval.=qq(\t\t\tnext;\n);
				} elsif ($dest eq 'SKIP') { # another special dest
					die "SKIP dest can only be used as the only dest"
						unless $pattern_ref->{'first_dest'} ==
							$pattern_ref->{'last_dest'};
					$big_eval.=qq(\t\t\tundef \$last{\$file};\n);
				} elsif ($dest =~ m{^UNIQUE\s+(\S.*)}) {
					my $cat=$1;
					my ($part1, $part2);
					if ($format=~m{^(.+)\,([^,]+)$}) {
						$part1=$1; $part2=$2;
					} else {
						die "format $format needs 2 comma delimited values ".
							"UNIQUE dests\n";
					}
					$big_eval.=qq{\t\t\t\$unique{\$relday}{\$node}{"$cat"}}.
						qq{{$part1}{$part2}++;};
				} else {               # normal dest

					$dest=~s{^CATEGORY\s+}{};

					if (exists $dests[$i]{'use_sprintf'}) {
						$format="sprintf($format)";
					} else {
						$format="\"$format\"";
					}

					$big_eval.=qq(\t\t\t\$count{\$relday}{\$node}{"$dest"}{$format}+=).
						qq($count*\$multiplier{\$file};\n)
						if !$real_mode;
					$big_eval.="\t\t\t&real_out(\$relday, \$node, ".
						qq{"$dest", $format, $count*\$multiplier{\$file});\n}
						if $real_mode;
				}
			}
			$big_eval.=qq(\t\t\tnext;\n\t\t});
		}

		$big_eval .= qq( else {\n);
		$big_eval .= qq(\t\t\t\$count{\$relday}{\$node}{"$unknowns"}{\$_}+=).
				qq(\$multiplier{\$file};\n)
				if !$real_mode;
		$big_eval .= qq{\t\t\t&real_out(\$relday, \$node, "$unknowns", \$_,}.
				qq{\$multiplier{\$file});\n}
				if $real_mode;


		if ($unknowns_only) {
			$big_eval .= qq(\t\t\$unknowns{$type}{\$_}++;\n);
			$big_eval .= qq(\t\t\$unknowns_raw{$type}{\$raw_line}++;\n);
		}
		$big_eval .= qq(\t\t\tnext; \n). qq(\t\t}\n).qq(\t} continue {\n);
		$big_eval .= qq(\t\t\$last{\$file}=\$raw_line;\n);
		$big_eval .= qq(\t\t\$multiplier{\$file}=1;\n);
		$big_eval .= qq(\t\t\$pos=tell \$fh;\n)
			if $real_mode;
		$big_eval .= qq(\t}\n);
		$big_eval .= qq(}\n);
		$big_eval{$type}=$big_eval;
		$filename_pats{$type}=&make_pattern(@{$log_array{$type}{filenames}});
	}
	@nodename_allowed{@allow_nodenames}=1 x @allow_nodenames;
	$nodename_allowed{$nodename}=1;
	foreach (@decompression_rules) {
		my ($ext, $command)=split(", ", $_);
		$decompression_rules{$ext}=$command;
	}
	foreach (@pgp_rules) {
		my ($pgp_type, $command)=split(", ", $_);
		$pgp_rules{$pgp_type}=$command;
	}
	return \%big_eval, \%filename_pats, \%decompression_rules, \%pgp_rules;
}


# convert a date into the number of days before today
sub normalize2relday {
	my $date=shift;
	die "$0: normalize2relday: need at least one arg" unless defined $date;

	if ($date eq "today") {
		return 0;
	}

	if ($date eq "yesterday") {
		return 1;
	}

	if ($date =~ m{^\d+$}) {
		return $date;
	}

	if ($date =~ m{^(\d{4,})_(\d{1,2})_(\d{1,2})$}) {
		my $abs_day=&absdate2absday($1, $2, $3);
		my $abs_today=&absdate2absday(split(/\s+/, 
			strftime("%Y %m %d", localtime($time_start))));
		my $check=strftime("%Y_%m_%d", relday2time($abs_today-$abs_day));
		die "$0: BUG: normalize2relday check returned $check for $date\n"
			unless $check eq $date;
		return $abs_today-$abs_day;
	}

	die "Unknown date format: $date\n";
}


# convert a relative date (ie. the number of days before today) into
# a localtime
sub relday2time {
	my $days_ago=shift;
	die "$0: relday2time: need at least one arg" unless defined $days_ago;

	return localtime($time_start-$days_ago*86400);
}


# convert an absolute year, month, day into years since Gregorian 0
sub absdate2absday {
	defined(my $year=shift) || die;
	defined(my $month=shift) || die;
	defined(my $day=shift) || die;

	my @month_acc=(0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365);

	return
		($year-1)*365+
		int(($year-1)/4  )*(+1)+
		int(($year-1)/100)*(-1)+
		int(($year-1)/400)*(+1)+
		$month_acc[$month-1]+
		$day+
		($month>2 && (!($year%400) || (!($year%4) && ($year%100)))? 1 : 0);
}

sub filter {
	my $filter=shift;
	my %values=@_;

	if (0) {
	} elsif ($filter =~ m{^\s*(\S.*?) (and|or) (\S.*)\s*$}) {
		my $keyword = $2;
		my $filter1 = $1;
		my $filter2 = $3;
		my @return1 = &filter($filter1, %values);
		my @return2 = &filter($filter2, %values);
		if ($keyword eq "and") {
			my %in_return2;
			@in_return2{@return2}=undef;
			return grep {exists $in_return2{$_}} @return1;
		} elsif ($keyword eq "or") {
			return &unique(@return1, @return2);
		} else {
			die "Internal error";
		}
	} elsif ($filter =~ m{^(>=|<=|<|>|=|==|!=|<>|><)\s+(\d+)\s*$}) {

		my $keyword=$1;
		my $value=$2;
		my $is_percent=$3;

		if (0) { # I like ifs to line up. . .

		} elsif ($keyword eq '>=') {
			return grep {$values{$_}>=$value} keys %values;
		} elsif ($keyword eq '<=') {
			return grep {$values{$_}<=$value} keys %values;
		} elsif ($keyword eq '<') {
			return grep {$values{$_}< $value} keys %values;
		} elsif ($keyword eq '>') {
			return grep {$values{$_}> $value} keys %values;
		} elsif ($keyword =~ m{^(=|==)$}) {
			return grep {$values{$_}==$value} keys %values;
		} elsif ($keyword =~ m{^(!=|<>|><)$}) {
			return grep {$values{$_}!=$value} keys %values;
		}

	} elsif ($filter =~ m{^((?:top|bottom)(?:_strict)?)\s+(\d+)(\%?)\s*$}) {

		my $keyword=$1;
		my $value=$2;
		my $is_percent=$3;

		if (0) {
		} elsif ($keyword =~ m{^(top|bottom)(_strict)?$}) {
			return if $value == 0;
			my $how_many=$value;
			my @keys=&sort_by_value(\%values, keys %values);
			@keys=reverse @keys if $1 eq "top";
			my $is_strict=$2;

			if ($is_percent) { # switch to percentage
				die "percentage must be between 0 and 100" 
					if $value<0 || $value>100;
				$how_many=&ceiling(@keys*$value/100);
			}

			return @keys if @keys <= $how_many; # no need to do more work

			my @return=splice(@keys, 0, $how_many);

			# what if we we have a bunch of items with equal value and the 
			# top-whatever cuts off in the middle?  Unless we strictly want
			# just the top-whatever, we should include those, too.
			my $last_val=$values{$return[$#return]};
			while (!$is_strict && @keys && $values{$keys[0]}==$last_val) {
				push @return, shift @keys;
			}

			return @return; # done!
		}
	} elsif ($filter =~ m{^\s*none\s*$}) {
		return keys %values;
	} else {
		die "unknown filter format in $filter\n";
	}

	return 1;
}


sub sort_keys {
	my $sort=lc shift;
	my $hash_ref=shift;
	my @keys=@_;

	$sort=~s{\s+}{ }g;
	die "No sort specified" if $sort =~ m{^\s*$};
	my @sorts=reverse split(/\s/, $sort);
	foreach my $sort (@sorts) {
		if ($sort eq 'reverse') {
			@keys=reverse @keys;
		} elsif ($sort =~ m{^(funky|numeric|string)$}) {
			my $sort_helper="${sort}_sort_helper";
			@keys=sort $sort_helper @keys;
		} elsif ($sort =~ m{^value$}) {
			@keys=&sort_by_value($hash_ref, @keys);
		} elsif ($sort eq "none") {
		} else {
			die "unknown sort: $sort\n";
		}
	}
	return @keys;
}


sub derive {
	defined(my $derivation=lc shift) || die "Internal error";
	my $relday=shift;
	my $node=shift;

	my ($keyword, $cat1, $cat2, $arg);

	my $quote_pat='\"([^\"]+)\"';

	if ($derivation =~ 
		m{^\s*$quote_pat\s+(add|subtract|remove)\s+$quote_pat\s*$}) {
		$keyword=$2;
		$cat1=$1;
		$cat2=$3;
	} elsif ($derivation =~ m{^\s*(=)\s+$quote_pat\s*$}) {
		$keyword=$1;
		$cat1=$2;
	} else {
		die "Derivation $derivation in illegal format\n";
	}

	my %return=%{$count{$relday}{$node}{$cat1}} 
		if defined $cat1 && defined $relday && defined $node && 
			exists $count{$relday}{$node}{$cat1};

	my %category2=%{$count{$relday}{$node}{$cat2}} 
		if defined $cat2 && defined $relday && defined $node &&
			exists $count{$relday}{$node}{$cat2};

	if (0) {
	} elsif ($keyword eq "add") {
		foreach my $key (keys %category2) {
			$return{$key}+=$category2{$key};
		}
	} elsif ($keyword eq "subtract") {
		foreach my $key (keys %category2) {
			$return{$key}-=$category2{$key};
		}
	} elsif ($keyword eq "remove") {
		foreach my $key (keys %category2) {
			delete $return{$key};
		}
	} else {
		die "Unknown keyword: $keyword\n";
	}

	return %return;
}


sub ceiling {
	my $val=shift;
	return $val if int $val == $val;
	return int ($val + 1);
}

sub should_ignore_file {
	my $filename=basename shift;

	return 0 if !@filename_ignore_patterns;

	my $pattern=&make_pattern(@filename_ignore_patterns);

	return 1 if ($filename =~ m{${pattern}$});
	return 0;
}


__END__

# internal config file for log_analysis

# what version config are we compatible with?  Every config file should have 
# one of these.
config_version 0.38

# what version is this file?  If you like doing configuration management, set
# this.
file_version $Revision: 1.94 $

# sulog type

# add our name to the log_type_list
add arr log_type_list=
sulog

# set the basename(s) of the file(s) we'll be looking at.  For sulog, that's 
# just "sulog", but for others, there are more than one (ie. syslog has 
# syslog, maillog, authlog, etc.)  This is mandatory.
set arr sulog_filenames=
sulog

# Some files (ie. wtmp, wtmpx) are in a binary format, so they need a 
# command to be run as an interpreter to be analyzed.  This is optional.
# It doesn't apply to the sulog format; see wtmp (later) for an example.
#set var open_command=

# If open_command and decompression_rules apply to the same file, then 
# two commands need to be run.  How do we get output from one to the other?
# A pipe won't always work, so we default to using temp files.  This variable
# lets you use a pipe instead.  This is optional.  It is ignored unless 
# open_command is set.  None of the default log types use this, but I know
# someone who wants it for his private ruleset.
#set var pipe_decompress_to_open=

# Arbitrary perl code to be run for each line, before doing anything else.
# This is optional.  sulog doesn't need it; see wtmp (later) for an
# example.
#set var sulog_pre_date_hook=

# pattern that describes the date in each log line.  The pattern will be 
# stripped off before proceeding.  $1 should contain the date after the
# pattern is run.  This is mandatory.
set var sulog_date_pattern=^SU\s+(\d+\/\d+)\s+\S+\s+

# date_format follow the rules for strftime(3).  It should describe
# the date as extracted to $1 in the last step.
set var sulog_date_format=%m/%d

# pattern that describes the nodename in each log line, after the date has
# been stripped.  It will be stripped off before proceeding.  $1 should 
# contain the nodename.  This is optional, and doesn't apply to sulog; 
# see syslog for an example.
#set var sulog_nodename_pattern=

# some lines of arbitrary perl code that get called after the nodename
# has been stripped, before any further processing is done.  sulog doesn't
# use this; see syslog for a real example of this.  This is optional.
#set arr sulog_pre_skip_list_hook=

# raw_rules and skip_list have been obsoleted by the new config format, so 
# they are deprecated, and can be ignored
# set arr sulog_skip_list=

# set arr sulog_raw_rules=

@@ifndef __USE_MINIMAL_CONFIG

logtype: sulog

	pattern:	\-\s+\S+\s+($user_pat)\-($user_pat)

					format:	$1 => $2
					dest:	su: failed for

	pattern:	\+\s+\S+\s+($user_pat)\-($user_pat)

					format:	$1 => $2
					dest:	su: succeeded for

@@endif

# and that's it for sulog.

# wtmp type

add arr log_type_list=
wtmp

# file basenames that this log type applies to
set arr wtmp_filenames=
wtmp
wtmpx

# wtmp files are in a binary format, and are intended to be interpreted
# by the last command.  Rather than try to read them ourselves, we call
# last.  Subject to usual tags, plus the %f tag stands for the filename.
set var wtmp_open_command=last -f %f

# don't pipe decompress to the open command, or last will whine about seeking
#set var pipe_decompress_to_open=

# This is a hook to run arbitrary perl code for each log line before
# doing anything else.
set arr wtmp_pre_date_hook=
	# the second-to-last line of output is always empty.  This would cause
	# it to fail the date_pattern check, so let's skip it in advance.
	next if m{^$};

set var wtmp_date_pattern= (?:Sun|Mon|Tue|Wed|Thu|Fri|Sat) ((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d+).*

set var wtmp_date_format=%b %e

# set arr wtmp_skip_list=

# set arr wtmp_raw_rules=

@@ifndef __USE_MINIMAL_CONFIG

logtype: wtmp


	pattern:	($file_pat) begins

					dest: 	SKIP

	pattern:	(?:reboot \s+system boot|reboot\s+~)

					format:	reboot
					dest:	major events

	pattern:	ftp\s+ftp\s+($host_pat)

					format:	$1
					dest:	FTP: successful FTP from (partial nodename)

	pattern:	($user_pat)\s+(pts/\d+|tty\w+)\s+($host_pat)

					format:	$1 from $3
					dest:	login: successful login for user from (partial nodename)

	pattern:	($user_pat)\s+(pts/\d+|tty\w+)

					format:	$1
					dest:	login: successful local login

	#mta this next guy should probably take advtantage of the X11 info
     pattern:  ($user_pat)\s+(pts/\d+|tty\w+)\s+(\:\d+(?:\.\d+)?)

					format:   $1
					dest:     login: successful local login

@@endif

# syslog
# This one is kinda scary.

add arr log_type_list=
syslog


# file basenames that this log type applies to
set arr syslog_filenames=
authlog
daemon
local1
messages
maillog
secure
syslog

set var syslog_date_pattern=^((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)+\s+\d{1,2})\s+\d+\:\d+\:\d+\s+

set var syslog_date_format=%b %e

set var syslog_nodename_pattern=^(\S+)\s*

set arr syslog_pre_skip_list_hook=
	# get rid of PID field, if present
	s{^([^\s\[]+)\[\d+\]}{$1};
	#
	# for sendmail, get rid of queue ID
	if (m{^sendmail: }) {
		s{^sendmail: (?:NOQUEUE|[A-Za-z]{3}\d{5}|[0-9A-Za-z]{12}): }
			{sendmail: };
		s{^sendmail: SYSERR\($user_pat\): }{sendmail: SYSERR: };
	}

# skip_list and raw_rules are obsolete and deprecated, but will continue 
# to work.

# set arr syslog_skip_list=

# set arr syslog_raw_rules=


# time for the new config format.  Hopefully, this is both more clear and 
# more extensible.

@@ifndef __USE_MINIMAL_CONFIG

logtype: syslog

# first, a bunch of patterns that we want to skip, AKA discard.

	pattern:	PAM_pwdb: \($word_pat\) session closed for user $user_pat

					dest:	SKIP

# let's try to recognize kernel device info messages and throw them out:

	pattern:	/bsd: \w+\d at\ 

					dest:	SKIP

	pattern:	/bsd: \w+\d:\ 

					dest:	SKIP

	pattern:	crosspost: seconds \d+ links \d+ \d+ symlinks \d+ \d+ mkdirs \d+ \d+ missing \d+ toolong \d+ other \d+

					dest:	SKIP

	pattern:	ftpd: FTP session closed

					dest:	SKIP

	pattern:	ftpd: (?:LIST|CWD|NLST) .*

					dest:	SKIP

	pattern:	ftpd: (?:NOOP|NLST)

					dest:	SKIP

	pattern:	ftpd: PASS password

					dest:	SKIP

	pattern:	ftpd: PORT

					dest:	SKIP

	pattern:	ftpd: PWD

					dest:	SKIP

	pattern:	ftpd: QUIT

					dest:	SKIP

	pattern:	ftpd: REST

					dest:	SKIP

	pattern:	ftpd: SYST

					dest:	SKIP

	pattern:	ftpd: TYPE ASCII

					dest:	SKIP

	pattern:	ftpd: TYPE Image

					dest:	SKIP

     pattern:  ftpd: USER \(none\)

                         dest:     SKIP

	pattern:	ftpd: USER ($user_pat)

					dest:	SKIP

	pattern:	ftpd: User ($user_pat) timed out after (\d+) seconds at .*

					dest:	SKIP

	pattern:	ftpd: cmd failure

					dest:	SKIP

	pattern:	identd: from: ($ip_pat) \( ($host_pat) \) for: (\d+), (\d+)

					dest:	SKIP

	pattern:	identd: from: ($ip_pat) \(($host_pat)\) EMPTY REQUEST
	#mta this probably should be flagged, but isn't for now

					dest:	SKIP

	pattern:	identd: from: ($ip_pat) \(($host_pat)\) for invalid-port\(s\): (\d+) , (\d+)
	#mta this probably should be flagged, but isn't for now

					dest:	SKIP

	pattern:	identd: Successful lookup: (\d+) , (\d+) : ($user_pat)\.($user_pat)

					dest:	SKIP

	pattern:	identd: Returned: (\d+) , (\d+) : NO-USER

					dest:	SKIP

	pattern:	innd: E

					dest:	SKIP

	pattern:	innd: L:$file_pat

					dest:	SKIP

	pattern:	innd: ME HISstats \d+ hitpos \d+ hitneg \d+ missed \d+ dne

					dest:	SKIP

	pattern:	kernel: (\s+\w{8}){8}

					dest:	SKIP

	pattern:	kernel: \w+: CDROM not ready\.  Make sure there is a disc in the drive\.

					dest:	SKIP

	pattern:	kernel: \w+: Setting promiscuous mode\.

					dest:	SKIP

	pattern:	kernel: Adding Swap: (\d+)k swap-space \(priority (\-?\d+)\)

					dest:	SKIP

	pattern:	kernel: ATAPI device \w+:

					dest:	SKIP

	pattern:	kernel: cdrom: open failed.

					dest:	SKIP

	pattern:	kernel: Detected (\d+(?:\.\d+)?) (?:M|k)?Hz processor\.

					dest:	SKIP

	pattern:	kernel: Detected PS\/2 Mouse Port\.

					dest:	SKIP

	pattern:	kernel: \s+\"(?:\w\w\s){12}\"

					dest:	SKIP

	pattern:	kernel: Device not ready\.  Make sure there is a disc in the drive\.

					dest:	SKIP

	pattern:	kernel: Disc change detected

					dest:	SKIP

	pattern:	kernel: EFLAGS: .*

					dest:	SKIP

	pattern:	kernel: EIP: .*

					dest:	SKIP

	pattern:	kernel: Linux version .*

					dest:	SKIP

	pattern:	kernel: Memory: .*

					dest:	SKIP

	pattern:	kernel: Process .*

					dest:	SKIP

	pattern:	kernel: sr0: disc change detected

					dest:	SKIP

	pattern:	kernel: UDF-fs DEBUG .*

					dest:	SKIP

	pattern:	kernel: UDF-fs INFO .*

					dest:	SKIP

	pattern:	named-xfer: send AXFR query 0 to ($ip_pat)

					dest:	SKIP

	pattern:	named: .*(?:Lame server|XSTATS|NSTATS|USAGE|ns_forw|ns_resp).*

					dest:	SKIP

	pattern:	named: .*(?:Cleaned cache|bad referral|points to a CNAME).*

					dest:	SKIP

	pattern:	named: .*(?:all possible.*lame|NS points to CNAME|wrong ans\. name).*

					dest:	SKIP

	pattern:	named: .*(?:send AXFR query| zone .* loaded|sysquery|invalid RR type).*

					dest:	SKIP

	pattern:	named: .*(?:name .* is invalid .* proceeding anyway)

					dest:	SKIP

	pattern:	named: Forwarding source address is .*

					dest:	SKIP

	pattern:	named: invalid RR type .* in authority section

					dest:	SKIP

	pattern:	named: listening on .*

					dest:	SKIP

	pattern:	named: Received NOTIFY answer from .*

					dest:	SKIP

	pattern:	named: Sent NOTIFY for .*

					dest:	SKIP

	pattern:	named: unrelated additional info \'($host_pat)\' type A from \[($ip_pat)\]\.(\d+)

					dest:	SKIP

	pattern:	named: zone transfer .* of .*

					dest:	SKIP

	pattern:	newsyslog: logfile turned over

					dest:	SKIP

	pattern:	ntpdate: step time server

					dest:	SKIP

	pattern:	ofpap: \d+ done

					dest:	SKIP

	pattern:	ofpap: PostScript

					dest:	SKIP

	pattern:	ofpap: done

					dest:	SKIP

	pattern:	ofpap: sending to pap\[\d+\]

					dest:	SKIP

	pattern:	ofpap: starting for \?

					dest:	SKIP

	pattern:	ofpap: straight text

					dest:	SKIP

	pattern:	q?popper: \(v[\d\.]+\) Unable to get canonical name of client,\ err = \d+

					dest:	SKIP

	pattern:	q?popper: Unable to obtain socket and address of client,\ err = \d+

					dest:	SKIP

	pattern:	q?popper: warning: can't verify hostname: gethostbyname\($host_pat\) failed

					dest:	SKIP

	pattern:	q?popper: (?:$mail_user_pat)?\@\[?$host_pat\]?: -ERR POP EOF received

					dest:	SKIP

	pattern:	q?popper: (?:$mail_user_pat)?\@\[?$host_pat\]?: -ERR POP hangup

					dest:	SKIP

	pattern:	q?popper: (?:$mail_user_pat)?\@\[?$host_pat\]?: -ERR POP timeout

					dest:	SKIP

	pattern:	q?popper: (?:$mail_user_pat)?\@\[?$host_pat\]?: -ERR SIGHUP or SIGPIPE flagged

					dest:	SKIP

	pattern:	savecore: no core dump

					dest:	SKIP

	pattern:	sendmail: $file_pat: \d+ aliases, longest \d+ bytes, \d+ bytes total

					dest:	SKIP

	pattern:	sendmail: Authentication-Warning: $host_pat: $mail_user_pat set sender to

					dest:	SKIP

	pattern:	sendmail: Authentication-Warning: $host_pat: $mail_user_pat\@$host_pat didn't use HELO protocol

					dest:	SKIP

	pattern:	sendmail: alias database $file_pat (?:auto|)rebuilt by $mail_user_pat

					dest:	SKIP

	pattern:	sendmail: clone \w+, owner\=($mail_user_pat(?:\@$host_pat)?)

					dest:	SKIP

	pattern:	sendmail: collect: premature EOM: Error \d+

					dest:	SKIP

	pattern:	sendmail: gethostbyaddr\($ip_pat\) failed: .*

					dest:	SKIP

	pattern:	sendmail: gethostbyaddr: $host_pat != $ip_pat

					dest:	SKIP

	pattern:	sendmail: to=.*stat=(?:Sent|queued).*

					dest:	SKIP

	pattern:	sendmail: from=.*

					dest:	SKIP

	pattern:	sendmail: \w+: DSN: .*

					dest:	SKIP

	pattern:	sendmail: \w+: return to sender: .*

					dest:	SKIP

	pattern:	sshd: Connection closed by $ip_pat

					dest:	SKIP

	pattern:	sshd: Generating 768 bit RSA key.

					dest:	SKIP

	pattern:	sshd: Generating new 768 bit RSA key.

					dest:	SKIP

	pattern:	sshd: RSA key generation complete.

					dest:	SKIP

	pattern:	sshd: fatal: Connection closed by remote host\.

					dest:	SKIP

	pattern:	sshd: fatal: Could not write ident string\.

					dest:	SKIP

	pattern:	sshd: fatal: Did not receive ident string\.

					dest:	SKIP

	pattern:	sshd: fatal: Local: Command terminated on signal \d+\.

					dest:	SKIP

	pattern:	sshd: fatal: Read error from remote host: Connection timed out

					dest:	SKIP

	pattern:	sshd: fatal: Read error from remote host: Connection reset (?:by peer)?

					dest:	SKIP

	pattern:	sshd: fatal: Read error from remote host: No route to host

					dest:	SKIP

	pattern:	sshd: fatal: Read from socket failed: Connection reset by peer

					dest:	SKIP

	pattern:	sshd: fatal: Read from socket failed: No route to host

					dest:	SKIP

	pattern:	sshd: fatal: Session canceled by user

					dest:	SKIP

	pattern:	sshd: fatal: Write failed: Broken pipe

					dest:	SKIP

	pattern:	sshd: fatal: Timeout before authentication\.

					dest:	SKIP

	pattern:	sshd: fatal: Timeout before authentication for ($ip_pat)\.

					dest:	SKIP

	pattern:	sshd: log: Closing connection to ($ip_pat)

					dest:	SKIP

	pattern:	sshd: log: fwd X11 connect from

					dest:	SKIP

	pattern:	sshd: log: Generating \d+ bit RSA key.

					dest:	SKIP

	pattern:	sshd: log: Generating new (\d+) bit RSA key\.

					dest:	SKIP

	pattern:	sshd: log: RhostsRsa authentication not available for connections from unprivileged port\.

					dest:	SKIP

	pattern:	sshd: log: Rsa authentication refused for $user_pat: no $file_pat/\.ssh\s

					dest:	SKIP

	pattern:	sshd: log: RSA key generation complete\.

					dest:	SKIP

	pattern:	sshd: log: Server listening on port 22.

					dest:	SKIP

	pattern:	sshd: log: Setting tty modes failed

					dest:	SKIP

	pattern:	sshd: log: Wrong response to RSA authentication challenge.

					dest:	SKIP

	pattern:	sshd: log: executing remote command as user ($user_pat)

					dest:	SKIP

	pattern:	snmpd\w.*: local pdu process error

					dest:	SKIP

	pattern:	snmpd\w.*: session_send_loopback_request\(\) failed

					dest:	SKIP

	pattern:	snmpd\w*: session_open\(\) failed for a pdu received from

					dest:	SKIP

	pattern:	su: Authentication failed for $user_pat

					dest:	SKIP

	pattern:	sudo:\s+ $user_pat : \(command continued\) .*

					dest:	SKIP

	pattern:	traceroute: gethostbyaddr: .*

					dest:	SKIP

	pattern:	unix:

					dest:	SKIP

     pattern:  unix: :

                         dest:     SKIP

	pattern:	unix: \t\<SUN[\d\.]+[G]? cyl \d+ alt \d+ hd \d+ sec \d+\>

					dest:	SKIP

	pattern:	unix: Copyright \(c\) 1983-1997\, Sun Microsystems\, Inc\.

					dest:	SKIP

	pattern:	unix: Ethernet address \= ((?:\w+\:){5}\w+)

					dest:	SKIP

	pattern:	unix: \w+ is .*

					dest:	SKIP

	pattern:	unix: SUNW\,\w+ is .*

					dest:	SKIP

	pattern:	unix: \w+ at .*

					dest:	SKIP

	pattern:	unix: SUNW\,\w+ at .*

					dest:	SKIP

	pattern:	unix: \w+: screen \w+x\w+, (?:single|double) buffered, \w+ mappable, rev \w+

					dest:	SKIP

	pattern:	unix: MMCODEC: Manufacturer id \w+, Revision \w+

					dest:	SKIP

	pattern:	unix: No contiguous memory requested for SX

					dest:	SKIP

	pattern:	unix: SBus level \d+ 

					dest:	SKIP

	pattern:	unix: SBus slot \w+ 0x\w+

					dest:	SKIP

	pattern:	unix: SunOS Release ([\d\.]+) Version .*

					dest:	SKIP

	pattern:	unix: avail mem = \w+

					dest:	SKIP

	pattern:	unix: cpu \d+ initialization complete - online

					dest:	SKIP

	pattern:	unix: cpu\w+: \w+,\w+ \(mid \w+ impl 0x\w+ ver 0x\w+ clock \w+ MHz\)

					dest:	SKIP

	pattern:	unix: dump on /dev/dsk/\w+ size \w+

					dest:	SKIP

	pattern:	unix: esp\w+:\s+esp-options=0x\w+

					dest:	SKIP

	pattern:	unix: mem = \w+ \(0x\w+\)

					dest:	SKIP

	pattern:	unix: pac: enabled - SuperSPARC/SuperCache

					dest:	SKIP

	pattern:	unix: pseudo-device: pm\w+

					dest:	SKIP

	pattern:	unix: pseudo-device: vol\w+

					dest:	SKIP

	pattern:	unix: root nexus = SUNW,SPARCstation-\d+

					dest:	SKIP

	pattern:	unix: root on /iommu@\w+,\w+/sbus@\w+,\w+/espdma@\w+,\w+/esp@\w+,\w+/sd@\w+,\w+:a fstype ufs

					dest:	SKIP

	pattern:	unix: sparc ipl \d+

					dest:	SKIP

	pattern:	unix: syncing file systems... done

					dest:	SKIP

	pattern:	unix: syncing file systems...SunOS Release \d+\.\d+

					dest:	SKIP

	pattern:	unix: vac: enabled

					dest:	SKIP

	pattern:	xntpd: tickadj \= (\d+), tick = (\d+), tvu_maxslew = (\d+)

					dest:	SKIP

	pattern:	xntpd: time reset \(step\) .*

					dest:	SKIP

	pattern:	xntpd: xntpd [\d\-\.]+

					dest:	SKIP

	pattern:	xntpd: precision = \d+ usec

					dest:	SKIP

	pattern:	xntpd: synchronisation lost

					dest:	SKIP

	pattern:	xntpd: synchronized to $ip_pat, stratum=\d+

					dest:	SKIP

	pattern:	xntpd: synchronized to LOCAL\(0\), stratum=\d+

					dest:	SKIP

	pattern:	/usr/dt/bin/ttsession: child \(\d+\) exited due to signal \d+

					dest:	SKIP

	pattern:	/usr/dt/bin/ttsession: exiting

					dest:	SKIP


# OK, now let's have a bunch of useful rules.

	pattern:	(kernel: device \w+ (?:entered|left) promiscuous mode|lpd: lpd shutdown succeeded|named: Ready to answer queries|named: deleting interface \[($ip_pat)\]\.\d+|named: named shutting down|named: reloading nameserver|named: starting|reboot: rebooted by \w+|rpcbind: rpcbind terminating on signal|sendmail: (?:sendmail )?(?:startup|shutdown|restarting|rejecting).*|sendmail: starting daemon|sshd: error: Bind to port \d+ on $ip_pat failed: Address already in use.|sshd: Received signal 15; terminating.|sshd: fatal: Cannot bind any address.|sshd: log: Received signal \d+; terminating|shutdown: reboot by .*|sshd: Received SIGHUP; restarting\.|sshd: Server listening on $ip_pat port \d+\.|syslogd(?: [\d\.\-\#]+)?: restart|syslogd: configuration restart|syslogd: (?:going down|exiting) on signal \d+|unix: BAD TRAP|xntpd: xntpd exiting on signal \d+).*

					format:	$1
					dest:	major events

	pattern:	(inetd: /usr/openwin/bin/Xaserver: Hangup|named: $file_pat:\d+:.*|sendmail: alias database $file_pat out of date|kernel: EXT2\-fs error \(device [\w\:]+\):|sendmail: SYSERR: Cannot create database for alias file .*|sendmail: SYSERR: dbm map .*|sendmail: SYSERR: MX list for .* points back to .*|sendmail: unable to write ${file_pat}|sendmail: forward $file_pat\+?: Group writable directory|sshd: error: bind: Address already in use|sshd: fatal: Bind to port \d+ failed: Address already in use.|xntpd: can't open $file_pat:.*)

					format:	$1
					dest:	stuff that might need fixing

	pattern:  (kernel: end_request: I/O error,\ dev [^\,]+),\ sector \d+

					format:	$1
					dest:	stuff that might need fixing

	pattern:	kernel: Out of Memory: Killed process (\d+) \(($file_pat)\)\.

					format: kernel: out of memory, killed process $2
					dest:	stuff that might need fixing

	pattern:	sendmail: .*: (cannot open $file_pat: Group writable directory)

					format:	$1
					dest:	stuff that might need fixing

	pattern:	ofpap: \d+ died with (\d+)

					format:	ofpap: died with $1
					dest:	stuff that might need fixing

	pattern:	(?:in\.)?ftpd: refused connect from ($host_pat)

					format:	$1
					dest:	FTP: refused connection from

	pattern:	(?:in\.)?ftpd: connect(?:ion)? from ($host_pat)(?: \[$ip_pat\])?

					format:	$1
					dest:	FTP: connection from

	pattern:	ftpd: ANONYMOUS FTP LOGIN FROM ($host_pat) \[($ip_pat)\],\ ($user_pat(?:\@(?:$host_pat|)|))

					format:	$3 from $1 ($2)
					dest:	FTP: anonymous login

	pattern:	ftpd: ANONYMOUS FTP LOGIN FROM ($host_pat) \[($ip_pat)\],?

					format:	unknown from $1 ($2)
					dest:	FTP: anonymous login

	pattern:	mountd: refused mount request from ($host_pat)

					format:	$1
					dest:	NFS: refused with

	pattern:	mountd: authenticated mount request from ($host_pat):\d+

					format:	$1
					dest:	NFS: authorized NFS with

	pattern:	mountd: authenticated unmount request from ($host_pat):\d+

					format:	$1
					dest:	NFS: authorized NFS with

	pattern:	PAM_pwdb: \(login\) session opened for user ($user_pat)\s+

					format:	$1
					dest:	login: successful login

	pattern:	kernel: IP fw-in deny \w+ (\w+) ($ip_pat):(\d+) ($ip_pat):(\d+)

					format:	$1 $2 => $4:$5
					dest:	CATEGORY kernel: firewall deny

	pattern:	kernel: Packet log: inp(?:ut)? DENY \w+ PROTO=17 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					# format:	UDP $1:$2 => $3:$4
					format:	"%-15s => %-15s UDP   %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall deny

	pattern:	kernel: Packet log: inp(?:ut)? DENY \w+ PROTO=1 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	ICMP $1:$2 => $3
					format:	"%-15s => %-15s ICMP  %-5s", $1, $3, $2
					dest:	CATEGORY kernel: firewall deny

	pattern:	kernel: Packet log: inp(?:ut)? DENY \w+ PROTO=6 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format: TCP $1:$2 => $3:$4
					format:	"%-15s => %-15s TCP   %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall deny

	pattern:	kernel: Packet log: inp(?:ut)? DENY \w+ PROTO=2 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	IGMP $1:$2 => $3:$4
					format:	"%-15s => %-15s IGMP  %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall deny

	pattern:	kernel: Packet log: inp(?:ut)? REJECT \w+ PROTO=17 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	UDP $1:$2 => $3:$4
					format:	"%-15s => %-15s UDP   %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall reject

	pattern:	kernel: Packet log: inp(?:ut)? REJECT \w+ PROTO=1 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	ICMP $1:$2 => $3
					format:	"%-15s => %-15s ICMP  %-5s", $1, $3, $2
					dest:	CATEGORY kernel: firewall reject

	pattern:	kernel: Packet log: inp(?:ut)? REJECT \w+ PROTO=6 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	TCP $1:$2 => $3:$4
					format:	"%-15s => %-15s TCP   %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall reject

	pattern:	kernel: Packet log: inp(?:ut)? REJECT \w+ PROTO=2 ($ip_pat):(\d+) ($ip_pat):(\d+) .*

					use_sprintf
					#mta
					# format:	IGMP $1:$2 => $3:$4
					format:	"%-15s => %-15s IGMP  %-5s", $1, $3, $4
					dest:	CATEGORY kernel: firewall reject

	# this next one contributed by Tim Meushaw
	pattern:	pattern: kernel: Denied Packet:.* SRC=($ip_pat) DST=($ip_pat).* PROTO=([A-Z]+) SPT=(\d+) DPT=(\d+) .*

					use_sprintf
					format: "%-15s => %-5s %-15s %-5s", $1,$3,$2,$5
					dest:   CATEGORY kernel: firewall deny

	pattern:	login: invalid password for \`($user_pat)\' on \`($file_pat)\'

					format:	$1
					dest:	login: authentication failure for

	pattern:	login: invalid password for \`($user_pat)\' on \`($file_pat)\' from \`($host_pat)\'

					format:	$1
					dest:	login: authentication failure for

	pattern:	login: LOGIN ON ($file_pat) BY ($user_pat)

					format:	$2
					dest:	login: successful local login

	pattern:	login: FAILED LOGIN \d+ FROM \($host_pat\) FOR ($user_pat),\ Authentication failure

					format:	$1
					dest:	login: authentication failure for

	pattern:	named.*Zone "($zone_pat)".* No default TTL set using SOA minimum instead

					format:	$1
					dest:	named: no default TTL (bind 8.2)

	pattern:	named.*Err/TO getting serial# for "($zone_pat)"

					format:	$1
					dest:	named: serial number errors for zone

	pattern:	named.*Zone "($zone_pat)" \(IN\) SOA serial\# \(\d+\) rcvd from \[($ip_pat)\] is \< ours

					format:	$2 for $1
					dest:	named: master has serial number too low for zone

	pattern:	named-xfer: serial from \[($ip_pat)\],\ zone ($zone_pat)\: \d+ lower than current\: \d+

					format:	$1 for $2
					dest:	named: master has serial number too low for zone

	pattern:	named-xfer: \[($ip_pat)\] not authoritative for ($zone_pat)\,

					format:	$1 for $2
					dest:	named: master server is not authoritative for zone

	pattern:	named-xfer: connect\(($ip_pat)\) for zone ($zone_pat) failed: (.*)

					format:	$1 for $2
					dest:	named: connect to master server for zone failed

	pattern:	named-xfer\: \[\[($ip_pat)\].\d+\] transfer refused from \[($ip_pat)\]\,\ zone ($zone_pat)

					format:	$3 from $2
					dest:	named: we were refused transfer

     #mta take advantage of zone info
	pattern:	(?:$file_pat)?named: unapproved AXFR from \[($ip_pat)\]\.(\d+) for \"($zone_pat)\" \(acl\)

					format:	$1
					dest:	named: unapproved zone transfer requested by

	pattern:	(?:$file_pat)?named: denied AXFR from \[($ip_pat)\]\.(\d+) for \"($zone_pat)\" \(acl\)

					format:	$1 for $3
					dest:	named: unapproved zone transfer requested by

	pattern:    (?:$file_pat)?named: client ($ip_pat)\#(\d+): zone transfer denied

					format:	$1
					dest:	named: unapproved zone transfer requested by

	pattern:	(?:$file_pat)?named: unapproved update from \[($ip_pat)\]

					format:	$1
					dest:	named: unapproved update from

     #mta: this next guy should probably display the zone
	pattern:	(?:$file_pat)?named: approved AXFR from \[($ip_pat)\]\.(\d+) for \"($zone_pat)\"

					format:	$1
					dest:	named: approved zone transfer requested by

	pattern:	(?:$file_pat)?named: Response from unexpected source \(\[($ip_pat)\]\.(\d+)\)

					format:	$1
					dest:	named: responses from unexpected sources

	pattern:	(?:$file_pat)?named: Malformed response from \[($ip_pat)\]\.(\d+) \((.*)\)

					format:	$1
					dest:	named: malformed response

	pattern:	(?:$file_pat)?named: unapproved query from \[($ip_pat)\]\.\d+ for \"($host_pat)\"

					format:	$1 for $2
					dest:	named: unapproved query

	pattern:	nscd: gethostbyaddr: ($host_pat) \!\= ($ip_pat)

					format:	$1 != $2
					dest:	nscd: host/IP mismatch

	pattern:	pam_tally: user ($user_pat) \((\d+)\) tally (\d+),\ deny (\d+)

					format:	$1 ($2) limit $4
					dest:	pam_tally: user attempting login after exceeding login failure limit

	pattern:	q?popper: ($mail_user_pat)\@\[?($host_pat)\]?: -ERR (?:authentication failure|Password supplied for "$mail_user_pat" is incorrect\.?|not authorized)

					format:	$1\@$2
					dest:	popper: authentication failure

	pattern:	q?popper: Failed attempted login to ($user_pat) from host ($host_pat)

					format:	$1\@$2
					dest:	popper: authentication failure

	pattern:	q?popper: ($user_pat)\@$host_pat: -ERR $file_pat lock busy\!  Is another session active\? \(11\)

					format:	$1
					dest:	popper: POP lock for

	pattern:	q?popper: Stats: ($mail_user_pat) (\d+) (\d+) (\d+) (\d+)

					format:	$1
					dest:	popper: users checked mail

	pattern:	q?popper: Stats: ($mail_user_pat) (\d+) (\d+) (\d+) (\d+) ($host_pat) ($ip_pat)

					format:	$1
					dest:	popper: users checked mail

	pattern:	q?popper: apop \"($mail_user_pat)\"\s*

					format:	$1
					dest:	popper: user is using apop

	pattern:	q?popper: ($mail_user_pat)\@\[?($host_pat)\]?: -ERR You must use APOP to connect to this server

					format:	$1\@$2
					dest:	popper: should have used APOP

	pattern:	q?popper: @($host_pat): -ERR Too few arguments for the auth command.

					format:	$1
					dest:	popper: too few arguments for the auth command

	pattern:	q?popper: ((?:$user_pat)?\@$host_pat): -ERR Unknown command: "(\w+)".

					format:	$2 from $1
					dest:	popper: unknown command from

	pattern:	unix: NOTICE: quota_ufs: (?:Warning: over disk|over disk and time|over hard disk) limit \(pid \d+,\ uid (\d+), inum (\d+), fs ($file_pat)\)

					format:	$1
					dest:	quota exceeded (user's UID)

	pattern:	sendmail: to=([^\,]+)\,.*stat=(unknown mailer error)

					format:	'$1' got '$2'
					dest:	sendmail: delivery failed

	pattern:	sendmail: to=([^\,]+)\,.*stat=Deferred[^\,]*

					format:	'$1'
					dest:	sendmail: delivery deferred

	pattern:	sendmail: to=([^\,]+)\,.*stat=(.*)

					format:	'$1' got '$2'
					dest:	sendmail: delivery failed

	pattern:	sendmail: \<([^\>]+)\>\.\.\.\s*(.*\S)

					format:	'$1' got '$2'
					dest:	sendmail: delivery failed

	pattern:	sendmail: ruleset=check_mail,\ arg1=\<?$mail_user_pat\@($host_pat)\>?,\ relay=([^\,]+),\ reject\=.*(Sender domain.*|DENY)

					format:	from user '$1' from server '$2' because '$3'
					dest:	sendmail: we rejected incoming mail

	pattern:	sendmail: ruleset=check_mail,\ arg1=([^\,]+),\ relay=([^\,]+),\ reject\=.*(Domain name required).*

					format:	from user '$1' from server '$2' because '$3'
					dest:	sendmail: we rejected incoming mail

	pattern:	sendmail: ruleset=check_rcpt,\ arg1=([^\,]+),\ relay=([^\,]+),\ reject=.*\.\.\.\s*(.*\S)

					format:	from user '$1' from server '$2' because '$3'
					dest:	sendmail: we rejected incoming mail

	pattern:	sendmail: timeout waiting for input from ($host_pat) .*

					format:	$1
					dest:	sendmail: communications problems with

	pattern:	sendmail: lost input channel from (.*)

					format:	'$1'
					dest:	sendmail: communications problems with

	pattern:	sendmail: (?:SYSERR: )?collect: (?:I\/O error on connection from|premature EOM: Connection reset by|unexpected close on connection from) ($host_pat|\[$ip_pat\]).*

					format:	'$1'
					dest:	sendmail: communications problems with

	pattern:	sendmail: Null connection from (.*)

					format:	'$1'
					dest:	sendmail: Null connect from

	pattern:	sendmail: ((?:IDENT:)?[^\:\,]+): expn ($mail_user_pat) \[rejected\]

					format:	'$1' expnd '$2'
					dest:	sendmail: expn rejected

	pattern:	sendmail: ((?:IDENT:)?[^\:\,]+): expn ($mail_user_pat)

					format:	'$1' expnd '$2'
					dest:	sendmail: expn allowed

	pattern:	sendmail: ((?:IDENT:)?[^\:\,]+): vrfy ($mail_user_pat) \[rejected\]

					format:	'$1' vrfyd '$2'
					dest:	sendmail: vrfy rejected

	pattern:	sendmail: ((?:IDENT:)?[^\:\,]+): vrfy ($mail_user_pat)

					format:	'$1' vrfyd '$2'
					dest:	sendmail: vrfy allowed

	pattern:	sshd: log: Unknown group id (\d+)

					format:	$1
					dest:	sshd: login to unknown group (check /etc/passwd)

	pattern:	sshd: Could not reverse map address ($ip_pat)\.

					format:	$1
					dest:	sshd: could not reverse map address

	pattern:	sshd: log: Connection for ($user_pat) not allowed from ($host_pat)

					format:	$1 from $2
					dest:	sshd: denied access

	pattern:	sshd: Did not receive ident string from ($ip_pat)\.

					format:	$1
					dest:	sshd: did not receive ident string from host

	pattern:	sshd: log: Connection from ($ip_pat) port (\d+)

					format:	$1
					dest:	sshd: connection from

	pattern:	sshd: log: (?:RSA|Password) authentication for ($user_pat) accepted\.?

					format:	$1
					dest:	sshd: authentications for

	pattern:	sshd: log: Rhosts with RSA host authentication accepted for ($user_pat),\ ($user_pat) on ($host_pat)\.

					format:	$1 from $2\@$3
					dest:	sshd: authentications for

	pattern:	sshd: log: Could not reverse map address ($ip_pat)\.

					format:	$1
					dest:	sshd: could not \"reverse map\"

	pattern:	sshd: Failed password for ($user_pat) from ($ip_pat) port \d+

					format:	$1 from $2
					dest:	sshd: failed password

	pattern:	sshd: Accepted password for ($user_pat) from ($ip_pat) port \d+(?:\s+ssh2)?

					format:	$1 from $2
					dest:	sshd: accepted password

	pattern:	sshd: Accepted publickey for ($user_pat) from ($ip_pat) port \d+(?:\s+ssh2)?

					format:	$1 from $2
					dest:	sshd: accepted publickey

	pattern:	sshd: Accepted rsa for ($user_pat) from ($ip_pat) port \d+

					format:	$1 from $2
					dest:	sshd: accepted rsa

	pattern:	sshd: Faking authloop for illegal user ($user_pat) from ($ip_pat) port (\d+)

					format:	$1 from $2
					dest:	sshd: illegal user

	pattern:	su\: \'su ($user_pat)\' failed for ($user_pat) on ($file_pat)

					format:	$2 => $1
					dest:	su: failed for

	pattern:	su\: \'su ($user_pat)\' succeeded for ($user_pat) on ($file_pat)

					format:	$2 => $1
					dest:	su: succeeded for

	pattern:	su: \- $file_pat ($user_pat)-($user_pat)

					format:	$1 => $2
					dest:	su: failed for

	pattern:	su: \+ $file_pat ($user_pat)-($user_pat)

					format:	$1 => $2
					dest:	su: succeeded for

	pattern:	PAM_pwdb: \(su\) session opened for user ($user_pat) by ($user_pat)

					format:	$2 => $1
					dest:	su: succeeded for

	pattern:	PAM_pwdb: \d+ authentication failure\;\ ($user_pat)\(uid=\d+\) \-\> ($user_pat) for su service

					format:	$1 => $2
					dest:	su: failed for

	pattern:	su: ($user_pat) to ($user_pat) on $file_pat

					format:	$1 => $2
					dest:	su: succeeded for

	pattern:	sudo:\s*($user_pat) : user NOT in sudoers ; TTY=$file_pat ; PWD=$file_pat ; USER=$user_pat ; COMMAND=($file_pat).*

					format:	$1 ran $2
					dest:	sudo: unauthorized user ran command

	pattern:	sudo:\s+($user_pat) : TTY=$file_pat ; PWD=$file_pat ; USER=$user_pat ; COMMAND=($file_pat).*

					format:	$1 ran $2
					dest:	sudo: authorized user ran command

	pattern:	sudo:\s+($user_pat) : (?:3 incorrect passwords|password incorrect) ; TTY=$file_pat ; PWD=$file_pat ; USER=$user_pat ; COMMAND=($file_pat).*

					format:	$1 ran $2
					dest:	sudo: incorrect password

	pattern:	snmpdx?: agent_process\(\) : bad community from ($ip_pat)

					format:	$1
					dest:	snmpd: bad community from

	pattern:	identd: Connection from ($host_pat)

					format:	$1
					dest:	identd: connection from

	pattern:	([^\:\s]+): refused connect from (.*\S)

					format:	$1 from $2
					dest:	service refused connection

	pattern:	([^\:\s]+): connect from (.*\S)

					format:	$1 from $2
					dest:	service allowed connection

	pattern:	tftpd: tftpd: trying to get file: ($file_pat)

					format:	$1
					dest:	tftpd: trying to get file

	# bind says a "user@host:/somedir/named" when in starts
	pattern:	$user_pat\@$host_pat:$file_pat/named

					format:	named started
					dest:	major events

	# OpenBSD says a "user@host:/somedir/GENERIC" when it starts
	pattern:	/bsd:\s*$user_pat\@$host_pat:$file_pat/GENERIC

					format:	booted
					dest:	major events

	# last message repeated.  This one is pretty much unique.
	pattern: last message repeated (\d+) times?

					count:  $1
					dest:   LAST

@@endif


# null: special type for "processing" /dev/null or for throwing out files

add arr log_type_list=
null

set arr null_filenames=
null

add arr null_pre_date_hook=
	next;

set var null_date_pattern=

set var null_date_format=

logtype: null

	pattern: .*
				dest: SKIP

# done describing log types.

# global variables

set var other_host_message=Other hosts syslogging to us

# pretty format for dates, in strftime(3) format
set var date_format=%Y_%m_%d

# output message for one-day mode (default), subject to usual tags, plus %d
# stands for date
set var output_message_one_day =  Logs for %n on %d

# same concept as above except for -a mode with no date range
set var output_message_all_days=  All logs for %n as of %d

# same concept as the last two, except for -a mode with a date range.  %s
# for start date, %e for end date
set var output_message_all_days_in_range=  All logs for %n for %s through %e


# command used to send mail.  Subject to usual tag substitutions, plus
# %m stands for mail_address and %o stands for the relevant output message.
set var mail_command = Mail -s '%o' %m

# Set PATH environment variable.
set var PATH=/usr/local/bin:/bin:/usr/bin:/usr/ucb

# these variables are usually set by running uname with various options,
# but you can override them if you really want to.
#set var nodename=ook
#set var osname=Linux
#set var osrelease=2.2

# assorted variables that default to not being defined, but you can set
# them if you want to.  This group corresponds to command-line options.
#set var show_all=
#set var real_mode=
#set var days_ago=
#set var output_file=
#set var mail_address=
#set var process_all_nodenames=

# patterns that can be used in other patterns.
set var file_pat=[\w\_\-\/\.]+
set var host_pat=[\w\.\-]+
set var ip_pat=[\d\.]+
set var user_pat=[\w\_\-]+
set var mail_user_pat=[\w\_\-\.\*\+\=]+
set var word_pat=[\w\_\-]+
set var zone_pat=[\w\.\-]+

set arr commands_to_run=
@@ifndef __USE_MINIMAL_CONFIG
w
df -k
cat /etc/dumpdates
@@endif

# log files that, if not present or not openable, will cause an error
# 2000-08-27: morty: required_log_files plays havoc with some internal
# stuff.  I'm discontinuing support for it in the config file.

# log files that we process if present.  Note that these are globbed.
add arr optional_log_files=
/var/log/authlog*
/var/log/daemon*
/var/log/maillog*
/var/adm/messages*
/var/log/messages*
/var/log/secure*
/var/log/syslog*
/var/log/wtmp*
/var/adm/wtmpx*
/var/adm/sulog*

# rules for decompressing compressed files.
set arr decompression_rules=
gz, gzip -dc %f
Z, compress -c %f

# filename patterns to ignore when including directories
set arr filename_ignore_patterns=
.*\~
\..*\.swp
\#.*\#

# rules for PGPing stuff
set arr pgp_rules=
	2, pgp -afe %m 2>/dev/null
	5, pgpe -afr %m 2>&1
	g, gpg -aer %m 2>&1

# umask, the usual meaning.
set var umask=077

# priority, ie. "niceness".  man nice for more info.
#   If you set it to 0, the program will not attempt to set priority.
#   Please don't set this to a negative number unless you *really* know 
# what you're doing.
set var priority=0

# any categories listed in here won't appear in the output.  Defaults to none,
# but feel free to add to it in the local config file if you get too much
# stuff you're not interested in.
set arr ignore_categories=

# the format string used for "real mode".  %n is the nodename of the message,
# %c is the category of the message, %# is the count, %d is the data, \\ is
# backslash, \n is newline, \t is tab
set var real_mode_output_format=%c: (loghost %n, from host %h)\n%-10# %d\n\n

# in "real mode", how many seconds should we sleep after we're done looking
# at the log files, before we look for more input
set var real_mode_sleep_interval=1

# in "real mode", every now and then we want to check if the log files have
# rolled over or new log files have appeared.  This is how often we do that.
set var real_mode_check_interval=300

# how should we sort?  You can set this to "string" for a simple string
# sort, "funky" for a sort that understands IP address and other strings
# with embedded integer values, or "numeric" for a simple numeric sort 
# (don't use unless you are really dealing with only numeric data)
set var default_sort=funky

# categories that will appear before anything else in the output.
set arr priority_categories=
@@ifndef __USE_MINIMAL_CONFIG
major events
stuff that might need fixing
@@endif


# includes

# assorted standard includes.  include_if_exists continues if the file
# doesn't exist; include dies nastily if the file doesn't exist.
# include_dir includes all the files in the directory, dying if the
# directory doesn't exist or if a file can't be opened; include_dir_if_exists
# includes all the files in the directory, not dying if the directory
# doesn't exist, but dying if a file in the directory can't be opened.

# This clump is put here last so that local configs will override the default.

@@ifndef __USE_MINIMAL_CONFIG

include_dir_if_exists /usr/local/etc/log_analysis.d
include_if_exists /usr/local/etc/log_analysis.conf
include_if_exists /usr/local/etc/log_analysis.conf-%s
include_if_exists /usr/local/etc/log_analysis.conf-%s-%r
include_if_exists /usr/local/etc/log_analysis.conf-%n
include_dir_if_exists /etc/log_analysis.d
include_if_exists /etc/log_analysis.conf
include_if_exists /etc/log_analysis.conf-%s
include_if_exists /etc/log_analysis.conf-%s-%r
include_if_exists /etc/log_analysis.conf-%n

@@endif

# the @@end preprocessor directive can be used to stop processing the 
# config file.  But you don't need it.
@@end

=head1 NAME

log_analysis - Analyze various system logs

=head1 SYNOPSIS

B<log_analysis> [B<-h>] [B<-f> config_file] [B<-o> file] [B<-O>] [B<-n> nodename] [B<-U>] [B<-u> unknownsdir] [B<-d> days_ago] [B<-a>] [B<-m> mail_address] [B<-M> mail_prog] [B<-r>] [B<-s>] [B<-S>] [required_files. . .]

=head1 DESCRIPTION

I<log_analysis> analyzes and summarizes system logs files.  
It also runs some other commands (ie. I<w>, I<df -k>) to show the system
state.  It's intended to be run on a daily basis out of cron.

=head1 OPTIONS

=over 4

=item B<-a> all

Show all logs, not just the ones from yesterday.

=item B<-d days_ago>

Show logs from I<days_ago> days ago.  Defaults to 1 (ie. show
yesterday's logs.)  In I<-a> mode, this option only affects the
heading, and it defaults to 0.

You can also provide an absolute date in the form YYYY_MM_DD,
ie. 2001_03_02.  And you can provide the symbolic names I<today>
(equivalent to 0) and I<yesterday> (equivalent to 1).

And you can even provide a date range in the form YYYY_MM_DD-YYYY_MM_DD
or ago1-ago2
to get output for a range of days.  Each day is output individually, so
if you use the I<-o> option, you get a separate file for each day, and 
if you use the I<-m> option, you get a separate mail for each day.

=item B<-f config_file>

Read I<config_file> in addition to the internal config and the internal
config files.  See L<"CONFIG FILE"> for details.

=item B<-h> help

Show command summary and exit.

=item B<-I info>

This option is used for obtaining internal information about I<log_analysis>.
I<log_analysis> exits immediately after outputting the information.

If I<info> is I<internal_config>, the internal config is output.

If I<info> is I<evals>, the evals built from the config (internal and local)
are output.

If I<info> is I<categories>, all categories (those mentioned in the various
configs and implicit categories) will be listed.

If I<info> is I<config_versions>, all config files will be listed with their
config_version (if defined) and file_version (if defined).

If I<info> is I<log_files>, the log files that would have been read are 
output.

If I<info> is I<log_types>, the log types are output.

If I<info> is I<patterns>, the various patterns defined for the log types
are output.

If I<info> is I<nothing>, I<log_analysis> just exits.  Useful for testing
configs.

If I<info> is I<help>, I<log_analysis> outputs the list of things you can use
for I<info>.

=item B<-m mail_address>

Mail output to I<mail_address>.  This can also be specified in the config;
see B<mail_address> in L<"VARIABLES">.

=item B<-M mail_command>

Use I<mail_command> to send the mail.  This
can also be specified in the config; see B<mail_command> in L<"VARIABLES">
for more info, including the default.

=item B<-n nodename>

Use I<nodename> as the nodename (AKA hostname) instead of the output of 
I<uname -n>.  This is more than just cosmetic: entries in syslogged files
will be processed differently if they didn't come from this nodename.  This
can also be specified in the config file; see B<nodename> in L<"VARIABLES">.

=item B<-N> process all nodenames

If the logs contain entries for nodes other than I<nodename>, (ie. if the
host is a syslog server), analyze them anyway.

=item B<-o file>

Output to I<file> instead of to standard output.  Works with I<-m>, 
so you can save to a file and send mail with one same command.

=item B<-O>

With I<-o file>, causes the output to go both to the file and to standard 
output.  NB: this does not currently work with I<-m>, so you can't output
to a file, standard output, and to email.

=item B<-p pgp_type>

Encrypts the mail output.  Uses pgp_type to determine the encryption command.
For use with B<-m> or B<mail_address>.  See B<pgp_type> in the list of global
variables for info on encryption types.

=item B<-r>

"Real mode", ie. monitor log files continuously.  Currently conflicts with
many other modes and options, and will not show you files that need 
decompression or an open_command.  Yes, has built-in support for log file
rollover.  
See variables I<real_mode>, I<real_mode_output_format>,
I<real_mode_sleep_interval>, and
I<real_mode_check_interval> in the list of global variables for more 
configurables.  Also see I<optional_log_files> for an issue specific to
real mode.

=item B<-s> suppress other commands

Usually, I<log_analysis> runs assorted commands that show system state
(ie. I<w>, I<df -k>).  This option doesn't run those commands.
See B<commands_to_run> in L<"VARIABLES"> for the list of extra commands.

=item B<-S> suppress output footer

Usually, I<log_analysis> will include its version number, the time it
spent running, and its arguments at the end of the output.  This option
suppresses that output.

=item B<-U> unknowns-only

Output logfile unknowns to stdout and exit.  If I<unknownsdir> exists,
also wipe I<unknownsdir> if it exists and then write out raw unknown 
lines to files in I<unknownsdir>.  This exists to make writing 
custom rules easier.

=item B<-u unknownsdir>

Use I<unknownsdir> as the unknownsdir.  If I<unknownsdir> already exists,
and contains files, its files will be used as the input for I<log_analysis> 
regardless of any other command line options.  If I<-U> is also specified,
after all processing I<unknownsdir> will be wiped out and its files
rewritten with the current unknowns.  This is useful for writing your own
configs.

=item B<-v> version

Output version and exit.

=item B<required-files>

If files are specified on the command line, log_analysis ignores its
built-in list of optional and required log files, and process the 
files on the command line.  If one of the files doesn't exist, it's
a fatal error.

=back

=head1 CONFIG FILE

The script has an embedded config file.  It will also read various
external config files if they exist; see L<"FILES"> for a list.  Later
directives (from later in the file or from a file read later) override
earlier directives.

You can make comments with '#' at the beginning of a line.  If you want
a '#' or '=' at the beginning of a line, you usually need to quote it with
backslash.

Some directives take a "block" as argument.  A block is a collection of lines
that ends with a line that is empty or only contains whitespace.  '#' at the
beginning of a line still comments out the line.  Leading whitespace on a 
line is ignored.

Before the config is parsed, it is passed through a preprocessor inspired
by the L<aide(1)> preprocessor.

=over 4

=head2 Pattern directives

These directives describe your logs, and are the main point of this
program.  The basic idea here is that you first declare what logtype
you are working with, and then you specify a bunch of perl patterns
that describe different kinds of log messages, and that save parts of
the message.  For each perl pattern, you specify one or more
destinations that describe what you want done with it.

=item B<logtype:> I<type>

Future patterns should be applied to this logtype (ie. sulog, syslog,
wtmp.)  Example: 

I<logtype: syslog>

=item B<pattern:> I<pattern>

I<pattern> is a perl regex (see L<perlre(1)>) that implictly starts
with ^ (beginning of the line) and implicitly ends with \s*$ (optional
whitespace and the end of the line.)  This should only be issued after
a B<logtype:> has been issued in the same config file.  Wildcard parts
of the pattern should be surrounded with parentheses, to save these
parts for later use in the B<format:>.  Note that there are some tokens 
with special meanings that can be used here, currently
$host_pat, $file_pat, $ip_pat, $user_pat, $mail_user_pat, $word_pat,
and $zone_pat.  Examples:

I<pattern: popper: Stats: ($mail_user_pat) (\d+) (\d+) (\d+) (\d+)>

I<pattern: login: LOGIN ON ($file_pat) BY ($user_pat)>

The order of precedence for patterns is undefined, except that user-defined 
patterns always have precedence over the patterns of the internal config.  

=item B<format:> I<format>

I<format> is treated as a string that contains the useful information
from a pattern.  Note that it should not actually be quoted.  A format
is mandatory for category destinations, but should not be used with
SKIP or LAST destinations.

For example, if we had a pattern that was I<login: LOGIN ON
($file_pat) BY ($user_pat)>, we would probably just want $2, so we
might say:

I<format: $2>

Similarly, if we had a patterns that was I<kernel: deny (\d+) packets 
from ($ip_pat) to ($ip_pat)>, we might want to say:

I<format: $2 =E<gt> $3>

=item B<use_sprintf>

I<use_sprintf> is optional.  If this directive is present for a given
format, than instead of the format being treated as a string, it is
treated as the arguments for L<sprintf(3)>.  For example, if you have
a source IP address in $2 and a destination IP address in $3, you
could just have dest as I<$2 =E<gt> $3>, but you would have things
lining up better if you did this:

I<format: "%-15s =E<gt> $3", $2>

I<use_sprintf>

=item B<count:> I<count>

I<count> is optional.  The default is that a log line that matches a
pattern causes the category to increment by 1.  But sometimes, a
single log line corresponds to multiple events, ie. if you have a log
message of the form "5 packets denied by firewall" or "last message
repeated 3 times", you can extract the event count to I<count>.  For
example, if you're using the pattern I<kernel: deny (\d+) packets from
($ip_pat) to ($ip_pat)>, you might say:

I<count: $1>

=item B<dest:> I<dest>

This describes what you want done with the data in a pattern.  If
I<dest> is the special token I<SKIP> the data is discarded.  If
I<dest> is the special token I<LAST>, the data is assumed to be of the
form "last message repeated N times", and we pretend as though the
last message we saw occurred, using I<count> as a multiplier.  If I<dest>
starts with the special token I<UNIQUE>, we do special "unique" handling, 
which is covered in L<"UNIQUE DESTINATION">.  If I<dest> starts with the 
special token I<CATEGORY> or is any other string, it is treated as a 
category that the
pattern data should be saved to.  Ie. if I<pattern> was I<login: LOGIN
ON ($file_pat) BY ($user_pat)>, and I<format> was I<$2>, then one
might set I<dest> to I<login: successful local login>.  You must have
a format defined before the I<dest>.

You can have multiple I<dest> directives for a single I<pattern>, if
all of the I<dest>s are category destinations.  Each one needs its own
I<format>.  Similarly, if you set I<count> or I<use_sprintf>, they are
tied to the particular I<dest> you set them with.

Note that I<dest> "closes" the description of a destination, so you
need to have any other related directives (ie. I<format>, I<count>,
I<use_sprintf>) before the I<dest> directive.  This ordering is
necessary to avoid ambiguity in the multiple-destination case.

=back

=head2 Category directives

Several patterns can lead to the same category, so category-specific
directives are associated with the category, not with a pattern.  Here
are the category directives:

=over 4

=item B<category:> I<category>

Specifies which category subsequent directives will define.

=item B<sort:> I<sorting keywords>

Specifies how this category should be sorted in the output.  Examples are
"funky", "string", "value", "reverse value", etc.  The default is "funky".
See L<"SORTING"> for more info.

=item B<filter:> I<filter commands>

By default, I<log_analysis> will output all the data it finds in a category.
Filters let you specify, say, that only the top 10 items should be output,
or that only the items that occurred fewer than 5 times should be output.
If a category has data, but none of the data meet the filter rules, then 
the category will be completely skipped.  See L<"FILTERS"> for more info.

=item B<derive:> I<derive commands>

The usual way to populate categories is via the pattern config.  But 
sometimes, you want to combine two or more elemental categories to make
a new category.  Any categories derived in this manner may not be a
destination for simple patterns.

There are currently three subcommands for this (the quotes are literal):

=over 8

=item I<"category1"> B<add> I<"category2">

=item I<"category1"> B<subtract> I<"category2">

These do what you expect: take the values for the items in category2 and
add or subtract them from the values for the items in category1.  Any
item defined in either category will be in the new category.  Subtract
can cause the values in the new category to be negative or 0.

=item I<"category1"> B<remove> I<"category2">

The new category will contain items in category2 that are not in category1.
This is very different from subtract.

Example: if category1 contains A with a value of 2 and B with a value of 2,
while category2 contains A with a value of 1 and C with a value of 1,
'"category1" subtract "category2"' will contain A with a value of 1, B with
a value of 2, and C with a value of -1, while '"category1" remove "category2"'
will only contain B with a value of 2.

=back

=back

=head2 Other directives

=over 4

=item B<config_version> I<version-number>

Declare that the config is compatible with version I<version-number>.
This is for version-control purposes.  Every config file should have one
of these.  You can scan your config files' config versions with 
I<-I config_versions>.

=item B<file_version> I<revision-information>

Your own version control information.  I<revision-information> can be 
arbitrary text.  You can scan your config files' config versions with 
I<-I config_versions>.

=item B<include> I<file>

Read in configuration from I<file>.  Dies if I<file> doesn't exist.  
I<file> is subject to usual tag substitutions; see L<"TAG SUBSTITUTION">.

=item B<include_if_exists> I<file>

Just like B<include>, but doesn't die if the file doesn't exist.

=item B<include_dir> I<dir>

Read in all files in I<dir>, and include them.  Die if the directory
doesn't exist, or if a file in the directory isn't readable.  I<dir>
is subject to the usual tag substitutions; see L<"TAG SUBSTITUTION">.
Any filenames that match a pattern in I<filename_ignore_patterns> will
be skipped.

=item B<include_dir_if_exists> I<dir>

Just like B<include_dir>, but doesn't die if the directory doesn't exist.
I<Does> still die if any of the files in I<dir> isn't readable.

=item B<block_comment>

Throws out the block immediately after it.

=item B<set var> I<varname> =I<value>

Set scalar variable I<varname> to value I<value>.  
If the variable already exists, this will overwrite it.

See L<"VARIABLES"> for the list of variables you can play with.

=item B<add var> I<varname> =I<value>

If scalar variable B<varname> already exists, append I<value> to the end
of its current value.  If it doesn't yet exist, create it and set it to
I<value>.

See L<"VARIABLES"> for the list of variables you can play with.

=item B<set arr> I<arrname> =

Read in the block that follows this declaration, make the lines into an
array, and set the array variable I<arrname> to that array.

See L<"VARIABLES"> for the list of variables you can play with.

=item B<add arr> I<arrname> =

Read in the block that follows this declaration, make the lines into an
array, and append that array to the array named I<arrname>.

See L<"VARIABLES"> for the list of variables you can play with.

=item B<remove arr> I<arrname> =

Read in the block that follows this declaration, and for each line, look for
and delete that line from array I<arrname>.  If one of these lines cannot
be found, the result is a warning, not death.

See L<"VARIABLES"> for the list of variables you can play with.

=back

=head1 VARIABLES

Some variables are scalar, which means they are strings or numbers.  Some 
variables are arrays, which are lists of scalars.

Some variables are mandatory, which means they must be defined somewhere in
one of the config files, while some variables are optional.

Some variables are global, while some are per-log-type extensions.  Some
example of per-log-type extensions are date_pattern and filenames.  
Extensions should actually appear in the format "TYPE_EXTENSION",
ie. date_pattern would actually appear as I<syslog_date_pattern> for the
syslog log-type and I<sulog_date_pattern> for sulog.

To see examples of many of the possibilities, as well as the default values,
run I<log_analysis -I internal_config>.

=head2 PER-LOG-TYPE VARIABLE EXTENSIONS

=over 4

=item B<filenames>

This mandatory extension is an array of file basenames that apply to the log
type.  For example, if you wanted I</var/adm/messages.1> to be processed by
the syslog rules, you might add I<messages> to I<syslog_filenames>.

=item B<open_command>

Some log files (ie. wtmp log types) are in a binary format that needs to be 
interpreted by external commands.  This optional scalar extension specifies a
command to be run to interpret the file.  The command is subject to the usual
tag substitutions (see L<"TAG SUBSTITUTIONS">), plus the %f tag maps to 
the file.  For example, the wtmp log type defines I<wtmp_open_command>
as "I<last -f %f>".  If both I<decompression_rules> and I<open_command> apply
to a given file, the intermediate data will be stored in a temp file unless
I<pipe_decompress_to_open> is used.  See L<"pipe_decompress_to_open"> for more
info.

In real mode, files that need an open_command will be ignored.

=item B<pipe_decompress_to_open>

If both I<decompression_rules> and I<open_command> apply to a given file,
the intermediate data will be stored in a temporary file by default to
avoid problems with some commands that can't handle input from a pipe.
If this optional scalar extension is set to I<1> (or any "true") value, then
instead, the output of the decompression rule will be piped to the 
open command, and the open command's %f tag will be mapped to "-".

=item B<pre_date_hook>

This optional extension is an array of arbitrary perl commands that are run
for each log line, before the date processing (or any other processing) is 
done.

=item B<date_pattern>

This mandatory extension is a scalar that contains a pattern with at least one
parenthesized subpattern.  Before any rules are 
applied to a log line, the engine strips off the date pattern.  If the engine is
only looking at one day (ie. the default), it takes the part of the string 
that matched the parenthesized subpattern, and if it isn't equal to the right
date, it skips the line.  The B<date_format> extension (next) describes what
the date should look like.

=item B<date_format>

This mandatory extension is a scalar that describes the date using the
same format as B<strftime(3)>.  For example, syslog_date_format is "%b %e".

=item B<nodename_pattern>

This optional extension is a pattern with at least one parenthesized
subpattern.  If it exists, then after the I<date_pattern> is stripped
from the line, this pattern is stripped, and the part that matched the
subpattern is compared to the nodename.  If they're not equal, then
the relevant counter for the category named by the
I<other_host_message> variable is incremented.  Note that all nodenames
are subject to having the local domain stripped from them; see I<domain>
and I<leave_FQDNs_alone> for details.

=item B<pre_skip_list_hook>

This optional extension is an array of perl commands to be run after the
nodename check, just before the skip_list check.

=item B<skip_list>

This optional extension is obsolete and deprecated, but still works for 
backwards compatibility.

=item B<raw_rules>

This optional extension is obsolete and deprecated, but still works for 
backwards compatibility.

=back

=head2 GLOBAL VARIABLES

These variables are all globals.

=over 4

=item B<log_type_list>

This variable is a mandatory global array that contains the list of all
known log-types, ie. I<syslog>, I<sulog>, I<wtmpx>, etc.

=item B<host_pat>

This variable is a mandatory global scalar that describes a host.

=item B<file_pat>

This variable is a mandatory global scalar that describes a file.

=item B<ip_pat>

This variable is a mandatory global scalar that describes an ip in
dotted-quad format.

=item B<mail_user_pat>

This variable is a mandatory global scalar that describes a mail 
username.

=item B<user_pat>

This variable is a mandatory global scalar that describes a Unix 
username.

=item B<word_pat>

This variable is a mandatory global scalar that describes a typical
computer identifier

=item B<zone_pat>

This variable is a mandatory global scalar that describes a zone.

=item B<other_host_message>

=item B<output_message_one_day>

=item B<output_message_all_days>

=item B<output_message_all_days_in_range>

Assorted mandatory scalars that are used for human-readable output.
B<other_host_message> defaults to "Other hosts syslogging to us",
B<output_message_one_day> defaults to "Logs for %n on %d",
B<output_message_all_days> defaults to "All logs for %n as of %d".
B<output_message_all_days_in_range> defaults to "All logs for %n for %s
through %e".

=item B<date_format>

This variable is a mandatory global scalar that describes how you want
the date printed in the output.  Uses the format of B<strftime(3)>.
Note that you probably shouldn't use characters that you wouldn't want
in a filename (ie. whitespace or '/') if you want to use the %d tag for 
I<output_file>.

=item B<output_file>

Equivalent to I<-o file>.
This variable is an optional global scalar that lists a filename that will
be output to instead of to standard output.  Works with I<mail_address> (if 
specified.)  Note that this variable is subject to the usual tag
substitutions (see L<"TAG SUBSTITUTIONS">, plus you can use the %d tag
for the date, so you can set it to something like 
"/var/log_analysis/archive/%n-%d".  See I<output_file_and_stdout>.

=item B<output_file_and_stdout>

Equivalent to I<-O>.
This variable is an optional global scalar that changes the behavior of
I<-o> or I<output_file>.  By default, I<-o> or I<output_file> 
causes output to only to only go to the named file.  With this variable,
output also goes to standard output.  Note: this does not currently
work with I<-m>.

=item B<nodename>

This variable is an optional global scalar that is used in a bunch of 
places: in checking to see whether a message from syslog (or other log 
type that defines I<nodename_pattern>) originated on this host; in 
reading in various default config files; etc.  
If left unset in the config, its value is set from the output
of I<uname -n>.  Its value is used to set the I<n> tag.  Note that 
unless I<leave_FQDNs_alone> is set, I<log_analysis> will try to strip 
the local domain name from I<nodename>.

=item B<osname>

=item B<osrelease>

These two optional global scalars default to the output of I<uname -s> and
I<uname -r>, respectively.  They are only used for reading in default config
files.  Their values set the I<s> and I<r> tags, respectively.

=item B<domain>

This variable is an optional global scalar.  If you don't set it, 
I<log_analysis> will try to set it by looking for a I<domain> line in 
I</etc/resolv.conf>.  If I<log_analysis> has I<domain> set, it will 
attempt to strip away the local domain name from all nodenames it 
encounters, unless I<leave_FQDNs_alone> is set.  See I<leave_FQDNs_alone> 
for details.

=item B<leave_FQDNs_alone>

This variable is an optional global scalar.  By default, if I<log_analysis>
has I<domain> set (either explicitly or implicitly), it will attempt to
strip away the domain name in I<domain> from all nodenames it encounters.  
If you set this to I<1>, or to some other true value, I<log_analysis> will 
not attempt to strip the domain name in I<domain>.

=item B<PATH>

This variable is an optional global scalar that sets the I<PATH>
environment variable.  This doesn't help the initial setting of 
I<nodename>, I<osname>, or I<osrelease>, which
are set by running uname.

=item B<umask>

This variable is an optional global scalar that sets the umask.
See L<umask(2)>.

=item B<priority>

This variable is an optional global scalar that sets the priority,
or 
"niceness."
See L<nice(1)>.
Setting this to zero means run unchanged from the current niceness.
Setting this negative is a bad idea unless you really know what you're 
doing, and is forbdidden to non-root users.

=item B<decompression_rules>

This variable is an optional global array of rules to decompress 
compressed files, in the format: compression-extension, comma, space,
command to decompress to stdout.  The command is subject to the usual
tag substitutions (see L<"TAG SUBSTITUTIONS">, plus %f stands for the 
filename.  For example, the rule for gzipped files is:

C<gz, gzip -dc %f>

If both I<decompression_rules> and I<open_command> apply to a given file,
the default is to use a temp file for the intermediate results unless
I<pipe_decompress_to_open> is used.  See L<"pipe_decompress_to_open"> for
more info.

In real mode, compressed files will be ignored.

=item B<pgp_rules>

This variable is an optional global array of rules for PGP encrypting
messages, in the format: PGP type (user defined), comma, space, 
command to PGP encrypt stdin to stdout.
The command is subject to the usual tag substitutions, plus %m stands for
the email address.  For use with the "B<-p>" and "B<-m>" options.  For 
example, the rule for gnupg is:

C<g, gpg -aer %m 2E<gt>&1>

Internally defined rules are "g" for "gnupg", "2" for PGP 2.x, and "5" 
for PGP 5.x.

B<WARNING>: The user who runs log_analysis must have already imported
the mail destination's key for this to work.  Make sure to test this
before you put it in a cronjob.

=item B<filename_ignore_patterns>

This variable is an optional global array of patterns that describe filenames
to be skipped in an include_dir/include_dir_if_exists context, such as
emacs backup file (".*~") or vim backup files ("\..*\.swp").  Only the 
file component of the path is examined, not the directory component.
Patterns implicitly begin with ^ and implicitly end with $.

=item B<mail_address>

This variable is an optional global scalar that can consist of an email
address.  If set, the output of the script will be mailed to the
address it is set to.  The B<-m> option does the same thing, and overrides
this.

=item B<mail_command>

This variable is an optional global scalar that is the command used to send
mail if B<-m> is user or B<mail_address> is set.  The B<-M> option does
the same thing, and overrides this.  This variable is subject to the 
usual tag substitutions, plus %m stands for mail_address and %o stands for
the relevant output message.  The default is:

C<Mail -s '%o' %m>

=item B<optional_log_files>

This variable is an optional array of file globs that are to be processed.
Note that, unlike I<required_log_files>, these are globs rather than literal
filenames, although literal filenames will also work.  [Globs are filenames
with wildcards, ie. I</var/adm/messages*>.]

WARNING: in real mode, only the most recent file per glob in 
I<optional_log_files> is monitored.  This means that you should set it to
something like I</var/log/messages*> and I</var/log/syslog*> rather than 
I</var/log/*>.

=item B<commands_to_run>

This variable is an optional array of commands that are also supposed to be
run to give a snapshot of the system state.  These are currently:
I<w>, I<df -k>, and I<cat /etc/dumpdates>.

=item B<ignore_categories>

This variable is an optional array of categories that you don't want to see.
Rather than try to remove all the rules for these categories, you can just
list them here.

=item B<priority_categories>

This variable is an optional array of categories that will be listed first 
in the output.

=item B<process_all_nodenames>

This variable is the config equivalent of the B<-N> option.

=item B<allow_nodenames>

This variable is an optional array of nodenames that can log to this
host.  Usually, logs labelled as being from another host will not be 
anaylzed, and each such line will be listed in a special category; if
you chose to allow some nodenames (or if you choose to process all 
nodenames by setting B<-N> or setting I<process_all_nodenames>) then
these log messages will also be processed.

=item B<real_mode>

This variable is the config equivalent of the B<-r> option; see the 
B<-r> option for more details.

=item B<real_mode_output_format>

This variable is a required global scalar.  It describes the per-output
format for real mode.  "\\" is replaced with "\", "\n" is replaced with
newline, "\t" with tab, and then tag substitution occurs 
(see L<"TAG SUBSTITUTION">); in
addition to the normal tags, "%c" is replaced with the category, "%#" 
is replaced with the count, "%d" is replaced with the formatted data, 
and "%h" is replaced with the nodename of the message.  I<WARNING:> 
you usually want "%h" (nodename of the message), not "%n" (nodename
of the host you're running on, which is one of the default tags 
substitutions.)  Defaults to "%c: (loghost %n, from host %h)\n%-10# %d\n\n".

=item B<real_mode_sleep_interval>

This variable is an optional global scalar for use with real mode; see 
I<-r> for more on real mode.  In real mode, I<log_analysis> reads log files 
for more data, sleeps for a little while, and then reads again.
The sleep interval controls how long I<log_analysis> sleeps (in seconds).  
It defaults to 1.

=item B<real_mode_check_interval>

This variable is an optional global scalar for use with real mode; see 
I<-r> for more on real mode.  In real mode, I<log_analysis> sits in a 
loop reading from the logs files.  Periodically, it wants to check if
the log files have rolled over or if newer log files have appeared.  If 
at least this long (in seconds) goes by since the last time we've checked,
we check again.

=item B<default_sort>

This variable is an optional global scalar that describes how certain
things will be sorted.  See L<"SORTING"> for info on what this can be set
to.  Defaults to I<funky>.

=item B<default_filter>

This variable is an optional global scalar that describes the default
category filter.  See L<"FILTERS"> for info on what this can be set
to.

=back

=head1 PREPROCESSOR DIRECTIVES

NB: these get completely processed before all other directives, so they 
don't care about other syntax elements.
Except as noted, these should appear at the beginning of the
line after optional whitespace.

=over 4

=item B<@@end>

End of config file.

=item B<@@define> I<var> I<val>

Define I<var> as value I<val>.  I<var> should contain only alphanumerics and
underscores, and start with an alphanumeric.  I<val> may contain no 
whitespace.

=item B<@@undef> I<var>

Undo any previous definition of I<var>.

=item B<@@ifdef> I<var>

=item B<@@ifndef> I<var>

=item B<@@else>

=item B<@@endif>

If variable I<var> is defined, even defined as a false value,
the lines after the @@ifdef are used, otherwise the lines
are effectively commented out.  @@ifndef is the logical reverse.
@@ifdef and @@ifndef must be terminated by an @@endif.   They may contain an 
@@else section that works in the usual way.

=item B<@@ifhost> I<name>

=item B<@@ifnhost> I<name>

These are just like @@ifdef and @@ifndef above, except that they test if 
the variable I<nodename> is equal to the value supplied for I<name>.

=item B<@@{var}>

If this string appears anywhere on any line, then if I<var> is a defined
variable, its value is substituted.  If I<var> is not a defined variable,
the string is left literally.  Note that this behaviour is different from
that of L<aide(1)>.

=item B<@@warn> I<message>

Print out I<message> as soon as the config is read.

=item B<@@error> I<message>

Print out I<message> and exit as soon as the config is read.

=back

=head1 SORTING

You can sort category items using several different criteria.  You can 
set the I<default_sort>, and then on a per-category basis, you can use the 
I<sort:> keyword to control things even closer.  If you don't override it,
I<default_sort> defaults to I<funky>.  Sorts stack, so you can use 
"reverse string" or "reverse value".  In theory, you
can stack all of them, ie. "reverse value reverse funky", but there is no
guarantee that sorts are stable.

The available sorts are:

=over 4

=item B<string> 

Simple string "lexicographical" sort.  Does not handle numbers well.

=item B<numeric> 

Sorts numbers, including decimal numbers, correctly, but cannot handle 
non-numeric characters, and cannot handle IPs correctly.

=item B<funky> 

Tries to do the right thing with mixed integers and strings.
Handles IP addresses correctly.  It does not handle decimal numbers correctly.

=item B<reverse> 

Reverses the current order.  Can be used in conjunction with another sort, 
ie. "reverse string".

=item B<value>

Sorts by count (ascending) instead of by item.

=item B<none> 

Does no additional sorting.

=back

=head1 FILTERS

Sometimes, you don't want to see all the information in a category, just
the top few items, or whatever.  Filters let you do this.  You can set a 
default filter using I<default_filter> (defaults to "none") or you can set 
filters on a per-category basis using the I<filter:> keyword.

Some commands you can use:

=over 4

=item B<E<gt>= >N

Only show items whose count is greater than or equal to N.

=item B<E<lt>= >N

=item B<E<gt> >N

=item B<E<lt> >N

=item B<= >N, B<== >N

=item B<!= >N, B<E<lt>E<gt> >N, B<E<gt>E<lt> >N

These are analagous to E<gt>=.

=item B<top >N

=item B<top >NB<%>

=item B<top_strict >N

=item B<top_strict >NB<%>

Only show those items who count is in the top N or top N%.  The difference 
between I<top> and I<top_strict> is what happens when there's a tie to
be in the top N.  I<top> will include all the items that tie, even if this
means there will be more than N.  I<top_strict> always cuts off after N.

=item B<bottom >N

=item B<bottom >NB<%>

=item B<bottom_strict >N

=item B<bottom_strict >NB<%>

Analagous to top.

=item subfilter B<and> subfilter

=item subfilter B<or> subfilter

Lets you "and" or "or" two or more subfilters togther (ie. "top 10 and 
E<gt>= 4").

=back

=head1 UNIQUE DESTINATIONS

I<log_analysis> has a relatively simple counting mechanism that is usually
effective.  One exception is when you want to track how often one value
occurs in your log uniquely with another value.  For example, suppose you're
watching firewall logs, $1 is the source IP, $2 is the destination IP, and 
you want to know if you're being scanned.  Tracking counts of "$1 $2" 
requires you to manually count how many times $1 occurs.  Tracking just "$1"
doesn't really tell you what you want, because you don't know if the source
IP is really scanning a bunch of different hosts, or just has a renegade
process that's banging away at a single destination.  What you want to track
is how many times $1 occurs with a unique $2.

To do this sort of thing in a pattern config, set I<format:> to 
I<value1, value2> and set I<dest:> to "I<UNIQUE> category-name".  In our
example, we might say:

  format: $1, $2
  dest:   UNIQUE scans

The fields in format are not evaluated in a string context, and only the 
last comma acts as a separator.  So, if $3 contains the protocol information,
you might say this:

  format: sprintf("%-15s %s", $1, $3), $2
  dest:   UNIQUE scans

=head1 TAG SUBSTITUTIONS

A few items are subject to "tag substitutions".  These are kind of like
printf's "%" sequences: a sequence like "%n" gets replaced with the nodename.
You can optionally specify field widths, which default to right-justified 
(ie.  "%10n") or can be preceeded with a "-" to make them left-justified 
(ie. "%-10n").  
Anything subject to tag substitutions will be listed as such.  The default
tag character is "%" unless otherwise listed.

Here are the standard sequences:

=over 4

=item B<%%> literal %

=item B<%n> nodename (ie. the output of I<uname -n>.)

=item B<%r> OS release (ie. the output of I<uname -r>.)

=item B<%s> OS name (ie. the output of I<uname -s>.)

=back

If you try to use a "%" followed by something else, you'll get an error.

There are also other sequences that apply in special situations; these are 
listed where they apply.

=head1 EXAMPLES

B<log_analysis -m root@whatever>

Analyze yesterday's logs and mail the results to root@whatever.  You might 
want to put this in a cronjob.

B<log_analysis -p5 -m root@whatever>

Same as the last one, but PGP encrypt the logs using PGP 5 before mailing.

B<log_analysis -a>

Look at all the logs, not just yesterday's.

B<log_analysis -sa /var/adm/sulog>

Analyze all the contents of sulog, don't bother with local state.

B<log_analysis -san otherhost syslog-file>

Analyze all the contents of syslog-file, which was created on "otherhost".
Don't run the local state commands.

B<log_analysis -sd1 -f foo.conf -U>

This style of command is useful while developing local configs to handle
log messages unknown to the internal config.  

Use I<foo.conf> as a config file in addition to the internal config.  
Output only the unknowns.

=head1 COMPATIBILITY

Written for Solaris and Linux.  May work for other OSs.

Written for perl 5.00503.  May work with some earlier perl versions.

=head1 NOTES

You often need to be root to read interesting log files.

It is customary to regularly "rollover" log files.  Many log file 
formats don't include year infomation; among other benefits, rollover
makes the dates in such logfiles unambiguous.  B<log_analysis> by
default looks for log lines that match a particular day of the year,
but does not even try to guess the year.  If the OS you're using
doesn't rollover some logfiles by default (ie. Solaris doesn't
rollover /var/adm/wtmpx, /var/adm/wtmp, or /var/adm/sulog), you will
need to rollover these files yourself to get valid output from this
program.

On some OSes, '%' (ie. the percent symbol) has a special meaning in
crontabs, and needs to be commented.  See L<crontab(1)>.

When there are a lot of unknowns, B<log_analysis> can take a lot
longer to run.  This is particularly a problem when you're first
running it, before you customize for your site.  To get around this
problem, if you send B<log_analysis> a SIGINT (ie. if you hit
control-C), it will stop going through your logs and immediately
output the results.

=head1 FILES

=over 4

=item B</etc/log_analysis.conf>

=item B</etc/log_analysis.conf-%n>

=item B</etc/log_analysis.conf-%s-%r>

=item B</etc/log_analysis.conf-%s>

=item B</usr/local/etc/log_analysis.conf>

=item B</usr/local/etc/log_analysis.conf-%n>

=item B</usr/local/etc/log_analysis.conf-%s-%r>

=item B</usr/local/etc/log_analysis.conf-%s>

Config files, in order of precedence.  "%n", "%s", and "%r" have the
usual tag substitution meanings; see L<"TAG SUBSTITUTIONS">.

=item B</etc/log_analysis.d>

=item B</usr/local/etc/log_analysis.d>

Plug-in directories.  All files in these directories will be treated as 
config files and include'd.

=back

=head1 AUTHOR

Mordechai T. Abzug <morty@sanctuary.arbutus.md.us>

=head1 See Also

L<syslogd(8)>, L<last(1)>, L<perlre(1)>

=cut
