#!/usr/local/bin/perl5.00502
# ==========================================================================
# Copyright (c) 1996 Regents of the University of California.
#
# This software has been developed by Roy Fielding <fielding@ics.uci.edu> as
# part of the WebSoft project at the University of California, Irvine.
#         <http://www.ics.uci.edu/pub/websoft/wwwstat/>
# See the file LICENSE for licensing and redistribution information.
#
# This program is provided ONLY as an example.  It is not needed to run
# wwwstat and is not supported by the author.
#
sub usage {
    die <<"EndUsage";
USAGE:  wwwerrs [ error_log_name ]

  This program reads the httpd error_log logfile (or whatever file is
  given as the single argument) looking for failed requests (bad URLs)
  which include a valid Referer (and thus can be tracked to the origin
  of the reference).  It assumes that the error_log has been enhanced
  to include the referer information (as in NCSA 1.4 and later).

  The output is an HTML file that makes it easy to check references.
  The script should be edited to discard normal error messages, where
  normal is dependent on your particular server.

EndUsage
}
if ($#ARGV > 0) { &usage; }
# ==========================================================================
# Get defaults

$Owner = 'Your Name Here &lt;somebody@no_where.com&gt;';

@DoW = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat');
@MoY = ('Jan','Feb','Mar','Apr','May','Jun',
        'Jul','Aug','Sep','Oct','Nov','Dec');

$Updated  = &wtime(time,'');     # Format it as local time

if ($#ARGV == 0)
{
    $ErrorLog = $ARGV[0];
    if ($ErrorLog =~ /^-/) { &usage; }
}
else
{
    $ErrorLog = '/usr/local/etc/httpd/logs/error_log';  # Server's Error Log
}

%pseen = ();
%unseen = ();
%cfgseen = ();

$UnusualHeader = "Unusual Error Messages";
$ConfigHeader  = "Client Denied by Server Configuration";
$PermHeader    = "File Permissions Deny Access";
$ExistHeader   = "File Does Not Exist";

# ==========================================================================
# Start output

$| = 1;  # set unbuffered

print "<HTML><HEAD>\n";
print "<TITLE>Collated Error Log Output</TITLE>\n";
print "</HEAD><BODY>\n";
print "<H1>Collated Error Log Output</H1>\n";
print "<EM>Last updated: $Updated</EM>\n";

print "<UL>\n";
print "<LI><A HREF=\"\#Unusual\">$UnusualHeader</A>\n";
print "<LI><A HREF=\"\#Config\">$ConfigHeader</A>\n";
print "<LI><A HREF=\"\#Perm\">$PermHeader</A>\n";
print "<LI><A HREF=\"\#Exist\">$ExistHeader</A>\n";
print "</UL>\n";

# ==========================================================================

print "<HR>\n";
print "<H2><A NAME=\"Unusual\">$UnusualHeader</A></H2>\n";
print "<PRE>\n";

&process_log;

print "</PRE>\n";

# ==========================================================================

print "<HR>\n";
print "<H2><A NAME=\"Config\">$ConfigHeader</A></H2>\n";
print "<PRE>\n";

foreach $pfile ( sort keys %cfgseen )
{
    printf ("%-70s |%s\n", $pfile, $cfgseen{$pfile});
}

print "</PRE>\n";

# ==========================================================================

print "<HR>\n";
print "<H2><A NAME=\"Perm\">$PermHeader</A></H2>\n";
print "<PRE>\n";

foreach $pfile ( sort keys %pseen )
{
    printf ("%-70s |%s\n", $pfile, $pseen{$pfile});
}

print "</PRE>\n";

# ==========================================================================

print "<HR>\n";
print "<H2><A NAME=\"Exist\">$ExistHeader</A></H2>\n";

foreach $pfile ( sort keys %unseen )
{
    print $pfile, "\n";
    print "<UL>\n";
    foreach $ref ( sort split(/>/, $unseen{$pfile}) )
    {
        next if ($ref eq '-');
        print "<LI><A HREF=\"$ref\">$ref</A>\n";
    }
    print "</UL>\n";
}

# ==========================================================================
print "<HR>\n";
print "<ADDRESS>\n";
print "$Owner\n";
print "</ADDRESS>\n";
print "</BODY></HTML>\n";

exit(0);
# ==========================================================================
# ==========================================================================
# Iterate through the logfile

sub process_log
{
    local($_, $oldrefs, $ref, $reason, $date, $file, $host);

    open(LOG, $ErrorLog) || die "$!: Failed to open $ErrorLog, stopped";

    LINE: while (<LOG>)
    {
        s/^\0+//;         # This is due to a bug in perl 5.002
        s/\0//g;          # because this line should be sufficient
        s/\&/\&amp;/g;
        s/</\&lt;/g;
        s/>/\&gt;/g;
        s/%7E/~/i;

        study;

        next if (/ timed out/);         # Ignore these common messages
        next if (/ send aborted for /);
        next if (/ caught SIGHUP/);
        next if (/ successful restart/);
        next if (/ killing CGI/);

        if (/ httpd: access to /)
        {
            ($date, $file, $host, $reason) = 
    /^\[([^\]]*)\] httpd: access to (\S+) failed for ([^, ]+), reason: (.*)/;

            if (!defined($reason)) { print; next; }

            if ($reason =~ s/ from (\S+)//) { $ref = $1; }
            else                            { $ref = ''; }

            if ($reason =~ /^(file|script) permissions/)
            {
                $pseen{$file} = $host;
            }
            elsif ($reason =~ /^(file|script) does not exist/)
            {
                if ($oldrefs = $unseen{$file})
                {
                    if ($ref && (rindex($oldrefs, $ref) < 0))
                    {
                        $oldrefs = join('>', $oldrefs, $ref);
                        $unseen{$file} = $oldrefs;
                    }
                }
                else { $unseen{$file} = $ref; }
            }
            elsif ($reason =~ /^client denied by server config/)
            {
                $cfgseen{$file} = $host;
            }
            else { print; }
        }
        else { print; } # This is for unknown/unusual messages
    }
    close(LOG);
}

# ---------------------------------------------------------------------------
# The following routine is from the wwwdates.pl package of the
# libwww-perl library <http://www.ics.uci.edu/pub/websoft/libwww-perl/>.
# ===========================================================================
# wtime() is a modified version of Perl 4.036's ctime.pl
# library by Waldemar Kebsch <kebsch.pad@nixpbe.UUCP> and
# Marion Hakanson <hakanson@cse.ogi.edu>.
# 
# wtime returns a time string in the format "Wkd, Dy Mon Year HH:MM:SS Zone"
#               with no newline appended.
#
# USAGE:
#
# &wtime(time,'');     -- returns the local time with no timezone appended
#                         As in "Wed, 15 Dec 1993 23:59:59 "
#
# &wtime(time,'GMT');  -- returns GMT time
#                         As in "Thu, 16 Dec 1993 07:59:59 GMT"
#
sub wtime
{
    local($time, $tz) = @_;
    local($[) = 0;
    local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst);

    # Use local time if tz is anything other than 'GMT'

    ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
        ($tz eq 'GMT') ? gmtime($time) : localtime($time);

    $year += 1900;
    sprintf("%s, %02d %s %04d %02d:%02d:%02d %s", $DoW[$wday],
            $mday, $MoY[$mon], $year, $hour, $min, $sec, $tz);
}

1;
