#! /usr/bin/perl -w

# vim:syntax=perl

use strict;
use lib '/usr/local/share/perl5';

package Lire::WWW::ExtendedLog;

use vars qw/ @ISA /;

use Lire::DlfSchema;
use Lire::W3CExtendedLog;
use Lire::Program qw( :msg );

BEGIN {
    @ISA = qw/Lire::W3CExtendedLog/;
}

my $schema	= Lire::DlfSchema::load_schema( "www" );

my %w3c_field2dlf =
  (
   'cs-username'    => 'who',
   'cs(User-Agent)' => 'useragent',
   'cs(Referer)'    => 'referer',
   'sc-status'	    => 'http_result',
   'sc-bytes'	    => 'requested_page_size',
   'cs-method'	    => 'http_action',
  );

sub build_parser {
    my ( $self ) = shift;
    $self->SUPER::build_parser( @_ );

    my @fields = split /\s+/, $self->{fields};
    my %fields = map { $_ => 1 } @fields;

    my @mapped   = ();
    my @dlf_fields;
    foreach my $f ( @fields ) {
	if ( exists $w3c_field2dlf{$f} ) {
	    push @mapped, $f;
	    push @dlf_fields, $w3c_field2dlf{$f};
	}
    }

    # Create the DLF maker function
    push @dlf_fields, "time" if $fields{time};
    push @dlf_fields, "requested_page"
      if $fields{'cs-uri'} || $fields{'cs-uri-stem'};
    push @dlf_fields, "client_host"
      if $fields{'c-ip'} || $fields{'c-dns'};
    # Keep only one of each
    my %dlf_fields = map { $_ => 1 } @dlf_fields;
    @dlf_fields = sort keys %dlf_fields;

    lr_info( "mapped DLF fields: ", join( ", ", @dlf_fields ) );

    my $dlf_maker = $schema->make_hashref2asciidlf_func( @dlf_fields );

    $self->{www_dlf_converter} = sub {
	my $w3c = $self->{w3c_parser}->( $_[0] );

	# Those fields that are mapped directly
	my %dlf = ( time => $w3c->{lire_time} );
	foreach my $name ( @mapped ) {
	    $dlf{$w3c_field2dlf{$name}} = $w3c->{$name};
	}
	# Client_host
	if ( exists $w3c->{'c-dns'} && $w3c->{'c-dns'} ne '-' ) {
	    $dlf{client_host} = $w3c->{'c-dns'};
	} elsif ( exists $w3c->{'c-ip'} ) {
	    $dlf{client_host} = $w3c->{'c-ip'};
	}

	# URL
	if ( exists $w3c->{'cs-uri'} && $w3c->{'cs-uri'} ne '-' ) {
	    $dlf{requested_page} = $w3c->{'cs-uri'};
	} elsif ( exists $w3c->{'cs-uri-stem'} ) {
	    $dlf{requested_page} = $w3c->{'cs-uri-stem'};
	}
	
	return $dlf_maker->( \%dlf );
    }
}

package main;

use Lire::Program qw( :msg :dlf );

my $lines	= 0;
my $dlflines    = 0;
my $errorlines	= 0;

init_dlf_converter( "www" );

my $parser = new Lire::WWW::ExtendedLog;

# Parse the header
my $line;
while (defined( $line = <> )) {
    last unless $line =~ /^#/;
    $parser->parse_directive( $line );
}

lr_err( "invalid W3C extended log file: must start by Version and Fields directives" )
  unless defined $parser->{fields} && defined $parser->{version};

my $todlf = $parser->{www_dlf_converter};
# Transform into DLF
do {
    $lines++;

    if ( $line =~ /^#/ ) {
	eval {
	    $parser->parse_directive( $line );
	};
	if ( $@ ) {
	    lr_err( $@ );
	    $errorlines++;
	    last;
	}
    } else {
	eval {
	    my $dlf = $todlf->( $line );
	    print join( " ", @$dlf), "\n";
	    $dlflines++;
	};
	if ($@) {
	    lr_warn( $@ );
	    lr_notice( qq{cannot convert line $. "$line" to www dlf, skipping} );
	}
    }
    $line = <>;
} while (defined $line);

end_dlf_converter( $lines, $dlflines, $errorlines );

exit 0;

__END__

=pod

=head1 NAME

w3c_extended2dlf - convert W3C Extended Log file to DLF

=head1 SYNOPSIS

B<w3c_extended2dlf> I<file>

=head1 DESCRIPTION

B<w3c_extended2dlf> converts web server log files in the W3C Extended
Log Format to the www DLF. This log format is defined at
http://www.w3.org/TR/WD-logfile.html

It is used by IIS 4.0 and IIS 5.0. This is a customizable format which
contains an header specifying which information is present in the
log.

To have the maximum information in you reports, we suggests that you log
the following fields :

date, time, c-ip, c-dns, cs-uri, cs-method, sc-bytes, sc-status,
cs(User-Agent), cs(Referer) and cs-username

We also support the cs-uri-stem field.

Other fields will be ignored.

=head1 LIMITATIONS

The converter doesn't handle aggregation (record with count field) and
will refuse to process those logs. Also it doesn't support changing
the fields in the middle of the log file. This means that all records
in the log file must have the same format.

=head1 AUTHORS

Francis J. Lacoste <flacoste@logreport.org>

=head1 VERSION

$Id: w3c_extended2dlf.in,v 1.8 2002/02/20 23:12:46 flacoste Exp $

=head1 COPYRIGHT

Copyright (C) 2001 Stichting LogReport Foundation LogReport@LogReport.org
 
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=cut

# Local Variables:
# mode: cperl
# End:
