#! /usr/bin/perl -w

# vim:syntax=perl

use strict;

use vars qw/$report_start $report_end/;

use lib '/usr/local/share/perl5';

use Lire::DataTypes qw/:special/;
use Lire::ReportConfig;
use Lire::ReportParser::ReportBuilder;
use Lire::Program qw/ :msg $PROG $LR_ID /;
use Lire::Merge::MergeFactory;

use POSIX qw/strftime/;

sub sort_and_check_reports {
    my ( $superservice, @files ) = @_;

    # ASSERT
    die( "sort_and_check_reports: invalid superservice parameter: $superservice" )
      unless check_superservice( $superservice );

    my %start = ();
    my @reports = ();

    # Check all reports for well-formedness, valid superservice,
    # start and end period.
    my $parser = new Lire::ReportParser::ReportBuilder( only_head => 1 );
    foreach my $r ( @files ) {
	unless (open REPORT, $r) {
	    lr_warn( "can't open XML report $r: $!. Skipping" );
	    next;
	}

	lr_info( "checking XML report '$r'" );
	my $report = eval { $parser->parse( \*REPORT ) };
	if ( $@ ) {
	    lr_warn( "error parsing XML report $r: $@. Skipping" );
	    next;
	}
	close REPORT;

	# Start and end of merged report is the union of the range of
	# all the reports
        # timespan_start and timespan_end are in seconds since epoch
	unless ( defined $report_start ) {
	    $report_start = $report->timespan_start;
	    $report_end   = $report->timespan_end;
	}

	$report_start = $report->timespan_start
	  if $report->timespan_start < $report_start;
	$report_end   = $report->timespan_end
	  if $report->timespan_end > $report_end;

	push @reports, $r;
	$start{$r} = $report->timespan_start;

	$report->delete();
    }

    lr_info( "Will merge ", scalar @reports, " files. Merged period starts ",
	     strftime( "%Y-%m-%d %H:%M:%S", localtime $report_start ),
	     "; ends on ",
	     strftime( "%Y-%m-%d %H:%M:%S", localtime $report_end ));

    return sort { $start{$a} <=> $start{$b} } @reports;
}

sub init_merge {
    my ( $cfg, $subreport_idx ) = @_;

    my %type_count = ();
    foreach my $section ( $cfg->sections ) {
	foreach my $spec ( $section->reports ) {
	    my $id = $spec->id;
	    my $key = $spec->key;

	    $type_count{$id} = 0 unless exists $type_count{$id};
	    $subreport_idx->{$key} = $type_count{$id}++;

	    lr_info( "initializing '$key' report" );
	    eval { $spec->init_merge( $report_start, $report_end ) };
	    if ( $@ ) {
		lr_warn( "$@\nreport '$key' will be skipped" );
		$spec->mark_missing( "init_merge() failed: $@" );
	    }
	}
    }
}

sub end_merge {
    my ( $cfg, $non_missing ) = @_;

    foreach my $section ( $cfg->sections ) {
	foreach my $spec ( $section->reports ) {
	    my $key = $spec->key;

	    lr_info( "finishing merge for '$key' report" );
	    eval { $spec->end_merge };
	    if ( $@ ) {
		lr_warn( "$@\nreport '$key' will be skipped" );
		$spec->mark_missing( "end_merge() failed: $@" );
	    }

	    # Mark missing, if it was also missing in all reports
	    $spec->mark_missing( "missing in all merged reports" )
	      unless $non_missing->{$key};
	}
    }
}

sub merge_report {
    my ( $cfg, $report, $subreport_idx, $non_missing ) = @_;

    foreach my $section ( $cfg->sections ) {
	foreach my $spec ( $section->reports ) {
	    my $type = $spec->id;

	    # Skip failed reports
	    next if $spec->is_missing;

	    my $idx = $subreport_idx->{$spec->key};
	    lr_err( "assertion failed: can't find index of subreport ", 
		    $spec->key )
	      unless defined $idx;

	    # Find the matching subreport in this report
	    my @subreports = $report->subreports_by_type( $type );

	    if (  $idx > $#subreports ) {
		lr_warn( "there's only ", scalar @subreports,
			 " subreports of type $type while trying to merge index $idx" );
		next;
	    }

	    my $subreport = $subreports[$idx];
	    if ( $subreport->is_missing ) {
		lr_info( "skipping merge of missing $type index $idx" );
		next;
	    }

	    lr_info( "merging subreport $type index $idx" );
	    eval { $spec->merge_subreport( $subreport ); };
	    if ($@) {
		lr_warn( $@ );
		lr_warn( "Merge of $type index $idx failed" );
		next;
	    }

	    $non_missing->{$spec->key} = 1;
	}
    }
}

lr_err( "Usage: $PROG <superservice> <report_cfg_file> <report.xml>+" )
  unless @ARGV >= 3;

my $superservice = shift;
lr_err( "$PROG: invalid superservice: $superservice\n" )
  unless check_superservice( $superservice );
my $report_cfg = shift;
lr_err( "$PROG: non-existent configuration file: $report_cfg\n" )
  unless -e $report_cfg;
lr_err( "$PROG: can't read report file: $report_cfg\n" )
  unless -r $report_cfg;
my @report_files = @ARGV;

my $factory = new Lire::Merge::MergeFactory;
my $cfg = eval { new_from_file Lire::ReportConfig( $superservice,
						$report_cfg, $factory) };
lr_err( "error loading configuration file: $@" ) if $@;

@report_files = sort_and_check_reports( $superservice, @report_files );

my $subreport_idx = {};
init_merge( $cfg, $subreport_idx );

my $non_missing = {};
my $parser = new Lire::ReportParser::ReportBuilder;
foreach my $r ( @report_files ) {
    lr_info( "parsing XML report '$r'" );
    my $report = eval { $parser->parsefile( $r ) };
    if ( $@ ) {
	lr_warn( "error parsing XML report $r: $@. Skipping" );
	next;
    }

    lr_info( "merging XML report '$r'" );
    merge_report( $cfg, $report, $subreport_idx, $non_missing );
    $report->delete();
}

end_merge($cfg, $non_missing );


# Write the merged XML report on STDOUT
lr_info( "writing merged XML report" );
my $report = $cfg->create_report( $report_start, $report_end );
$report->generator( "lr_xml_merge(1)" );
$report->write_report;

# see also lr_dlf2xml
if ( $ENV{LR_ARCHIVE} ) {
    my $lr_time = strftime( "%Y%m%d%H%M%S", localtime $report_start ) 
      . "-" . strftime( "%Y%m%d%H%M%S", localtime $report_end );
    # Save timespan in the archive
    lr_info( "gonna run lr_db_store $LR_ID time_span $lr_time" );
    system( "lr_db_store", $LR_ID, "time_span", $lr_time );
    lr_err "lr_db_store failed"
      if ( $? ne 0 );
}

exit 0;

# Local Variables:
# mode: cperl
# End:

__END__

=pod

=head1 NAME

lr_xml_merge - merge XML reports into a new XML report

=head1 SYNOPSIS 

B<lr_xml_merge> I<superservice> I<report_cfg_file> I<xml_report>+

=head1 DESCRIPTION

B<lr_xml_merge> will generate a new XML report on standard out using
the I<report_cfg_file> report configuration file. The data in this
report will result from the merging of the XML reports specified on
the command line.

This tool is useful to generate weekly or monthly reports from daily
XML reports without having to reprocess the whole logs. It can be also
useful to generate aggregated reports from specific server reports.
For example, to generate a report for all the email servers of the
network from the individual server reports.

The XML reports you merge should be of the same superservice. For best
results, you should merge reports that were generated from the same
report configuration file. You can use different parameters value.
(For example, change the TOP 20 into TOP 10 or change the 1d period
into a 1w or 1M period.)

=head1 HOW REPORTS ARE MERGED

For each subreport configured in the report configuration file,
B<lr_xml_merge> will use the data in each source XML reports that has
the same index and report specification ID. 

For example, if the www report configuration file you are using
contains

    =section Activity Reports
    requests-by-period		period=1d
    bytes-by-period		period=1d
    user_sessions-by-period	period=1d

    =section Download Reports
    |select-url			url_match="\\.(gz|tgz|zip|exe|pdf|doc)$"
    requests-by-period		period=1d

The first C<requests-by-period> data will come from the first
subreport in the source XML reports with a C<requests-by-period> ID.
For the other subreport C<requests-by-period> that is in the "Download
Reports" section, its data will come from the second subreport in the
source XML reports that has a C<requests-by-period> ID.

You don't have to worry about subreports which are marked missing
"shifting" your data. Those subreports are present in the source XML
report but don't contain any data, so they will be "merged" at the
right place.

When merging results from timegroup, timeslot and rangegroup
operation, B<lr_xml_merge> won't do data interpolation. This means
that if you changed some of the parameters between the merged XML
report configuration and the source XML report configurations, make
sure that the merged parameters are even multiple of the source ones.
For example, for a timegroup operation, changing a 1d parameter into a
1w will work fine; changing a 1w into a 1d won't. When the parameters
aren't compatible, you'll get a warning and the data from the failing
subreport won't be included in the merged report.

=head1 GOTCHAS

You really don't want to merge reports for which the subreports order
was reorgnanized unless you really know what you are doing.


Filters are ignored in the merging process. It is assumed that the
source data was filtered accordingly.

For some operations, it is possible to obtain different reports when
you process the entire log and when you merge smaller reports. The
difference will be greatest for subreports which depends on the
analysis window. In the case of the www superservice, such examples
are found in subreports that depend on information related to
client_host and sessions. A "Different Client Hosts" is related to the
time window over which different client hosts are found. For example,
when you add two daily reports to produce a weekly report, it's
probable you have some overlap in client_hosts between the two days.
This information should appear in the explanation message that
precedes each subreport. If it doesn't, fill that as a bug.

=head1 VERSION

$Id: lr_xml_merge.in,v 1.6 2002/06/21 23:05:09 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2002 Stichting LogReport Foundation LogReport@LogReport.org
 
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut


