package Lire::AsciiDlf::Group;

use strict;

use vars qw/ $VERSION @ISA /;

use Lire::Config;
use Lire::Group;
use Lire::AsciiDlf::NestableAggregator;
use Lire::DataTypes qw( :basic );
use Lire::Report::Entry;

use Carp;

BEGIN {
    ($VERSION)	= '$Revision: 1.27 $' =~ m!Revision: ([.\d]+)!;
    @ISA = qw( Lire::Group Lire::AsciiDlf::NestableAggregator );
}

=pod

=head1 NAME

Lire::AsciiDlf::Group - interface to lire:group XML entities

=head1 SYNOPSIS

 use Lire::AsciiDlf::Group;

=head1 DESCRIPTION

Lire::AsciiDlf::Group offers various functions to handle <lire:group> XML
entities.  This module is used in Lire::Merge::Group and in
Lire::AsciiDlf::AsciiDlfFactory.

=head1 METHODS AND FUNCTIONS

=cut

sub dlf_sort_fields {
    my ( $self ) = shift;

    if ( $self->{min_memory} ) {
	my @fields = map { $_->field->name } @{$self->group_fields };

	push @fields, $self->SUPER::dlf_sort_fields;

	return @fields;
    } else {
	return $self->SUPER::dlf_sort_fields;
    }
}

=pod

=head2 guess_extra_entries

guess_extra_entries takes a limit as set in a <lire:group ...> entity in a
Report specification file.  It returns a number of extra records to store in
the generated XML report file (this number should be added to the user-supplied
limit).  This is done in order to have enough information to be able to do
merging later.  This function is called in init_report.

Depending on the limit supplied by the user, we add an extra 5 till 100% of the
data.

=cut

sub guess_extra_entries {
    my ( $limit ) = @_;

    # percentage to add to user-supplied limit
    my $ratio;

    if ( $limit < 10 ) {
	$ratio = 100;
    } elsif ( $limit < 25 ) {
	$ratio = 50
    } elsif ( $limit < 50 ) {
	$ratio = 25
    } elsif ( $limit < 1000 ) {
	$ratio = 10
    } else {
	$ratio = 5;
    }

    return int( $limit * $ratio / 100 );
}

sub init_report {
    my ($self, $dlf_info) = @_;

    my @fields = map { $_->field->name} @{$self->group_fields};
    $self->{key_maker} =
      $self->{report_spec}->schema->make_key_access_func( @fields  );

    my $keys = 1;
    foreach my $field ( @{$self->group_fields} ) {
	$keys *= $dlf_info->field_keys( $field->name );
    }
    my $threshold = defined $self->{parent} ? 
      $Lire::Config::KEYS_THRESHOLD_NESTED : 
      $Lire::Config::KEYS_THRESHOLD_TOP;

    $self->{min_memory} = $keys > $threshold;
    
    if ( defined $self->limit ) {
	my $limit = $self->limit;
	if ( $limit =~ /^\$/ ) {
	    $limit = substr $limit, 1;
	    $limit = $self->{report_spec}->param( $limit )->value;
	}
	$self->{limit_view} = $limit;

	# For better report merging, we add some entries.
	$self->{limit_num} = $limit + guess_extra_entries( $limit );
    }

    if ( $self->sort_fields ) {
	# Build sort function
	my ( $a_dflt, $b_dflt, $schwartzian );
	if (  $self->{min_memory}) {
	    $a_dflt = '$_[0]';
	    $b_dflt = '$_[1]';
	    $schwartzian = ''; # Not using schwarzian transform
	} else {
	    $a_dflt = '$a';
	    $b_dflt = '$b';
	    $schwartzian = '[1]'; # Using schwartzian transform
	}
	my @sort_ops = ();
	foreach my $f ( @{$self->sort_fields} ) {
	    my ($a, $b) = ($a_dflt, $b_dflt );
	    if ( $f =~ /^-/ ) {
		$f = substr $f, 1;
		($a,$b) = ($b,$a);
	    }

	    my $index;	# This will contains the index of the field in the array
	    my $cmp = '<=>'; # Default to numeric comparison
	    my $i = 0;
	    foreach my $group_field ( @{$self->group_fields} ) {
		if ( $group_field->name eq $f ) {
		    $index = $i;
		    if ( is_numeric_type( $group_field->field->type ) ) {
			$cmp = "<=>";
		    } else {
			$cmp = "cmp";
		    }
		    last;
		}
		$i++;
	    }
	    $i = @{$self->group_fields};
	    unless (defined $index) {
		foreach my $op ( @{$self->ops} ) {
		    if ( $op->name eq $f ) {
			$index = $i;
			last;
		    }
		    $i++;
		}
	    }
	    push @sort_ops, $a ."->" . $schwartzian . "[$index] $cmp " .
	      $b ."->" . $schwartzian . "[$index]";
	}
	my $sort_code = "sub { " . join( " || ", @sort_ops ) . " }";
	$self->{sort_cmp} = eval $sort_code;
	croak "error compiling sort comparison ($sort_code): $@" if $@;
    }

    $self->SUPER::init_report( $dlf_info );
}

sub init_group_data {
    my ( $self ) = @_;

    # The group datastructure used to hold the operations' data of the
    # group element is an array. It contains the fields value and the
    # op data: 
    # [ group field, group field, ..., op data, op data, op data ]

    if ( $self->{min_memory}) {
	# To minimize memory, we can operate on sorted input by 
	# keeping the key in final sorted order and keeping only 
	# the minimum needed
	#
	# Structure of the array: we keep in the first element the current 
	# group key, the second element is the current group element data.
	# After this we have the processed group elements data in sort order
	# and up the the limit attribute.
	# [ current_key, current_group_data, sorted_group_data, sorted_group_data, ... ]
	return [];
    } else {
	# For better performance, at the cost of higher memory footprint,
	# we don't need sorted input and keep related keys using a hash.
	return {}
    }
}

sub init_key_data {
    my ( $self, $dlf ) = @_;

    my $key_data = [];

    my $i = 0;
    foreach my $f ( @{$self->group_fields} ) {
	$key_data->[$i++] = $dlf->[$f->field->pos];
    }

    foreach my $op ( @{$self->ops} ) {
	$key_data->[$i++] = $op->init_group_data();
    }

    return $key_data;
}

sub update_group_data {
    my ( $self, $dlf, $data ) = @_;

    my $key	    = $self->{key_maker}->( $dlf );
    my $key_data;
    if ( $self->{min_memory} ) {
	my $last_key = $data->[0];
	$key_data    = $data->[1];
	if ( ! defined $last_key || $key ne $last_key ) {
	    $self->sort_current_group_element( $data )
	      if defined $last_key;

	    $last_key = $data->[0] = $key;
	    $key_data = $data->[1] = $self->init_key_data( $dlf );
	} 
    } else {
	$key_data = $data->{$key};
	unless ( exists $data->{$key} ) {
	    $key_data = $data->{$key} = $self->init_key_data( $dlf );
	}
    }

    my $i = @{$self->group_fields};
    foreach my $op ( @{$self->ops} ) {
	$op->update_group_data( $dlf, $key_data->[$i++] );
    }
}

sub item_data2sort_key {
   my $key = [];
   foreach my $f ( @{$_[0]} ) {
       if ( ref $f eq 'SCALAR' ) {
	   push @$key, $$f;
       } elsif (ref $f eq 'ARRAY' ) {
	   push @$key, $f->[0];
       } elsif (ref $f eq 'HASH' ) {
	   push @$key, $f->{value};
       } else {
	   push @$key, $f;
       }
   }
   return $key;
}

=pod

=head2 binary_insert

Recursive function that uses a binary search to insert 
$item in $array using $cmp as comparison operator.
$first_idx and $max_idx specify the boundaries of the search.
Search ends when $first_idx == $max_idx or when $sort_key
sorts at or before $first_idx or at or after $max_idx

=cut

sub binary_insert {
    my ( $item, $sort_key, $array, $cmp, $first_idx, $max_idx ) = @_;

    my $mid_idx = int( ($max_idx - $first_idx) / 2) + $first_idx;
    my $sort = $cmp->( $sort_key, item_data2sort_key( $array->[$mid_idx] ) ); 
    if ( ($first_idx == $mid_idx && $sort <= 0)  ||
	 ($max_idx  == $mid_idx && $sort >= 0 )  || 
	 $sort == 0
       ) 
    {
	# Search has ended, insert according to sort order
	if ( $sort < 0 ) {
	    # Sort before mid_idx
	    splice( @$array, $mid_idx, 0, $item);
	} else {
	    # Sort right after mid_idx
	    splice( @$array, $mid_idx + 1, 0, $item);
	}
    } else {
	# Recurse
	if ( $sort < 0 ) {
	    binary_insert( $item, $sort_key, $array, $cmp,
			   $first_idx, $mid_idx - 1 );
	} else {
	    binary_insert( $item, $sort_key, $array, $cmp,
			   $mid_idx + 1, $max_idx );
	}
    }
}

sub sort_current_group_element {
    my ( $self, $data ) = @_;

    # Case where @$data is empty
    return unless @$data;

    my $item   = $data->[1];

    # This data item is ended
    my $i = @{$self->group_fields};
    foreach my $op ( @{$self->ops} ) {
	$op->end_group_data( $item->[$i++] );
    }

    if ( $self->{sort_cmp}) {
	my $key = item_data2sort_key( $item );
	my $cmp = $self->{sort_cmp};
	if ( @$data == 2 ) {
	    push @$data, $item;

	# Small optimization: check for before or at end of array condition
	} elsif ( $cmp->( $key, item_data2sort_key( $data->[2] ) ) <= 0 ) {
	    splice @$data, 2, 0, $item;
	} elsif ( $cmp->( $key, item_data2sort_key( $data->[$#$data])) >= 0 ) {
	    push @$data, $item;
	} else {
	    binary_insert( $item, $key, $data, $cmp, 2, $#$data );
	}
    } else {
	# Push at end
	push @$data, $item;
    }

    # Keep only limit records
    if ( $self->{limit_num} ) {
	my $max_count = $self->{limit_num} + 2; # last_key, current_element
	splice @$data, $max_count
	  if $max_count < @$data ;
    }
}

sub end_group_data {
    my ( $self, $data ) = @_;

    if ( $self->{min_memory} ) {
	$self->sort_current_group_element( $data );

	# Remove last_key, current_element
	splice @$data,0,2;
    } else {
	foreach my $key ( keys %$data ) {
	    my $item = $data->{$key};
	    my $i = @{$self->group_fields};
	    foreach my $op ( @{$self->ops} ) {
		$op->end_group_data( $item->[$i++] );
	    }
	}

	# Sort the keys according to the sort value
	my @sorted_keys;
	if ( $self->sort_fields ) {
	    my $cmp = $self->{sort_cmp};
	    # This uses schwartzian transform
	    @sorted_keys =  map { $_->[0] } sort $cmp 
	      map { [ $_, item_data2sort_key( $data->{$_} ) ] } keys %$data;
	} else {
	    @sorted_keys = keys %$data;
	}

	# Keep only limit records
	if ( $self->{limit_num} ) {
	    my $limit = $self->{limit_num};
	    splice @sorted_keys, $limit
	      if ($limit < @sorted_keys );
	}
	
	# Delete unused keys
	%$data = map { $_ => $data->{$_} } @sorted_keys;
	$data->{_lr_sorted_keys} = \@sorted_keys;
    }

    $data;
}

sub create_entries {
    my ( $self, $group, $data ) = @_;

    $data ||= $self->{data};

    $group->show( $self->{limit_view} )
      if $self->{limit_view};

    # Either to the sorted group data
    # or the sorted keys
    my $array_ref;
    if ( $self->{min_memory} ) {
	$array_ref = $data;
    } else {
	$array_ref = $data->{_lr_sorted_keys};
    }

    my $field_count = @{$self->group_fields};
    foreach my $elmnt ( @$array_ref ) {
	my $item;
	if ( $self->{min_memory} ) {
	    $item = $elmnt;
	} else {
	    $item = $data->{$elmnt};
	}

	my $entry = new Lire::Report::Entry;
	my $i = 0;
	while  ( $i < $field_count ) {
	    $entry->add_name( $item->[$i++] );
	}
	foreach my $op ( @{$self->ops} ) {
	    $op->add_entry_value( $entry, $item->[$i++] );
	}
	$group->add_entry( $entry );
    }
}

# keep perl happy
1;

__END__

=pod

=head1 VERSION

$Id: Group.pm,v 1.27 2002/06/23 19:23:40 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut

