#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
#
# $Id: paraget,v 1.18 2001/05/10 04:47:54 lrclause Exp $
#

use strict;
use English;
use IO::File;
use Fatal qw/ chdir mkdir /;
use File::Spec;
use File::Basename;
use Getopt::Long;
use vars qw/ $VERSION /;
use File::Copy;
use sigtrap 'handler' => 'cleanup_signal_handler', 'normal-signals';

use URI;
use Net::FTP;

use Net::Paraget::AssignmentManager;
use Net::Paraget::Context;
use Net::Paraget::ClientManager;
use Net::Paraget::Display;
use Net::Paraget::Interval;
use Net::Paraget::IntervalList;
use Net::Paraget::IntervalManager;
use Net::Paraget::Mirror;
use Net::Paraget::MirrorSet;
use Net::Paraget::MirrorSetGatherer;
use Net::Paraget::Server;
use Net::Paraget::ServerList;
use Net::Paraget::ServerQueue;

my $debug = 0;

$OUTPUT_AUTOFLUSH = 1;
$VERSION = '0.3.0';

my %options;
GetOptions( \%options,
	    'mirror=s@',
	    'homedir=s', 'mirror-set=s', 'output=s', 'leave-parts!',
	    'tmpdir=s',
	    'quiet', 'debug=i',
	    'size=i',
	    'child-program=s',
	    'testing',
	  );

my $file_in_mirror = shift or usage();
my $set_name = $options{'mirror-set'};
$debug = $options{debug} if defined $options{debug};

select STDERR if $options{testing};

my $mirror_set = Net::Paraget::MirrorSet->new();

my $user_homedir = $ENV{HOME} or die "cannot determine user home directory";

my $homedir;

if ( defined $options{homedir} )
{
    $homedir = $options{homedir};
}
else
{
    my $user_homedir = $ENV{HOME};
    defined $user_homedir or die 'cannot determine homedir: $HOME is not set';
    $homedir = File::Spec->catdir( $user_homedir, '.paraget' );
}

if ($set_name) 
{
    my $mirror_gatherer = Net::Paraget::MirrorSetGatherer->new();
    
    if (not -d "$homedir") {
	print "Initializing paraget...\n";
	my $mirrorsetdir = File::Spec->catdir( $homedir, "mirror-sets" );
	mkdir $homedir, 0777;
	mkdir $mirrorsetdir, 0777;
	chdir $mirrorsetdir;		
	system qw!wget --quiet ftp://paraget.sourceforge.net/pub/paraget/mirror-sets/*!;
	if ($CHILD_ERROR) {
	    die "Error getting mirror-sets: \n";
	}
	print "Initialization done.\n";
    }

    my $default_set_dir
	= File::Spec->catfile( $homedir, 'mirror-sets' );

    $mirror_gatherer->push_set_dirs( $default_set_dir );

    $mirror_set->add( $mirror_gatherer->gather( $set_name ) );
}

if ( $options{mirror} )
{
    foreach my $url ( @{ $options{mirror} } )
    {
	my $mirror = Net::Paraget::Mirror->new( $url );
	$mirror->type( 'argument' );
	$mirror_set->add( $mirror );
    }
}

die "no mirrors defined" unless $mirror_set->count_mirrors() > 0;

my $servers_file = File::Spec->catfile( $homedir, "servers.xml");

my $output
  = defined $options{output} ? $options{output} : basename $file_in_mirror;

########################################################################

my $server_queue = Net::Paraget::ServerQueue->new();
my $server_list  = Net::Paraget::ServerList->new();

$server_list->read_in_servers($servers_file);

foreach my $mirror ( $mirror_set->mirrors() )
{
    next if $mirror->disabled();
    
    my $server_id = $mirror->server_id();
    
    my $server = $server_list->servers( $server_id );
    
    unless ( $server )
    {
	$server = Net::Paraget::Server->new( host   => $mirror->url->host(),
					     scheme => $mirror->url->scheme(),
					     list   => $server_list,
					   );
	$server_list->servers( $server_id => $server );
    }
    
    $server->push_mirrors( $mirror );
    $mirror->server( $server );
    
    state( "got server $server_id", 3 );
}


$server_queue->add( $server_list->servers_values() );

##########################################################################

my $size = $options{size} || get_size( $file_in_mirror, $mirror_set );
defined $size or die "cannot determine the size of $file_in_mirror";
state( "size of $file_in_mirror is $size", 1 );

##########################################################################


my $assignment_manager = Net::Paraget::AssignmentManager->new();
my $client_manager     = Net::Paraget::ClientManager->new();
my $display            = Net::Paraget::Display->new();
my $interval_manager   = Net::Paraget::IntervalManager->new();

my $context = Net::Paraget::Context->new
  ( 
   assignment_manager  => $assignment_manager,
   client_manager      => $client_manager,
   display             => $display,
   interval_manager    => $interval_manager,
   mirror_set          => $mirror_set,
   server_list         => $server_list,
   server_queue        => $server_queue,
  );


$assignment_manager->hash_init( context => $context,
				file    => $file_in_mirror,
				min_splittable_eta => 5,
			      );

$client_manager->authoritative_size( $size );

$client_manager->child_program( $options{'child-program'} )
  if defined $options{'child-program'};

$client_manager->tmp_dir( $options{tmpdir} || $ENV{TMPDIR} || '/tmp' );

$interval_manager->interval_list( Net::Paraget::IntervalList->new() );

my $primary_interval = Net::Paraget::Interval->new( start => 0,
						    end   => $size,
						  );

$interval_manager->interval_list->initialize( $primary_interval );

$display->context( $context );

#########################################################################

while ( $interval_manager->uncompleted() > 0 )
{
    $interval_manager->cleanup_uncompleted();
    my $uncompleted_count = $interval_manager->uncompleted();
    
    state( "*****", 2 );
    state( "*****Doing an iteration; there are $uncompleted_count uncompleted intervals left", 2 );
    
    unless ( $mirror_set->count_able_mirrors() > 0 )
    {
	die "no more able mirrors available";
    }
    
    {
	my $percentage_completed
	  = int( 100 * ( 1 - ( $interval_manager->total_uncompleted_size() / $size )
		       ) 
	       );
	
	$percentage_completed = ( ( ' ' x ( 3 - length( $percentage_completed ) ) )
				  . $percentage_completed
				);

	my $servers_active = $client_manager->count_clients();

	$servers_active = ( ( ' ' x ( 2 - length( $servers_active ) ) )
			 . $servers_active
			 );
	
	my $graphic_width = 72 - 5;
	print "${percentage_completed}% ", $display->ascii_status_graphic( $graphic_width ), "\n";
    }

 KILL_STALLED:
    {
	my $average_speed = $server_queue->ranked_average_speed();
	last KILL_STALLED if not defined $average_speed;
	# Only if we have >0 speed have we any servers available
	foreach my $client ( $client_manager->clients() )
	{
	    next if not $client->alive();
	    if ( $client->speed() < $average_speed * 1/10 ) 
	    {
		state("Killing client ".$client->info()." with speed ".$client->speed()." for average speed ".$average_speed, 1);
		$client->stop();
		last KILL_STALLED; # We only assign one at a time
	    }
	}
    }
    
    my @assignments = $assignment_manager->get_n_assignments( 1 );

    foreach ( @assignments ) 
    {
	state( "assignment: " . $_->info(), 2 );
    }
    
    $client_manager->assign_clients( @assignments );
    
    my @servers_to_add;
    
    foreach my $client ( $client_manager->clients() )
    {
	next if $client->no_further_updates();
	
	my $info = $client->info();
	state( "***checking on client with $info", 3 );
	
	$client->checkup();
	
	my $report           = $client->report();
	my $assignment       = $report->assignment();
	my $interval         = $assignment->interval();
	my $mirror           = $assignment->mirror();
	my $server           = $assignment->server();
	my $server_id        = $server->id();
	my $amount_completed = $report->amount_completed();
	
	eval {
	    $interval->resize_from_report( $report );
	};

	if ($EVAL_ERROR) {
	    my $error = $EVAL_ERROR;
	    $interval_manager->dump();
	    die $error;
	}
	$server->update_from_report( $report );
	
	# refresh info after resize
	$info = $client->info();
	state( "new client info is $info", 3 );
	
	if ( $client->alive() )
	{
	    my $speed = $server->speed();
	    $speed = 'undefined' if not defined $speed;
	    
	    if ( $client->fulfilled_assignment() )
	    {
		state( "stopping fulfilled client", 2 );
		$client->stop();
	    } else
	    {
		state( "$server_id is alive, completed $amount_completed, speed $speed", 2 );
	    }
	}
	
	# note this is not the "else" of the check above, since the
	# stop could make the client not alive
	if ( not $client->alive() )
	{
	    state( "client is not alive and completed $amount_completed", 2 );
	    
	    if ( $client->had_error() )
	    {
		$mirror->set_disabled();
		state( "disabling mirror " . $mirror->as_string(), 2 );
		state( "setting as unassigned interval " . $interval->info(), 3 );
		$interval->clear_assignment();
		$assignment->clear_client();
	    }
	    
	    state( "What to do with $server_id now?", 3 );
	    
	    if ( not ( ( my $count = $server->count_able_mirrors() ) > 0 ) )
	    {
		state( "has no valid mirrors; disabling", 3 );
		$server->set_disabled();
	    } else
	    {
		state ( "has $count able mirrors; adding to queue", 3 );
		push @servers_to_add, $server;
	    }
	    
	    $client->set_no_further_updates();
	}
	
    }
    
    $server_queue->add( @servers_to_add );
    
    sleep 2;
}

$server_list->write_out_servers($servers_file);

if ( $interval_manager->uncompleted() == 0 )
{
    print "Server Reports:\n";
    my %servers = ();
    foreach my $client ( $client_manager->clients() )
    {
	my $server = $client->assignment()->server();
	next if not defined $server->speed();
	$servers{$server->as_string()} = $server;
    }

    my @sorted_servers = sort { $b->speed() <=> $a->speed() }
                              ( values %servers );

    foreach my $server ( @sorted_servers )
    {
	my $speed = $server->speed() or next;
	$speed /= 1024;
	$speed = int( $speed );
	my $amount = $server->amount_completed();
	$amount /= 1024;
	$amount = int( $amount );
	print '  ', $server->as_string(), " : $amount KB at $speed KB/s\n";
    }
    
    merge_outputs( $interval_manager, $output );
    
    foreach my $client ( $client_manager->clients() )
    {
	my $output_file = $client->output_file();
	next unless -e $output_file;
	
	unlink $output_file
	  or warn "error removing $output_file: $ERRNO";
    }
} else
{
    state( "did not complete downloads successfully", 1 );
    exit 1;
}

exit 0;

#########################################################################


sub state
{
    my ( $state, $level ) = @_; 
    warn "too many args to to state()" if @_ > 2;
    
    print STDERR $state, "\n" if not defined $level or $debug >= $level;
}



sub usage
{
    print STDERR "usage: $PROGRAM_NAME --mirror-set <name> ... <filepath>\n";
    exit 1;
}

sub max
{
    my ($a, $b) = @_;

    return $a < $b ? $b : $a;
}

sub get_size
{
    my ( $file_in_mirror, $mirror_set ) = @_;
    
    # We want it sorted in order of increasing overhead
    my @mirrors = sort 
	{ $a->server()->an_overhead() <=> $b->server()->an_overhead() }
    $mirror_set->mirrors();

    my $skip_untried = 0;

    foreach my $mirror ( @mirrors )
    {
	if ($mirrors[0]->server()->tries() > 0) {
	    $skip_untried = 1;
	}
    }

    foreach my $mirror ( @mirrors )
    {
	next if $skip_untried and $mirror->server()->tries() == 0;

	my $url = URI->new( $mirror->url()."/".$file_in_mirror );
	
	state( "checking the size of $url", 2 );
	
	my $protocol = $url->scheme();
	my $host     = $url->host()     or die "no host in URL $url";
	my $user     = $url->user()     || 'anonymous';
	my $pass     = $url->password   || 'anonymous@anonymous.org';
	my $path     = $url->path()     or die "no path to get in URL in $url";
	
	unless ( $protocol =~ /^ftp$/i )
	{
	    warn "protocol of $url not supported";
	    next;
	}
	
	my $ftp = Net::FTP->new( $host ) or next;
	
	unless ( $ftp->login( $user, $pass )
		 and $ftp->binary()
	       )
	{
	    warn ( 'error accessing ', $mirror->server->id(), ': ',
		   $ftp->message()
		 );
	    next;
	}
	
	return $size if defined ( $size = $ftp->size( $path ) );
	
	warn "error determing the size of $url: ", $ftp->message();
    }
    
    return undef;
}


sub merge_outputs
{
    my ( $interval_manager, $output_file ) = @_;
    state( "performing final merge", 3 );
    
    my $how_far = 0;
    
    my $output = IO::File->new( ">$output_file" )
      or die "cannot open $output_file for writing: $ERRNO";
    
    foreach my $interval ( sort { $a->start() <=> $b->start() }
			   $interval_manager->interval_list->as_list()
			 )
    {
	next unless $interval->completed();
	next unless $interval->end() > $how_far;
	
	my $interval_start = $interval->start();
	my $start_reading_at = $how_far - $interval_start;
	
	die "error: interval range from $interval_start to $how_far was missed somehow"
	  if $start_reading_at < 0;
	
	my $input_file = $interval->assignment->client->output_file();
	
	my $input = IO::File->new( "<$input_file" );
	
	$input->seek( $start_reading_at, SEEK_SET )
	  or die "error seeking: $ERRNO";
	
	xcopy( $input, $output );
	
	my $amount_copied = ( -s $input_file ) - $start_reading_at;
	$how_far += $amount_copied;
	
    }
}


sub xcopy
{
    my ( $input, $output ) = @_;
    my $buff;
    while ( 1 )
    {
	my $return = read $input, $buff, 4096;
	die "error reading: $ERRNO" if not defined $return;
	$output->print( $buff );
	last if $return == 0;
    }
}

sub cleanup 
{
    if (defined $client_manager)
    {
	foreach my $client ( $client_manager->clients() )
	{
	    if (-e $client->output_file())
	    {
		my $file = $client->output_file();
		unlink $file or warn "Couldn't remove $file: $ERRNO";
	    }
	}
    }
}

sub cleanup_signal_handler
{
    my ( $signal_name ) = @_;

    print "Caught signal $signal_name, cleaning up.\n";

    cleanup();

    exit(1);
}

__END__


########################################################################


=head1 NAME

paraget - parallel file get

=head1 SYNOPSIS

  paraget [options] --mirror-set name filepath

=head1 EXAMPLES

  paraget --mirror-set cpan modules/by-module/XML/XML-Parser-2.29.tar.gz

=head1 DESCRIPTION

B<paraget> implements getting a single file in parallel from
different mirror sites.

The first time you run paraget, it will
download a list of mirrors for various mirror-sets, such as
CPAN and Debian from paraget's anonymous FTP space.

=head1 ARGUMENTS

=over 4

=item filepath

Path to a file underneath the root of a mirrored tree.

=item --mirror-set name

Name of a mirror set.  See L<"~/.paraget/mirror-sets/">
for more information.

=back

=head1 OPTIONS

=over 4

=item --size filesize

In order to eliminate the step of determining the filesize
of the file to retrieve, one can pass it in as an argument.
This is most likely to be used if paraget is called
in an automated fashion.

=item --tmpdir I<directory>

Use I<directory> for temporary files.
Falls back to $TMPDIR, then F</tmp>.

=back

=head1 PERFORMANCE TIPS

In order to use the best mirrors for you, you should remove
what you think are probably bad mirrors in your mirror-sets-files,
those in F<~/.paraget/mirror-sets/>.

=head1 HEURISTIC

Currently, the heuristic paraget uses to choose how to assign
servers to B<intervals> is done by assigning the fastest server
to the interval that is taking the longest to complete.

=head1 FILES

=over 4

=item F<~/.paraget/mirror-sets/>

The default directory for finding mirror sets.  Underneath
this should be files named as mirror sets, which each
file containing a list of URL's that are mirrors for
a particular tree.  Lines can be commented out using
hash-marks (#).

=back

=head2 Examples

This could be for CPAN: ~/.paraget/mirror-sets/cpan:

  ftp://ftp.cpan.org/pub/CPAN
  ftp://download.sourceforge.net/pub/mirrors/CPAN

=head1 TODO

=head2 Persistent statistics

Implement persistent records-keeping, so that the speed of servers
is remembered across sessions.  Perhaps this could be offered
as an online-service, so that each paraget install would not
have to learn it on its own.

=head2 Heuristic tweaking/modularity

The heuristics that paraget uses are very hokey.

=head2 HTTP support

Currently, only FTP is supported; HTTP supports GET's with
a file offset, so we should be able to add this easily.

=head1 KNOWN BUGS

Intervals sometimes have negative intervals; parget dies
horribly.

Sometimes doesn't complete getting a file; stalls near the end.
Just give up and kill paraget.  We're very sorry.

Currently, we leave a dirty mess in the temporary directory.
This could be used if we implemented re-getting of files, but
since we don't, we should clean up after ourselves.

=head1 ABOUT

The homepage of this program is http://paraget.sourceforge.net/

=head1 COPYRIGHT

Copyright (C) 2000,2001
Lars Clausen <lrclause@uiuc.edu> and
Frank J. Tobin <ftobin@uiuc.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, visit the following URL:
http://www.gnu.org/copyleft/gpl.html

=cut
