#!/usr/bin/perl -w

# Newsgrab - The Newsgrabber by Jesper L. Nielsen <lyager@phunkbros.dk>
# Newsrc support added by Roger Knobbe <rogerk+newsgrab@tislabs.com>
#
# Released under the GNU public license.
#
# $Id: newsgrab.pl,v 1.12 2001/10/29 21:33:42 jln Exp $
#

use strict;
use News::NNTPClient;
use News::Newsrc;

# Added for authentication.
use Term::ReadKey;

$|=1;	# Set autoflush for progressbar
sub progress_sub($$);
sub stat_print;

# Bind function for handling calls to Carp() in News::NNTPClient
$SIG{__WARN__} = \&warn_sig;

# ------------------------------------------ global variable initialization ---

my $LIST = 1;
my $NNTP_SERVER = $ENV{NEWSSERVER};
my $NNTP_PORT = '119';
my $NNTP_REGEX = '*';
my $NNTP_USER = '';
my $NNTP_PASS = '';
my $NNTP_GROUP = '';
my $NNTP_RC = $ENV{HOME}."/.newsrc.newsgrab";
my $IGNORE_RC = 0;
my $VERBOSE = 0;

# Gather parameters
while ($_ = shift(@ARGV)) {
	if (/-h/ || /--help/) {
		usage_sub();
		next;
	}
	if (/--version/) {
		version_sub();
		next;
	}
	if (/-v/ || /--verbose/) {
		$VERBOSE= 1;
		next;
	}
	if (/-u/ || /--user/) {
		auth_sub();
		next;
	} 
	if (/-p/ || /--port/) {
		$NNTP_PORT = shift(@ARGV);
		next;
	}
	if (/-s/ || /--server/) {
		$NNTP_SERVER = shift(@ARGV);
		next;
	}
	if (/-g/ || /--group/) {
		$NNTP_GROUP = shift(@ARGV);
		next;
	}
	if (/-l/ || /--list/) {
		print "Listing (-l) is now default, use -r to retrieven\n";
		next;
	}
	if (/-r/ || /--retrieve/) {
		$LIST = 0;
		next;
	}
	if (/-i/ || /--ignore/) {
		$IGNORE_RC = 1;
		next;
	}
	$NNTP_REGEX = $_;
}

# ------------------------------------------- global variable verification ---

# Get server if missing
if (!$NNTP_SERVER) {
	# Or just ask for the server?
	print "Server: ";
	$NNTP_SERVER = <STDIN>;
	chomp $NNTP_SERVER;	
	print "\n";
}

my ($mid, $head, %arts);

print "Connecting to ".($NNTP_GROUP ? $NNTP_GROUP." \@ " : "").$NNTP_SERVER.":".$NNTP_PORT."\n";

my $connected=1;
my $c = new News::NNTPClient($NNTP_SERVER, $NNTP_PORT);

if ($NNTP_USER) {
	$c->authinfo($NNTP_USER,$NNTP_PASS);
}

# Load newsrc file
my $rc = new News::Newsrc;
stat_print "newsrc", "Reading newsrc $NNTP_RC";
#print "newsrc: Reading newsrc $NNTP_RC\n";
if ($rc->load($NNTP_RC)) {
	stat_print("newsrc", "loaded ".$rc->num_groups." group(s) from $NNTP_RC");
} else {
	stat_print "newsrc", "no groups in $NNTP_RC";
}

# add group to newsrc file, if it doesn't exist.
if ($rc->exists($NNTP_GROUP)) {
	stat_print "newsrc", "$NNTP_GROUP loaded ok";	
} else {
	$rc->add_group($NNTP_GROUP);
}

# ------------------------------------------------------------ main program ---	

# If no group, list all groups on server and EXIT
if (!$NNTP_GROUP) {
	print "No group specified, listing all groups available on the server\n";
	my $newsgroup;
	foreach ($c->list('active')) {
		($newsgroup) = split(/[\t|\s]/);
		print $newsgroup."\n";
	}
	exit(0);
}

# Take all multipart articles, and join them in a hash (one hash key
# presents each complete file)
print "Finding all subject in $NNTP_GROUP matching '$NNTP_REGEX'\n";
print '-'x80;
print "\n";
my ($first, $last) = $c->group($NNTP_GROUP);
foreach $head ($c->xpat("Subject", $first, $last, $NNTP_REGEX)) {
	# Attemt to split result from XPAT into the following variables
	# $1: Message ID
	# $2: Name of article (Subject line)
	# $3: Part number
	# $4: Number of total parts
	# $5: Possible rest of subject
	#
	# Key for each hash is based on $5 appended to $2

	my %art;
	$head =~ /^([0-9]+)\s(.*)[\(|\[]{1}([0-9]+)\/([0-9]+)[\)|\]]{1}(.*)$/;
	
	# Test if this article has already been marked as seen, and skip if true
	#if ((!$rc->marked($NNTP_GROUP, $1)) || $IGNORE_RC || $LIST) {

		$art{'mid'} = $1;
		if ($3 && $4) {
			$art{'part_num'} = $3;
			$art{'part_total'} = $4;
		} else {
			# Not multipart
			$art{'part_num'} = 1;
			$art{'part_total'} = 1;
		}

		# Our index will be the subject minus the multipart section (meaning
		# beginning part of subject concatenated with the end part of the subject)

		next unless ($2 || $5);	# Skip if we can't make index

		# If the article got index'ed well pull out it's size and ad it to the
		# grand total
		#my @xover = $c->xover($1);
		#print @xover;
		#my $xover = shift(@xover);
		#my $size;
		#(undef, undef, undef, undef, undef, undef, $size) = split /\t/, $xover;

		#$art{'size'} = $size;
		$arts{$2.$5}[$art{'part_num'}] = \%art;
	#} else {
	#	print ".";
	#}
}

print "\n";

# Now all the multiparted articals have an element in the
# %arts hash, and we can retrieve them, if they are complete
my ($key, $val, $state, $art_no, $parts, $total_size);
foreach $key (sort(keys %arts)) {
	# $key   : Multithread article subject
	# %arts  : Hash reference of all selected articles
	# $state : State of posting, is all parts of the post available, either
	#          'I' for incomplete og 'C' for complete.
	# $art_no : Article number (used as a counter to check parts
	# $total_size : The sum of all multipart messages within this $key
	
	# Test the state of each of the article subject we
	# pulled out from the newsgroup
	$val = $arts{$key};
	if (!@$val[1]) {
		my %art;
		@$val[1] = \%art;
		$parts = 0;
	} else {
		$parts = @$val[1]->{'part_total'};
	}

	# Start state is 'incomplete' we can prove it wrong
	# by looking for the inidvidual parts
	@$val[1]->{'state'} = 'I';
	$total_size=0;
	for ($art_no = 1; $art_no <= $parts; $art_no++) {
		#$total_size += @$val[1]->{'part_size'};
		if ($rc->marked($NNTP_GROUP, @$val[1]->{'mid'})) {
			@$val[1]->{'state'} = 'R';
			last;
		}
			
		if (!@$val[$art_no]) {
			last;	# Break
		}
		if ($art_no == $parts) {
			# Prooved wrong. This posting is actually complete
			@$val[1]->{'state'} = 'C';
			last;
		}
	}
	
	# Detemine wether to download file, or skip
	# because it's incomplete
	# There 3 the followin states:
	# I: Incomplete (all multipart messages not found)
	# S: Skipping file, coz we've already downloaded it
	# C: Complete posting

	stat_print $key."($art_no/$parts)", @$val[1]->{'state'};
	if ($LIST) {
		#Skip if list mode
		next;
	} 
	if (@$val[1]->{'state'} eq 'R') {
		# If already retrieved, skip unles $IGNORE_RC was set
		if (!$IGNORE_RC) {
			next;
		}
	}
	if (@$val[1]->{'state'} eq 'I') {
		# Skip if incomplete
		next;
	}

	# If we didn't provide the list option, perform a retriaval of the
	# posting
	my ($file, $mode, @art_lines);
	my @markable;
	my $skipping = 0;

	for ($art_no= 1; $art_no <= $parts; $art_no++) {
		my $artnum = @$val[$art_no]->{'mid'};
		push @markable, $artnum;

		if (!$skipping) {
			if (!$connected) {
				$c->connect;
				$c->group($NNTP_GROUP);
				$connected=1;
			}
			@art_lines = $c->body($artnum);
		 	while ($_ = shift(@art_lines)) {

				if (!$file) {
					# We haven't hit first line of the UU-message yet
					# keep looking for it..
					next unless ($mode,$file) = /^begin\s*(\d*)\s*(.*)/;
					if (-e $file) {
						print STDERR "File: '$file' already exists. skipping\n";
						$skipping = 1;
					} else {
						open(OUT, "> $file") || die "Couldn't create $file: $!\n";
					}
				}
				if (!$skipping)
				{
					last if /^end/;
					next if /[a-z]/;
					next unless int((((ord() - 32) & 077) + 2) / 3) ==
					int(length() / 4);
					(print OUT unpack("u", $_)) || die "Couldn't write $file: $!\n";
				}
			}
		}

		# Done fetching, upgrade progress_bar
		progress_sub($art_no, $parts);
	}
	close (OUT);
	chmod oct($mode), $file;

	# Mark articles as read in the newsrc file
	$rc->mark_list($NNTP_GROUP, \@markable);
	$rc->save;
}



exit(1);

# -------------------------------------------------------------- functions ---

sub usage_sub {

	print STDERR "Usage: $0 [OPTIONS] {-g group] [exp]\n\n";
	print STDERR "\tOPTIONS:\n";
	print STDERR "\t-h: This help (and quit)\n";
	print STDERR "\t-r: Retrive files that match <exp>\n";
	print STDERR "\t-u: Prompts for username and password\n";
	print STDERR "\t-p <port>: Port to connect to (default: 119)\n";
	print STDERR "\t-s <server>: Server to connect to (default: \$NEWSSERVER)\n";
	print STDERR "\t-i: Ignore what .newsrc says\n";
	print STDERR "\t--version: Print version information, then quit\n";
	print STDERR "\n";
	print STDERR "\t-g <group>: Group to retrive from (no group lists all)\n";
	print STDERR "\texp: Regular expression to grab (default: *)\n";
	print STDERR "\n";
	exit(0);
}

sub auth_sub {

	print "Username: ";
	$NNTP_USER = <STDIN>;
	chomp $NNTP_USER;
	print "Password: ";
	ReadMode('noecho');
	$NNTP_PASS = ReadLine(0);
	chomp $NNTP_PASS;
	print "\n";
}
	
sub version_sub {
	print STDERR "NewsGrab Version 0.1.4\n";
	exit(0);
}

sub warn_sig {
	print "Got unexpected answer from server ";
	if ($c) {
		print "(Code: ".$c->code.")";
	}
	print "\n";

	
	if ($c && ($c->code >= 200) && ($c->code < 300)) {
		return;
	}
	if ($c && ($c->code >= 500)) {
		print "Error: ".$c->message()."\n";
		$connected = 0;
	} else {
		if (!$c)
		{
			# Connection object not created, we was unable to connect
			# to the news server
			print "Error: Unable to connect to $NNTP_SERVER:$NNTP_PORT\n";
		}
		exit(1);
	}
}

sub stat_print {
	my ($line, $stat) = @_;

	# line_width is the space the 2 arguments can actually take
	my $column_width = 77;
	my $line_len = ($column_width-(length($stat))); 
	# Cut $line of, if it's too long;
	$line = substr $line, 0, $line_len;
	printf "%-*s [%s]\n", $line_len, $line, $stat;
	#printf "%-*s [%s]\n", ($column_width-(length($stat))), $line, $stat;
}


{

my $last_res;
sub progress_sub($$) {

	return unless $VERBOSE;
	my ($current_no, $total_no) = @_;
	my $cols = 80;

	# First article, initialize values
	if ($current_no == 1) {
		$last_res = 0;
	}

	my $res = ($current_no * $cols) / $total_no;
	#print "now X".($res - $last_res)."\n";
	print "X" x ($res - $last_res);

	# Last article, set newline
	if ($current_no == $total_no) {
		print "x" x ($cols - $res);
		print "$last_res | $res";
		print "\n";
	}
	$last_res = $res;
}
}
