#!/usr/local/bin/perl -w

# Newsgrab - The Newsgrabber by Jesper L. Nielsen <lyager@phunkbros.dk>
# Newsrc support added by Roger Knobbe <rogerk+newsgrab@tislabs.com>
#
# Released under the GNU public license.
#
# $Id: newsgrab.pl,v 1.16.2.1 2002/01/08 20:15:06 jln Exp $
#

use strict;
use News::NNTPClient;
use News::Newsrc;
use Time::ParseDate;

# Added for authentication.
use Term::ReadKey;

$|=1;	# Set autoflush for progressbar
sub progress_sub($$);
sub xover_fetch($);
sub xpat_fetch($);
sub stat_print;

# Bind function for handling calls to Carp() in News::NNTPClient
$SIG{__WARN__} = \&warn_sig;

# ------------------------------------------ global variable initialization ---

my $LIST = 1;
my $NNTP_SERVER = $ENV{NEWSSERVER};
my $NNTP_PORT = '119';
my $NNTP_REGEX = '';
my $NNTP_USER = '';
my $NNTP_PASS = '';
my $NNTP_GROUP = '';
my $NNTP_RC = $ENV{HOME}."/.newsrc.newsgrab";
my $SORT = 0;
my $SORT_DATE = 0;
my $IGNORE_RC = 0;
my $VERBOSE = 0;

# Gather parameters
while ($_ = shift(@ARGV)) {
	if (/^-h$/ || /^--help$/) {
		usage_sub();
		next;
	}
	if (/^--version$/) {
		version_sub();
		next;
	}
	if (/^-v$/ || /^--verbose$/) {
		$VERBOSE= 1;
		next;
	}
	if (/^-u$/ || /^--user$/) {
		auth_sub();
		next;
	} 
	if (/^-p$/ || /^--port$/) {
		$NNTP_PORT = shift(@ARGV);
		next;
	}
	if (/^-s$/ || /^--server$/) {
		$NNTP_SERVER = shift(@ARGV);
		next;
	}
	if (/^-g$/ || /^--group$/) {
		$NNTP_GROUP = shift(@ARGV);
		next;
	}
	if (/^-l$/ || /^--list$/) {
		print "Listing (-l) is now default, use -r to retrieven\n";
		next;
	}
	if (/^-r$/ || /^--retrieve$/) {
		$LIST = 0;
		next;
	}
	if (/^-i$/ || /^--ignore$/) {
		$IGNORE_RC = 1;
		next;
	}
	if (/^--sort$/) {
		$SORT = 1;
		my $sort_after = shift(@ARGV);
		if ($sort_after eq 'd') {
			$SORT_DATE = 1;
		} else {
			usage();
		}
		next;
	}
	$NNTP_REGEX = $_;
}

# ------------------------------------------- global variable verification ---

# Get server if missing
if (!$NNTP_SERVER) {
	# Or just ask for the server?
	print "Server: ";
	$NNTP_SERVER = <STDIN>;
	chomp $NNTP_SERVER;	
	print "\n";
}

print "Connecting to ".($NNTP_GROUP ? $NNTP_GROUP." \@ " : "").$NNTP_SERVER.":".$NNTP_PORT."\n";

my $connected=1;
my $c = new News::NNTPClient($NNTP_SERVER, $NNTP_PORT);

if ($NNTP_USER) {
	$c->authinfo($NNTP_USER,$NNTP_PASS);
}

# Load newsrc file
$NNTP_RC .= ".".$NNTP_SERVER;
my $rc = new News::Newsrc;
stat_print "newsrc", "Reading newsrc $NNTP_RC";
#print "newsrc: Reading newsrc $NNTP_RC\n";
if ($rc->load($NNTP_RC)) {
	stat_print("newsrc", "loaded ".$rc->num_groups." group(s) from $NNTP_RC");
} else {
	stat_print "newsrc", "no groups in $NNTP_RC";
}

# add group to newsrc file, if it doesn't exist.
if ($rc->exists($NNTP_GROUP)) {
	stat_print "newsrc", "$NNTP_GROUP loaded ok";	
} else {
	$rc->add_group($NNTP_GROUP);
}

# ------------------------------------------------------------ main program ---	

# If no group, list all groups on server and EXIT
if (!$NNTP_GROUP) {
	print "No group specified, listing all groups available on the server\n";
	my $newsgroup;
	foreach ($c->list('active')) {
		($newsgroup) = split(/[\t|\s]/);
		print $newsgroup."\n";
	}
	exit(0);
}

# Take all multipart articles, and join them in a hash (one hash key
# presents each complete file)
if ($NNTP_REGEX) {
	print "Finding all subjects in $NNTP_GROUP matching '$NNTP_REGEX'\n";
} else {
	print "Finding all subjects in $NNTP_GROUP\n";
}
print '-'x80;
print "\n";
my ($first, $last) = $c->group($NNTP_GROUP);

# %arts is a collection of multipart messages
my %arts;
my @ordered_keys;
if ($SORT) {
	%arts = xover_fetch($NNTP_REGEX);
	if ($SORT_DATE) {
		@ordered_keys = 
		   sort { $arts{$a}->{'date'} <=> $arts{$a}->{'date'} } keys %arts;
	} else {
		# Default is per key
		@ordered_keys = sort(keys %arts);
	}
} else {
	%arts = xpat_fetch($NNTP_REGEX);
	@ordered_keys = sort(keys %arts);
}

print "\n";

# Now all the multiparted articals have an element in the
# %arts hash, and we can retrieve them, if they are complete
my ($key, $art_no, $parts, $total_size);
foreach $key (@ordered_keys) {
	# $key   : Multithread article subject
	
	# Start state is 'incomplete' we can prove it wrong
	# by looking for the individual parts
	$total_size=0;

	# Some posts start with element 0, some don't, lets calibrate the
	# $art_no, to start with the correct element, if the number
	# of MIDs in the array is bigger than the total number given in the 
	# multipost subject
	if ((@{$arts{$key}->{'message'}}+0 > $arts{$key}->{'total'})) {
		$art_no = (@{$arts{$key}->{'message'}}+0 
		           - $arts{$key}->{'total'});
	} else {
		$art_no = 0;
	}
		
	for (; $art_no <= $arts{$key}->{'total'}; $art_no++) {
		if (!$arts{$key}->{'message'}[$art_no]) {
			last;
		}
		if ($rc->marked($NNTP_GROUP, $arts{$key}->{'message'}->[$art_no])) {
			$arts{$key}->{'complete'} = 'R';
			last;
		}
		if ($art_no == $arts{$key}->{'total'}) {
			# Prooved wrong. This posting is actually complete
			$arts{$key}->{'complete'} = 'C';
		}
	}
	
	# Detemine wether to download file, or skip
	# because it's incomplete
	# There 3 the followin states:
	# I: Incomplete (all multipart messages not found)
	# S: Skipping file, coz we've already downloaded it
	# C: Complete posting

	stat_print $key, $arts{$key}->{'complete'};
	if ($LIST) {
		#Skip if list mode
		next;
	} 
	if ( $arts{$key}->{'complete'} eq 'R') {
		# If already retrieved, skip unles $IGNORE_RC was set
		if (!$IGNORE_RC) {
			next;
		}
	}
	if ($arts{$key}->{'complete'} eq 'I') {
		# Skip if incomplete
		next;
	}

	# If we didn't provide the list option, perform a retriaval of the
	# posting
	my ($file, $mode, @art_lines);
	my @markable;
	my $skipping = 0;

	foreach my $artnum (@{$arts{$key}->{'message'}}) {
		next unless $artnum;

		# All articles in @markable will be added to the newsrc file
		push @markable, $artnum;

		if (!$skipping) {
			if (!$connected) {
				$c->connect;
				$c->group($NNTP_GROUP);
				$connected=1;
			}
			@art_lines = $c->body($artnum);
		 	while ($_ = shift(@art_lines)) {

				if (!$file) {
					# We haven't hit first line of the UU-message yet
					# keep looking for it..
					next unless ($mode,$file) = /^begin\s*(\d*)\s*(.*)/;
					if (-e $file) {
						print STDERR "File: '$file' already exists. skipping\n";
						$skipping = 1;
					} else {
						open(OUT, "> $file") || die "Couldn't create $file: $!\n";
					}
				}
				if (!$skipping)
				{
					last if /^end/;
					next if /[a-z]/;
					next unless int((((ord() - 32) & 077) + 2) / 3) ==
					int(length() / 4);
					(print OUT unpack("u", $_)) || die "Couldn't write $file: $!\n";
				}
			}
		}

		# Done fetching, upgrade progress_bar
		progress_sub($art_no, $parts);
	}
	close (OUT);
	chmod oct($mode), $file;

	# Mark articles as read in the newsrc file
	$rc->mark_list($NNTP_GROUP, \@markable);
	$rc->save;
}



exit(1);

# -------------------------------------------------------------- functions ---

sub usage_sub {

	print STDERR "Usage: $0 [OPTIONS] [-g group] [match]\n\n";
	print STDERR "  OPTIONS:\n";
	print STDERR "  -h: This help (and quit)\n";
	print STDERR "  -r: Retrive files that match <exp>\n";
	print STDERR "  -u: Prompts for username and password\n";
	print STDERR "  -p <port>: Port to connect to (default: 119)\n";
	print STDERR "  -s <server>: Server to connect to (default: \$NEWSSERVER)\n";
	print STDERR "  -i: Ignore what .newsrc says\n";
	print STDERR "  --version: Print version information, then quit\n";
	print STDERR "  --sort: Sort output by: 'd' for date\n";
	print STDERR "          (This require more header information to be\n";
	print STDERR "           retrieved, and is therefor slower)\n";
	print STDERR "\n";
	print STDERR "  -g <group>: Group to retrive from (no group lists all)\n";
	print STDERR "  match: Subject must contain this string (default: 'none')\n";
	print STDERR "\n";
	exit(0);
}

sub auth_sub {

	print "Username: ";
	$NNTP_USER = <STDIN>;
	chomp $NNTP_USER;
	print "Password: ";
	ReadMode('noecho');
	$NNTP_PASS = ReadLine(0);
	ReadMode 0;		# Reset readmode
	chomp $NNTP_PASS;
	print "\n";
}
	
sub version_sub {
	print STDERR "NewsGrab Version 0.1.4\n";
	exit(0);
}

sub warn_sig {
	print "Got unexpected answer from server ";
	if ($c) {
		print "(Code: ".$c->code.")";
	}
	print "\n";

	
	if ($c && ($c->code >= 200) && ($c->code < 300)) {
		return;
	}
	if ($c && ($c->code >= 500)) {
		print "Error: ".$c->message()."\n";
		$connected = 0;
	} else {
		if (!$c)
		{
			# Connection object not created, we was unable to connect
			# to the news server
			print "Error: Unable to connect to $NNTP_SERVER:$NNTP_PORT\n";
		}
		exit(1);
	}
}

sub stat_print_right {
	my ($line, $stat) = @_;

	# line_width is the space the 2 arguments can actually take
	my $column_width = 77;
	my $line_len = ($column_width-(length($stat))); 
	for (my $i = 0; $i < length($line); $i += $line_len) {
		if (!($i)) {
			printf "%s [%s]\n", substr($line, $i, $line_len), $stat;
		} else {
			printf "%s\n", substr($line, $i, $line_len), $stat;

		}
	}
}

sub stat_print {
	my ($line, $stat) = @_;

	# line_width is the space the 2 arguments can actually take
	my $column_width = 77;
	my $line_len = ($column_width-(length($stat))); 
	for (my $i = 0; $i < length($line); $i += $line_len) {
		if (!($i)) {
			printf "[%s] %s\n", $stat, substr($line, $i, $line_len);
		} else {
			# Remember to add the filling around $stat
			printf " "x(length($stat)+3);
			printf "%s\n", substr($line, $i, $line_len), $stat;

		}
	}
}


sub stat_print_old {
	my ($line, $stat) = @_;

	# line_width is the space the 2 arguments can actually take
	my $column_width = 77;
	my $line_len = ($column_width-(length($stat))); 
	# Cut $line of, if it's too long;
	$line = substr $line, 0, $line_len;
	printf "%-*s [%s]\n", $line_len, $line, $stat;
}


# BLOCK
{

my $last_res;
sub progress_sub($$) {

	return unless $VERBOSE;
	my ($current_no, $total_no) = @_;
	my $cols = 80;

	# First article, initialize values
	if ($current_no == 1) {
		$last_res = 0;
	}

	my $res = ($current_no * $cols) / $total_no;
	#print "now X".($res - $last_res)."\n";
	print "X" x ($res - $last_res);

	# Last article, set newline
	if ($current_no == $total_no) {
		print "x" x ($cols - $res);
		print "$last_res | $res";
		print "\n";
	}
	$last_res = $res;
}
}

sub xover_fetch($) {
	my $NNTP_REGEX = shift;
	my %arts;
	foreach my $head ($c->xover($first, $last)) {
		next unless ($head =~ /$NNTP_REGEX/);
	
		# From RFC 2980:
		# Each line of output will be formatted with the article number,
		# followed by each of the headers in the overview database or the
		# article itself (when the data is not available in the overview
		# database) for that article separated by a tab character.  The
		# sequence of fields must be in this order: subject, author, date,
		# message-id, references, byte count, and line count.  Other optional
		# fields may follow line count.  Other optional fields may follow line
		# count.  These fields are specified by examining the response to the
		# LIST OVERVIEW.FMT command.  Where no data exists, a null field must
		# be provided (i.e. the output will have two tab characters adjacent to
		# each other).  Servers should not output fields for articles that have
		# been removed since the XOVER database was created.
	
		my ($mid, $subject, $author, $date, $message_id, 
	    	$refernences, $byte_count, $line_count
	    	) = split(/\t/, $head);
	
		# Check status of the multipost
		$subject =~ /^(.*)[\(|\[]{1}([0-9]+)\/([0-9]+)[\)|\]]{1}(.*)$/;
	
		next unless ($1 || $4); # Skip, we can't make index
	
		#print "$2:$3|$head\n";
		if ($2 && $3) {
			# initialize
			if (!$arts{$1.$4}) {
				$arts{$1.$4}->{'date'} = parsedate($date);
				$arts{$1.$4}->{'complete'} = 'I';	# Incomplete
				$arts{$1.$4}->{'total'} = $3;
			}
			$arts{$1.$4}->{'message'}[($2+0)] = $mid;
			$arts{$1.$4}->{'m_count'}++;
		} else {
			if (!$arts{$1.$4}) {
				$arts{$1.$4}->{'date'} = parsedate($date);
				$arts{$1.$4}->{'complete'} = 'I';	# Incomplete
				$arts{$1.$4}->{'total'} = 1;
			}
			$arts{$1.$4}->{'message'}[0] = $mid;
			$arts{$1.$4}->{'m_count'}++;
		}
	
	}
	return %arts;
}
	
sub xpat_fetch($) {
	my $NNTP_REGEX = shift;
	my %arts;
	foreach my $head ($c->xpat("Subject", $first, $last, '*'.$NNTP_REGEX.'*')) {
		# Attemt to split result from XPAT into the following variables
		# $1: Message ID
		# $2: Name of article (Subject line)
		# $3: Part number
		# $4: Number of total parts
		# $5: Possible rest of subject
		#
		# Key for each hash is based on $5 appended to $2

		$head =~ /^([0-9]+)\s(.*)[\(|\[]{1}([0-9]+)\/([0-9]+)[\)|\]]{1}(.*)$/;
		
		my $mid = $1;

		next unless ($2 || $5);	# Skip if we can't make index

		# Our index will be the subject minus the multipart section (meaning
		# beginning part of subject concatenated with the end part of the subject)

		if ($3 && $4) {
			#initialize
			if (!$arts{$2.$5}) {
				$arts{$2.$5}->{'complete'} = 'I';
				$arts{$2.$5}->{'total'} = $4;
			}
			$arts{$2.$5}->{'message'}[($3+0)] = $mid;
			$arts{$2.$5}->{'m_count'}++;
		} else {
			#initialize
			if (!$arts{$2.$5}) {
				$arts{$2.$5}->{'complete'} = 'I';
				$arts{$2.$5}->{'total'} = 1;
			}
			$arts{$2.$5}->{'message'}[0] = $mid;
			$arts{$2.$5}->{'m_count'}++;
		}
	
	}
	return %arts;
}


