#!/usr/local/bin/perl5.00502
##    -*-perl-*-
##    ubh - The Usenet Binary Harvester - Perl console application which 
##          automatically discovers, downloads, and decodes single-part 
##          and multi-part Usenet binaries.
##
##    Copyright (C) 2000  Gerard Lanois
##                        gerard@users.sourceforge.net
##                        P.O. Box 507264
##                        San Diego, CA 92150-7264
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use strict;

############################################################################

package Ubh::Platform;

use vars qw($VERSION);

$VERSION = "1.5";

use Config;

sub new
{
    my ($class) = @_;

    my $ubhrcname;
    my $newsrcname;
    my $datadir;
    my $max_filename_length;
    my $separator;

    if ($Config{'osname'} =~ /^macos/i) {
        # Macintosh MacPerl
        $ubhrcname  = "hda:ubh:ubhrc"; #  suggest unquoted 'FORCEDIR = forced'
        $newsrcname = "hda:ubh:newsrc";
        $datadir    = "hda:ubh:data";
        $max_filename_length = 32;
        $separator  = ':'; 
        #  MacOS is GUI, so we need an elegant method to set @ARGV unless MPW tool
        if ($MacPerl::Version =~ /Application$/) {
            # We're running from the app  
            # (see http://www.macperl.com/depts/MacPerlFAQ.html section 5.3.3 )      
            my( $cmdLine, @args );
            $cmdLine = &MacPerl::Ask( "-arguments e.g. -A -n -C (-u for usage summary.)" );
            if ($cmdLine) {
                require "shellwords.pl";
                @args = &shellwords( $cmdLine );
                unshift( @ARGV, @args );
            }
        }
    }
    elsif ($Config{'osname'} =~ /^mswin/i) {
        # Windows
        $ubhrcname  = "ubhrc";
        $newsrcname = "newsrc";
        $datadir    = "data";
        $max_filename_length = 255;
        $separator   = '/';
    }
    else { 
        #Linux, Unix, MacOS X, all others 
        $ubhrcname  = ".ubhrc";
        $newsrcname = ".newsrc";
        $datadir    = "data";
        $max_filename_length = 255;
        $separator  = '/';
    }

    # dependent assignments after OS configuration.
    my $tempdir = $datadir;
    my $tempfilename = $tempdir . $separator . "ubhparts." . $$;

    bless {
        _ubhrcname           => $ubhrcname,
        _newsrcname          => $newsrcname,
        _datadir             => $datadir,
        _max_filename_length => $max_filename_length,
        _separator           => $separator,
        _tempdir             => $tempdir,
        _tempfilename        => $tempfilename
        }, $class;
}

sub get_ubhrcname
{ 
    $_[0]->{_ubhrcname}
}

sub set_ubhrcname
{
    my ($self, $ubhrcname) = @_;
    $self->{_ubhrcname} = $ubhrcname if ($ubhrcname);
}

sub get_newsrcname 
{ 
    $_[0]->{_newsrcname}  
}

sub set_newsrcname
{
    my ($self, $newsrcname) = @_;
    $self->{_newsrcname} = $newsrcname if ($newsrcname);
}

sub get_datadir    
{ 
    $_[0]->{_datadir}
}

sub set_datadir
{
    my ($self, $datadir) = @_;
    $self->{_datadir} = $datadir if ($datadir);
}

sub get_max_filename_length
{ 
    $_[0]->{_max_filename_length}
}

sub set_max_filename_length
{
    my ($self, $max_filename_length) = @_;
    $self->{_max_filename_length} = $max_filename_length if ($max_filename_length);
}

sub get_separator
{ 
    $_[0]->{_separator}     
}

sub set_separator
{
    my ($self, $separator) = @_;
    $self->{_separator} = $separator if ($separator);
}

sub get_tempdir
{ 
    $_[0]->{_tempdir}     
}

sub set_tempdir
{
    my ($self, $tempdir) = @_;
    if ($tempdir) {
        $self->{_tempdir} = $tempdir;
        $self->{_tempfilename} = $tempdir . $self->{_separator} . "ubhparts." . $$;
    }
}

sub get_tempfilename   
{ 
    $_[0]->{_tempfilename}    
}

1;

############################################################################

package Ubh::NNTP;

# This class acts much like a Net::NNTP, with the added benefit of
# automatically re-connecting a dropped connection.
#
# This class wraps an instance of a Net::NNTP via AUTOLOAD which
# sets up automatic delegation to the contained Net::NNTP instance, 
# in the manner prescribed by perlbot.
#
# This was necessary because I needed to keep track of the host
# name and other data necessary to re-connect on the fly.  Because
# the root class of Net::NNTP (class IO::Handle) merely blesses a
# a scalar instance, it is not possible to just add my extra stuff
# to the root class hash.

use vars qw($VERSION $AUTOLOAD);

$VERSION = "1.5";

use Net::NNTP;

{
    # This stuff is private.

    sub _connect 
    {
        my $self = shift;

        print "Connecting to ", $self->{_host}, "... ";
        
        my $try = 0;
        my $connected = 0;
        my $nntp;
        while ($try < $self->{_retries} && !$connected) {
            if ($nntp = Net::NNTP->new($self->{_host})) {
                $connected = 1;
            }
            else {
                print "\n   WARNING: Net::NNTP->new(", $self->{_host}, ") failed.";
                print "\n  Trying again.\n";
                sleep 1;
                $try++;
            }
        }

        if (!$connected) {
            print "\n   ERROR: Net::NNTP->new(", $self->{_host}, ") failed.";
            print "\n  Giving up.\n";
            exit(-1);
        }
        else {
            print "done.\n";

            # Authenticate user if necessary.
            if (defined $self->{_account} && 
                defined $self->{_password}) {
                print "Authenticating...";
                $nntp->authinfo($self->{_account}, $self->{_password}) 
                    or die "ERROR: Net::NNTP->authinfo() failed.\n";
                print "done.\n";
            }
        }

        $self->{_nntp} = $nntp;
    }
}

sub body
{
    my $self = shift;

    my $num = shift;

    my $try = 0;
    my $gotit = 0;
    my $body;
    my $time_start = time();
    while ($try < $self->{_retries} && !$gotit) {
        if ($body = $self->{_nntp}->body($num)) {
            $gotit = 1;
        }
        else {
            print "\n   WARNING: Net::NNTP->body($num) failed.\n";

            # There is a bug in Net::Cmd which does not set the code
            # when there is a disconnect.  Thus, we have to check
            # for defined fileno directly ourselves.
            if ($self->{_nntp}->code() eq "000" ||
                !defined fileno($self->{_nntp})) {
                # No response was recieved, so we've probably lost connection.
                undef $self->{_nntp};
                $self->_connect();
                $self->{_nntp}->group($self->{_group});
                $try = 0;
            }
            else {
                $try++;
            }
        }
    }

    if (!$gotit) {
        print "\n   ERROR: Net::NNTP->body($num) failed.  Giving up.\n";
        return undef;
    }

    # Calculate and report data rate for this body.
    my $bytes = 0;
    my $delta = (time() - $time_start);
    for (@{$body}) {
        $bytes += length($_);
    }

    # Add to grand total too.
    $self->{_bytes} += $bytes;

    my $rate;
    if ($delta > 0) {
        $rate = ($bytes / 1024.0) / ($delta);
    } else {
        $rate = 0.0;
    }
    print "[bytes = $bytes  secs = $delta  rate = " . sprintf("%.03f",$rate) . " KB/sec]\n";

    return $body;
}


sub group 
{
    my $self = shift;

    $self->{_group} = shift;

    my $try = 0;
    my $gotgroup = 0;
    my $num_articles;
    my $first;
    my $last;
    while ($try < $self->{_retries} && !$gotgroup) {
        if (($num_articles, $first, $last) = $self->{_nntp}->group($self->{_group})) {
            $gotgroup = 1;
        }
        else {
            print "\n   WARNING: Net::NNTP->group($self->{_group}) failed.\n";

            # There is a bug in Net::Cmd which does not set the code
            # when there is a disconnect.  Thus, we have to check
            # for defined fileno directly ourselves.
            if ($self->{_nntp}->code() eq "000" ||
                !defined fileno($self->{_nntp})) {
                # No response was recieved, so we've probably lost connection.
                undef $self->{_nntp}; # make sure the old connection is closed
                $self->_connect(); # and try to reconnect
                $try = 0;
            }
            else {
                $try++;
            }
        }
    }

    return ($num_articles, $first, $last) if ($gotgroup);

    print "\n   ERROR: Net::NNTP->group($self->{_group}) failed.  Giving up.\n";
    return undef;
}

sub new
{
    my $self = shift;
    my %arg  = @_;

    my $ref = bless {
        _nntp     => undef,
        _host     => $arg{host},
        _retries  => $arg{retries},
        _account  => $arg{account},
        _password => $arg{password},
        _group    => undef,
        _bytes    => 0,
        _time     => time()
    }, $self;

    $ref->_connect();

    return $ref;
}


sub get_stats
{
    my $self = shift;

    my $delta = time() - $self->{_time};
    my $rate;
    if ($delta > 0) {
        $rate = ($self->{_bytes} / 1024.0) / ($delta);
    } else {
        $rate = 0.0;
    }
    return ($self->{_bytes}, $delta, $rate);
}


sub reset_stats
{
    my $self = shift;
    $self->{_time} = time();
    $self->{_bytes} = 0;
}


sub AUTOLOAD
{
    my $self = shift;

    # Take care not to delegate DESTROY.  Only $self->DESTROY() should
    # call the contained instance's DESTROY, if necessary.
    return if $AUTOLOAD =~ /::DESTROY$/;

    # The $AUTOLOAD variable contains the method name prepended by 
    # the package name, so strip that stuff off, leaving just the
    # method name.
    $AUTOLOAD =~ s/^Ubh::NNTP:://;

    # Now delegate the method call to the contained instance.
    $self->{_nntp}->$AUTOLOAD(@_);
}

1;

############################################################################

package main;

use Getopt::Std;
use News::Newsrc;

# ----------------------------------------------------------------------
# Global configuration variables.  These are default values.
# ----------------------------------------------------------------------
my $nntpserver = "news";
my $nntpretries = 3;
my $account;
my $password;  
my $forcedir   = undef;
my $perms = 0777;
my $multi_ext   = '(?i)asf|avi|gif|jpg|mov|mpg|mpeg|rm|[cdertsu]\d\d|rar|ace|zip|\d\d\d|nfo|sfv|mp3';
my $single_ext  = '(?i)asf|avi|gif|jpg|mov|mpg|mpeg|rm|[cdertsu]\d\d|rar|ace|zip|\d\d\d|nfo|sfv|mp3';

# ----------------------------------------------------------------------
# Global variables.
# ----------------------------------------------------------------------
my $version = "1.5";
my $platform;
my $nntp;
my %opt;
my $exit_code = 0;
$SIG{INT} = \&grace;

# ----------------------------------------------------------------------
# Global constants.
# ----------------------------------------------------------------------
my $ARTICLES_PER_DOT = 1000;

# bitmap for matches hash
my $MATCH_EXT    = 0x1;
my $MATCH_SINGLE = 0x2;
my $MATCH_MULTI  = 0x4;

# bitmap for filters hash
my $FILTER_NOT_INCLUDED = 0x1;
my $FILTER_EXCLUDED     = 0x2;

# ----------------------------------------------------------------------
# Miscellaneous subroutines.
# ----------------------------------------------------------------------

sub welcome() {
    print "------------------------------------------------------------------------------\n";
    print "ubh ", $version, ", (C) 2000 Gerard Lanois\n";
    print "This software comes with ABSOLUTELY NO WARRANTY; ";
    print "for details use -w.\n";
    print "This is free software, and you are welcome to redistribute\n";
    print "it under certain conditions; see the file LICENSE for details.\n";
    print "------------------------------------------------------------------------------\n";
}


sub yesno($) {
    my $question = shift;

    print $question, " (y/n/q) [n]: ";
    my $key = <>;
    print "\n";

    if ($key =~ /^q/i) {
        return 2;
    }
    elsif ($key =~ /^y/i) {
        return 1;
    }
    else {
        return 0;
    }
}


sub print_warranty() {
    print << "EOW";
------------------------------------------------------------------------------
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
EOW
}


sub print_usage() {
    print << "EOU";
Usage: ubh [switches]

These options apply to single-part article processing:
    -S  Process only single part articles.
    -g  Greedy - download and process article even if the subject
        does not have a filename match.

These options apply to multi-part article processing:
    -M  Process only multiple part articles.
    -i  Interactive preselection of multipart articles.

These options apply to both single and multi part processing:
    -A Assemble articles on disk rather than in memory.
    -d Diagnostic mode; dumps all raw articles in group.
    -D Multipart diagnostic mode; dumps all parts of selected 
       multipart articles
    -c <file> Use file as config file instead of default (ubhrc).
    -a <num>  Process all articles (disregards newsrc).
    -f <num>  Process the first num articles (updates newsrc).
    -l <num>  Process the last num articles (updates newsrc).
    -n Updates newsrc after every article.
    -s Logs all subjects to subjects.log and complete binary subjects
       multiparts.log (disregards newsrc).  Does not download anything.
    -I <regexp> Inclusion search filter (double quote on command line)
    -X <regexp> Exclusion search filter (double quote on command line)
    -r Logs rejects to rejects.log and stores raw rejected articles.
    -C Clean filenames (change all non alphanumeric to '_')
    -L Long filenames (use subject as the filename)
    -O Overwrite instead of creating unique name
    -Z Produce lots and lots of logs (seriously)
    -z Mark articles that do not pass inclusion/exclusion
    -y chmod 0666 all output files
    -u Print out usage summary.
    -w Print out warranty information.

Examples:
    ubh -i -M -I "(?i)rem|r\\.e\\.m\\."
    ubh -S -l 1000
EOU
}


sub process_options() {
    my $error = !getopts('ugiAdDc:af:l:nrsSMI:X:wCOLyZz', \%opt);

    if ($opt{'c'}) {
        $platform->set_ubhrcname($opt{'c'});
    }

    my $count = 0;
    $count++ if ($opt{'a'});
    $count++ if ($opt{'s'});
    $count++ if ($opt{'f'});
    $count++ if ($opt{'l'});
    if ($count > 1) {
        print "ERROR: Can only specify one of -a, -s, -f, or -l\n";
        $error = 1;
    }

    if ($opt{'i'} && $opt{'S'}) {
        print "ERROR: Can't specify both -i and -S\n";
        $error = 1;
    }

    if ($opt{'I'}) {
        eval { "" =~ /$opt{'I'}/ };
        if ($@) {
            print "ERROR: invalid -I pattern: ", $@, "\n";
            $error = 1;
        }
    }
    
    if ($opt{'X'}) {
        eval { "" =~ /$opt{'X'}/ };
        if ($@) {
            print "ERROR: invalid -X pattern: ", $@, "\n";
            $error = 1;
        }
    }

    if ($opt{'w'}) {
        print_warranty();
        exit(-1);
    }

    if ($error || $opt{'u'}) {
        print_usage();
        exit(-1);
    }
}


sub read_config() {
    open(UBHRC, "< ".$platform->get_ubhrcname) 
        or die "ERROR: open(".$platform->get_ubhrcname.") failed.\n";

    while (<UBHRC>) {
        chomp;


        next if (/\s*\#.*/);

        if (m/(\w+)\s*=\s*(.*)$/) {

            my $keyword = $1;
            my $value   = $2;

            # That .* grabbed everything to the end-of-line, including
            # any trailing whitespace, so eat any such trailing whitespace.
            # (See Perl Cookbook, recipe 1.14 "Trimming Blanks from the
            # Ends of a String".)
            $value =~ s/\s+$//;

            if ($keyword =~ /NNTPSERVER/) {
                $nntpserver = $value;
            }
            elsif ($keyword =~ /NNTPRETRIES/) {
                # Always want to try at least once.
                $nntpretries = ($value <= 0) ? 1 : $value;
            }
            elsif ($keyword =~ /NEWSRCNAME/) {
                $platform->set_newsrcname($value);
            }
            elsif ($keyword =~ /DATADIR/) {
                $platform->set_datadir($value);
            }
            elsif ($keyword =~ /FORCEDIR/) {
                $forcedir = $value;
            }
            elsif ($keyword =~ /EXTENSIONS/) {
                $multi_ext = $value;
                $single_ext = $value;
            }
            elsif ($keyword =~ /MULTI_EXT/) {
                $multi_ext = $value;
            }
            elsif ($keyword =~ /SINGLE_EXT/) {
                $single_ext = $value;
            }
            elsif ($keyword =~ /ACCOUNT/) {
                $account = $value;
            }
            elsif ($keyword =~ /PASSWORD/) {
                $password = $value;
            }
            elsif ($keyword =~ /TEMPDIR/) {
                $platform->set_tempdir($value);
            }
            elsif ($keyword =~ /PERMISSION/) {
                $perms = $value;
            }
            elsif ($keyword =~ /OPT_g/) {
                $opt{'g'} = 1;
            }
            elsif ($keyword =~ /OPT_i/) {
                $opt{'i'} = 1;
            }
            elsif ($keyword =~ /OPT_A/) {
                $opt{'A'} = 1;
            }
            elsif ($keyword =~ /OPT_d/) {
                $opt{'d'} = 1;
            }
            elsif ($keyword =~ /OPT_D/) {
                $opt{'D'} = 1;
            }
            elsif ($keyword =~ /OPT_a/) {
                $opt{'a'} = $value;
            }
            elsif ($keyword =~ /OPT_f/) {
                $opt{'f'} = $value;
            }
            elsif ($keyword =~ /OPT_l/) {
                $opt{'l'} = $value;
            }
            elsif ($keyword =~ /OPT_n/) {
                $opt{'n'} = $value;
            }
            elsif ($keyword =~ /OPT_r/) {
                $opt{'r'} = 1;
            }
            elsif ($keyword =~ /OPT_s/) {
                $opt{'s'} = 1;
            }
            elsif ($keyword =~ /OPT_S/) {
                $opt{'S'} = 1;
            }
            elsif ($keyword =~ /OPT_M/) {
                $opt{'M'} = 1;
            }
            elsif ($keyword =~ /OPT_I/) {
                $opt{'I'} = $value;
            }
            elsif ($keyword =~ /OPT_X/) {
                $opt{'X'} = $value;
            }
            elsif ($keyword =~ /OPT_C/) {
                $opt{'C'} = 1;
            }
            elsif ($keyword =~ /OPT_L/) {
                $opt{'L'} = 1;
            }
            elsif ($keyword =~ /OPT_O/) {
                $opt{'O'} = 1;
            }
            elsif ($keyword =~ /OPT_Z/) {
                $opt{'Z'} = 1;
            }
            elsif ($keyword =~ /OPT_z/) {
                $opt{'z'} = 1;
            }
            elsif ($keyword =~ /OPT_y/) {
                $opt{'y'} = 1;
            }
        }
    }
    close(UBHRC);


    my $error = 0;
    if ((defined $account && !defined $password) || 
        (!defined $account && defined $password)) {
        print "ERROR: Need to specify both ACCOUNT and PASSWORD\n";
        $error = 1;
    }

    if ($error) {
        print_usage();
        exit(-1);
    }
}

# ----------------------------------------------------------------------
# Main routine.
# ----------------------------------------------------------------------

sub main() {
    $| = 1;

    welcome();

    $platform = Ubh::Platform->new;

    process_options();

    read_config();

    process_options();

    $nntp = Ubh::NNTP->new(
                           host => $nntpserver,
                           retries => $nntpretries,
                           account => $account,
                           password => $password
                           );

    process_groups();

    exit($exit_code);
}


# ----------------------------------------------------------------------
# Subject processing.
# ----------------------------------------------------------------------

sub get_subjects($$$$$$$$) {
    my $first    = shift;
    my $last     = shift;
    my $group    = shift;
    my $subjects = shift;
    my $filters  = shift;
    my $matches  = shift;
    my $groupdir = shift;
    my $newsrc   = shift;

    my $num_headers = 0;
    my $num_passed_headers = 0;

    print "Retrieving headers";

    # MBSlater
    # It would be nicer to get the headers by the various methods 
    # and then do one run thru to do the inclusion/exclusion & matching 
    # (use match bitmap)
    # or... abstract the 'subject' retrieval method into another function

    # The Net::NNTP 'X' methods return undef if the method fails.
    # Take advantage of this to select a faster way of downloading
    # the headers, (XHDR or XOVER) if available.  If not, just get 
    # them manually (via HEAD).
    if (defined $nntp->xhdr("Subject", "$first")) {

        # Use NNTP server xhdr to retrieve subjects.
        # This saves a lot of time, as only the Subject headers
        # are transferred.

        my $xhdrs;
        if (!$opt{'s'} && !$opt{'a'}) {
            # Download only those subjects not marked as already
            # read in the newsrc.

            # grab the unmarked articles list, convert it to a run_list and then step thru it
            my $set_articles = Set::IntSpan->new(join(",",$newsrc->unmarked_articles($group, $first, $last)));
            my $set_list = $set_articles->run_list;
            foreach my $num (split(",",$set_list)) {
                my $chunk_xhdr = $nntp->xhdr("Subject", "$num");
                foreach my $key (keys %$chunk_xhdr) {
                    $xhdrs->{$key} = $chunk_xhdr->{$key};
                    if (($num_headers++ % $ARTICLES_PER_DOT) == 0) { print '.'; }
                }
            }
        } else {
            $xhdrs = $nntp->xhdr("Subject", "$first-$last");
        }

        print " done.\n";
        print "   " . (scalar keys %$xhdrs) . " headers retrieved\n";
        print "Checking inclusion/exclusion";
        $num_headers = 0;
        foreach my $num (sort { $a <=> $b } keys %$xhdrs) {
            my $subject = $xhdrs->{$num};
            $subjects->{$num} = $subject;
            
            if (($num_headers++ % $ARTICLES_PER_DOT) == 0) { print '.'; }
            
            # Skip inclusion/exclusion filter if just logging all subjects.
            if (!$opt{'s'}) {
                # Skip this subject if it either doesn't match
                # the inclusion filter or it does match the
                # exclusion filter.
                if ($opt{'I'} && $subject !~ /$opt{'I'}/o) {
                    if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                    $filters->{$num} |= $FILTER_NOT_INCLUDED;
                    next;
                }
                if ($opt{'X'} && $subject =~ /$opt{'X'}/o) {
                    if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                    $filters->{$num} |= $FILTER_EXCLUDED;
                    next;
                }
            }
            $num_passed_headers++;
        }
    }
    elsif (defined $nntp->xover($first)) {

        # Use NNTP server xover to retrieve subjects.
        # This saves a little time, as only the overview
        # headers are transferred.

        # This code to retrieve the headers via xover is from Greg Bacon,
        # posted to comp.lang.perl.misc.
        my %oview_fmt;
        my $i = 0;
        for ( @{ $nntp->overview_fmt } ) {
            $oview_fmt{$_} = $i++;
        }
        
        # This retrieves the overview records.  $o is a reference
        # to a hash.  The keys to this hash are the article numbers.
        # The values in the hash are array references.  The arrays
        # contain the article headers, and are indexed according to
        # the overview_fmt hash (see above).

        # MBSlater - add code like xhdr

        my $o = $nntp->xover("$first-$last");
        for (keys %$o) {
            my $num = $_;
            my $subject = $$o{$num}->[$oview_fmt{"Subject:"}];
            $subject->{num} = $subject;
            
            # Disregard the newsrc test if certain options are in effect.
            if (!$opt{'a'} && !$opt{'s'}) {
                # Skip this subject if it is marked as already
                # read in the newsrc.
                next if ($newsrc->marked($group, $num));
            }
    
            # Skip this subject if it either doesn't match
            # the inclusion filter or it does match the
            # exclusion filter.
            if ($opt{'I'} && $subject !~ /$opt{'I'}/) {
                if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                $filters->{$num} |= $FILTER_NOT_INCLUDED;
                next;
            }
            if ($opt{'X'} && $subject =~ /$opt{'X'}/) {
                if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                $filters->{$num} |= $FILTER_EXCLUDED;
                next;
            }
        }
    }
    else {

        # The server supports neither XHDR nor XOVER.
        # So, have to manually collect subjects directly from headers.
        # This is alot slower, as *all* headers for each article
        # need to be requested and downloaded.

        # MBSlater - add code like xhdr

        for ($first..$last) {
            my $num = $_;
        
            # Get headers for this article.
            my $header = $nntp->head($num)
                or warn "WARNING: Net::NNTP->head($num) failed.\n";
            
            # Get Subject: for this article.
            next if (!grep(/^Subject:/, @{$header}));
            my @found_subjects = grep(/^Subject:/, @{$header});
            my $subject = $found_subjects[0];
            $subject =~ s/^Subject: //;
            $subjects->{$num} = $subject;

            # Disregard the newsrc test if certain options are in effect.
            if (!$opt{'a'} && !$opt{'s'}) {
                # Skip this subject if it is marked as already
                # read in the newsrc.
                next if ($newsrc->marked($group, $num));
            }
         
            # Skip this subject if it either doesn't match
            # the inclusion filter or it does match the
            # exclusion filter.
            if ($opt{'I'} && $subject !~ /$opt{'I'}/) {
                if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                $filters->{$num} |= $FILTER_NOT_INCLUDED;
                next;
            }
            if ($opt{'X'} && $subject =~ /$opt{'X'}/) {
                if (!defined $filters->{$num}) { $filters->{$num} = 0; }
                $filters->{$num} |= $FILTER_EXCLUDED;
                next;
            }
        }
    }
    print " done.\n";
    print "   $num_passed_headers headers passed inclusion/exclusion\n";
}

sub evil_filename($) {
    my $filename = shift;

    # It's amazing the shit people specify as file names.
    # It's not uncommon to occasionally see full DOS path names,
    # complete with C:.
    $filename =~ tr/\"//d;
    $filename =~ tr/\\/_/d;
    $filename =~ tr/\//_/d;
    $filename =~ tr/:/_/d;
    $filename =~ tr/=/_/d;
    $filename =~ tr/?/_/d;

    if ($opt{'C'}) {
        # convert all non-alphanumeric to '_'
        $filename =~ s/([^\w.\/\_\-])/\_/g;
    }

    # fix silly files with '-' as the first character
    if ($filename =~ /^\-/) {
        $filename = "_" . $filename;
    }

    # check and fix filenames that are too long
    if (length($filename) > $platform->get_max_filename_length) {
        if ($filename =~ /^(.*)\.(.*)$/) {
            my $base = $1;
            my $ext = $2;
            if ($ext >= $platform->get_max_filename_length) {
                print "\n   WARNING: Cannot fix filename length (extension is too long).\n";
            } else {
                $base = substr $base, 0, ($platform->get_max_filename_length - length($ext) - 1);
                $filename = "$base.$ext";
            }
        } else {
            print "\n   WARNING: Cannot fix filename length (extension not found).\n";
        }
    }

    return $filename;
}


# Borrowed from MIME::Base64...
sub decode_line_base64($) {
    my $str = shift;
    my $res = "";

    my $ostr = $str;
    $str =~ tr|A-Za-z0-9+=/||cd;            # remove non-base64 chars
    if (length($str) % 4) {
        print "\n    BASE64 Length of base64 data not a multiple of 4.\n";
        print "    BASE64   original (", length($ostr),  ")=\"", $ostr, "\"\n";
        print "    BASE64 translated (", length($str), ")=\"", $str,  "\"\n";
    }
    $str =~ s/=+$//;                        # remove padding
    $str =~ tr|A-Za-z0-9+/| -_|;            # convert to uuencoded format
    while ($str =~ /(.{1,60})/gs) {
        my $len = chr(32 + length($1)*3/4); # compute length byte
        $res .= unpack("u", $len . $1 );    # uudecode
    }
    $res;
}

sub process_body_base64($$$$$) {
    my $num      = shift;
    my $skip     = shift;
    my $body     = shift;
    my $groupdir = shift;
    my $filename = shift;

    $filename = evil_filename($filename);
    my $filepath = $groupdir.$platform->get_separator.$filename;
    if (!$opt{'O'}) {
      if (-f $filepath) {
          # File exists.  Prepend article number to make it unique.
          $filepath = $groupdir.$platform->get_separator.$num."_".$filename;
      }
    }

    print "    BASE64 Writing ", $filepath, "...";

    open(OUTFILE, "> $filepath") or
        warn "ERROR: open($filepath) failed: $!";
    binmode OUTFILE;

    # Skip any cruft as required before decoding the body. 
    # This is necessary for MIME message/partial articles, since
    # the first article body has a bunch of stuff at the top.
    # All the rest of the parts are fine - just need to skip the
    # stuff at the top, when so directed.

    if ($opt{'A'}) {
        # Decode from file.
        open(B64FILE, "< $body") or die "ERROR: open($body) failed.\n";
        while ($skip && <B64FILE>) { 
            $skip--; 
        }
        while (<B64FILE>) {           
            print OUTFILE decode_line_base64($_);
        }
        close B64FILE;
    }
    else {
        # Decode from memory.
        while ($skip) { 
            my $skipped = shift @{$body};  
            $skip--; 
        }
        for (@{$body}) {
            print OUTFILE decode_line_base64($_);
        }
    }
    close OUTFILE;
    print "done.\n";

    chmod 0666, $filepath if ($opt{'y'});
}


sub decode_line_uu($$$$$$$)
{
    my $num      = shift;
    my $groupdir = shift;
    my $newfilename = shift;
    my $line = shift;
    my $fb = shift;
    my $fe = shift;
    my $filepath = shift;
   
    if (!$$fb) {

        # Look for the "begin".
        if ($line =~ /(?i)^begin\s+(\d+)\s+(.+)/) {
            my $mode = $1;
            my $file = $2;
            

            # You could probably stand to double-check the file
            # extension that you get.  I've actually seen HAPPY99.EXE
            # and lots of other junk come in via this route.
            if ($opt{'L'} && length($newfilename) > 0) {
                $file = $newfilename; 
            }
            $file = evil_filename($file);
            $$filepath = $groupdir.$platform->get_separator.$file;
            if (!$opt{'O'}) {
                if (-f $$filepath) {
                    # File exists.  Prepend article number to make it unique.
                    $$filepath = $groupdir.$platform->get_separator.$num."_".$file;
                }
            }

            print "    UUDECODE Writing ", $$filepath, "...";

            if (!open(OUT, "> $$filepath")) {
                warn "ERROR: open($$filepath) failed: $!";
                return;
            }

            binmode OUT;
            $$fb = 1;
        }
    }
    else {
        if ($line =~ /(?i)^end$/) {
            $$fe = 1;
            return;
        }
        return if ($line =~ /[a-z]/);
        return unless 
            int((((ord() - 32) & 077) + 2) / 3) 
                == int(length($line) / 4);
        print OUT unpack("u", $line) or die "ERROR: can't write to $$filepath: $!";
    }
}

sub process_body_uu($$$$) {
    my $num      = shift;
    my $body     = shift;
    my $groupdir = shift;
    my $newfilename = shift;

    my $fb = 0; # Found begin.
    my $fe = 0; # Found end.
    my $filepath;

    if ($opt{'A'}) {
        # Decode article from disk file.
        open(UUFILE, "< $body") or die "ERROR: open($body) failed.\n";       
        while (<UUFILE>) {
            decode_line_uu($num, $groupdir, $newfilename, $_, \$fb, \$fe, \$filepath);
        }
        close (UUFILE);
    }
    else {
        # Decode from memory.
        for (@{$body}) {
            decode_line_uu($num, $groupdir, $newfilename, $_, \$fb, \$fe, \$filepath);
        }
    }

    if (!$fb) {
        print "    UUDECODE Never saw \"begin\"\n";
    }
    else {
        close OUT or warn "ERROR: close($filepath) failed: $!";
        $fe or warn "WARNING: article ended before end found\n";

        chmod 0666, $filepath if ($opt{'y'});

        print "done.\n";
    }

    return $fb;
}

sub process_mime_mp_body($$$$$$$)
{
    my $state = shift;
    my $states = shift;
    my $line = shift;
    my $filename = shift;
    my $newfilename = shift;
    my $encoding = shift;
    my $result;

    if ($$state == $$states{CONTENTTYPE2}) {
        if ($line =~ /^Content-Type:\s*(\S+)\/(\S+)\;/i) {

            my ($content, $type) = ($1, $2);

            print "    MIME Message Content-Type is ";
            print $content, "/", $type, "\n";

            if (($content =~ /application/i) && ($type =~ /octet-stream/i)) {

                if ($line =~ /^Content-Type:\s*\S+\/\S+\;\s*name="*(.+)"*$/i) {
                    # Content type and name are all on one line.
                    $$filename = $1;

                    if ($opt{'L'} && length($newfilename) > 0) {
                        $$filename = $newfilename;
                    }

                    print "    MIME Found filename: ", $$filename, "\n";

                    # Is this a file type we want?  Bail if not.
                    if ($$filename !~ /(.+\.($multi_ext))/) {
                        $$result = 0;
                        return;
                    }

                    $$state = $$states{ENCODING};
                }
                else {
                    # Name is on a separate line.
                    $$state = $$states{NAME};
                }
            }
        }
    }
    elsif ($$state == $$states{NAME} && $line =~ /^\s*name="*(.+)"*\s*$/i) {

        # Found file name.
        $$filename = $1;

        if ($opt{'L'} && length($newfilename) > 0) {
            $$filename = $newfilename;
        }

        print "    MIME Found filename: ", $$filename, "\n";

        # Is this a file type we want?  Bail if not.
        if ($$filename !~ /(.+\.($multi_ext))/) {
            $$result = 0;
            return;
        }

        $$state = $$states{ENCODING};
    }
    elsif (($$state == $$states{ENCODING}) && 
           $line =~ /^Content-Transfer-Encoding:\s+(\S+)$/i) {

        # Found single part encoding.
        $$encoding = $1;
        print "    MIME Content-Transfer-Encoding is ", $$encoding, "\n";

        # Currently only base64 is supported.
        if ($$encoding !~ /base64/i) {
            $$result = 0;
            return;
        }

        $$state = $$states{BLANKLINE};
    }
    elsif ($$state == $$states{BLANKLINE}) {
        if ($line =~ /^\s*$/) {
            $$state = $$states{INBODY};
            $$result = 2;
        }
    }
    $$result = 1;
}

sub process_mime_mp($$$$$) {
    # Handle as "message/partial".  At this point the partials have all
    # been assembled in order in the body.  Pull the filename out of
    # the header and save off the binary.

    my $num      = shift;
    my $headers  = shift;
    my $body     = shift;
    my $groupdir = shift;
    my $newfilename = shift;  

    my $filename;
    my $encoding;
    my $type_mp = 0;
    my $mime_version = 0;

    my %states = (
                  CONTENTTYPE1  => 1,
                  CONTENTTYPE2  => 2,
                  NAME          => 3,
                  ENCODING      => 4,
                  BLANKLINE     => 5,
                  INBODY        => 6 );

    my $state = $states{CONTENTTYPE1};
    for (@{$headers}) {

        # MIME-Version sometimes comes after the Content-Type.
        if (/^MIME-Version:/i) {
            # Don't care what the version number is,
            # so long as it begins with "MIME-Version:".
            $mime_version = 1;
        }

        if ($state == $states{CONTENTTYPE1}) {
            if (/^Content-Type:\s*(\S+)\/(\S+)\;/i) {

                my ($content, $type) = ($1, $2);

                print "    MIME Message Content-Type is ";
                print $content, "/", $type, "\n";

                if (($content =~ /message/i) && ($type =~ /partial/i)) {

                    # It's a multipart message/partial MIME article.
                    $type_mp = 1;

                    # Look for file name in second content type.
                    $state = $states{CONTENTTYPE2};
                }
            }
        }
    }

    # Keeps track of how many lines of MIME in-band signalling 
    # to skip over when it comes time to begin decoding the body.
    my $skip = 0;

    my $body_result = 1;
    if ($opt{'A'}) {
        open(MMFILE, "< $body") or die "ERROR: open($body) failed.\n";       
        while (<MMFILE>) {
            $skip++;
            process_mime_mp_body(
                \$state, 
                \%states, 
                $_, 
                \$filename, 
                $newfilename, 
                \$encoding, 
                \$body_result);
            return 0 if ($body_result == 0);
            last if ($body_result == 2);
        }
        close(MMFILE);
    }
    else {
        for (@{$body}) {
            $skip++;
            process_mime_mp_body(
                \$state, 
                \%states, 
                $_, 
                \$filename, 
                $newfilename, 
                \$encoding, 
                \$body_result);
            return 0 if ($body_result == 0);
            last if ($body_result == 2);
        }
    }

    if ($type_mp && !$mime_version) {
        print "    MIME - Never saw MIME-Version header.\n";
    }

    my $decode_result = 0;
    if ($type_mp && $filename && $encoding && ($state == $states{INBODY})) {

        # Single part article - decode and save off all the lines in the body.
        process_body_base64($num, $skip, $body, $groupdir, $filename);
        $decode_result = 1;
    }
    return $decode_result;
}

sub process_mime_multipart_mixed($$$$$$$$$$$$$)
{
    my $groupdir = shift;
    my $num = shift;
    my $state  = shift;
    my $states = shift;
    my $boundary  = shift;
    my $encoding  = shift;
    my $line   = shift;
    my $part   = shift;
    my $filename = shift;
    my $newfilename = shift;
    my $filepath = shift;
    my $opened = shift;
    my $result = shift;

    if (($$state == $$states{FIRSTBOUNDARY}) &&
        ($line =~ /^--$boundary/)) {
        $$part++;
        $$state = $$states{CONTENTTYPE};
    }
    elsif ($$state == $$states{CONTENTTYPE}) {
        if ($line =~ /^Content-Type:\s*(\S+)\/(\S+)\;/i) {

            my ($content, $type) = ($1, $2);
            
            print "    MIME Part ", $$part, " Content-Type is ";
            print $content, "/", $type, "\n";

            $$state = $$states{ENCODING};

            if (($content =~ /image/i) ||
                ($content =~ /audio/i) ||
                ($content =~ /video/i) ||
                (($content =~ /application/) && 
                 ($type =~ /octet-stream/i))) {
                if (($line =~ /^Content-Type:\s*\S+\/\S+\;\s*name="*(.+)"*$/i) ||
                    ($line =~ /^Content-Type:\s*\S+\/\S+\;\s*charset=\S+;\s*name="*(.+)"*$/i)) {
                    # Content type and file name are all on one line.
                    $$filename = $1;

                    if ($opt{'L'} && length($newfilename) > 0) {
                        $$filename = $newfilename;
                    }

                    print "    MIME Found filename: ", $$filename, "\n";

                    # Is this a file type we want?  Keep looking if not.
                    if ($$filename !~ /(.+\.($single_ext))/) {
                        $$state = $$states{INBODY};
                    }
                }
                else {
                    # File name is on a separate line.
                    $$state = $$states{NAME};
                }
            }
        }
    }
    elsif ($$state == $$states{NAME} && 
           $line =~ /^\s*name="*(.+)"*\s*$/i) {

        # File name was on a separate line.
        $$filename = $1;

        if ($opt{'L'} && length($newfilename) > 0) {
            $$filename = $newfilename;
        }

        print "    MIME Found filename: ", $$filename, "\n";

        $$state = $$states{ENCODING};

        # Is this a file type we want?  Keep looking if not.
        if ($$filename !~ /(.+\.($single_ext))/) {
            $$state = $$states{INBODY};
        }
    }
    elsif ($$state == $$states{ENCODING}) {
        if ($line =~ /^Content-Transfer-Encoding:\s+(\S+)$/i) {
            $$encoding = $1;
            print "    MIME Content-Transfer-Encoding is ", $$encoding, "\n";

            $$state = $$states{BLANKLINE};

            # Currently only base64 is supported.
            if ($$encoding !~ /base64/i) {
                $$state = $$states{INBODY};
            }
        }
        elsif ($line =~ /^--$boundary/) {
            # Hit boundary, was expecting content encoding.  In this case, we
            # just skip this content type.  I wonder if the RFC requires a
            # content encoding?  For now, process the next part's content type.
            print "    MIME Content-Transfer-Encoding not found.\n";
            $$state = $$states{CONTENTTYPE};
        }
    }
    elsif ($$state == $$states{BLANKLINE}) {
        if ($line =~ /^\s*$/) {
            $$state = $$states{INBODY};
        }
    }
    elsif ($$state == $$states{INBODY}) {
        if ($$filename && !$$opened) {

            $$filename = evil_filename($$filename);
            $$filepath = $groupdir.$platform->get_separator.$$filename;
            if (!$opt{'O'}) {
                if (-f $$filepath) {
                    # File exists.  Prepend article number to make it unique.
                    $$filepath = $groupdir.$platform->get_separator.$num."_".$$filename;
                }
            }

            print "    MIME Writing ", $$filepath, "...";
            if (!open(OUTFILE, "> $$filepath")) {
                warn "ERROR: open($$filepath) failed: $!";
            }
            else {
                $$opened = 1;
            }
            binmode OUTFILE;
        }

        # Boundaries are prepended with '--'
        # The last boundary ends with '--'
        if ($line =~ /^--$boundary/) {
            $$part++;
            $$state = $$states{CONTENTTYPE};

            if ($$opened) {
                print "done.\n";
                close(OUTFILE);
                chmod 0666, $$filepath if ($opt{'y'});
                $$opened = 0;
                $$result = 1;
            }
        }
        else {
            if ($$opened) {
                print OUTFILE decode_line_base64($line);
            }
        }
    }
}

sub process_mime($$$$$) {
    my $num      = shift;
    my $headers  = shift;
    my $body     = shift;
    my $groupdir = shift;
    my $newfilename = shift;

    my $type_single = 0;
    my $type_multi = 0;

    my $part = 0;
    my $boundary;
    my $filename;
    my $filepath;
    my $encoding;
    my $mime_version = 0;
    my $opened = 0;
    my $result = 0;

    my %states = (
                  GETBOUNDARY   => 1,
                  BOUNDARY      => 2,
                  FIRSTBOUNDARY => 3,
                  CONTENTTYPE   => 4,
                  NAME          => 5,
                  ENCODING      => 6,
                  BLANKLINE     => 7,
                  INBODY        => 8 );

    # Search through the headers:
    #  1. To figure out if it's a multipart/mixed or a single
    #     part type that we are interested in.
    #  2. If it's multipart, get the boundary marker.
    #  3. If it's single part, get the file name and content
    #     encoding type.
    my $state = $states{CONTENTTYPE};
    for (@{$headers}) {

        # MIME-Version sometimes comes after the Content-Type.
        if (/^MIME-Version:/i) {
            # Don't care what the version number is,
            # so long as it begins with "MIME-Version:".
            $mime_version = 1;
        }

        if ($state == $states{CONTENTTYPE}) {
            if (/^Content-Type:\s*(\S+)\/(\S+)\;/i) {

                my ($content, $type) = ($1, $2);

                print "    MIME Message Content-Type is ";
                print $content, "/", $type, "\n";

                if (($content =~ /multipart/i) && ($type =~ /mixed/i)) {

                    # It's a multipart MIME article.
                    $type_multi = 1;

                    if (/^Content-Type:\s*\S+\/\S+\;\s*boundary="(\S+)"\s$/i) {
                        # All on one line.
                        $boundary = $1;
                    }
                    else {
                        $state = $states{BOUNDARY};
                    }
                }
                elsif (($content =~ /image/i) ||
                       ($content =~ /audio/i) ||
                       ($content =~ /video/i) ||
                       (($content =~ /application/) && 
                        ($type =~ /octet-stream/i))) {

                    # It's a single part mime article.
                    $type_single = 1;

                    if (/^Content-Type:\s*\S+\/\S+\;\s*name="*(.+)"*$/i) {
                        # Content type and name are all on one line.
                        $filename = $1;

                        if ($opt{'L'} && length($newfilename) > 0) {
                            $filename = $newfilename;
                        }

                        print "    MIME Found filename: ", $filename, "\n";

                        # Is this a file type we want?  Bail if not.
                        return 0 if ($filename !~ /(.+\.($single_ext))/);

                        $state = $states{ENCODING};
                    }
                    else {
                        # Name is on a separate line.
                        $state = $states{NAME};
                    }
                }
                else {
                    # Unsupported MIME message content type.
                    return 0;
                }
            }
        }
        elsif ($type_multi &&
               ($state == $states{BOUNDARY}) && 
               /^\s*boundary="(\S+)"\s*$/i) {

            # Found multi-part boundary.
            $boundary = $1;
        }
        elsif ($type_single && 
               $state == $states{NAME} && 
               /^\s*name="*(.+)"*\s*$/i) {

            # Found single part file name.
            $filename = $1;

            if ($opt{'L'} && length($newfilename) > 0) {
                $filename = $newfilename;
            }

            print "    MIME Found filename: ", $filename, "\n";

            # Is this a file type we want?  Bail if not.
            return 0 if ($filename !~ /(.+\.($single_ext))/);

            $state = $states{ENCODING};
        }
        elsif ($type_single && 
               ($state == $states{ENCODING}) && 
               /^Content-Transfer-Encoding:\s+(\S+)$/i) {

            # Found single part encoding.
            $encoding = $1;
            print "    MIME Content-Transfer-Encoding is ", $encoding, "\n";

            # Currently only base64 is supported.
            return 0 if ($encoding !~ /base64/i);
        }
    }

    if (($type_single || $type_multi) && !$mime_version) {
        print "    MIME - Never saw MIME-Version header.\n";
    }

    if ($type_single && $filename && $encoding) {

        # Single part article - decode and save off all the lines in the body.
        process_body_base64($num, 0, $body, $groupdir, $filename);

    }
    elsif ($type_multi && $boundary) {

        # Single part article - process each part in the body.

        $state = $states{FIRSTBOUNDARY};
        if ($opt{'A'}) {
            open(MM2FILE, "< $body") or die "ERROR: open($body) failed.\n";       
            while (<MM2FILE>) {
                process_mime_multipart_mixed(
                    $groupdir,
                    $num,
                    \$state, 
                    \%states, 
                    $boundary, 
                    \$encoding, 
                    $_, 
                    \$part, 
                    \$filename, 
                    $newfilename, 
                    \$filepath, 
                    \$opened,
                    \$result);
            }
            close(MM2FILE);
        }
        else {
            for (@{$body}) {
                process_mime_multipart_mixed(
                    $groupdir,
                    $num,
                    \$state, 
                    \%states, 
                    $boundary, 
                    \$encoding, 
                    $_, 
                    \$part, 
                    \$filename, 
                    $newfilename, 
                    \$filepath, 
                    \$opened,
                    \$result);
            }
        }
    }

    return $result;
}

sub clean_up() {
    if (-f $platform->get_tempfilename) {
         my $spacer = unlink $platform->get_tempfilename;
     }
}

sub decode_file($$$$$) {
    my $num         = shift;
    my $body        = shift;
    my $groupdir    = shift;
    my $newfilename = shift;
    my $single_part = shift;   # MBSlater - get rid of this (if possible)

    # First, assume it is uuencoded.  If not, then try MIME.
    if (process_body_uu($num, $body, $groupdir, $newfilename)) {
        $exit_code++;
    } 
    else {
        my $headers = $nntp->head($num)
            or warn "WARNING: Net::NNTP->head($num) failed.\n";
        if ($headers) {
            if ($single_part) {
                if (process_mime($num, $headers, $body, $groupdir, $newfilename)) {
                    $exit_code++;
                } 
                else {
                    reject_article($num, $groupdir, $headers, $body) if ($opt{'r'});
                }
            } 
            else {
                if (process_mime_mp($num, $headers, $body, $groupdir, $newfilename)) {
                    $exit_code++;
                } 
                else {
                    reject_article($num, $groupdir, $headers, $body) if ($opt{'r'});
                }
            }
        }
    }
}


sub process_multiparts($$$$$$) {
    my $group    = shift;
    my $subjects = shift;
    my $filters  = shift;
    my $matches  = shift;
    my $groupdir = shift;
    my $newsrc   = shift;

    # The key is the name, the value is the number of parts.
    my %numparts;

    # The key is the name, the value is a reference
    # to a hash of the article numbers for the parts.
    my %articles;

    # The key is the name, the value is 1 if all parts were found.
    my %gotparts;

    # Pass 1 - assemble list of parts.
    print "Pass 1: Assembling";
    my $num_headers = 0;
    foreach my $num (sort { $a <=> $b } keys %$subjects) {

        if (($num_headers++ % $ARTICLES_PER_DOT) == 0) { print '.'; }

        my $subject = $subjects->{$num};

        if ($subject =~ /(.+\.($multi_ext))/) {

            my $match = $1;
            $matches->{$num} = $MATCH_EXT;

            if (defined $filters->{$num}) {
                $newsrc->mark($group,$num) if $opt{'z'};
                next;
            }

            # Process single part articles in then single-parts pass.
            next if (! ($subject =~ /\((\d+)\/(\d+)\)/ || $subject =~ /\[(\d+)\/(\d+)\]/) );
            my $part = $1;
            my $total = $2;
            
            # Process multi-part articles consisting of a single part in the single-parts pass.
            next if ($total == 1);
            
            # let everyone know that this was processed by multipart
            $matches->{$num} = $MATCH_MULTI;

            # Consider multipart articles if they are being processed
            # during this run.
            if (!$opt{'S'}) {
                
                # Kill leading zeros.
                $part =~ s/^0+(\d+)$/$1/;
                
                if ($part == 0) {
                    # Maybe someday save part 0 off as a text file?
                    # For now, just skip part 0's.
                    
                    # MBSlater - save 0 file
                }
                else {
                    
                    if (!exists($numparts{$match})) {
                        $numparts{$match} = $total;
                        $gotparts{$match} = 0;
                        $articles{$match} = {};
                    }
                    
                    $articles{$match}->{$part} = $num;
                    
                    my $items = keys(%{$articles{$match}});
                    if ($items == $numparts{$match}) {
                        $gotparts{$match} = 1;
                    }
                }
            }
        }
    }
    print " done.\n";

    if ($opt{'s'}) {
        # Just log the multipart subject hits for posterity.
        my $msubfile = $groupdir."/multiparts.log";
        open(MSUBJECTS, "> $msubfile") 
            or die "ERROR: open($msubfile) failed.\n";
        foreach my $match (sort keys %articles) {
            if ($gotparts{$match}) {

                print MSUBJECTS $articles{$match}->{1}, "|", $match, "\n";
#                print MSUBJECTS $match, "\n";
                
# This prints out the article numbers of the parts.
#                print MSUBJECTS $match, "|";
#                my $first = 1;
#                for (sort { $a <=> $b } keys %{$articles{$match}}) {
#                    print MSUBJECTS "," if (!$first); $first = 0;
#                    my $tpart = $_;
#                    my $num = $articles{$match}->{$tpart};
#                    print MSUBJECTS $num;
#                }
#                print MSUBJECTS "\n";
            }
        }        
        close(MSUBJECTS);
        return;
    }

    # [OPTIONAL] - prompt to see if they want binaries.
    # This is only used for when doing interactive multipart 
    # selection (opt{i}).
    # The key is the name, the value is 1 if they want this
    # article.  Otherwise, assume all multipart articles are
    # wanted.
    my %wanted;
    
    # Eliminate incompletes, since the prompting and sorting depends on the
    # existence of part 1 of every binary.
    foreach my $match (keys %articles) {
        if (!$gotparts{$match}) {
            my $items = keys(%{$articles{$match}});
            my $titems = $numparts{$match};
            print "   Not complete: [$match] ($items of $titems)\n";
            delete $articles{$match};
        }
    }
    
    if ($opt{'i'}) {
        print "Pass 2: Prompting...";
        foreach my $match (sort { $articles{$a}->{1} <=> $articles{$b}->{1} } keys %articles) {
            
            # Assume they don't want it.
            $wanted{$match} = 0;
            
            if ($gotparts{$match}) {
                print "Got all ", $numparts{$match};
                print " parts of ", $match, "\n";
                my $wanted = yesno("Want it?");
                last if ($wanted == 2); # Escape and get started downloading.
                $wanted{$match} = $wanted;
            }
        }
        print "done.\n";
    }
    
    # Pass 3 - download parts and assemble articles.
    if ($opt{'i'}) {
        print "Pass 3: Retrieval...\n";
    }
    else {
        print "Pass 2: Retrieval...\n";
    }
    
    # At this point the only articles that remain are complete.
    my $num_art = scalar keys %articles;
    my $curr_art = 0;
    foreach my $match (sort { $articles{$a}->{1} <=> $articles{$b}->{1} } keys %articles) {

        $curr_art++;
        
        # Skip unwanted articles in interactive mode.
        next if ($opt{'i'} && !$wanted{$match});
        
        # Retrieve article parts in order.
        print "Retrieving: [$curr_art of $num_art] ", $match, "\n";
        print "   Subject: [" . $subjects->{$articles{$match}->{1}} . "]\n";
        
        # MBSlater - this could be nicer if it were something like:
        #    $subjects->{$articles{$match}{1}} . "." . $extension;
        my $newfilename = $match;   
        
        my @mark_list;
        my @parts;
        my $gotparts = 0;
        
        if ($opt{'A'}) {
            # Create empty parts file.
            open(PARTSFILE, "> ".$platform->get_tempfilename) 
                or die "ERROR: open(".$platform->get_tempfilename.") failed.\n";
            close(PARTSFILE);
        }

      PARTS: foreach my $tpart (sort { $a <=> $b } keys %{$articles{$match}}) {
          if ($opt{'A'}) {
              # Append this part to the parts file.
              open(PARTSFILE, ">> ".$platform->get_tempfilename) 
                  or die "ERROR: open(".$platform->get_tempfilename.") failed.\n";       
          }
          my $num = $articles{$match}->{$tpart};
          print "Part ", $tpart, " of ", $numparts{$match}, ", article ", $num, "... ";
          
          my $p = $nntp->body($num);
          if (!defined $p) {
              warn "\nWARNING: Net::NNTP->body($num) failed.\n";
              warn "\nWARNING: Skipping $match\n";
              last PARTS;
          }
          else {
              push(@mark_list,$num);
              if ($opt{'A'}) {
                  # Write this part out to the parts file.
                  print PARTSFILE @{$p};
              }
              else {
                  push(@parts, @{$p});
              }
              $gotparts++;
              if ($opt{'D'}) {
                  dump_body_part($group, $num, $p, $groupdir); 
              }
          }
          if ($opt{'A'}) {
              close (PARTSFILE);
          }
      }
        
        if ($gotparts == $numparts{$match}) {
            
            # Mark the articles as read, even if the decode fails, 
            # We got what we got.
            $newsrc->mark_list($group, \@mark_list);
            $newsrc->save if ($opt{'n'});

            # Notice how this passes the article number of the first part.
            my $num = $articles{$match}{1};
            if ($opt{'A'}) {
                decode_file($num, $platform->get_tempfilename, $groupdir, $newfilename, 0);
            }
            else {
                decode_file($num, \@parts, $groupdir, $newfilename, 0);
            }
        }
    }
}


sub dump_articles($$$) {
    my $group    = shift;
    my $subjects = shift;
    my $groupdir = shift;
    
    foreach my $num (sort { $a <=> $b } keys %$subjects) {
        print "Dumping ", $num, "...";
        my $article = $nntp->article($num);
        my $artfile = $groupdir.$platform->get_separator.$num;
        open(ARTFILE, "> $artfile") 
            or die "ERROR: open($artfile) failed.\n";
        for (@{$article}) {
            print ARTFILE $_;
        }
        close(ARTFILE);
        print "done.\n";
    }
}


sub dump_body_part($$$) {
    my $group    = shift;
    my $num      = shift;
    my $body     = shift;
    my $groupdir = shift;

    print "Dumping ", $num, "...";
    my $partfile = $groupdir.$platform->get_separator.$num;
    open(PARTFILE, "> $partfile") 
        or die "ERROR: open($partfile) failed.\n";

    my $headers = $nntp->head($num)
        or warn "WARNING: Net::NNTP->head($num) failed.\n";

    for (@{$headers}) {
        print PARTFILE $_;
    }
    print PARTFILE "\n";

    open(DUMPFILE, "< $body")
    or die "ERROR: open($body) failed.\n";       
    while (<DUMPFILE>) {
        print PARTFILE $_;
    }
    close(DUMPFILE);
    close(PARTFILE);
    print "done.\n";
}


sub process_article($$$$) {
    my $num = shift;
    my $groupdir = shift;
    my $newfilename = shift;
    my $group = shift;

    print "Considering single-part article ", $num, "\n";
    my $body = $nntp->body($num);
    if (!defined $body) {
        warn "\nWARNING: Net::NNTP->body($num) failed.\n";
        warn "\nWARNING: Skipping $num\n";
    }
    else {
        if ($opt{'A'}) {
            # Save body to disk file, and decode that.
            open(SINGFILE, "> ".$platform->get_tempfilename) 
                or die "ERROR: open(".$platform->get_tempfilename.") failed.\n";       
            print SINGFILE @{$body};
            close SINGFILE;
            decode_file($num, $platform->get_tempfilename, $groupdir, $newfilename, 1);
        }
        else {
            # Pass array to be decoded.
            decode_file($num, $body, $groupdir, $newfilename, 1);
        }
    }
}


sub process_singles($$$$$$) {
    my $group    = shift;
    my $subjects = shift;
    my $filters  = shift;
    my $matches  = shift;
    my $groupdir = shift;
    my $newsrc   = shift;

    if ($opt{'s'}) {
        # Just log the raw subjects for posterity.
        my $subfile = $groupdir."/subjects.log";
        open(SUBJECTS, "> $subfile") 
            or die "ERROR: open($subfile) failed.\n";
        foreach my $num (sort { $subjects->{$a} cmp $subjects->{$b} } keys %$subjects) {
            my $subject = $subjects->{$num};
            print SUBJECTS $num, "|", $subject, "\n";
        }        
        close(SUBJECTS);
    }
    else {
        foreach my $num (sort { $a <=> $b } keys %$subjects) {
            my $subject = $subjects->{$num};

            # Check for already processed in multi-part.
            next if (defined $matches->{$num} &&
                     $matches->{$num} == $MATCH_MULTI);

            # Toss if not included or excluded.
            if (defined $filters->{$num}) {
                $newsrc->mark($group,$num) if $opt{'z'};
                next;
            }
                
            if ($subject =~ /(.+\.($single_ext))/) {
                
                my $match = $1;
                $matches->{$num} = $MATCH_EXT;

                print "    Retrieving: $match\n";
                print "Article Number: [" . $num . "]\n";
                print "       Subject: [" . $subject . "]\n";
                
                process_article($num, $groupdir, $match, $group);
                $newsrc->mark($group, $num);
                $newsrc->save if ($opt{'n'});
            }
            else {
                if ($opt{'g'}) {
                    $matches->{$num} = $MATCH_SINGLE;

                    # Nothing interesting in the header.  Maybe
                    # there is an article in the body, so go ahead
                    # and process the body as a single part article.
                    process_article($num, $groupdir, "", $group);
                    $newsrc->mark($group, $num);
                    $newsrc->save if ($opt{'n'});
                }
                else {
                    reject_subject($num, $subject) if ($opt{'r'});
                }
            }
        }
    }
}


# ------------------------------------------------------------
# REJECT logging...
# ------------------------------------------------------------
sub open_reject($$) {
    my $group = shift;
    my $groupdir = shift;
        
    my $rejectfile = $groupdir."/rejects.log";
    open(REJECT, "> $rejectfile") 
        or die "ERROR: open($rejectfile) failed.\n";
}


sub close_reject() {
    close(REJECT);
}


sub reject_subject($$) {
    my $num = shift;
    my $subject = shift;
    print REJECT $num, "|SUBJECT|", $subject, "\n";
}


sub reject_article($$$$) {
    my $num        = shift;
    my $groupdir   = shift;
    my $headers    = shift;
    my $body       = shift;

    print REJECT $num, "|BODY|\n";

    my $rejectpath = $groupdir.$platform->get_separator.$num;
    open(REJECTARTICLE, "> $rejectpath") or warn "Can't open $rejectpath: $!";

    print REJECTARTICLE @{$headers};

    # Blank line to separate headers and body.
    print REJECTARTICLE "\n"; 

    if ($opt{'A'}) {
        open(REJFILE, "< $body") or die "ERROR: open($body) failed.\n";       
        while (<REJFILE>) {
            print REJECTARTICLE $_;
        }
        close REJFILE;
    }
    else {
        print REJECTARTICLE @{$body};
    }
    close REJECTARTICLE or warn "Can't close $rejectpath: $!";
}

sub save_logs($$$$$) {
    my $group    = shift;
    my $subjects = shift;
    my $filters  = shift;
    my $matches  = shift;
    my $groupdir = shift;


    print "Saving logs... ";

    # MBSlater - lots and lots of logging here:
    # - all subjects
    # - passed filters subjects
    # - failed filters subjects
    # - matched extension but didn't pass filters
    
    # todo - make these append as an option?

    # my $fn_all = "$groupdir/$group.all.log";
    # open(FP_ALL, "> $fn_all")   or warn "Can't open $fn_all: $!";

    my $fn_exclude = "$groupdir/$group.excluded.log";
    open(FP_EXCLUDE, "> $fn_exclude")   or warn "Can't open $fn_exclude: $!";
    # print FP_EXCLUDE "--- new ----------------------------------------\r\n";

    my $fn_extension = "$groupdir/$group.matched_extension_but_failed_filter.log";
    open(FP_EXTENSION, "> $fn_extension")   or warn "Can't open $fn_extension: $!";
    # print FP_EXTENSION "--- new ----------------------------------------\r\n";

    # MBSlater
    # This approach causes the HUGE memory hit... shrug.  why?
    # foreach my $num (sort { $subjects->{$a} cmp $subjects->{$b} } keys %$subjects) {

    # fairly stoopid way to do it, but it doesn't eat up tons of memory
    my (@lexclude, @lextension);
    foreach my $num (keys %$subjects) {
        # print FP_ALL "$num|" . $subjects->{$num} . "\r\n";
        if (defined $filters->{$num}) {
            if ($filters->{$num} & $FILTER_EXCLUDED) {
                push(@lexclude,"$num|" . $subjects->{$num});
            }
        }
         if (defined $matches->{$num}) {
            if ($matches->{$num} == $MATCH_EXT) {
                my $subject = $subjects->{$num};
                if ($subject =~ /\((\d+)\/(\d+)\)/ || $subject =~ /\[(\d+)\/(\d+)\]/) {
                    my $part = $1;
                    my $total = $2;
                    $part =~ s/^0+(\d+)$/$1/;
                    if ($part == 1) {
                        push(@lextension,"$num|" . $subjects->{$num});
                    }
                } else {
                    push(@lextension,"$num|" . $subjects->{$num});
                }
            }
        }
    }
    
    # sort the lists, print the logs
    foreach my $i (sort { (split('\|',$a))[1] cmp (split('\|',$b))[1] } @lexclude) {
        print FP_EXCLUDE "$i\n";
    }
    foreach my $i (sort { (split('\|',$a))[1] cmp (split('\|',$b))[1] } @lextension) {
        print FP_EXTENSION "$i\n";
    }

    # close(FP_ALL)       or warn "Can't close $fn_all: $!";
    close(FP_EXCLUDE)   or warn "Can't close $fn_exclude: $!";
    close(FP_EXTENSION) or warn "Can't close $fn_extension: $!";

    print "done.\n";
}

# ------------------------------------------------------------
# Group processing.
# ------------------------------------------------------------
sub process_group($$$) {
    my $group    = shift;
    my $newsrc   = shift;
    my $groupdir = shift;

    open_reject($group, $groupdir) if ($opt{'r'});

    print "------------------------------------------------------------\n";
    print "Group ", $group, "\n";

    defined $nntp->group($group) or return;

    my ($num_articles, $first, $last) = $nntp->group($group);

    if ($num_articles > 0) {

        # Consider all headers unless -f or -l are in effect.
        if ($opt{'f'} && ($first + $opt{'f'} <= $last)) {
            $last = $first + $opt{'f'};
        }
        if ($opt{'l'} && ($last - $opt{'l'} >= 0)) {
            $first = $last - $opt{'l'};
        }

        print "   $num_articles articles, from $first to $last\n";

        # clean up the newsrc file a bit, mark articles that have expired
        if ($first > 1) {
            $newsrc->mark_range($group,1,$first-1);
            $newsrc->save;
        }

        # The key is the article number, the value is the Subject: header.
        my (%subjects, %filter, %match);

        get_subjects($first, $last, $group, \%subjects, \%filter, \%match, $groupdir, $newsrc);

        if ($opt{'d'}) {
            dump_articles($group, \%subjects, $groupdir);
        }
        else {

            $nntp->reset_stats();

            # Need to make a pass through the multiparts, even if they
            # only want single parts, to cull the multiparts out.
            process_multiparts($group, \%subjects, \%filter, \%match, $groupdir, $newsrc);

            if (!$opt{'M'}) { 
                process_singles($group, \%subjects, \%filter, \%match, $groupdir, $newsrc);
            }

            if (!$opt{'a'} && !$opt{'s'}) {
                # Don't catch up if just logging the subjects,
                # or if scanning all articles.
                # Otherwise, save all the marks we have made.
                $newsrc->save;
            }

            if ($opt{'Z'}) {
                save_logs($group, \%subjects, \%filter, \%match, $groupdir);
            }

            my ($bytes, $delta, $rate) = $nntp->get_stats();
            print "STATS: total bytes = $bytes  secs = $delta  rate = " . sprintf("%.03f",$rate) . " KB/sec\n";
        }
    }

    close_reject() if ($opt{'r'});
}


sub process_groups() {
    # Ensure data subdirectory exists.
    if (! -d $platform->get_datadir) {
        mkdir($platform->get_datadir, $perms) 
            or die "ERROR: mkdir(".$platform->get_datadir.") failed: $!\n";
    }

    # Ensure temp subdirectory exists.
    if (! -d $platform->get_tempdir) {
        mkdir($platform->get_tempdir, $perms) 
            or die "ERROR: mkdir(".$platform->get_tempdir.") failed: $!\n";
    }

    # Get list of groups from newsrc.
    my $newsrc = new News::Newsrc 
        or die "ERROR: new News::Newsrc failed.\n";
    $newsrc->load($platform->get_newsrcname)
        or die "ERROR: News::Newsrc->load(".$platform->get_newsrcname.") failed.\n";
    my @groups = $newsrc->sub_groups()
        or die "ERROR: News::Newsrc->sub_groups() failed.\n";

    # Process each group in the newsrc.
    foreach my $group (@groups) {

        # Ensure group subdirectory exists.
        my $groupdir;
        if (defined $forcedir) {
            $groupdir = $platform->get_datadir.$platform->get_separator.$forcedir;
        } else {
            $groupdir = $platform->get_datadir.$platform->get_separator.$group;
        }
        if (! -d $groupdir) {
            mkdir($groupdir, $perms) 
                or die "ERROR: mkdir($groupdir) failed: $!\n";
        }

        process_group($group, $newsrc, $groupdir);
        clean_up();
    }

    clean_up();

    print "Quitting... ";
    $nntp->quit;
    print "done.\n";
}

sub grace() {
    clean_up();
    die "\n\nPlease check to make sure all ubhtemp files are deleted from your temp directory \n";;
} 

main();


