#!/usr/bin/perl
use LWP::UserAgent;
use strict;

# Perlreactor 0.2
# by Rickp


########## Configure here ################

my $CONFIGFILE = "$ENV{HOME}/.prconf";
my $HASHFILE = "$ENV{HOME}/.prhashfile";
my $HTTPURL = "http://localhost:4080";
my $HTTPUSER = "";
my $HTTPPASS = "";

########## End of Configuration ##########

my $ua = new LWP::UserAgent;
$ua->agent("Mozilla/5.0(PerlReactor0.2)"); 

if (! open(HFILE, ">> $HASHFILE")) {
	print "Can not write to hashfile ($HASHFILE)\n";
	exit 1;
}

close(HFILE);
open(HFILE, $HASHFILE);

my @hashfile;
my $req;

while (my $hashline = <HFILE>) {
	push(@hashfile, $hashline);
}	

close(HFILE);
open(HFILE, ">> $HASHFILE");

my $add = 0;

if ($#ARGV >= 0){
	if ($ARGV[0] =~ /^-a/) { $add = 1; }
	else { $CONFIGFILE = $ARGV[0]; }
	if ($#ARGV >=1) {$CONFIGFILE = $ARGV[1]};
}

if (! open(CFILE, "< $CONFIGFILE")) {
	if (open(CFILE, "> $CONFIGFILE")) {
		print CFILE "# This is the url-file for perlreactor
#################################################################
# All the URLs in this File will be scanned for ed2klinks
# then all the files that are not already in your hashfile
# will be downloaded. If you put a '->' before the url, 
# perlreactor will dig 1 step into the site. 
# To add all the links on the pages to your hashfile 
# type './perlreactor -a'. Then you just
# have to type './perleactor' in the future (or add a cron-job)
# to get all the new files on the pages.
#################################################################
# Here are a few examples (uncomment the urls to activate them):

# Sharereactor:
# Get every new Enterpise-episode from season 2:
# http://www.sharereactor.com/downloadrelease.php?id=2493
# Southpark season 6
# http://www.sharereactor.com/downloadrelease.php?id=1244
# Friends season 9
# http://www.sharereactor.com/downloadrelease.php?id=2606
#
# Get all the current releases on the screener-page:
#->http://www.sharereactor.com/category.php?id=2

# Eselfilme (German Page)
# Get the Top Ten
# ->http://62.4.85.233/filme/videos_top10/html/


# Goldesel (German Page)
# Get the new Stuff
# ->http://209.50.236.70/content/news.html
";

		print "I just created the config-file $CONFIGFILE for you. Please edit it\n";
	}
	else { 
		print "The config file $CONFIGFILE doesn't exist and I can not create it\n";
	}
exit 1;	
}

my @urllist;
my @fetchlist;

while (my $configline = <CFILE>) {
	if ($configline =~ /^ *http:\/\//) {
		$configline =~ s/ *//;
		chomp($configline); 
		push(@urllist, $configline); 
	}
	if ($configline =~ /^ *->http:\/\//) { 
		$configline =~ s/ *->//;
		chomp($configline);
		push(@fetchlist, $configline); 
	}
}


foreach my $htmlpart (@fetchlist) {
	$req = new HTTP::Request GET => $htmlpart;
	$req->header('Accept' => 'text/html');
	my $html = $ua->request($req);
	if ($html->is_success) {
		my $baseurl = $htmlpart;
		$baseurl =~ s/\/[\d,a-z,\.,\?,\=,\&]+$/\//;
		my $domain = $baseurl;
		$domain =~ s/([\d,a-z])\/.*/$1/;
		my @linklist;
		my @htmlarray = split(/\"/, $html->content);
		foreach my $srcurl (@htmlarray){
			if ($domain =~ /sharereactor\.com/) {
				if ($srcurl =~ /download\.php\?/) { push(@urllist, $baseurl . $srcurl); }	
			}	
			else {	
				if (($srcurl =~ /.+(\.htm|\.html)$/) || ($srcurl =~/.+(\.php|\.asp)\?.*/)) {
					if ($srcurl =~ /^http/) { }
					else {	
						if ($srcurl =~ /^\// ) {$srcurl = $domain . $srcurl;}
						else {$srcurl = $baseurl . $srcurl;}
					}			
				push(@urllist, $srcurl); 
				}
			}
		}
	}
}
	
if($add){
	print "Adding links from " . @urllist . " urls to the hashfile\n";
}
else {
	print "Checking " . @urllist . " urls\n";
}
		
foreach my $url (@urllist) {
	$req = new HTTP::Request GET => "$url";
	$req->header('Accept' => 'text/html');
	my $html = $ua->request($req);
	if ($html->is_success) {
		my @linklist;
		my @htmlarray = split(/"/, $html->content);
		foreach my $htmlpart (@htmlarray){
			if ($htmlpart =~/ed2k:\/\/\|file\|/) { 
				$htmlpart =~ s/\|\//\|/;
				push(@linklist, $htmlpart); 
			}
		}
		
		if ($#linklist >= 0) {
			my @elements;
			my $gotit = 0;
			my $ed2klink;
			foreach $ed2klink (@linklist) {
				$gotit = 0;
				@elements = split(/\|/, $ed2klink);
				foreach my $hash (@hashfile){
					if ($hash =~ /$elements[4]/) { $gotit = 1; }
				}
				if ($gotit == 0) {
					$_ = URI::Escape::uri_unescape($ed2klink);
					if (/^ed2k:\/\/\|file\|.*\|.*\|.*\|$/) {
						if (! $add) {
							$req = HTTP::Request->new(GET => "$HTTPURL/submit?q=dllink+$ed2klink");
							if ($HTTPUSER && $HTTPPASS) {
								$req->authorization_basic ($HTTPUSER, $HTTPPASS);
							}
							my $response = $ua->request($req);
							if (!($response->is_success)) {
								print "Can not connect to mlDonkey.\nPlease start it and set up the right server in perlreactor\n";
								exit 1
							}
							else {
								print HFILE $elements[4] . "\n";
								print "Downloading $elements[2]\n";
							}
						}
						else {
							print HFILE $elements[4] . "\n";
							print "Adding $elements[2] to reactorfile\n";
						}	
					}
					else {
						print "Malformed ed2k-link!\n"
					}
				}
			}		
		}	
	}
	else {
		print "Error while fetching $url\n";
	}
	
}

close(HFILE);