#!/usr/bin/perl
#-----------------------------------------------------------------------------#
#
#  Find fastest CVSup server script for FreeBSD - version 0.2.6
#  Copyright (c) A.J.Robinson (ajr@subdimension.com) 2002
#  Distributed under the BSD license.
#
#-----------------------------------------------------------------------------#
#
# Changes:
# 0.2.6  - changed from freebsdmirrors.org to the online handbook.
# 0.2.5  - added support for other servers (NetBSD/OpenBSD) that don't match
#          the naming scheme
# 0.2.4  - ability to get CVSup mirrors from www.freebsdmirrors.org, script re-
#          writes itself if $REWRITE_SELF is set. Removed @ALL_CC.
# 0.2.3  - added exit codes, POD documentation, exit with usage() if no params
#          given, 'local' and 'all' targets,
# 0.2.2  - fixed stupid coding with double DNS lookup (PeerAddr=>$server)
# 0.2.1  - better error messages, wrap gethostbyname in eval block to timeout,
#          verbose and quiet modes
# 0.2.0  - generate server list from hash
# 0.1.0  - basic version - just about worked!
#
#-----------------------------------------------------------------------------#
# Load dependent modules

# We want to do this before the other modules so it fails quickly.
# Print a friendly error so newbies aren't completely stuck:
# (sorry it's a bit FreeBSD specific!)
BEGIN {
  eval('use Time::HiRes qw(gettimeofday)');
  if ($@) {
    die( '\nYou need the Time::HiRes perl module, you can either:\n\n',
         '1. Load it from CPAN:\n',
         '     # perl -MCPAN -e \'install Time::HiRes\'\n\n',
         '2. Fetch it as a package:\n',
         '     # pkg_add -r p5-Time-HiRes\n\n',
         '3. Compile it from the ports collection:\n',
         '     # cd /usr/ports/devel/p5-Time-HiRes ; make install clean\n\n',
       );
  }
}

# standard modules in perl distribution which _should_ be there

$|++; # (unbuffer STDOUT)
use strict;
use IO::Socket;
use Getopt::Std ('getopts');
my $HAVE_LWP = eval('use LWP::Simple qw(get)');

#-----------------------------------------------------------------------------#
# User Configurable Variables (you can change these):

# edit this for countries which are fairly close to you:

my @LOCAL_CC = qw( uk ie fr de fi no nl );  # (lower case, separated by space)

# network timeouts in seconds:

my $DNS_TIMEOUT = 5;   #  DNS lookups
my $CVS_TIMEOUT = 5;   #  connect to the CVS server

my $REWRITE_SELF = 0;  # shall I rewrite myself when new servers are found
                       # in the online Handbook? (only root can do this)

#-----------------------------------------------------------------------------#
# Other variables

# How many FreeBSD CVSup servers has each country got? (well, not strictly
# true... what's the highest cvsup number in use?

my %FREEBSD_SERVERS = (
  'ar' => 1,    # Argentina
  'at' => 1,    # Austria
  'au' => 3,    # Australia
  'br' => 4,    # Brazil
  'ca' => 2,    # Canada
  'cn' => 1,    # China
  'cz' => 1,    # Czech Republic
  'de' => 7,    # Germany
  'dk' => 1,    # Denmark
  'ee' => 1,    # Estonia
  'es' => 3,    # Spain
  'fi' => 2,    # Finland
  'fr' => 8,    # France
  'gr' => 2,    # Greece
  'ie' => 1,    # Ireland
  'is' => 1,    # Iceland
  'jp' => 6,    # Japan
  'kr' => 2,    # Korea
  'lt' => 2,    # Lithuania
  'lv' => 1,    # Latvia
  'nl' => 3,    # Netherlands
  'no' => 1,    # Norway
  'nz' => 1,    # New Zealand
  'pl' => 1,    # Poland
  'pt' => 1,    # Portugal
  'ro' => 1,    # Romania
  'ru' => 6,    # Russia
  'se' => 2,    # Sweden
  'si' => 1,    # Slovenia
  'sk' => 2,    # Slovak Republic
  'sm' => 1,    # San Marino
  'tw' => 3,    # Taiwan
  'ua' => 5,    # Ukraine
  'uk' => 5,    # United Kingdom
  'us' => 17,   # USA
  'za' => 2,    # South Africa
);

my %OTHER_SERVERS = (

  'netbsd'  => [ 'cvsup.de.netbsd.org',
                 'cvsup2.de.netbsd.org',
                 'cvsup.jp.netbsd.org',
                 'cvsup.pasta.cs.uit.no',
                 'cvsup.uk.netbsd.org',
               ],
  'openbsd' => [ 'cvsup.uk.openbsd.org',
                 'cvsup.de.openbsd.org',
                 'cvsup2.de.openbsd.org',
                 'cvsup.hu.openbsd.org',
                 'cvsup.fr.openbsd.org',
                 'cvsup.ca.openbsd.org',
                 'cvsup.usa.openbsd.org',
                 'cvsup.kr.openbsd.org',
                 'cvsup.no.openbsd.org',
                 'cvsup.pt.openbsd.org',
                 'anoncvs.de.openbsd.org',
                 'rt.fm',
                 'skeleton.phys.spbu.ru',
                 'cvsup.jp.openbsd.org',
                 'wiretapped.net',
               ],
);

my $CVSUP_PORT = 5999; # we attempt to connect on this port from an
                       # unprivaleged local port - can change this
                       # so we look for fastest FTP mirrors instead.
                       # But that's another script... ;)

# The URL to connect to when remotely fetching a list of CVSup servers:

my $URL="http://www.freebsd.org/doc/en_US.ISO8859-1/books/handbook/cvsup.html";


#-----------------------------------------------------------------------------#
# Usage() sub

sub Usage {
  my $error = shift;
  print "\n  fastest_cvsup - finds fastest CVSup server\n\n";
  print "  Error: $error\n" if $error;
  print "  Usage: $0 [-h] [-q] [-r] -c (country codes|local|all)\n",
        "  Where: -h           prints this screen\n",
        "         -q           quiet mode, only outputs fastest server\n",
        "         -r           uses remote server list from FreeBSD Handbook\n",
        "         -c aa,bb,cc  queries servers in countries aa,bb,cc\n",
        "            local     queries servers set as local in the script\n",
        "            all       queries *all* FreeBSD servers\n\n",
        "  See the man page, fastest_cvsup(7), for more details.\n\n";
  exit(1);
}

#-----------------------------------------------------------------------------#
# get user supplied options

# we need the ':' to store the actual values of the -c switch

my %opt = ();
getopts('hqrc:', \%opt);

# if no args given, or help page requested

Usage() if ( ! %opt or $opt{'h'} );

# shall we be quiet?

my $VERBOSE = 1;                   # let's be loud (default)
if ( $opt{'q'} ) { $VERBOSE = 0; } # only display fastest server

# shall we get a list of CVSup servers from $URL?

my $REMOTE_SERVERLIST = 0;    # default is no, use internal list
if ( $opt{'r'} ) { $REMOTE_SERVERLIST = 1; }

# which servers shall we query?

my @countries = ();

unless ( $opt{'c'} ) {

  # nothing to query, so we exit with an error message

  Usage('You need to specify which servers to query!');

}
else {

  $opt{'c'} = lc( $opt{'c'} );   # convert to lowercase

  if ( $opt{'c'} eq 'local' ) {  # shall we query local servers ?
    @countries = @LOCAL_CC;
  }

  elsif ( $opt{'c'} eq 'all' ) { # shall we query all servers ?
    @countries = sort ( keys %FREEBSD_SERVERS );
  }

  else {  # build a list from the countries specified

    # count number of times the country crops up, if it's greater than 2
    # it's also in the ALL_CC array (valid input) so push it into the
    # countries array (also, specifying -c uk,uk,uk will only query uk
    # servers once)

    my %i = ();                                            # (just a counter)
    for ( split(/,/,$opt{'c'}) ) { $i{$_}++ }              # count times
    for ( keys %i ) { push(@countries,$_) if $i{$_} >= 1 } # build array

  }
}

#-----------------------------------------------------------------------------#

if ( $REMOTE_SERVERLIST ) {

  print ">> Fetching server list..." if $VERBOSE;
  my $html;

  # for testing
  #if (open(F,"<sites.htm")){while(<F>){$html.=$_}close(F)};

  if ( $HAVE_LWP ) {
    $html = get($URL);
  }
  else {
    $html = `fetch -pqo- $URL`;
  }

  if ( $html ) { # did we get anything?

    print "OK!\n" if $VERBOSE;

    my %srv = ();  # temporary hash before transfering into %FREEBSD_SERVERS
    my %cc_name = ();  # hash to hold the full country name

    # get the servers from the handbook - it's a bit more messy, but as
    # freebsdmirrors.org relies on DNS zone transfers (which don't seem to
    # work anymore), it's the next best thing.

    # non-greedy regex to grab what's between the <dl> tags just after the
    # name anchor.
    $html =~ s|.*name="CVSUP-MIRRORS".*?<dl>(.*?)</dl>.*|$1|s;

    # split it up into countries, between '<dt>' tags
    for ( split('<dt>',$html) ) {

      # extract the country name and the rest (what a mess!)

      if ( m/^(\w+)(\s{1}\w+)?\s*(.*)/s ) {
        my $country = $1.$2; # for two word names
        my $mess = $3;       # the rest

        # get rid of crap and the first '<li>'
        $mess =~ s/.*?<li>(.*)/$1/s;

        for ( split('<li>',$mess) ) {

          # extract server name
          my $server;
          ($server = $_ ) =~ s/.*<p>([\w\.]+).*/$1/s;

          # parse non-US servers

          if ( $server =~ /cvsup([\d]*)\.([\w]+)\.freebsd\.org/i ) {
            my $i = $1 || 1;
            my $cc = lc($2);
            if ( !$srv{$cc} or $srv{$cc} < $i ) { $srv{$cc} = $i }

            # save country name
            unless ( $cc_name{$cc} ) { $cc_name{$cc} = $country }
          }

          # parse US servers

          elsif ( $server =~ /cvsup([\d]*)\.freebsd\.org/i ) {
            my $i = $1 || 1;
            if ( !$srv{'us'} or $srv{'us'} < $i ) { $srv{'us'} = $i }
            unless ( $cc_name{'us'} ) { $cc_name{'us'} = $country }
          }
        }
      }
    }

    # check to see if the internal list needs updating...

    my $needs_updating = ();
    for ( keys %srv ) {
      if ( $srv{$_} ne $FREEBSD_SERVERS{$_} ) {
        $needs_updating++;
        last;
      }
    }

    if ( $needs_updating ) {

      if ( $REWRITE_SELF ) {
         
        if ( scalar(getpwuid $<) eq 'root' ) {

          # build a new FREEBSD_SERVERS hash

          my $newstring = "my \%FREEBSD_SERVERS = (\n";
          for ( sort( keys %srv ) ) {

            # 'cc' => number,
            $newstring .= "  \'$_\' => $srv{$_},";

            # variable length spacer
            $newstring .= ( " " x ( 5 - length($srv{$_}) ) );

            # commented out country name
            $newstring .= "# $cc_name{$_}\n";
          }
          $newstring .= ");";

          # read in _this_ file (!)

          open(OLD, "< $0") or die "Error, can't open $0: $!";
          my $bigstring;
          while (<OLD>) { $bigstring .= $_ }
          close(OLD) or die "Error, can't close $0: $!";

          # substitute the variable (non-greedy regex again)

          $bigstring =~ s/(my \%FREEBSD_SERVERS = .*?;)/$newstring/gs;

          # write it out to a new file

          open(NEW, "> $0.new") or die "Error, can't open $0.new: $!";
          print NEW $bigstring or die "Error, can't write $0.new: $!";
          close(NEW) or die "Error, can't close $0.new: $!";

          # swap them over

          rename($0, "$0.orig") or die "Error, can't rename $0 to $0.orig: $!";
          rename("$0.new", $0) or die "Error, can't rename $0.new to $0: $!";

          # set permissions on new file

          chmod(0755,$0) or die "\nCannot chmod(0755,$0): $!\n";

          print ">> The internal CVSup list has been updated.\n" if $VERBOSE;

        }
        else {
          print ">> The internal CVSup list can ONLY be updated by ROOT.\n" if $VERBOSE;
        }
      }
      else {
        # we're not updating automagically, so you get a message to do
        # it yourself!
        print ">> The internal CVSup list needs updating!\n" if $VERBOSE;
      }
    }

    # use the new list we've downloaded
    %FREEBSD_SERVERS = %srv;

    # and we might need to update this if it's changed
    if ( $opt{'c'} eq 'all' ) { @countries = sort ( keys %FREEBSD_SERVERS ) }

  }
  else {
    print "Failed! (Using internal list)\n" if $VERBOSE;
  }
}

#-----------------------------------------------------------------------------#
# now we build an array of servers

my (@servers,@first,@others) = ();

foreach my $cc ( @countries ) {

  # For all the countries except the US, the server names follow the pattern
  # cvsupXX.country.freebsd.org, with the first server omitting the XX digit.
  # US servers don't include the country, but include the first digit.

  for ( my $i=1; $i<=$FREEBSD_SERVERS{$cc}; $i++ ) {

    if ( $cc eq 'us' ) { # use no country code, include the '1'
      if ( $i == 1 ) { push(@first,"cvsup$i.freebsd.org"); }
      else           { push(@others,"cvsup$i.freebsd.org"); }
    }
    else {
      if ( $i == 1 ) { push(@first,"cvsup.$cc.freebsd.org"); }
      else           { push(@others,"cvsup$i.$cc.freebsd.org"); }
    }
  }

  if ( $OTHER_SERVERS{$cc} ) {
    push( @others, @{ $OTHER_SERVERS{$cc} } );
  }

}

# put the first server of each country at the start of the servers array and
# the rest at the end - this gives the DNS server more time to reply. With
# slow connections and caching DNS (looking up the root), this is better, I
# think. Any comments...?

@servers = (@first,@others);


#-----------------------------------------------------------------------------#
# now we time the servers in the array

print ">> Querying servers in countries: @countries\n" if $VERBOSE;

my %time = ();

foreach my $server (@servers) {

  my $ip_packed = ();  # get ip address before attempting to connect, and
  my $ip_addr   = ();  # wrap gethostbyname in an eval block to timeout. IP
                       # address is packed in memory, hence these vars.

  sub timeout { die "TIMEOUT\n" }; # replacement signal
  $SIG{'ALRM'} = \&timeout;

  eval {
    alarm($DNS_TIMEOUT);                       # set timeout...
    $ip_packed = (gethostbyname($server))[4];  # lookup server within $TIMEOUT
    alarm(0);                                  # done, cancel alarm
  };

  if ($@ =~ /^TIMEOUT/ ) {  # we timed out
    print "==> DNS lookup timed out for $server\n" if $VERBOSE;
    next;
  }
  elsif (! $ip_packed ) {   # no such host
    print "==> DNS lookup failed for $server\n" if $VERBOSE;
    next;
  }

  $ip_addr = join(".",unpack("C4",$ip_packed)); # get dotted quad ip

  # connect to IP address of server
  print "==> Connecting to $server [$ip_addr]...\n" if $VERBOSE;

  my $time_before = gettimeofday();    # start timing....

  my $remote = IO::Socket::INET->new( Proto=>'tcp',
                                      PeerAddr=>$ip_addr,
                                      PeerPort=>$CVSUP_PORT,
                                      Reuse=>1,
                                      Timeout=>$CVS_TIMEOUT,
                                    );

  my $time_after = gettimeofday();     # .... end timing

  if ( $@ ) {  # was there an error?
  
    if ( $VERBOSE ) { # And do we want to know about it?
      my $error = ();
      ($error = $@) =~ s/^IO::Socket::INET\s*:\s*(.*)/$1/;
      print "    - error: $error\n";
    }

    next; # skip to the next one
  }

  if ( defined($remote) and $remote->opened ) {   # did we get a connection?
    my $reply = $remote->getline;                 # what did the server say?
    chomp($reply);                                # remove newline
    close $remote;                                # close connection
    my $time_taken = $time_after - $time_before;  # calculate time

    # if the server replied 'OK', i.e. not overloaded, save the timing
    $time{$server} = $time_taken if ( $reply =~ m/^OK/ );

    if ( $VERBOSE ) { # print pretty display

      my $ms = sprintf("%.2f", 1000 * $time_taken); # time in ms
      print "    - server replied: $reply\n";
      print "    - time taken: $ms ms\n";

    }
  }
}

#-----------------------------------------------------------------------------#
# now we output the fastest

if ( %time ) { # did we get *any* servers?

  # sort for the fastest
  my @fastest = ();
  for ( sort { $time{$a} <=> $time{$b} } keys %time ) { push(@fastest,$_) }

  # if we are verbose, print the gold, silver and bronze medalists

  if ( $VERBOSE ) {
    print "\n>> Speed Daemons:\n"    if $fastest[0]; # just checking ;)
    print "    - 1st: $fastest[0]\n" if $fastest[0];
    print "    - 2nd: $fastest[1]\n" if $fastest[1];
    print "    - 3rd: $fastest[2]\n" if $fastest[2];
  }
  else {  # otherwise just output the fastest.
    print $fastest[0];
  }
}
else {

  # we didn't get any servers, boo hoo.

  print ">> No servers were found :( \n" if $VERBOSE;
  exit(1);
}

exit(0);

#-----------------------------------------------------------------------------#
# POD documentation follows...

__END__

=head1 NAME

fastest_cvsup - find fastest FreeBSD CVSup server

=head1 SYNOPSIS

B<fastest_cvsup> [B<-h>] [B<-q>] [B<-r>] B<-c> (B<country codes>|B<local>|B<all>)

=head1 DESCRIPTION

Displays the 3 fastest CVSup servers in user specified countries. It can just
return the fastest for use in automated shell scripts.

It uses C<Time::HiRes> for timings and C<IO::Socket::INET> to make a socket
connection to the target server. Unlike some shell scripts it does not rely on
pings to measure network speed.

=head1 USAGE

=item B<-h>   Displays usage.

=item B<-q>   Quiet mode, only returns the fastest server.

=item B<-r>   Uses remote CVSup server list from freebsd.org

If the variable $REWRITE_SELF is set to 1 in the script then (as if by magic) it
re-writes itself with the new server list. This only works as root and DOES NOT
DO IT BY DEFAULT!

=item B<-c (country codes)>

Times the servers in the specified countries, this is a comma separated
list with no spaces. The full list of countries with CVS mirrors is
available from the FreeBSD handbook.

For example, to time servers in the United Kingdom, France and
Germany use:

C<$> fastest_cvsup C<-c> uk,fr,de

=item B<-c local>

Uses the countries specifed in the C<@LOCAL_CC> array, edit the script to
specify which countries are considered local.

=item B<-c all>

Uses all the FreeBSD servers. This may take some time, but is quite interesting!

=head1 EXAMPLES

This can be used in shell scripts, for example:

C< #!/bin/sh>
C< echo 'Finding fastest CVSup server... '>
C< if SERVER=`fastest_cvsup -q -c uk,fr,de`; then>
C<   echo $SERVER>
C<   cvsup -g -L2 -h $SERVER /usr/local/etc/cvsup/supfile>
C< fi>

=head1 RETURN VALUES

Returns 0 without any errors, 1 with errors.

=head1 BUGS

None known as yet... ;)

=head1 AUTHOR

A.J.Robinson, E<lt>ajr@subdimension.comE<gt>

=cut
