#!/usr/bin/perl
#-----------------------------------------------------------------------------#
#
#  Find fastest CVSup server script for FreeBSD - version 0.2.8
#  Copyright (c) A.J.Robinson (ajr@subdimension.com) 2002
#  Distributed under the BSD license.
#
#-----------------------------------------------------------------------------#
#
# Changes:
# 0.2.8  - fixed the progress meter, timestamped %FREEBSD_SERVERS
# 0.2.7  - updated documentation, usage message and progress meter.
# 0.2.6  - changed from freebsdmirrors.org to the online handbook.
# 0.2.5  - added support for other servers (NetBSD/OpenBSD) that don't match
#          the naming scheme.
# 0.2.4  - ability to get CVSup mirrors from www.freebsdmirrors.org, script re-
#          writes itself if $REWRITE_SELF is set. Removed @ALL_CC.
# 0.2.3  - added exit codes, POD documentation, exit with usage() if no params
#          given, 'local' and 'all' targets.
# 0.2.2  - fixed stupid coding with double DNS lookup (PeerAddr=>$server).
# 0.2.1  - better error messages, wrap gethostbyname in eval block to timeout,
#          verbose and quiet modes.
# 0.2.0  - generate server list from hash.
# 0.1.0  - basic version - just about worked!
#
#-----------------------------------------------------------------------------#
# Load dependent modules

# We want to do this before the other modules so it fails quickly.
# Print a friendly error so newbies aren't completely stuck:
# (sorry it's a bit FreeBSD specific!)
BEGIN {
  eval('use Time::HiRes qw(gettimeofday)');
  if ($@) {
    die( "\nYou need the Time::HiRes perl module, you can either:\n\n",
         "1. Load it from CPAN:\n",
         "     # perl -MCPAN -e \'install Time::HiRes\'\n\n",
         "2. Fetch it as a package:\n",
         "     # pkg_add -r p5-Time-HiRes\n\n",
         "3. Compile it from the ports collection:\n",
         "     # cd /usr/ports/devel/p5-Time-HiRes ; make install clean\n\n",
       );
  }
}

# standard modules in perl distribution which _should_ be there

$|++; # (unbuffer STDOUT)
use strict;
use IO::Socket;
use Getopt::Std ('getopts');

# Works better with the port: www/p5-libwww, but not essential.
my $HAVE_LWP = eval('require LWP::Simple qw(get)');

#-----------------------------------------------------------------------------#
# User Configurable Variables (you can change these):

# edit this for countries which are fairly close to you:

my @LOCAL_CC = qw( uk ie fr de fi no nl );  # (lower case, separated by space)

# network timeouts in seconds:

my $DNS_TIMEOUT = 5;   #  DNS lookups
my $CVS_TIMEOUT = 5;   #  connect to the CVS server

my $REWRITE_SELF = 0;  # shall I rewrite myself when new servers are found
                       # in the online Handbook? (only root can do this)

#-----------------------------------------------------------------------------#
# Other variables

# How many FreeBSD CVSup servers has each country got? (well, not strictly
# true... what's the highest cvsup number in use?

my %FREEBSD_SERVERS = (     # last updated: Thu Sep 19 12:56:23 2002
  'ar' => 1,    # Argentina
  'at' => 1,    # Austria
  'au' => 5,    # Australia
  'br' => 4,    # Brazil
  'ca' => 2,    # Canada
  'cn' => 1,    # China
  'cz' => 1,    # Czech Republic
  'de' => 7,    # Germany
  'dk' => 1,    # Denmark
  'ee' => 1,    # Estonia
  'es' => 3,    # Spain
  'fi' => 2,    # Finland
  'fr' => 8,    # France
  'gr' => 2,    # Greece
  'hu' => 1,    # Hungary
  'ie' => 1,    # Ireland
  'is' => 1,    # Iceland
  'jp' => 6,    # Japan
  'kr' => 2,    # Korea
  'lt' => 2,    # Lithuania
  'lv' => 1,    # Latvia
  'nl' => 3,    # Netherlands
  'no' => 1,    # Norway
  'nz' => 1,    # New Zealand
  'pl' => 1,    # Poland
  'pt' => 1,    # Portugal
  'ro' => 1,    # Romania
  'ru' => 6,    # Russia
  'se' => 2,    # Sweden
  'si' => 1,    # Slovenia
  'sk' => 2,    # Slovak Republic
  'sm' => 1,    # San Marino
  'tw' => 3,    # Taiwan
  'ua' => 5,    # Ukraine
  'uk' => 5,    # United Kingdom
  'us' => 17,   # USA
  'za' => 2,    # South Africa
);

my %OTHER_SERVERS = (

  # from: http://www.netbsd.org/mirrors/#cvsup

  'netbsd'  => [ 'cvsup.de.netbsd.org',
                 'cvsup2.de.netbsd.org',
                 'cvsup.jp.netbsd.org',
                 'cvsup.pasta.cs.uit.no',
                 'cvsup.uk.netbsd.org',
               ],

  # from: http://www.openbsd.org/cvsup.html#CVSROOT

  'openbsd' => [ 'cvsup.uk.openbsd.org',
                 'cvsup.de.openbsd.org',
                 'cvsup2.de.openbsd.org',
                 'cvsup.hu.openbsd.org',
                 'cvsup.fr.openbsd.org',
                 'cvsup.ca.openbsd.org',
                 'cvsup.usa.openbsd.org',
                 'cvsup.kr.openbsd.org',
                 'cvsup.no.openbsd.org',
                 'cvsup.pt.openbsd.org',
                 'anoncvs.de.openbsd.org',
                 'rt.fm',
                 'skeleton.phys.spbu.ru',
                 'cvsup.jp.openbsd.org',
                 'wiretapped.net',
               ],
);

my $CVSUP_PORT = 5999; # we attempt to connect on this port from an
                       # unprivaleged local port - can change this
                       # so we look for fastest FTP mirrors instead.
                       # But that's another script... ;)

# The URL to connect to when remotely fetching a list of CVSup servers:

my $URL="http://www.freebsd.org/doc/en/books/handbook/cvsup.html";

# What shall we display?

# verbose messages:
my $GENERAL_INFO    = '>>  ';
my $GENERAL_WARNING = '**  ';
my $SERVER_INFO     = '--> ';
my $SERVER_WARNING  = '==> ';
my $SERVER_RESULT   = '    - ';
my $SERVER_ERROR    = '    * ';

#-----------------------------------------------------------------------------#
# Usage() sub

sub Usage {
  my $error = shift;
  print "\n  fastest_cvsup - finds fastest CVSup server\n\n";
  print "  Error: $error\n" if $error;
  print "  Usage: $0 [-h] [-q|Q] [-r] -c (country codes|local|all)\n",
        "  Where: -h           prints this screen\n",
        "         -q           quiet mode, only outputs fastest server\n",
        "         -Q           very quiet mode, no progress meter\n",
        "         -r           uses remote server list from FreeBSD Handbook\n",
        "         -c aa,bb,cc  queries servers in countries aa,bb,cc\n",
        "            local     queries servers set as local in the script\n",
        "            all       queries all FreeBSD servers\n\n",
        "  See the man page, fastest_cvsup(7), for more details.\n\n";
  exit(1);
}

#-----------------------------------------------------------------------------#
# get user supplied options

# we need the ':' to store the actual values of the -c switch

my %opt = ();
getopts("hrqQc:", \%opt);

# if no args given, or help page requested

Usage() if ( ! %opt or $opt{'h'} );

# how noisy shall we be?

my $VERBOSE = 1;          # let's be loud (default)
my $SHOW_PROGRESS = 1;    # shall I show a small status bar? (default yes)

if ( $opt{'q'} or $opt{'Q'} ) { $VERBOSE = 0 }  # only display fastest
if ( $opt{'Q'} ) { $SHOW_PROGRESS = 0 }         # no progress meter

# shall we get a list of CVSup servers from $URL?

my $REMOTE_SERVERLIST = 0;    # default is no, use internal list
if ( $opt{'r'} ) { $REMOTE_SERVERLIST = 1; }

# which servers shall we query?

my @countries = ();

unless ( $opt{'c'} ) {

  # nothing to query, so we exit with an error message

  Usage('You need to specify which servers to query!');

}
else {

  $opt{'c'} = lc( $opt{'c'} );   # convert to lowercase

  if ( $opt{'c'} eq 'local' ) {  # shall we query local servers ?
    @countries = @LOCAL_CC;
  }

  elsif ( $opt{'c'} eq 'all' ) { # shall we query all servers ?
    @countries = sort ( keys %FREEBSD_SERVERS );
  }

  else {  # build a list from the countries specified

    # count number of times the country crops up, if it's greater than 1
    # push it into the countries array (specifying -c uk,uk,uk will only
    # query uk servers once)

    my %i = ();                                            # (just a counter)
    for ( split(',',$opt{'c'}) ) { $i{$_}++ }              # count times
    for ( keys %i ) { push(@countries,$_) if $i{$_} >= 1 } # build array

  }
}

#-----------------------------------------------------------------------------#

if ( $REMOTE_SERVERLIST ) {

  print $GENERAL_INFO, "Fetching server list... " if $VERBOSE;
  my $html;

  # for testing
  #if (open(F,"<sites.htm")){while(<F>){$html.=$_}close(F)};

  if ( $HAVE_LWP ) {
    $html = get($URL);
  }
  else {
    $html = `fetch -qo- $URL`;
  }

  if ( $html ) { # did we get anything?

    print "OK!\n" if $VERBOSE;

    my %srv = ();  # temporary hash before transfering into %FREEBSD_SERVERS
    my %cc_name = ();  # hash to hold the full country name

    # get the servers from the handbook - it's a bit more messy, but as
    # freebsdmirrors.org relies on DNS zone transfers (which don't seem to
    # work anymore), it's the next best thing.

    # non-greedy regex to grab what's between the <dl> tags just after the
    # name anchor.
    $html =~ s|.*name="CVSUP-MIRRORS".*?<dl>(.*?)</dl>.*|$1|s;

    # split it up into countries, between '<dt>' tags
    for ( split('<dt>',$html) ) {

      # extract the country name and the rest (what a mess!)

      if ( m/^(\w+)(\s{1}\w+)?\s*(.*)/s ) {
        my $country = $1.$2; # for two word names
        my $mess = $3;       # the rest

        # get rid of crap and the first '<li>'
        $mess =~ s/.*?<li>(.*)/$1/s;

        for ( split('<li>',$mess) ) {

          # extract server name
          my $server;
          ($server = $_ ) =~ s/.*<p>([\w\.]+).*/$1/s;

          # parse non-US servers

          if ( $server =~ /cvsup([\d]*)\.([\w]+)\.freebsd\.org/i ) {
            my $i = $1 || 1; # set to 1 if no initial digit
            my $cc = lc($2); # convert to lower case

            # save the highest number encountered
            if ( !$srv{$cc} or $srv{$cc} < $i ) { $srv{$cc} = $i }

            # save country name
            unless ( $cc_name{$cc} ) { $cc_name{$cc} = $country }
          }

          # parse US servers

          elsif ( $server =~ /cvsup([\d]*)\.freebsd\.org/i ) {
            my $i = $1 || 1;
            if ( !$srv{'us'} or $srv{'us'} < $i ) { $srv{'us'} = $i }
            unless ( $cc_name{'us'} ) { $cc_name{'us'} = $country }
          }
        }
      }
    }

    # check to see if the internal list needs updating... (this assumes that
    # the Handbook will be more up to date)

    my $needs_updating = ();
    for ( keys %srv ) {
      if ( $srv{$_} ne $FREEBSD_SERVERS{$_} ) {
        $needs_updating++;
        last;
      }
    }

    if ( $needs_updating ) {

      if ( $REWRITE_SELF ) {

      # XXX: should really stick this in __DATA__

        if ( $< == 0 ) {  #same as: if ( scalar(getpwuid $<) eq 'root' ) {

          # build a new FREEBSD_SERVERS hash

          my $time = scalar(localtime);
          my $newstring = "my \%FREEBSD_SERVERS = (     # last updated: $time\n";
          for ( sort( keys %srv ) ) {

            # 'cc' => number,
            $newstring .= "  \'$_\' => $srv{$_},";

            # variable length spacer, basically for lining up USA!
            $newstring .= ( " " x ( 5 - length($srv{$_}) ) );

            # commented out country name
            $newstring .= "# $cc_name{$_}\n";
          }
          $newstring .= ");";

          # read in _this_ file (!)

          open(OLD, "< $0") or die "Error, can't open $0: $!";
          my $bigstring;
          while (<OLD>) { $bigstring .= $_ }
          close(OLD) or die "Error, can't close $0: $!";

          # substitute the variable (non-greedy regex again)

          $bigstring =~ s/(my \%FREEBSD_SERVERS = .*?;)/$newstring/gs;

          # write it out to a new file

          open(NEW, "> $0.new") or die "Error, can't open $0.new: $!";
          print NEW $bigstring or die "Error, can't write $0.new: $!";
          close(NEW) or die "Error, can't close $0.new: $!";

          # swap them over

          rename($0, "$0.orig") or die "Error, can't rename $0 to $0.orig: $!";
          rename("$0.new", $0) or die "Error, can't rename $0.new to $0: $!";

          # set permissions on new file

          chmod(0755,$0) or die "\nCannot chmod(0755,$0): $!\n";

          print $GENERAL_INFO, "The internal CVSup list has been updated.\n" if $VERBOSE;

        }
        else {
          print $GENERAL_WARNING, "The internal CVSup list can ONLY be updated by ROOT.\n" if $VERBOSE;
        }
      }
      else {
        # we're not updating automagically, so you get a message to do
        # it yourself!
        print $GENERAL_INFO, "The internal CVSup list needs updating!\n" if $VERBOSE;
      }
    }

    # use the new list we've downloaded
    %FREEBSD_SERVERS = %srv;

    # and we might need to update this if it's changed
    if ( $opt{'c'} eq 'all' ) { @countries = sort ( keys %FREEBSD_SERVERS ) }

  }
  else {
    print "Failed! (Using internal list)\n" if $VERBOSE;
  }
}

#-----------------------------------------------------------------------------#
# now we build an array of servers

my (@servers,@first,@others) = ();

foreach my $cc ( @countries ) {

  # For all the countries except the US, the server names follow the pattern
  # cvsupXX.country.freebsd.org, with the first server omitting the XX digit.
  # US servers don't include the country, but include the first digit.

  for ( my $i=1; $i<=$FREEBSD_SERVERS{$cc}; $i++ ) {

    if ( $cc eq 'us' ) { # use no country code, include the '1'
      if ( $i == 1 ) { push(@first,"cvsup$i.freebsd.org"); }
      else           { push(@others,"cvsup$i.freebsd.org"); }
    }
    else {
      if ( $i == 1 ) { push(@first,"cvsup.$cc.freebsd.org"); }
      else           { push(@others,"cvsup$i.$cc.freebsd.org"); }
    }
  }

  if ( $OTHER_SERVERS{$cc} ) {
    push( @others, @{ $OTHER_SERVERS{$cc} } );
  }

}

# put the first server of each country at the start of the servers array and
# the rest at the end - this gives the DNS server more time to reply. With
# slow connections and caching DNS (looking up the root), this is better, I
# think. Any comments...?

@servers = (@first,@others);


#-----------------------------------------------------------------------------#
# now we time the servers in the array

print $GENERAL_INFO, "Querying servers in countries: @countries\n" if $VERBOSE;

my %time = ();

my $srvs = scalar(@servers) - 1; # number of servers (minus one)
my $done  = 0;                   # number of servers timed

foreach my $server (@servers) {

  if ( $SHOW_PROGRESS && ! $VERBOSE ) {

    # display simple progress bar (much simplified from earlier versions)

    my $left = $srvs - $done;
    print STDERR "\r",(' ' x 80),"\r"; # beginning of line, overwrite with ' ' 80 times, back to beginning
    print STDERR " -=(\033[32mo",('o' x $done),"\033[m\033[31m",('o' x $left),"\033[m)=($server)=- ";
    if ($srvs == $done) {print STDERR "\n"}
    $done++;
  }

  my $ip_packed = ();  # get ip address before attempting to connect, and
  my $ip_addr   = ();  # wrap gethostbyname in an eval block to timeout. IP
                       # address is packed in memory, hence these vars.

  sub timeout { die "TIMEOUT\n" }; # replacement signal
  $SIG{'ALRM'} = \&timeout;

  eval {
    alarm($DNS_TIMEOUT);                       # set timeout...
    $ip_packed = (gethostbyname($server))[4];  # lookup server within $TIMEOUT
    alarm(0);                                  # done, cancel alarm
  };

  if ($@ =~ /^TIMEOUT/ ) {  # we timed out
    print $SERVER_WARNING, "DNS lookup timed out for $server\n" if $VERBOSE;
    next;
  }
  elsif (! $ip_packed ) {   # no such host
    print $SERVER_WARNING, "DNS lookup failed for $server\n" if $VERBOSE;
    next;
  }

  $ip_addr = join(".",unpack("C4",$ip_packed)); # get dotted quad ip

  # connect to IP address of server
  print $SERVER_INFO, "Connecting to $server [$ip_addr]...\n" if $VERBOSE;

  my $time_before = gettimeofday();    # start timing....

  my $remote = IO::Socket::INET->new( Proto=>'tcp',
                                      PeerAddr=>$ip_addr,
                                      PeerPort=>$CVSUP_PORT,
                                      Reuse=>1,
                                      Timeout=>$CVS_TIMEOUT,
                                    );

  my $time_after = gettimeofday();     # .... end timing

  if ( $@ ) {  # was there an error?

    if ( $VERBOSE ) { # And do we want to know about it?
      my $error = ();
      ($error = $@) =~ s/^IO::Socket::INET\s*:\s*(.*)/$1/;
      print $SERVER_ERROR, "error: $error\n";
    }

    next; # skip to the next one
  }

  if ( defined($remote) and $remote->opened ) {   # did we get a connection?
    my $reply = $remote->getline;                 # what did the server say?
    chomp($reply);                                # remove newline
    close $remote;                                # close connection
    my $time_taken = $time_after - $time_before;  # calculate time

    # if the server replied 'OK', i.e. not overloaded, save the timing
    $time{$server} = $time_taken if ( $reply =~ m/^OK/ );

    if ( $VERBOSE ) { # print pretty display

      my $ms = sprintf("%.2f", 1000 * $time_taken); # time in ms
      print $SERVER_RESULT, "server replied: $reply\n";
      print $SERVER_RESULT, "time taken: $ms ms\n";

    }
  }
}

#-----------------------------------------------------------------------------#
# now we output the fastest

if ( %time ) { # did we get *any* servers?

  # sort for the fastest
  my @fastest = ();
  for ( sort { $time{$a} <=> $time{$b} } keys %time ) { push(@fastest,$_) }

  # if we are verbose, print the gold, silver and bronze medalists

  if ( $VERBOSE ) {
    print "\n", $GENERAL_INFO, "Speed Daemons:\n" if $fastest[0]; # just checking ;)
    print $SERVER_RESULT, "1st: $fastest[0]\n" if $fastest[0];
    print $SERVER_RESULT, "2nd: $fastest[1]\n" if $fastest[1];
    print $SERVER_RESULT, "3rd: $fastest[2]\n" if $fastest[2];
  }
  else {  # otherwise just output the fastest.
    print $fastest[0];
    print STDERR "\n";  # ending newline
  }
}
else {
  # we didn't get any servers, boo hoo. Send this to STDERR.
  print STDERR $GENERAL_WARNING, "No servers were found :( \n";
  exit(1);
}

exit(0);

#-----------------------------------------------------------------------------#
# POD documentation follows...

__END__

=head1 NAME

B<fastest_cvsup> - find fastest CVSup server

=head1 SYNOPSIS

B<fastest_cvsup> [B<-h>] [B<-(q|Q)>] [B<-r>] B<-c> (B<country codes>|B<local>|B<all>)

=head1 DESCRIPTION

Displays the 3 fastest CVSup servers in user specified countries. It can just
return the fastest for use in automated shell scripts.

It uses Time::HiRes for timings and IO::Socket::INET to make a socket
connection to the target server, the server response is taken notice of. Unlike
some shell scripts it does not rely on 'pings' to measure network speed.

=head1 USAGE

=over 5

=item B<-h>

displays usage.

=item B<-q>

quiet mode - only returns the fastest server and prints a progress meter.

=item B<-Q>

very quiet mode - in addition the progress meter is not shown.

=item B<-r>

uses remote CVSup server list from the online FreeBSD Handbook.

If the variable C<$REWRITE_SELF> is set to 1 in the script then (as if by
magic) it re-writes itself with the new server list. This only works as root
and does not do it by default.

=item B<-c country codes>

this is a list (comma separated, no spaces) of server groups to time.

FreeBSD servers are grouped into country codes - see the script or the FreeBSD
Handbook for a full list.

NetBSD and OpenBSD servers are lumped together under the 'netbsd' and
'openbsd' codes respectively.

=item B<-c local>

uses the countries specifed in the C<@LOCAL_CC> array. Edit the script to
specify which countries are considered local.

=item B<-c all>

uses all the FreeBSD servers. This may take some time, but is quite interesting!

=back

=head1 EXAMPLES

=over 5

=item 1

times the FreeBSD CVSup servers in the United Kingdom, France and Germany:

    $ fastest_cvsup -c uk,fr,de

=item 2

times the OpenBSD and NetBSD CVSup servers:

    $ fastest_cvsup -c openbsd,netbsd

=item 3

shell script, finds the fastest UK FreeBSD CVSup server, then runs cvsup using
that server:

    #!/bin/sh
    if SERVER=`fastest_cvsup -q -c uk`; then
      cvsup -h $SERVER /usr/local/etc/cvsup/supfile
    fi

=back

=head1 RETURN VALUES

Returns 0 without any errors, 1 with errors.

=head1 AUTHOR

A.J.Robinson, E<lt>ajr@subdimension.comE<gt>

=cut
