#! /usr/bin/perl -wT
# Copyright  2001 Martin Kammerhofer <mkamm@gmx.net>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# $Id: commitlog2cvsweb.pl,v 1.6 2001/02/25 18:41:07 mkamm Exp $
# Convert FreeBSD "commitlog" file(s) into HTML with links to cvsweb.cgi.
# This script parses CVS commitlogs assembled by the log_accum.pl script.

# WARNING: The following code is very poor style. It has been hacked
# a few times and needs a rewrite. However it does still work for me.

require 5.003;
use strict;
use Carp;
use Getopt::Long;
use URI::Escape;
use Compress::Zlib;
use File::Basename;

my $configfile = "/usr/local/etc/commitlog2cvsweb.conf";
my $debug = 0; # debuglevel (currently 2..5 are useful, see options -d, -debug)
my $script_started = time();

delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH)};

# DEFAULT PARAMETERS

use vars 
qw ($verbose  $pfx  $self_prefix  $unknown_prefix  $stdin_with_pfx 
    $cvsweb  $urlsuffix  $branch_wanted  $show_all_branches 
    $query_pr_cgi  $committer_wanted  $maximum_output 
    $hyperlink_committers  $and_patterns  $case_insensitive 
    $number_commits  $hr  $html_header_and_footer  $body_attributes 
    $extra_font1  $extra_font2  $branch_font1  $branch_font2 
    $log_font1  $log_font2  $file_headers 
    $commits_are_time_sequential  $sort_arguments  $start_date 
    $end_date  $outfile  @logmsg_patterns);

   ($verbose, $pfx, $self_prefix, $unknown_prefix, $stdin_with_pfx,
    $cvsweb, $urlsuffix, $branch_wanted, $show_all_branches,
    $query_pr_cgi, $committer_wanted, $maximum_output,
    $hyperlink_committers, $and_patterns, $case_insensitive,
    $number_commits, $hr, $html_header_and_footer, $body_attributes,
    $extra_font1, $extra_font2, $branch_font1, $branch_font2,
    $log_font1, $log_font2, $file_headers,
    $commits_are_time_sequential, $sort_arguments, $start_date,
    $end_date, $outfile) =

    # DO NOT EDIT THE DEFAULTS HERE - use $configfile instead!

    (
     0,		# verbose flag (file summary lines to STDERR)
     "src/",	# default prefix ${pfx} for directories inside repository
     # the following files get their name as prefix
     "CVSROOT|distrib|doc|ports|www",
     "other",	# no known prefix for these files
     0,		# use ${pfx} on --stdin too
     "http://www.FreeBSD.org/cgi/cvsweb.cgi", # URL of cvsweb.cgi
     "",	# suffix to URL e.g. "cvsroot=myproject"
     "",	# RCS branch e.g. "MAIN"
     0,		# show all branches in cvsweb, even if only commits to a
		# single branch are requested with $branch_wanted
     "http://www.FreeBSD.org/cgi/query-pr.cgi", # PR cgi
     "",	# case insensitive committer pattern, e.g. "joe|fred"
     0,		# max. number of commit messages to output (0 for no limit)
     0,		# add hyperlinks to next/previous commit by same committer
     0,		# ANDing of log message patterns
     0,		# case insensitive pattern matching
     1,		# number all commits
     1,		# rules before each commit
     1,		# print HTML header and footer (<HEAD> and <BODY>)
     "bgcolor=white",	# attributes for HTML <BODY> tag:
     "<font size=-1>",	# "<font>"  tag for per committer links and commit #s
     "</font>",		# "</font>" tag for per committer links and commit #s
     "<font color=green>",	# "<font>"  tag for branch
     "</font>",			# "</font>" tag for branch
     "<font color=red>",	# "<font>"  tag for searched text in log
     "</font>",			# "</font>" tag for searched text in log
     1,		# wether to write <h1> file headers
     1,		# commit times are always increasing in input files
     0,		# sort @ARGV according to modification dates (-M)
     undef, undef,	# start/end date
     "-",	# default outputfile
     );

my %optctl = (
	      "and" => \$and_patterns,
	      "body-attributes" => \$body_attributes,
	      "branch" => \$branch_wanted,
	      "committer" => \$committer_wanted,
	      "cvsweb" => \$cvsweb,
	      "debuglevel" => \$debug, "d" => \$debug, # set or increment
	      "fileheaders" => \$file_headers,
	      "from" => \$start_date,
	      "headfoot" => \$html_header_and_footer,
	      "hr" => \$hr,
	      "insensitive" => \$case_insensitive,
	      "link-committers" => \$hyperlink_committers,
	      "logmessage" => \@logmsg_patterns,
	      "maximum" => \$maximum_output,
	      "number-commits" => \$number_commits,
	      "outfile" => \$outfile,
	      "prefix" => \$pfx,
	      "queryprcgi" => \$query_pr_cgi,
	      "sequential" => \$commits_are_time_sequential,
	      "showallbranches" => \$show_all_branches,
	      "stdin-with-prefix" => \$stdin_with_pfx,
	      "sort-arguments" => \$sort_arguments,
	      "to" => \$end_date,
	      "urlsuffix" => \$urlsuffix,
	      "verbose" => \$verbose,
	      );

my ($prog, $prog_dir) = fileparse($0, "\.p(er)?l");
my $version = '$Id: commitlog2cvsweb.pl,v 1.6 2001/02/25 18:41:07 mkamm Exp $'; # '
$version =~ s/^\s*\$Id: //;
$version =~ s/ \$\s*$//;

sub usage () {
    print STDERR
	"usage: $prog [--cvsweb=URL] [--urlsuffix=SFX] [--branch=TAG]\n",
	" [--showallbranches] [--outfile=FILE] [--committer=PATTERN]\n",
	" [--and] [--insensitive] [--logmessage=PATTERN]...\n",
	" [--link-committers] [--queryprcgi=URL] [--number-commits]\n",
	" [--maximum=NUM] [--sort-arguments]  commitlog-file...\n",
	"or: $prog --version\n";
    exit 64;
}

sub html_escape ($) {
    local $_ = shift or croak "Missing argument";
    s/\&/&amp;/g;
    s/\"/&quot;/g;
    s/>/&gt;/g;
    s/</&lt;/g;
    return $_;
}

# OPTIONS PROCESSING
if (-r $configfile) {
    my @stat = stat(_);
    die "$prog: $configfile must'nt be writable by others\n" if $stat[2] & 022;
    require $configfile;
}
if (!GetOptions(\%optctl, "cvsweb|url=s", "branch|tag=s", "version!",
		"urlsuffix|suffix=s", "outfile|output=s",
		"committer=s", "showallbranches!", "debuglevel=i", "d+",
		"queryprcgi=s", "logmessage|logstring|logpattern|logmsg=s@",
		"link-committers!", "and!", "number-commits!",
		"insensitive!", "body-attributes=s", "headfoot!",
		"from|after|since=s", "to|upto|before=s",
		"verbose!", "hr!", "maximum|atmost=i",
		"prefix=s", "stdin-with-prefix!", "fileheaders!",
		"sequential!", "sort-arguments!",)
    || $#ARGV == -1 && -t && !$optctl{version} # only terminal input
    )
{
    usage();
}
if ($optctl{version}) {
    print "$version\n";
    exit 0;
}

$verbose = 1 if $debug;
if ($hr) {
    $hr = "<HR>";
} else {
    $hr = "";
}
$unknown_prefix = "-|$unknown_prefix" unless $stdin_with_pfx;

# convert the "from ... to" time interval into seconds since the epoch
my $date_wanted = 0;
if ($start_date || $end_date) {
    $date_wanted = 1;
    eval "require Time::ParseDate";
    if ($@) {
	die "$prog: required module Time::ParseDate not available\n";
    } else {
	import Time::ParseDate qw(parsedate);
    }
    # parse the date(s), check start <= end
    my $err;
    my %opts = (WHOLE => 1, PREFER_PAST => 1, VALIDATE => 1);
    if ($start_date) {
	($start_date, $err) = parsedate($start_date, %opts);
	die "$prog: bad date: $err\n" unless $start_date;
    } else {
	$start_date = 0;
    }
    if ($end_date) {
	($end_date, $err) = parsedate($end_date, %opts);
	die "$prog: bad date: $err\n" unless $end_date;
    } else {
	$end_date = time() + 86400;
    }
    if ($start_date > $end_date) {
	die("$prog: negative time interval:\n\t'",
	    scalar(localtime($start_date)),
	    "' ==> '", scalar(localtime($end_date)), "'\n");
    }
    print STDERR ("$prog: '", scalar(localtime($start_date)),
		  "' ==> '", scalar(localtime($end_date)), "'\n")
	if $verbose;
}

# pre compile search patterns to speed up log message grepping
my @compiled_logmsg_patterns =
    map($case_insensitive ? qr/$_/ims : qr/$_/ms, @logmsg_patterns);

if ($outfile ne "-") {
    close(STDOUT) or die;
    $outfile = $1 if $outfile =~ /^(.*)$/; # untaint
    open(STDOUT, "> $outfile") or
	die "$prog: redirect output to '$outfile': $!.\nStopped";
}

# VARIABLES

my (@f, $fname, $uri, $htm, $dir, $found_dir, $logmessage, $headline);
my $cvsweb_suffix = "";
$cvsweb_suffix = "?only_with_tag=" . $branch_wanted
    if $branch_wanted && !$show_all_branches;
$cvsweb_suffix .= ($cvsweb_suffix ? "&" : "?") . $urlsuffix if $urlsuffix;

my $copy = 0; # copy from input flag: 0..suppress, 1..buffer, 2..copy
my $buf = ""; # output buffer (until copy/suppress decision is done)
my $files = 0;	# 1 if inside added/modified/removed files section
my $RCP = 0;	# 1 if inside "Revision Changes Path" section
my $log = 0;	# 1 if inside "Log:" section
my ($commit_count_in, $commit_count_out) = (0, 0);
my ($committer, $date, $time, $tz, %by_committer, $branchtag);
# (do not confuse the parsed $branchtag with $branch_wanted from commandline)


# parse the line in $_

sub parse_line() {{
    print STDERR if $debug >= 5;
    if ( /^\s*$/ ) {
 	$files = $RCP = 0; # empty lines close "files" or "RCP" sections
	next;
    }
    @f = split;
    # check for begin of new commit message:
    # look for a "committer" "date" "time" "tz" line
    if (($#f == 2 || $#f == 3) && $f[0] =~ /^\S+$/
	&& $f[1] =~ m^(\d\d)?\d\d/\d\d/\d\d$
	&& $f[2] =~ /^\d\d:\d\d:\d\d$/
	# && ($#f == 2 || $f[3] =~ /^[A-Z]{3,4}$/ )
	&& /^\S+/ # committer name must start in first column
	) {
	# finish last commit?
	if ($copy == 1) {
	    # is the branch correct?
	    if ($branch_wanted && $branch_wanted ne $branchtag) {
		# we take this code path only for "ancient" imports...
		$copy = 0;
		--$by_committer{$committer}; # adjust per committer count
		print STDERR "not branch '$branch_wanted' (???)\n"
		    if $debug >= 3;
	    }
	    # grep the log message?
	    if (@logmsg_patterns) {
		my $accepted = 0;
		goto no_logmessage if ! $logmessage;
		if ($and_patterns) {
		    # all patterns ANDed
		    $accepted = 1;
		    foreach my $pat (@compiled_logmsg_patterns) {
			if ($logmessage !~ /$pat/ ) {
			    $accepted = 0; last;
			}
		    }
		} else {
		    # all patterns ORed
		    foreach my $pat (@compiled_logmsg_patterns) {
			if ($logmessage =~ /$pat/ ) {
			    $accepted = 1; last;
			}
		    }
		}
		if ($accepted) {
		    $commit_count_out++;
		    print $headline, $buf;
		    $headline = $buf = "";
		    print STDERR "ACCEPTED (log) $commit_count_out\n"
			if $debug >= 3;
		} else {
		  no_logmessage:
		    --$by_committer{$committer}; # adjust per committer count
		    print STDERR "not logmessage\n" if $debug >= 3;
		}
		$copy = 0; # don't accept twice
	    }
	    die "internal error" if $copy; # something wrong here?
	} # finished last commit

	# initialize all those state variables :(
	$copy = $files = $log = $RCP = 0; # not inside any section yet
	$branchtag = $buf = $logmessage = ""; # not known yet
	$dir = $found_dir = undef;
	# output limit reached?
	if ($maximum_output && $commit_count_out >= $maximum_output) {
	    print STDERR "LIMIT $commit_count_out reached\n" if $debug >= 3;
	    print "<h3>limit of $commit_count_out commits reached</h3>\n";
	    local $^W = 0; last LINE;
	}
	# PROCESS THE COMMIT MESSAGE
	$commit_count_in++;
	print STDERR "found commit $commit_count_in: @f\n" if $debug >= 2;
	($committer, $date, $time, $tz) = @f;
	# $tz may be undefined or junk if we have a "historic" commit message
	$tz = "" if !defined($tz) || $tz !~ /[A-Z]{3,4}/ ;
	# filter by committer
	if ($committer_wanted && $committer !~ m^(?i:$committer_wanted)$ ) {
	    # we are not for this committer
	    print STDERR "not committers '$committer_wanted'\n" if $debug >= 3;
	    next;
	}
	# filter by date (very stupid and inefficient algorithm)
	if ($date_wanted) {
	    my $datetime = "$date $time $tz";
	    my $commit_date = parsedate($datetime) || die;
	    if ($commit_date < $start_date) {
		print STDERR "too early '$datetime'\n" if $debug >= 3;
		next;
	    }
	    if ($commit_date > $end_date) {
		local $^W = 0;
		print STDERR "too late '$datetime'\n" if $debug >= 3;
		last LINE if $commits_are_time_sequential;
		next;
	    }
	}
	if ($branch_wanted || @logmsg_patterns) {
	    $copy = 1; # buffer output until we have seen the branch / log
	} else {
	    $copy = 2;
	    ++$commit_count_out;
	    print STDERR "ACCEPTED (head) $commit_count_out\n" if $debug >= 3;
	}
	my $count = ++$by_committer{$committer}; # num. of commits by committer
	# (The above count will have to be decremented if we decide to filter
	# this commit message later.)
	my ($next, $prev) = ($count+1, ($count>1 ? $count-1 : undef));
	if ($hyperlink_committers) {
	    $uri = uri_escape($committer);
	    $htm = html_escape($committer);
	    # insert a link to this committer's next commit
	    s^\Q$committer\E<A NAME="$uri$count"><A HREF="#$uri$next">$htm$extra_font1&nbsp;($count)$extra_font2</A></A>;
	    if ($prev) {
		# insert a link to this committer's previous commit
		s$&nbsp;<A HREF="#$uri$prev">$extra_font1(previous)$extra_font2</A>;
	    }
	}
	# make an anchor for this commit message
	if ($number_commits) {
	    my ($anchor);
	    if ($ARGV =~ /^(\w+(\.\d+)?)/ ) {
		$anchor = $1;
		$anchor =~ tr /.//d;
	    } else {
		$anchor = "commit";
	    }
	    $anchor .= "/$commit_count_in";
	    s^<A NAME="$anchor">$extra_font1($anchor) $extra_font2</A>$hr;
	} elsif ($hr) {
	    s/^/$hr/;
	}
	next;
    } # end of headline processing
    # only commit headlines (see above) start in column 1:
    die "Bad input format: $_\nStopped" if /^\S/;
    # No need to parse further?
    next unless $copy;
    # check for a new repository directory or imported sources
    if ( /^  (\S+) - New directory$/ ||
	 /^  (\S+) - Imported sources$/ ) {
	$found_dir = $1;
	if ($copy == 1) {
	    if ($branch_wanted && $branch_wanted ne "MAIN") {
		--$by_committer{$committer}; # adjust per committer count
		$copy = 0; # suppress this commit
		print STDERR "not branch '$branch_wanted' (new dir/import)\n"
		    if $debug >= 3;
		next;
	    } elsif (@logmsg_patterns) {
		--$by_committer{$committer}; # adjust per committer count
		$copy = 0; # suppress this commit
		print STDERR "not log (new dir/import)\n" if $debug >= 3;
		next;
	    } else {
		$copy = 2; # copy out
		$commit_count_out++;
		print STDERR ( /New/ ? "NEW DIR" : "IMPORTED",
			       " $commit_count_out\n")
		    if $debug >= 3;
	    }
	}
	# get the real directory name

	# hyperlink the directory
	if ($ARGV !~ /^($unknown_prefix)/o ) {
	    $htm = html_escape($found_dir);
	    $uri = uri_escape("$cvsweb/$found_dir$cvsweb_suffix");
	    s\Q$found_dir\E<A HREF="$uri">$htm</A>;
	}
	next;
    }
    # check for (another) begin of "files" section
    # (there can be 1..3 per commit)
    if ($#f >= 1 && $f[0] =~ /^(Added|Branch:|Modified|Removed):?$/
	&& ($f[0] =~ /:$/ || $f[1] eq 'files:')) {
	$files = 1; $log = $RCP = 0;
	$found_dir = undef;
	if ($#f == 3 && $f[2] eq "(Branch:") {
	    $branchtag = $f[3];
	    $branchtag =~ s/\)$//; # remove ')'
	} elsif ($f[0] eq "Branch:") {
	    # old style commit message format
	    $branchtag = $f[-1];
	} else {
	    $branchtag = "MAIN";
	}
	$htm = html_escape($branchtag);
	s\b(\Q$branchtag\E)\b$branch_font1$htm$branch_font2;
	# filter on branch
	if ($copy == 1) {
	    if ($branch_wanted && $branch_wanted ne $branchtag) {
		--$by_committer{$committer} if $copy; # adjust committer count
		$copy = 0;
		print STDERR "not branch '$branch_wanted'\n" if $debug >= 3;
	    } elsif (!@logmsg_patterns) {
		$copy = 2;
		++$commit_count_out;
		print STDERR "ACCEPTED (branch) $commit_count_out\n"
		    if $debug >= 3;
	    }
	}
	if ($f[0] =~ /:$/ ) {
	    # old style commit message
	    if ($ARGV =~ /^($unknown_prefix)/o ) {
		if ($f[1] =~ m/ncvs/(\S+)$ ) {
		    $found_dir = $f[1];
		    $dir = $1;
		} else {
		    $dir = $found_dir = ""; # we do not know the prefix
		}
	    } else {
		$found_dir = $f[1];
		$dir = "${pfx}$found_dir";
	    }
	} else {
	    $dir = $found_dir = undef;
	    next;
	}
    }
    # check for begin of "Log:" section
    if ( /^  Log:$/ ) {
      start_of_log:
 	$files = $RCP = 0; # begin of "log" section closes "files" section
	$log = 1;
	next;
    }
    # check for begin of "RCP" section
    if ($#f == 2 && $f[0] eq "Revision" && $f[1] eq "Changes"
	&& $f[2] eq "Path" && /^  Revision\b/ ) {
	$RCP = 1;
	$log = $files = 0; # begin of "RCP" section closes "Log:" section
	warn "Zero logmessage: $committer $date $time\n"
	    if ! $logmessage && $debug >= 4;
	next;
    }
    # handle data inside "files" section
    if ($files) {
	# we are inside the "files" section
	# if a string contains a slash (/) it is the directory name
	if ( m^(?: {4}| {15})(\S+)\s ) {
	    # find directory depending on leading whitespace:
	    # NEW: /^ {4}(\S+)/ OR OLD: /^ {15}(\S+)/
	    $dir = $found_dir = $1;
	    if ($ARGV =~ /^($self_prefix)/o ) {
		$dir = "$1/$dir";
	    } elsif ($ARGV =~ /^($unknown_prefix)/o ) {
		if ($found_dir =~ m/ncvs/(\S+)$ ) {
		    $dir = $1;
		} else {
		    $dir = ""; # we do not know the prefix
		}
	    } else {
		$dir = "${pfx}$dir";
	    }
	    print STDERR "found directory: '$dir'\n" if $debug >= 4;
	}
	if (!defined($dir) || !defined($found_dir)) {
	    # Format error (maybe a commit message pasted into the log
	    # of another one...)
	    goto start_of_log;
	}
	next unless $dir;
	foreach my $f (@f) {
	    if ($f eq $branchtag
		|| $f =~ /^(Added|Branch|Modified|Removed):$/ # old style
		|| $f =~ /\.$/
		) {
		next;
	    }
	    $htm = html_escape("$f");
	    if ($f eq $found_dir) {
		$uri = uri_escape("$cvsweb/$dir$cvsweb_suffix");
	    } else {
		$uri = uri_escape("$cvsweb/$dir/$f$cvsweb_suffix");
	    }
	    s\b\Q$f\E(?=\s)<A HREF="$uri">$htm</A>;
	}
    } # end of code for files section
    # handle data inside "Log:" section
    if ($log) {
	# handle old style import lines
	if ( m^  [A-Z] (\S+/\S+)$ ) {
	    if ($dir) {
		$htm = html_escape($1);
		$uri = uri_escape("$cvsweb/$1$cvsweb_suffix");
		s\b\Q$1\E\b<A HREF="$uri">$htm</A>;
	    }
	    next; # this is not part of the log message
	}
	$logmessage .= $_ if $copy == 1; # accumulate logmessage
	$_ = html_escape($_); # html_escape logmessage text
	# hyperlink http:// and ftp:// URLs
	s\b((?i:https?|ftp)://\S+)
	    '<A HREF="'.uri_escape($1).'">'.html_escape($1)."</A>"eg;
	# hyperlink PR number (only one)
	if ( m\bPR\s*[:\#]\s*((?:\w+/)?\d+)\b ) {
	    $uri = uri_escape("$query_pr_cgi?pr=$1");
	    $htm = html_escape("$1");
	    s\b$1\b<A HREF="$uri">$htm</A>;
	}
	# hyperlink submitter email address(es)
	if ( /^\s+[A-Z][a-z]+.*(?i:by|from):/g ) {
	    s\b([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)\b
		'<A HREF="' . uri_escape("mailto:$1") . '">'
		    . html_escape($1) . "</A>"eg;
	} else { # only within '<' '>'
	    s(?:<|&lt;)([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)(?:>|&gt;)
		'&lt;<A HREF="' . uri_escape("mailto:$1") . '">'
		    . html_escape($1) . '</A>&gt;'eg;
	}
	# emphasize searched text (you'd rather say '.+' than '.' for testing)
	for my $pat (@compiled_logmsg_patterns) {
	    s($pat)$log_font1$1$log_font2g;
	}
    }
    # handle data inside "RCP" section
    # lines have this format: "rev"  "+x" "-y"  "path"
    if ($RCP) {
	if (! /^\s+(\d+(?:\.\d+)+)\s+(\+\d+ -\d+)\s*(\S+)\s*$/) {
	    # Format error (maybe a commit message pasted into the log
	    # of another one...)
	    goto start_of_log;
	}
	my ($rev, $plusminus, $path) = ($1, $2, $3);
	my $htm = html_escape($path);
	my $prev = $rev; # previous revision
	if ($prev =~ /\.(\d+)$/) {
	    my $lsn = $1; # least significant number in revision
	    $lsn -= 1;
	    if ($lsn) { # delta x.y => x.(y+1)
		$prev =~ s/$1$/$lsn/;
	    } else { # delta x.y => x.y.z.1
		$prev =~ s/\.\d+\.\d+$//;
	    }
	    my $suffix = ($cvsweb_suffix ? "$cvsweb_suffix&" : "?");
	    # order of substitutions (line end to begin) is important here!
	    # hyperlink the source file
	    $uri = uri_escape("$cvsweb/$path$cvsweb_suffix");
	    s(?<=\d|\s)\Q$path\E\b<A HREF="$uri">$htm</A>;
	    # hyperlink the delta
	    $uri = uri_escape("$cvsweb/$path.diff${suffix}r1=$prev&r2=$rev");
	    s\Q$plusminus\E<A HREF="$uri">$plusminus</A>;
	    # hyperlink the new revision
	    $uri = uri_escape("$cvsweb/$path") . "#"
		. uri_escape("rev$rev$cvsweb_suffix");
	    s\b\Q$rev\E\b<A HREF="$uri">$rev</A>;
	}
    }
} continue {
    if ($copy == 2) { # copy out
	if ($headline) {
	    print $headline;
	    $headline = "";
	}
	if ($buf) {
	    print $buf;
	    $buf = "";
	}
	print;
    } elsif ($copy == 1) { # accumulate into $buf
	$buf .= $_;
    } else { # suppress output
	$buf = "";
    }
}} # end of parse_line()


######### MAIN #########

my $title = "$prog";
foreach my $arg (@ARGV) {
    $title .= " " . basename($arg);
}
html_escape($title);
print <<EndOfHeader if $html_header_and_footer;
<HTML><HEAD>
<META NAME="generator" CONTENT="$version">
<TITLE>$title</TITLE></HEAD>
<BODY $body_attributes><PRE>
EndOfHeader

if (!@ARGV) {
    $file_headers = 0;
    unshift(@ARGV, '-');
} elsif ($sort_arguments) {
    local $^W = 0;
    @ARGV = sort { -M $b <=> -M $a } @ARGV;
}
FILE:
while ($ARGV = shift) {
    if ($ARGV =~ /^(\+?>|\|)/ || -d $ARGV) {
	print STDERR "$prog: ignoring '$ARGV'\n";
	next;
    }
    # See if we can skip a file because of it's age
    if ($date_wanted) {
	my $fileage = -M $ARGV;
	if ($fileage && $start_date
	    && $script_started - $fileage * 86400 < $start_date) {
	    # this input file is too old, we can skip it
	    print STDERR "$prog: file '$ARGV' is too old, skipping it\n"
		if $debug >= 2;
	    next FILE;
	}
    }
    if (open(ARGV, "$ARGV")) {
	$. = $commit_count_in = $commit_count_out = 0;
	my $gz = gzopen(\*ARGV, "rb") or die $ARGV;
	$headline = '<A NAME="' . html_escape(basename($ARGV)) . '"></A>';
	$headline .= "<H1>" . html_escape($ARGV) . "</H1>\n" if $file_headers;
	$ARGV = basename($ARGV);
      LINE:
	while ($gz->gzreadline($_) > 0) {
	    $.++;
	    &parse_line();
	}
	# "not gzip '$file' ?\n" if $gzerrno != Z_STREAM_END;
	$gz->gzclose() unless @ARGV; # close last one
	if ($commit_count_in == 0) {
	    warn "$prog: no commit messages found in $ARGV\n"
		if $debug >= 2;
	} elsif ($commit_count_out == 0) {
	    warn "$prog: no commit messages converted from $ARGV\n"
		if $commit_count_in > 1 && $verbose || $debug >= 2;
	} elsif ($commit_count_in != $commit_count_out) {
	    warn "$prog: $ARGV: $commit_count_out of $commit_count_in commit messages converted\n" if $verbose;
	    print "$extra_font1$commit_count_out of $commit_count_in commit messages displayed$extra_font2<p>\n"
		if $file_headers;
	}
    } else {
	print STDERR "$prog: $ARGV: $!\n";
    }
}


print <<'EndOfFooter' if $html_header_and_footer;
</PRE></BODY></HTML>
EndOfFooter

if ($outfile ne "-") {
    close(STDOUT) or die;
}

__END__;

=head1 NAME

commitlog2cvsweb - convert FreeBSD commitlog files to HTML

=head1 SYNOPSIS

=over 4

=item .

commitlog2cvsweb [--cvsweb=I<URL>] [--urlsuffix=I<SFX>] [--branch=I<TAG>]
[--showallbranches] [--outfile=I<FILE>] [--committer=I<PATTERN>]
[--and] [--insensitive] [--logmessage=I<PATTERN>]...
[--link-committers] [--queryprcgi=I<URL>] [--number-commits]
[--maximum=I<NUM>] [--sort-arguments]  I<commitlog-file>...

=item .

commitlog2cvsweb --version

=back

=head1 DESCRIPTION

The commitlog2cvsweb program takes one ore more commitlog files and
converts them into HTML. The commitlog files are usually stored under
F</home/ncvs/CVSROOT/commitlogs/> and can be retrieved by cvsup-ing
the src-base collection in CVS mode. (See L<cvsup>,
http://www.FreeBSD.org/handbook/cvsup.html and L</EXAMPLES>.)

The generated HTML has hyperlinks to the C<cvsweb.cgi> script. This
means you can click on any of the changed files and see the CVS log
(change history) and have access to all the revisions and deltas.

(The cgi-script C<cvsweb.cgi> was originally written by Bill Fenner
<fenner@freebsd.org> for the FreeBSD project. It allows browsing of
CVS-repositories with a HTML-browser. CVS is a popular version control
system. C<cvsup> is written by John Polstra <jdp@polstra.com>. It is
a network distribution package for CVS repositories.)

Options may be abbreviated to a unique prefix. The options are as
follows:

=over 4

=item --cvsweb=I<URL>

Specify URL of cvsweb.cgi script.

=item --urlsuffix=I<SFX>

Specify some extra information for appending to generated URLs. (You
should not type a leading C<?> or C<&> character because it will be
added automatically.)

=item --branch=I<TAG>

Output only commit messages from branch I<TAG>.

=item --showallbranches

Tell C<cvsweb.cgi> that you want to see file revisions on all
branches. The default behaviour is to show only revisions on the
branch selected with C<--branch>.

=item --outfile=I<FILENAME>

Specify the output file. If no output file is specified standard
output is used.

=item --committer=I<PATTERN>

Output only commit messages from committer I<PATTERN>. More than one
committer is requested by separating the committer names with a bar.
Committer names in I<PATTERN> are case insensitive and must match the
full committer name.

=item --logmessage=I<PATTERN>

Output only commits with matching commit log messages. This option may
be be specified multiple times.

=item --and

Output only commits matching all logmessage patterns (AND). The
default is to output commits matching any pattern (OR).

=item --insensitive

Ignore case when matching logmessage patterns.

=item --link-committers

Add hyperlinks to committers previous/next commit.

=item --number-commits

Sequentially number all commits. (Numbering is done on a per file
basis.)

=item --queryprcgi=I<URL>

Use I<URL> to query problem reports. Default:
http://www.FreeBSD.org/cgi/query-pr.cgi

=item --maximum=I<NUM>

Output at most I<NUM> commits per input file.

=item --body-attributes=I<ATTRIBUTES>

Specify attributes for the html BODY tag.

=item --noheadfoot

Suppress output of HTML, HEAD and BODY tags. This is for including the
output into another html file.

=item --from=I<DATE>

Output only commit messages made after I<DATE>. Options C<--after> and
C<--since> are aliases for C<--from>.

=item --to=I<DATE>

Output only commit messages made before I<DATE>. Options C<--before>
and C<--upto> are aliases for C<--to>. If I<DATE> does not contain a
timezone then the date is interpreted according to the local timezone.
Check L<Time::ParseDate> for supported date/time formats.

=item --nofileheaders

Do not emit H1 tags for each input file.

=item --sort-arguments

Arguments (input files) are processed in order of their modification
times. This is useful to process several commitlog files in
chronological order.

=item --version

Print version information and exit.

=back

Input files may be compressed with C<gzip>.

=head1 FILES

Default values for all options will be read from
F</usr/local/etc/commitlog2cvsweb.conf> if it exists.

=head1 EXAMPLES

=head2 Step1: Retrieving the commitlog files

As a first step you need to get commitlog files. Retrieve them with
cvsup(1). An example supfile is:

     # Change the next line to use your nearest CVSup mirror site
     # listed at http://www.FreeBSD.org/handbook/cvsup.html .
     *default host=cvsup.at.FreeBSD.org base=/usr prefix=/home/ncvs
     *default release=cvs delete use-rel-suffix
     # If you have a fast network link, comment out the following line!
     *default compress
     src-base

Store this as file F<base-supfile> and run cvsup:

C<cvsup -i CVSROOT/commitlogs base-supfile>

This will retrieve all commitlog files. That is some 17MB as of
Februray 2001. If you are not interested in old logs use a refuse file
as described in L<cvsup> or a more selective C<-i> option in the above
command line example.

=head2 Step2: HTMLizing with commitlog2cvsweb

To check what was going on in the area of FreeBSD kernel development
since last monday run this command:

C<commitlog2cvsweb --since "monday 0:00" -o sys.html
    /home/ncvs/CVSROOT/commitlogs/sys>

and open F<sys.html>.

To see all kernel commits mentioning C<sound> or C<pcm> dating from
the year 2000 run:

C<commitlog2cvsweb --since "2000-01-01" --before "2001-01-01"
-o sound2K.html -i --log sound --log 'pcm\b' --sort
/home/ncvs/CVSROOT/commitlogs/sys*>

Note how C<\b> (word boundary) was used to avoid matching C<pcmcia>.

=head1 BUGS

This implementation is way to slow. (Do not even think of using it as
a CGI program!)

Commit messages pasted into logs of other commits may confuse the
parser.

You cannot show revisions and diffs of files which have been removed
in the MAIN branch. (This is a limitation of C<cvsweb.cgi>.)

Timestamps in commitlog files without a timezone are interpreted
according to the local timezone. (These timestamps occur only in very
old FreeBSD commit logs.)

Option C<--link-commiters> always adds a link to the next commit made
by the same committer. This is wrong for the last one. (This bug is a
consequence of single pass processing.)

=head1 AUTHOR

Martin Kammerhofer <mkamm@gmx.net>

