#! @PERL5@ -wT
# @configure_input@
# Copyright  2001 Martin Kammerhofer <mkamm@gmx.net>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# Convert FreeBSD "commitlog" file(s) into HTML with links to cvsweb.cgi.
# This script parses CVS commitlogs assembled by the log_accum.pl script.
# @(#)$Id: commitlog2cvsweb.pl.in,v 1.15 2001/03/17 03:22:04 mkamm Exp $

# WARNING: The following code is very poor style. It has been hacked
# a few times and needs a rewrite. However it does still work for me.

require 5.003;
use strict;
use Carp;
use Getopt::Long;
use URI::Escape;
use Compress::Zlib;
use File::Basename;
use Config::IniFiles;
use constant CONFENV =>  "CVSWEB_CONVERTERS_CONF";	# ENV variable
use constant CONFNAME => "cvsweb-converters.conf";	# file basename
use constant COMMON => "common";	# default section in ini-file

my ($prog, $prog_dir) = fileparse($0, "\.p(er)?l");
my $version = '$Id: commitlog2cvsweb.pl.in,v 1.15 2001/03/17 03:22:04 mkamm Exp $'; # ';
$version =~ s/^\s*\$Id: //;
$version =~ s/ \$\s*$//;
my $debug = 0; # debuglevel (currently 2..5 are useful, see options -d, -debug)

delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH)};

# <PARAM>	keep this comment line here!
my ($verbose, $pfx, $self_prefix, $unknown_prefix, $stdin_with_pfx,
    $cvsweb, $urlsuffix, $branch_wanted, $show_all_branches,
    $query_pr_cgi, $committer_wanted, $maximum_output,
    $hyperlink_committers, $and_patterns, $case_insensitive,
    $number_commits, $hr, $html_header_and_footer, $body_attributes,
    $extra_font1, $extra_font2, $branch_font1, $branch_font2,
    $log_font1, $log_font2, $file_headers,
    $commits_are_time_sequential, $sort_arguments, $start_date,
    $end_date, $perldoc, $outfile, @logmsg_patterns) =
# </PARAM>	keep this comment line here!

    # DO NOT EDIT THE DEFAULTS HERE - use the configfile instead!
    # (Otherwise you have to start all over when you upgrade this script.)
    (
     0,		# verbose flag (file summary lines to STDERR)
     "src/",	# default prefix ${pfx} for directories inside repository
     # the following files get their name as prefix
     "CVSROOT|distrib|doc|ports|www",
     "other",	# no known prefix for these files
     0,		# use ${pfx} on --stdin too
     "http://www.FreeBSD.org/cgi/cvsweb.cgi", # URL of cvsweb.cgi
     "",	# suffix to URL e.g. "cvsroot=myproject"
     "",	# RCS branch e.g. "MAIN"
     0,		# show all branches in cvsweb, even if only commits to a
		# single branch are requested with $branch_wanted
     "http://www.FreeBSD.org/cgi/query-pr.cgi", # PR cgi
     "",	# case insensitive committer pattern, e.g. "joe|fred"
     0,		# max. number of commit messages to output (0 for no limit)
     0,		# add hyperlinks to next/previous commit by same committer
     0,		# ANDing of log message patterns
     0,		# case insensitive pattern matching
     1,		# number all commits
     1,		# rules before each commit
     1,		# print HTML header and footer (<HEAD> and <BODY>)
     "bgcolor=white",	# attributes for HTML <BODY> tag:
     "<font size=-1>",	# "<font>"  tag for per committer links and commit #s
     "</font>",		# "</font>" tag for per committer links and commit #s
     "<font color=green>",	# "<font>"  tag for branch
     "</font>",			# "</font>" tag for branch
     "<font color=red>",	# "<font>"  tag for searched text in log
     "</font>",			# "</font>" tag for searched text in log
     1,		# wether to write <h1> file headers
     1,		# commit times are always increasing in input files
     0,		# sort @ARGV according to modification dates (-M)
     undef, undef,	# start/end date
     "perldoc",	# pathname of perldoc script (if not in default PATH)
     "-",	# outputfile
     (),	# grep patterns
     );

# map option names to variables
my %optctl = (
	      "and" => \$and_patterns,
	      "body-attributes" => \$body_attributes,
	      "branch" => \$branch_wanted,
	      "committer" => \$committer_wanted,
	      "cvsweb" => \$cvsweb,
	      "debuglevel" => \$debug, "d" => \$debug, # set or increment
	      "fileheaders" => \$file_headers,
	      "from" => \$start_date,
	      "headfoot" => \$html_header_and_footer,
	      "hr" => \$hr,
	      "insensitive" => \$case_insensitive,
	      "link-committers" => \$hyperlink_committers,
	      "logmessage" => \@logmsg_patterns,
	      "maximum" => \$maximum_output,
	      "number-commits" => \$number_commits,
	      "outfile" => \$outfile,
	      "prefix" => \$pfx,
	      "queryprcgi" => \$query_pr_cgi,
	      "sequential" => \$commits_are_time_sequential,
	      "showallbranches" => \$show_all_branches,
	      "stdin-with-prefix" => \$stdin_with_pfx,
	      "sort-arguments" => \$sort_arguments,
	      "to" => \$end_date,
	      "urlsuffix" => \$urlsuffix,
	      "verbose" => \$verbose,
	      );

# print short usage information (synopsis) and exit with $?=64
sub usage () {
    print STDERR
	"usage: $prog [--cvsweb=URL] [--urlsuffix=SFX] [--branch=TAG]\n",
	" [--showallbranches] [--outfile=FILE] [--committer=PATTERN]\n",
	" [--and] [--insensitive] [--logmessage=PATTERN]...\n",
	" [--link-committers] [--queryprcgi=URL] [--number-commits]\n",
	" [--maximum=NUM] [--sort-arguments]  commitlog-file...\n",
	"or: $prog --help\n",
	"or: $prog --version\n";
    exit 64;
}

# show the pod documentation and exit
sub help () {
    {exec $perldoc, $0};
    # try 'perldoc' in the same directory as perl itself
    $ENV{PATH} = $1 if dirname($^X) =~ /^(.*)$/ ; # untaint
    {exec 'perldoc', $0};
    print STDERR "$prog: cannot exec '$perldoc'\n";
    goto &usage; # fallback
}

# html escape '<', '>', '&' and '"'
sub html_escape ($) {
    local $_ = shift or croak "Missing argument";
    s/\&/&amp;/g;
    s/\"/&quot;/g;
    s/>/&gt;/g;
    s/</&lt;/g;
    return $_;
}

# OPTIONS PROCESSING

# 1st step: try to locate and read a config file
# try 1. $ENV{CONFENV}		(constants CONF* are defined above)
#     2. "~/.CONFNAME"		(in home directory prefixed with a dot)
# and 3. "@PREFIX@/etc/CONFNAME"		(in "/PREFIX/etc")
my $configfile;
if (exists $ENV{+CONFENV}) {
    $configfile = $ENV{+CONFENV};
} elsif (exists $ENV{HOME} && -e ("$ENV{HOME}/." . CONFNAME)) {
    $configfile = "$ENV{HOME}/." . CONFNAME;
} elsif (-e "@PREFIX@/etc/" . CONFNAME) {
    $configfile = "@PREFIX@/etc/" . CONFNAME;
}
if ($configfile) {
    my %config;
    if (!tie %config, 'Config::IniFiles',
	(-file => $configfile, -default => COMMON)) {
	$" = "\n";
	die "$0: cannot tie to config file '$configfile'\n",
	"@Config::IniFiles::errors\n";
    }
    # get my (sub)section(s)
    my (@sections, %param_hash);
    push @sections, $config{+COMMON} if exists $config{+COMMON};
    while (my ($section, $hashref) = each %config) {
	push @sections, $hashref if $section =~ m\Q$prog\Eio;
    }
    # get a list of all parameters from the source code text
    my @param_list;
    open SELF, "< $0" or die "$prog: cannot read '$0': $!\n";
    while (<SELF>) {
	next unless m<PARAM>i ... m</PARAM>i;
	push @param_list, m[\$\@\%\*\&]\w+g unless m^\s*[;\#];
	last if m</PARAM>i; # only the 1st counts!
    }
    close SELF or die;
    # read and assign my parameters
    my $parameter_count = 0;
    foreach (@param_list) {
	my ($type, $name) = m^(.)(\w+);
	next unless $type =~ m[\$\@]; # only scalars and arrays possible
	$param_hash{$name} = undef; # remember parameter names for code below
	foreach my $section (@sections) {
	    if (exists $section->{$name}) {
		if ($type eq "\$") { # assign scalar
		    eval qq \$$name = \$section->{'$name'} ;
		} elsif ($type eq '@') { # split and assign to array
		    eval qq \@$name = split(\$/, \$section->{'$name'}) ;
		}
		die "$prog: cannot assign to $type$name: $@\n" if $@;
		$parameter_count++;
		eval qq \$\"=", \\n\\t"; print STDERR "\\$type$name = '$type$name'\\n" 
		    if $debug >= 5;
	    }
	}
    }
    print STDERR "$prog: WARNING: no parameters read from '$configfile'\n"
	unless $parameter_count;
    # warn about unrecognized parameters (uses %param_hash built above)
    my @unrecognized;
    foreach my $section (@sections) {
	foreach my $parameter (keys %{$section}) {
	    push @unrecognized, $parameter
		unless exists $param_hash{$parameter};
	}
    }
    if (@unrecognized) {
	@unrecognized = sort @unrecognized;
	print STDERR "$prog: WARNING: ",
	"the following parameters are not recognized:\n",
	"@unrecognized\n";
    }
    # untie
    untie %config or die "untie failed";
} # done with $configfile

# 2nd step: process command line options
if (!GetOptions(\%optctl, "cvsweb|url=s", "branch|tag=s", "version!",
		"urlsuffix|suffix=s", "outfile|output=s",
		"committer=s", "showallbranches!", "debuglevel=i", "d+",
		"queryprcgi=s", "logmessage|logstring|logpattern|logmsg=s@",
		"link-committers!", "and!", "number-commits!",
		"insensitive!", "body-attributes=s", "headfoot!",
		"from|after|since=s", "to|upto|before=s",
		"verbose!", "hr!", "maximum|atmost=i",
		"prefix=s", "stdin-with-prefix!", "fileheaders!",
		"sequential!", "sort-arguments!", "help!", )
    )
{
    usage();
}
help() if $optctl{help};
if ($optctl{version}) {
    print "$version\n";
    exit 0;
}
usage() if !@ARGV && -t; # only tty input

$verbose = 1 if $debug;
if ($hr) {
    $hr = "<HR>";
} else {
    $hr = "";
}
$unknown_prefix = "-|$unknown_prefix" unless $stdin_with_pfx;

# convert the "from ... to" time interval into seconds since the epoch
my $date_wanted = 0;
if ($start_date || $end_date) {
    $date_wanted = 1;
    eval "require Time::ParseDate";
    if ($@) {
	die "$prog: required module Time::ParseDate not available\n";
    } else {
	import Time::ParseDate qw(parsedate);
    }
    # parse the date(s), check start <= end
    my $err;
    my %opts = (WHOLE => 1, PREFER_PAST => 1, VALIDATE => 1);
    if ($start_date) {
	($start_date, $err) = parsedate($start_date, %opts);
	die "$prog: bad date: $err\n" unless $start_date;
    } else {
	$start_date = 0;
    }
    if ($end_date) {
	($end_date, $err) = parsedate($end_date, %opts);
	die "$prog: bad date: $err\n" unless $end_date;
    } else {
	$end_date = time() + 86400;
    }
    if ($start_date > $end_date) {
	die("$prog: negative time interval:\n\t'",
	    scalar(localtime($start_date)),
	    "' ==> '", scalar(localtime($end_date)), "'\n");
    }
    print STDERR ("$prog: '", scalar(localtime($start_date)),
		  "' ==> '", scalar(localtime($end_date)), "'\n")
	if $verbose;
}

# pre compile search patterns to speed up log message grepping
my @compiled_logmsg_patterns =
    map($case_insensitive ? qr/$_/ims : qr/$_/ms, @logmsg_patterns);

if ($outfile ne "-") {
    close(STDOUT) or die;
    $outfile = $1 if $outfile =~ /^(.*)$/; # untaint
    open(STDOUT, "> $outfile") or
	die "$prog: redirect output to '$outfile': $!.\nStopped";
}

# VARIABLES

my (@f, $fname, $uri, $htm, $dir, $found_dir, $logmessage, $headline);
my $cvsweb_suffix = "";
$cvsweb_suffix = "?only_with_tag=" . $branch_wanted
    if $branch_wanted && !$show_all_branches;
$cvsweb_suffix .= ($cvsweb_suffix ? "&" : "?") . $urlsuffix if $urlsuffix;

my $copy = 0; # copy from input flag: 0..suppress, 1..buffer, 2..copy
my $buf = ""; # output buffer (until copy/suppress decision is done)
my $files = 0;	# 1 if inside added/modified/removed files section
my $RCP = 0;	# 1 if inside "Revision Changes Path" section
my $log = 0;	# 1 if inside "Log:" section
my ($commit_count_in, $commit_count_out) = (0, 0);
my ($committer, $date, $time, $tz, %by_committer, $branchtag);
# (do not confuse the parsed $branchtag with $branch_wanted from commandline)


# parse the line in $_

sub parse_line() {{
    print STDERR if $debug >= 5;
    if ( /^\s*$/ ) {
 	$files = $RCP = 0; # empty lines close "files" or "RCP" sections
	next;
    }
    @f = split;
    # check for begin of new commit message:
    # look for a "committer" "date" "time" "tz" line
    if (($#f == 2 || $#f == 3) && $f[0] =~ /^\S+$/
	&& $f[1] =~ m^(\d\d)?\d\d/\d\d/\d\d$
	&& $f[2] =~ /^\d\d:\d\d:\d\d$/
	# && ($#f == 2 || $f[3] =~ /^[A-Z]{3,4}$/ )
	&& /^\S+/ # committer name must start in first column
	) {
	# finish last commit?
	if ($copy == 1) {
	    # is the branch correct?
	    if ($branch_wanted && $branch_wanted ne $branchtag) {
		# we take this code path only for "ancient" imports...
		$copy = 0;
		--$by_committer{$committer}; # adjust per committer count
		print STDERR "not branch '$branch_wanted' (???)\n"
		    if $debug >= 3;
	    }
	    # grep the log message?
	    if (@logmsg_patterns) {
		my $accepted = 0;
		goto no_logmessage if ! $logmessage;
		if ($and_patterns) {
		    # all patterns ANDed
		    $accepted = 1;
		    foreach my $pat (@compiled_logmsg_patterns) {
			if ($logmessage !~ /$pat/ ) {
			    $accepted = 0; last;
			}
		    }
		} else {
		    # all patterns ORed
		    foreach my $pat (@compiled_logmsg_patterns) {
			if ($logmessage =~ /$pat/ ) {
			    $accepted = 1; last;
			}
		    }
		}
		if ($accepted) {
		    $commit_count_out++;
		    print $headline, $buf;
		    $headline = $buf = "";
		    print STDERR "ACCEPTED (log) $commit_count_out\n"
			if $debug >= 3;
		} else {
		  no_logmessage:
		    --$by_committer{$committer}; # adjust per committer count
		    print STDERR "not logmessage\n" if $debug >= 3;
		}
		$copy = 0; # don't accept twice
	    }
	    die "internal error" if $copy; # something wrong here?
	} # finished last commit

	# initialize all those state variables :(
	$copy = $files = $log = $RCP = 0; # not inside any section yet
	$branchtag = $buf = $logmessage = ""; # not known yet
	$dir = $found_dir = undef;
	# output limit reached?
	if ($maximum_output && $commit_count_out >= $maximum_output) {
	    print STDERR "LIMIT $commit_count_out reached\n" if $debug >= 3;
	    print "<h3>limit of $commit_count_out commits reached</h3>\n";
	    local $^W = 0; last LINE;
	}
	# PROCESS THE COMMIT MESSAGE
	$commit_count_in++;
	print STDERR "found commit $commit_count_in: @f\n" if $debug >= 2;
	($committer, $date, $time, $tz) = @f;
	# $tz may be undefined or junk if we have a "historic" commit message
	$tz = "" if !defined($tz) || $tz !~ /[A-Z]{3,4}/ ;
	# filter by committer
	if ($committer_wanted && $committer !~ m^(?i:$committer_wanted)$ ) {
	    # we are not for this committer
	    print STDERR "not committers '$committer_wanted'\n" if $debug >= 3;
	    next;
	}
	# filter by date (very stupid and inefficient algorithm)
	if ($date_wanted) {
	    my $datetime = "$date $time $tz";
	    my $commit_date = parsedate($datetime) || die;
	    if ($commit_date < $start_date) {
		print STDERR "too early '$datetime'\n" if $debug >= 3;
		next;
	    }
	    if ($commit_date > $end_date) {
		local $^W = 0;
		print STDERR "too late '$datetime'\n" if $debug >= 3;
		last LINE if $commits_are_time_sequential;
		next;
	    }
	}
	if ($branch_wanted || @logmsg_patterns) {
	    $copy = 1; # buffer output until we have seen the branch / log
	} else {
	    $copy = 2;
	    ++$commit_count_out;
	    print STDERR "ACCEPTED (head) $commit_count_out\n" if $debug >= 3;
	}
	my $count = ++$by_committer{$committer}; # num. of commits by committer
	# (The above count will have to be decremented if we decide to filter
	# this commit message later.)
	my ($next, $prev) = ($count+1, ($count>1 ? $count-1 : undef));
	if ($hyperlink_committers) {
	    $uri = uri_escape($committer);
	    $htm = html_escape($committer);
	    # insert a link to this committer's next commit
	    s^\Q$committer\E<A NAME="$uri$count"><A HREF="#$uri$next">$htm$extra_font1&nbsp;($count)$extra_font2</A></A>;
	    if ($prev) {
		# insert a link to this committer's previous commit
		s$&nbsp;<A HREF="#$uri$prev">$extra_font1(previous)$extra_font2</A>;
	    }
	}
	# make an anchor for this commit message
	if ($number_commits) {
	    my ($anchor);
	    if ($ARGV =~ /^(\w+(\.\d+)?)/ ) {
		$anchor = $1;
		$anchor =~ tr /.//d;
	    } else {
		$anchor = "commit";
	    }
	    $anchor .= "/$commit_count_in";
	    s^<A NAME="$anchor">$extra_font1($anchor) $extra_font2</A>$hr;
	} elsif ($hr) {
	    s/^/$hr/;
	}
	next;
    } # end of headline processing
    # only commit headlines (see above) start in column 1:
    die "Bad input format: $_\nStopped" if /^\S/;
    # No need to parse further?
    next unless $copy;
    # check for a new repository directory or imported sources
    if ( /^  (\S+) - New directory$/ ||
	 /^  (\S+) - Imported sources$/ ) {
	$found_dir = $1;
	if ($copy == 1) {
	    if ($branch_wanted && $branch_wanted ne "MAIN") {
		--$by_committer{$committer}; # adjust per committer count
		$copy = 0; # suppress this commit
		print STDERR "not branch '$branch_wanted' (new dir/import)\n"
		    if $debug >= 3;
		next;
	    } elsif (@logmsg_patterns) {
		--$by_committer{$committer}; # adjust per committer count
		$copy = 0; # suppress this commit
		print STDERR "not log (new dir/import)\n" if $debug >= 3;
		next;
	    } else {
		$copy = 2; # copy out
		$commit_count_out++;
		print STDERR ( /New/ ? "NEW DIR" : "IMPORTED",
			       " $commit_count_out\n")
		    if $debug >= 3;
	    }
	}
	# get the real directory name

	# hyperlink the directory
	if ($ARGV !~ /^($unknown_prefix)/o ) {
	    $htm = html_escape($found_dir);
	    $uri = uri_escape("$cvsweb/$found_dir$cvsweb_suffix");
	    s\Q$found_dir\E<A HREF="$uri">$htm</A>;
	}
	next;
    }
    # check for (another) begin of "files" section
    # (there can be 1..3 per commit)
    if ($#f >= 1 && $f[0] =~ /^(Added|Branch:|Modified|Removed):?$/
	&& ($f[0] =~ /:$/ || $f[1] eq 'files:')) {
	$files = 1; $log = $RCP = 0;
	$found_dir = undef;
	if ($#f == 3 && $f[2] eq "(Branch:") {
	    $branchtag = $f[3];
	    $branchtag =~ s/\)$//; # remove ')'
	} elsif ($f[0] eq "Branch:") {
	    # old style commit message format
	    $branchtag = $f[-1];
	} else {
	    $branchtag = "MAIN";
	}
	$htm = html_escape($branchtag);
	s\b(\Q$branchtag\E)\b$branch_font1$htm$branch_font2;
	# filter on branch
	if ($copy == 1) {
	    if ($branch_wanted && $branch_wanted ne $branchtag) {
		--$by_committer{$committer} if $copy; # adjust committer count
		$copy = 0;
		print STDERR "not branch '$branch_wanted'\n" if $debug >= 3;
	    } elsif (!@logmsg_patterns) {
		$copy = 2;
		++$commit_count_out;
		print STDERR "ACCEPTED (branch) $commit_count_out\n"
		    if $debug >= 3;
	    }
	}
	if ($f[0] =~ /:$/ ) {
	    # old style commit message
	    if ($ARGV =~ /^($unknown_prefix)/o ) {
		if ($f[1] =~ m/ncvs/(\S+)$ ) {
		    $found_dir = $f[1];
		    $dir = $1;
		} else {
		    $dir = $found_dir = ""; # we do not know the prefix
		}
	    } else {
		$found_dir = $f[1];
		$dir = "${pfx}$found_dir";
	    }
	} else {
	    $dir = $found_dir = undef;
	    next;
	}
    }
    # check for begin of "Log:" section
    if ( /^  Log:$/ ) {
      start_of_log:
 	$files = $RCP = 0; # begin of "log" section closes "files" section
	$log = 1;
	next;
    }
    # check for begin of "RCP" section
    if ($#f == 2 && $f[0] eq "Revision" && $f[1] eq "Changes"
	&& $f[2] eq "Path" && /^  Revision\b/ ) {
	$RCP = 1;
	$log = $files = 0; # begin of "RCP" section closes "Log:" section
	warn "Zero logmessage: $committer $date $time\n"
	    if ! $logmessage && $debug >= 4;
	next;
    }
    # handle data inside "files" section
    if ($files) {
	# we are inside the "files" section
	# if a string contains a slash (/) it is the directory name
	if ( m^(?: {4}| {15})(\S+)\s ) {
	    # find directory depending on leading whitespace:
	    # NEW: /^ {4}(\S+)/ OR OLD: /^ {15}(\S+)/
	    $dir = $found_dir = $1;
	    if ($ARGV =~ /^($self_prefix)/o ) {
		$dir = "$1/$dir";
	    } elsif ($ARGV =~ /^($unknown_prefix)/o ) {
		if ($found_dir =~ m/ncvs/(\S+)$ ) {
		    $dir = $1;
		} else {
		    $dir = ""; # we do not know the prefix
		}
	    } else {
		$dir = "${pfx}$dir";
	    }
	    print STDERR "found directory: '$dir'\n" if $debug >= 4;
	}
	if (!defined($dir) || !defined($found_dir)) {
	    # Format error (maybe a commit message pasted into the log
	    # of another one...)
	    goto start_of_log;
	}
	next unless $dir;
	foreach my $f (@f) {
	    if ($f eq $branchtag
		|| $f =~ /^(Added|Branch|Modified|Removed):$/ # old style
		|| $f =~ /\.$/
		) {
		next;
	    }
	    $htm = html_escape("$f");
	    if ($f eq $found_dir) {
		$uri = uri_escape("$cvsweb/$dir$cvsweb_suffix");
	    } else {
		$uri = uri_escape("$cvsweb/$dir/$f$cvsweb_suffix");
	    }
	    s\b\Q$f\E(?=\s)<A HREF="$uri">$htm</A>;
	}
    } # end of code for files section
    # handle data inside "Log:" section
    if ($log) {
	# handle old style import lines
	if ( m^  [A-Z] (\S+/\S+)$ ) {
	    if ($dir) {
		$htm = html_escape($1);
		$uri = uri_escape("$cvsweb/$1$cvsweb_suffix");
		s\b\Q$1\E\b<A HREF="$uri">$htm</A>;
	    }
	    next; # this is not part of the log message
	}
	$logmessage .= $_ if $copy == 1; # accumulate logmessage
	$_ = html_escape($_); # html_escape logmessage text
	# hyperlink http:// and ftp:// URLs
	s\b((?i:https?|ftp)://\S+)
	    '<A HREF="'.uri_escape($1).'">'.html_escape($1)."</A>"eg;
	# hyperlink PR number (only one)
	if ( m\bPR\s*[:\#]\s*((?:\w+/)?\d+)\b ) {
	    $uri = uri_escape("$query_pr_cgi?pr=$1");
	    $htm = html_escape("$1");
	    s\b$1\b<A HREF="$uri">$htm</A>;
	}
	# hyperlink submitter email address(es)
	if ( /^\s+[A-Z][a-z]+.*(?i:by|from):/g ) {
	    s\b([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)\b
		'<A HREF="' . uri_escape("mailto:$1") . '">'
		    . html_escape($1) . "</A>"eg;
	} else { # only within '<' '>'
	    s(?:<|&lt;)([\w+=\-.!]+@[\w\-]+(\.[\w\-]+)+)(?:>|&gt;)
		'&lt;<A HREF="' . uri_escape("mailto:$1") . '">'
		    . html_escape($1) . '</A>&gt;'eg;
	}
	# emphasize searched text (you'd rather say '.+' than '.' for testing)
	for my $pat (@compiled_logmsg_patterns) {
	    s($pat)$log_font1$1$log_font2g;
	}
    }
    # handle data inside "RCP" section
    # lines have this format: "rev"  "+x" "-y"  "path"
    if ($RCP) {
	if (! /^\s+(\d+(?:\.\d+)+)\s+(\+\d+ -\d+)\s*(\S+)\s*$/) {
	    # Format error (maybe a commit message pasted into the log
	    # of another one...)
	    goto start_of_log;
	}
	my ($rev, $plusminus, $path) = ($1, $2, $3);
	my $htm = html_escape($path);
	my $prev = $rev; # previous revision
	if ($prev =~ /\.(\d+)$/) {
	    my $lsn = $1; # least significant number in revision
	    $lsn -= 1;
	    if ($lsn) { # delta x.y => x.(y+1)
		$prev =~ s/$1$/$lsn/;
	    } else { # delta x.y => x.y.z.1
		$prev =~ s/\.\d+\.\d+$//;
	    }
	    my $suffix = ($cvsweb_suffix ? "$cvsweb_suffix&" : "?");
	    # order of substitutions (line end to begin) is important here!
	    # hyperlink the source file
	    $uri = uri_escape("$cvsweb/$path$cvsweb_suffix");
	    s(?<=\d|\s)\Q$path\E\b<A HREF="$uri">$htm</A>;
	    # hyperlink the delta
	    $uri = uri_escape("$cvsweb/$path.diff${suffix}r1=$prev&r2=$rev");
	    s\Q$plusminus\E<A HREF="$uri">$plusminus</A>;
	    # hyperlink the new revision
	    $uri = uri_escape("$cvsweb/$path") . "#"
		. uri_escape("rev$rev$cvsweb_suffix");
	    s\b\Q$rev\E\b<A HREF="$uri">$rev</A>;
	}
    }
} continue {
    if ($copy == 2) { # copy out
	if ($headline) {
	    print $headline;
	    $headline = "";
	}
	if ($buf) {
	    print $buf;
	    $buf = "";
	}
	print;
    } elsif ($copy == 1) { # accumulate into $buf
	$buf .= $_;
    } else { # suppress output
	$buf = "";
    }
}} # end of parse_line()


######### MAIN PROGRAM #########

my $title = "$prog";
foreach my $arg (@ARGV) {
    $title .= " " . basename($arg);
}
html_escape($title);
print <<EndOfHeader if $html_header_and_footer;
<HTML><HEAD>
<META NAME="generator" CONTENT="$version">
<TITLE>$title</TITLE></HEAD>
<BODY $body_attributes><PRE>
EndOfHeader

if (!@ARGV) {
    $file_headers = 0;
    unshift(@ARGV, '-');
} elsif ($sort_arguments) {
    local $^W = 0;
    @ARGV = sort { -M $b <=> -M $a } @ARGV;
}
FILE:
while ($ARGV = shift) {
    if ($ARGV =~ /^(\+?>|\|)/ || -d $ARGV) {
	print STDERR "$prog: ignoring '$ARGV'\n";
	next;
    }
    # See if we can skip a file because of it's age
    if ($date_wanted) {
	my $fileage = -M $ARGV;
	if ($fileage && $start_date
	    && $^T - $fileage * 86400 < $start_date) {
	    # this input file is too old, we can skip it
	    print STDERR "$prog: file '$ARGV' is too old, skipping it\n"
		if $debug >= 2;
	    next FILE;
	}
    }
    if (open(ARGV, "$ARGV")) {
	$. = $commit_count_in = $commit_count_out = 0;
	my $gz = gzopen(\*ARGV, "rb") or die $ARGV;
	$headline = '<A NAME="' . html_escape(basename($ARGV)) . '"></A>';
	$headline .= "<H1>" . html_escape($ARGV) . "</H1>\n" if $file_headers;
	$ARGV = basename($ARGV);
      LINE:
	while ($gz->gzreadline($_) > 0) {
	    $.++;
	    &parse_line();
	}
	# There are errors returned here when closing. Since we are only
	# reading we don't care. There could be a memory leak in Perl or
	# Compress::Zlib (I didn't check) however there is no descriptor
	# leak (verified with lsof).
	$gz->gzclose();
	close(ARGV);
	if ($commit_count_in == 0) {
	    warn "$prog: no commit messages found in $ARGV\n"
		if $debug >= 2;
	} elsif ($commit_count_out == 0) {
	    warn "$prog: no commit messages converted from $ARGV\n"
		if $commit_count_in > 1 && $verbose || $debug >= 2;
	} elsif ($commit_count_in != $commit_count_out) {
	    warn "$prog: $ARGV: $commit_count_out of $commit_count_in commit messages converted\n" if $verbose;
	    print "$extra_font1$commit_count_out of $commit_count_in commit messages displayed$extra_font2<p>\n"
		if $file_headers;
	}
    } else {
	print STDERR "$prog: $ARGV: $!\n";
    }
}


print <<'EndOfFooter' if $html_header_and_footer;
</PRE></BODY></HTML>
EndOfFooter

if ($outfile ne "-") {
    close(STDOUT) or die;
}

__END__;

=head1 NAME

commitlog2cvsweb - convert FreeBSD commitlog files to HTML

=head1 SYNOPSIS

=over 4

=item .

commitlog2cvsweb [--cvsweb=I<URL>] [--urlsuffix=I<SFX>] [--branch=I<TAG>]
[--showallbranches] [--outfile=I<FILE>] [--committer=I<PATTERN>]
[--and] [--insensitive] [--logmessage=I<PATTERN>]...
[--link-committers] [--queryprcgi=I<URL>] [--number-commits]
[--maximum=I<NUM>] [--sort-arguments]  I<commitlog-file>...

=item .

commitlog2cvsweb --version

=back

=head1 DESCRIPTION

The commitlog2cvsweb program takes one ore more commitlog files and
converts them into HTML. The commitlog files are usually stored under
F</home/ncvs/CVSROOT/commitlogs/> and can be retrieved by cvsup-ing
the src-base collection in CVS mode. (See L<cvsup>,
http://www.FreeBSD.org/handbook/cvsup.html and L</EXAMPLES>.)

The generated HTML has hyperlinks to the C<cvsweb.cgi> script. This
means you can click on any of the changed files and see the CVS log
(change history) and have access to all the revisions and deltas.

(The cgi-script C<cvsweb.cgi> was originally written by Bill Fenner
<fenner@freebsd.org> for the FreeBSD project. It allows browsing of
CVS-repositories with a HTML-browser. CVS is a popular version control
system. C<cvsup> is written by John Polstra <jdp@polstra.com>. It is
a network distribution package for CVS repositories.)

Options may be abbreviated to a unique prefix. The options are as
follows:

=over 4

=item --cvsweb=I<URL>

Specify URL of cvsweb.cgi script.

=item --urlsuffix=I<SFX>

Specify some extra information for appending to generated URLs. (You
should not type a leading C<?> or C<&> character because it will be
added automatically.)

=item --branch=I<TAG>

Output only commit messages from branch I<TAG>.

=item --showallbranches

Tell C<cvsweb.cgi> that you want to see file revisions on all
branches. The default behaviour is to show only revisions on the
branch selected with C<--branch>.

=item --outfile=I<FILENAME>

Specify the output file. If no output file is specified standard
output is used.

=item --committer=I<PATTERN>

Output only commit messages from committer I<PATTERN>. More than one
committer is requested by separating the committer names with a bar.
Committer names in I<PATTERN> are case insensitive and must match the
full committer name.

=item --logmessage=I<PATTERN>

Output only commits with matching commit log messages. This option may
be be specified multiple times.

=item --and

Output only commits matching all logmessage patterns (AND). The
default is to output commits matching any pattern (OR).

=item --insensitive

Ignore case when matching logmessage patterns.

=item --link-committers

Add hyperlinks to committers previous/next commit.

=item --number-commits

Sequentially number all commits. (Numbering is done on a per file
basis.)

=item --queryprcgi=I<URL>

Use I<URL> to query problem reports. Default:
http://www.FreeBSD.org/cgi/query-pr.cgi

=item --maximum=I<NUM>

Output at most I<NUM> commits per input file.

=item --body-attributes=I<ATTRIBUTES>

Specify attributes for the html BODY tag.

=item --noheadfoot

Suppress output of HTML, HEAD and BODY tags. This is for including the
output into another html file.

=item --from=I<DATE>

Output only commit messages made after I<DATE>. Options C<--after> and
C<--since> are aliases for C<--from>.

=item --to=I<DATE>

Output only commit messages made before I<DATE>. Options C<--before>
and C<--upto> are aliases for C<--to>. If I<DATE> does not contain a
timezone then the date is interpreted according to the local timezone.
Check L<Time::ParseDate> for supported date/time formats.

=item --nofileheaders

Do not emit H1 tags for each input file.

=item --sort-arguments

Arguments (input files) are processed in order of their modification
times. This is useful to process several commitlog files in
chronological order.

=item --version

Print version information and exit.

=back

Input files may be compressed with C<gzip>.

=head1 FILES

C<commitlog2cvsweb> looks for a configuration file in three places.

=over 4

=item *

If the variable C<CVSWEB_CONVERTERS_CONF> is set in the environment its
content is interpreted as the name of the configuration file, otherwise

=item *

the file F<~/.cvsweb-converters.conf> is examined, and finally

=item *

F<@PREFIX@/etc/cvsweb-converters.conf> is tried.

=back

Only the first existing file is used.

=head1 EXAMPLES

=head2 Step1: Retrieving the commitlog files

As a first step you need to get commitlog files. Retrieve them with
cvsup(1). An example supfile is:

     # Change the next line to use your nearest CVSup mirror site
     # listed at http://www.FreeBSD.org/handbook/cvsup.html .
     *default host=cvsup.at.FreeBSD.org base=/usr prefix=/home/ncvs
     *default release=cvs delete use-rel-suffix
     # If you have a fast network link, comment out the following line!
     *default compress
     src-base

Store this as file F<base-supfile> and run cvsup:

C<cvsup -i CVSROOT/commitlogs base-supfile>

This will retrieve all commitlog files. That is some 17MB as of
Februray 2001. If you are not interested in old logs use a refuse file
as described in L<cvsup> or a more selective C<-i> option in the above
command line example.

=head2 Step2: HTMLizing with commitlog2cvsweb

To check what was going on in the area of FreeBSD kernel development
since last monday run this command:

C<commitlog2cvsweb --since "monday 0:00" -o sys.html
    /home/ncvs/CVSROOT/commitlogs/sys>

and open F<sys.html>.

To see all kernel commits mentioning C<sound> or C<pcm> dating from
the year 2000 run:

C<commitlog2cvsweb --since "2000-01-01" --before "2001-01-01"
-o sound2K.html -i --log sound --log 'pcm\b' --sort
/home/ncvs/CVSROOT/commitlogs/sys*>

Note how C<\b> (word boundary) was used to avoid matching C<pcmcia>.

=head1 BUGS

This implementation is way to slow. (Do not even think of using it as
a CGI program!)

Commit messages pasted into logs of other commits may confuse the
parser.

Timestamps in commitlog files without a timezone are interpreted
according to the local timezone. (These timestamps occur only in very
old FreeBSD commit logs.)

Option C<--link-commiters> always adds a link to the next commit made
by the same committer. This is wrong for the last one. (This bug is a
consequence of single pass processing.)

=head1 AUTHOR

Martin Kammerhofer <mkamm@gmx.net>

=cut

# Local Variables:
# mode: perl
# End:
#EOF
