#!/usr/bin/perl -w
# Copyright (c) 2000, 2001 Udo Erdelhoff. All rights reserved.
# Written for the FreeBSD German Documentation Project
#
# Redistribution and use in source and compiled forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  1. Redistributions of source code must retain the above
#     copyright notice, this list of conditions and the following
#     disclaimer as the first lines of this file unmodified.
#
#  2. Redistributions in compiled form must reproduce the above
#     copyright notice, this list of conditions and the following
#     disclaimer in the documentation and/or other materials provided
#     with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY UDO ERDELHOFF "AS IS" AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL UDO ERDELHOFF BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# $Id: makemanent,v 1.6 2001/01/07 21:36:10 ue Exp $

use strict;
use Getopt::Long;

# Things that can be configured
my ($cutoffdate, @entityfiles, $includeold, @mandirs, $usegroups);

# Cutoff-date. Ignore all manpages that are older than this. Must be a valid
# argument for touch -t (i.e. [[CC]YY]MMDDhhmm[.SS])
$cutoffdate = "";

# Print old entity definitions along with new ones?
# 0=no, everything else=yes
$includeold = 0;

# Entity files that must be read. Default is defined below
@entityfiles = ();

# Directories that must be searched. Default is defined below
@mandirs = ();

# Generate manpage groups (by inode?) 0=no, everything eles=yes
$usegroups = 0;

##########################################################################

sub usage () {
	print STDERR "usage: makemanent [--includeold] [--usegroups] ";
	print STDERR "[--cutoffdate [[CC]YY]MMDDhhmm[.ss]] ] ";
	print STDERR "[--entitfyfile [file]] [--mandir dir]\n";
	print STDERR "Second dash is optional, abbreviation is possible\n";
	print STDERR "-e and -m may be specified more than once\n";
	die;
}

GetOptions ('cutoffdate=s'	=> \$cutoffdate,
	    'entityfile=s'	=> \@entityfiles,
	    'includeold'	=> \$includeold,
	    'mandir=s'		=> \@mandirs,
	    'usegroups'		=> \$usegroups) || usage();

# DEFAULT entity file to read in.
if (@entityfiles == 0) {
	@entityfiles = ( "/usr/doc/share/sgml/man-refs.ent");
}

# DEFAULT directories to search.
if (@mandirs == 0) {
	@mandirs = ( "/usr/share/man/man1", "/usr/share/man/man1aout",
	    "/usr/share/man/man2", "/usr/share/man/man3",
	    "/usr/share/man/man4", "/usr/share/man/man5",
	    "/usr/share/man/man6", "/usr/share/man/man7",
	    "/usr/share/man/man8", "/usr/share/man/man9",
	    "/usr/share/man/mann");
}

my (%oldentities, $entityfile, $oldcount);	# the old entities
my ($entity, $page, $vol);			# the unholy trinity
my (%newentities, $newcount);			# the new entities
my (%groups, $grpcount);			# the new entity groups
my ($cmd, $dir, $key, %temp, $result);		# various stuff

$newcount = 0;
$grpcount = 0;

if ($cutoffdate ne "") {
	$cmd = "touch -t $cutoffdate /tmp/mme-mark.$$";
	$result = system $cmd;
	if ($result != 0) {
		print STDERR "$cmd returned $result\n";
		die "Can't create mark file: $!\n";
	}
}


foreach $entityfile (@entityfiles) {
	$oldcount = 0;
	unless (open (OLDENT, $entityfile)) {
		print STDERR "Can't read old entites from $entityfile: $!\n";
		next;
	}
	while (<OLDENT>) {
		next unless /^<!ENTITY man\./;
		chomp;
		($entity, $page, $vol) = m%^<!ENTITY (.*?) \"<citerefentry/<refentrytitle/(.*?)/<manvolnum/([0-9nl])//\">$%;	# "
		unless (defined $entity && defined $page && defined $vol) {
			print STDERR "malformed entity defintion $_\n";
			next;
		}
		$oldentities{$entity} = "$_";
		$oldcount++;
	}
	close (OLDENT);
}

# Ok, we know that the truth is out there. Anybody else?
foreach $dir (@mandirs) {
	$cmd = "find $dir -xdev -type f";
	if ($cutoffdate ne "") {
		$cmd = "$cmd -newer /tmp/mme-mark.$$";
	}

#	printf STDERR "Scanning directory %-30s", "$dir...";
	if ($usegroups == 0) {

		$cmd = "$cmd -print |";
		$result = get_files();
#		if ($result >= 0) {
#			printf STDERR "%4d manpages found\n", $result;
#		}
	} else {
		$cmd = "$cmd -ls |";
		$result = get_inode_groups();
#		if ($result >= 0) {
#			printf STDERR "%4d manpage groups found\n", $result;
#		}
	}
}

if ($cutoffdate ne "") {
	unlink ("/tmp/mme-mark.$$");
}

# Lets see if there is anything new
$newcount = 0;
$grpcount = 0;
#print STDERR "Looking for duplicates...";
foreach $key (sort keys %temp) {
	if ($temp{$key} =~ / /) {
		$result = check_group($temp{$key});
	} else {
		$result = check_single($temp{$key});
	}
	next if ($result == 0);			# Old entry

	if ($result == 1) {			# New entry
		($page, $vol) = ($temp{$key} =~ /^(.*)\.([0-9])$/);
		($entity = $page ) =~ tr/_\[\+/.../;
		$entity = "man.$entity.$vol";
		$newentities{$entity} = "$page $vol";
		$newcount++;
	} elsif ($result == 2) {		# New group
		$groups{$key} = $temp{$key};
		$grpcount++;
	}
}
#print STDERR " done\n";

# Report our results
#print STDERR "Generating $newcount entities and $grpcount new entity groups\n";
foreach $entity (sort keys %newentities) {
	($page, $vol) = split (/ /, $newentities{$entity});
	print "<!ENTITY $entity \"<citerefentry/<refentrytitle/";
	print "$page/<manvolnum/$vol//\">\n";
}

if ($includeold != 0) {
	print "\n";
	foreach $entity (sort keys %oldentities) {
		print "$oldentities{$entity}\n";
	}
}

print "\n";
foreach $key (sort keys %groups) {
	@_ = split / /, $groups{$key};
	$result = @_;
	print "\n<!-- entity group $key, $result members -->\n";
	foreach (@_) {
		($page, $vol) = m/^(.*)\.([0-9])$/;
		($entity = $page) =~ tr/_\[\+/.../;
		$entity = "man.$entity.$vol";
		print "<!ENTITY $entity \"<citerefentry/<refentrytitle/";
		print "$page/<manvolnum/$vol//\">\n";
	}
}

sub check_group {

	$result = 2;
	foreach (split / /, $_[0]) {
		if (check_single ($_) == 0) {
			$result = 0;
			last;
		}
	}
	return ($result);
}

sub check_single {

	($page, $vol) = ($_[0] =~ m/^(.*)\.([0-9])$/);
	unless (defined $page) {
		print STDERR "Found no manpage in $_!\n";
		return 0;
	}
	unless (defined $vol) {
		print STDERR "Found no volume in $_!\n";
		return 0;
	}
	($entity = $page) =~ tr/_\[\+/.../;
#	($entity = $page) =~ tr/_/./;
	$entity = "man.$entity.$vol";
	if (defined $oldentities{$entity} || defined $newentities{$entity}) {
		return 0;
	}
	return 1;
}

sub get_inode_groups {

	my ($inode, $inodecount, $file);

	$inodecount = 0;

	unless (open (NEWENT, $cmd)) {
		print STDERR "Can't launch $cmd: $!\n";
		return (-1);
	}

	while (<NEWENT>) {
		chomp;
		($inode, $file) = m/^\s*([0-9]+) .* (.*?)$/;
		unless (defined $inode) {
			print STDERR "Found no inode in $_\n";
			next;
		}
		unless (defined $file) {
			print STDERR "Found no filenmae in $_\n";
			next;
		}
		$file =~ s%^.*/%%;
		$file =~ s/\.gz$//;
		$file =~ s/aout$//;
		next unless ($file =~ m/^.*\.[0-9]+/);
		if (defined $temp{$inode}) {
			unless ($temp{$inode} =~ /\b$file\b/o) {
				$temp{$inode} = "$temp{$inode} $file";
			}
		} else {
			$temp{$inode} = "$file";
			$inodecount++;
		}
	}
	close (NEWENT);
	return ($inodecount);
}

sub get_files {
	my ($filecount, $file);

	$filecount = 0;

	unless (open (NEWENT, $cmd)) {
		print STDERR "Can't launch $cmd: $!\n";
		return (-1);
	}

	while (<NEWENT>) {
		chomp;
		($file) = m/^.*\/(.*?)$/;
		unless (defined $file) {
			print STDERR "Found no filename in $_\n";
			next;
		}
		$file =~ s/\.gz$//;
		$file =~ s/aout$//;
		next unless ($file =~ /^.*?\.[0-9nl]$/);
		$filecount++ unless (defined $temp{$file});
		$temp{$file} = "$file";
	}
	close (NEWENT);
	return ($filecount);
}
