#!/usr/bin/perl
#
# Copyright (c) 1995 Wolfram Schneider, Berlin
# All rights reserved. Alle Rechte vorbehalten.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
#    must display the following acknowledgement:
#    This product includes software developed by Wolfram Schneider
# 4. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $Id: manck.pl,v 1.2 1996/05/30 16:22:08 wosch Exp $
#
# manck - check manual pages

sub var {
    $ENV{'PATH'} = '/bin:/usr/bin';
    $path = '/bin:/sbin:/usr/bin:/usr/sbin';
    $manpath = '/usr/share/man';
    $incpath = '/usr/include:/usr/X11/include:/usr/local/include';

    $ext = '.gz';
    $zcat = 'gzcat';
    $noman = 'No manual entry for'; # error message

    # 0: only errors 1: some more information 2: verbose
    $debug = 0;

    # "manck.files": scan manck.files for list of missing
    # files that are "ok" to be missing.
    # 0: report all missing files.
    $allfiles = 0; #'manck.files';

    # "manck.pages": scan manck.pages for list of missing
    # pages that are "ok" to be missing.
    # 0: report all missing pages.
    $allpages = 0; #'manck.pages';

    require "stat.pl";
    $| = 1;

    # bogus sections
    %s_bogus =
	(
	 '1-UCB', '1',
	 '1M',	  '1',
	 '1m',	  '1',
	 '3C',	  '3',
	 '3C++',  '3',
	 '3N',	  '3',
	 '3S',	  '3',
	 '3t',	  '3',
	 '3X',	  '3',
	 '3X11',  '3',
	 '3Xt',	  '3',
	 '8C',	  '8',
	 '8c',	  '8',
	 );

    # common sections
    %sh =
	(
	 'AUTHOR',	      0,
	 'AUTHORS',	      0,
	 'BUGS',	      0,
	 'CAVEATS',	      0,
	 'COMPATIBILITY',     0,
	 'DESCRIPTION',	      0,
	 'DIAGNOSTICS',	      0,
	 'ENVIRONMENT',	      0,
	 'ERRORS',	      0,
	 'EXAMPLE',	      0,
	 'EXAMPLES',	      0,
	 'FILES',	      '&files',
	 'HISTORY',	      0,
	 'NAME',	      '&name',
	 'NOTES',	      0,
	 'OPTIONS',	      0,
	 'RETURN VALUE',      0,
	 'RETURN VALUES',     0,
	 'SEE ALSO',	      '&see_also',
	 'STANDARDS',	      0,
	 'SYNOPSIS',	      '&synopsis',
	 'XREF',	      0, # cross references in whole file
	 'VFILES',	      0, # file names in whole file
	 );
}

sub usage {

    select STDERR;
    print <<EOF;
usage: manck [-d|-debug] [-d|-debug] [-h|-help] [-bin path]
	     [-i|-include includepath] [-u=section] [-xref] [-files]
	     [-M mandir] [-exclude_files file] [-exclude_pages file]
             [manpages ...]
EOF

    print "\nchecked sections: ";
    while(($key,$val) = each %sh) {
	print "'$key' " if $val;
    }
    print "\n";

    exit 1;
}

sub parse {
    local(*argv) = @_;

    while ($_ = $argv[0], /^-/) {
	shift @argv;
	last if /^--$/;
	if    (/^--?(d|debug)$/)	{ $debug++ }
	elsif (/^--?(h|help|\?)$/)	{ &usage }
	elsif (/^--?(b|bin)$/)		{ exit(&bin($argv[0] || $path));  }
	elsif (/^--?M$/)		{ $manpath = $argv[0]; shift @argv;
					  $manpath =~ s=/+$==; }
	elsif (/^-i|-include$/)		{ $incpath = $argv[0]; shift @argv; }
	elsif (/^--?(exclude_files)$/)  { $allfiles = $argv[0]; shift @argv; }
	elsif (/^--?(exclude_pages)$/)  { $allpages = $argv[0]; shift @argv; }
	elsif (/^-u=(.+)$/)		{ if (!defined $sh{$1}) {
					      warn "unknown section: $1\n";
					      &usage;
					  }
					  $sh{$1} = 0 }
	elsif (/^-xref/)		{ $sh{'XREF'} = '&xref'; }
	elsif (/^-files/)		{ $sh{'VFILES'} = '&files'; }
	else				{ &usage }
    }

    @include = split(/:/, $incpath);

    # use absolute path
    if ($manpath !~ "^/") {
	$manpath = ($ENV{'PWD'} || `pwd`) . '/' . $manpath;
	$manpath =~ s/\n//g;	# chop
	$manpath =~ s%/./%/%g;	# foo/./bla -> foo/bla
    }

    # scan for list of missing files that are "ok" to be missing.
    $allfiles && &read_exclude_files($allfiles);

    # scan for list of missing pages that are "ok" to be missing.
    $allpages && &read_exclude_pages($allpages);

}

#
# check if binaries in $path have their own manpages
#
sub bin {
    local($path) = @_;
    local($dir);

    # common man section for unix commands
    #  '[168ln]([a-z]+)?';

    &cache_manpages(*man);

    foreach $dir (split(/:/, $path)) {
	print "$dir\n";
	opendir(DIR, $dir) || die "opendir: $dir $!\n";
	foreach (sort readdir(DIR)) {
	    next if $_ eq '.';
	    next if $_ eq '..';

	    if (! -e "$dir/$_" && -l "$dir/$_") {
		print "$_ is an empty symlink\n";
	    } elsif (! -x _ && -f _) {
		print "$_ is not executable for you\n";
	    }

	    if (!defined $man{$_}) {
		print "$noman ``$_''\n" unless defined $okpages{$_};
	    } elsif ($man{$_} =~ m%man/man[168ln][a-z]*/%) {
		print "$_ in $man{$_}\n" if $debug > 0;
	    }

	    else {
		print "Found $_ in $man{$_}\n";
	    }
	}
	closedir DIR;
    }
}

sub uniq_without_sort {
    local(@list) = @_;
    local(%found, $path, @list_new);

    while($path = shift @list) {
	if (!$found{$path}) {
	    push(@list_new, $path);
	}
	$found{$path} = 1;
    }

    @list_new;
}


#
# readdir all manpages from
# */man/man* directories
#
sub cache_manpages {
    local(*m) = @_;
    local($dir, $subdir, $file);
    local($mpath) = $manpath . ':' . $ENV{'MANPATH'};

    foreach $dir  (&uniq_without_sort(split(/:/, $mpath))) {
	opendir(DIR, $dir) || warn "$dir: $!\n";
	foreach $subdir (sort(readdir(DIR))) {
	    next if $subdir !~ /^man/;

	    opendir(SUBDIR, "$dir/$subdir") || warn "$dir/$subdir: $!\n";
	    foreach (readdir(SUBDIR)) {
		($file = $_) =~ s/$ext$//o;

		if ($file =~ /^(.+)\.[^\.]+$/) {
		    if (defined $m{$1}) {
			$m{$1} .= ':' . "$dir/$subdir/$_";
		    } else {
			$m{$1} .= "$dir/$subdir/$_";
		    }
		}
	    }
	    closedir SUBDIR;
	}
	closedir DIR;
    }
}

#
# like package &find()
#
sub read_manpages {
    local($mpath) = @_;
    local($dir, $subdir, $file);

    foreach $dir  (&uniq_without_sort(split(/:/, $mpath))) {
	opendir(DIR, $dir) || warn "$dir: $!\n";
	foreach $subdir (sort(readdir(DIR))) {
	    next if $subdir !~ /^man/;

	    opendir(SUBDIR, "$dir/$subdir") || warn "$dir/$subdir: $!\n";
	    foreach (sort readdir(SUBDIR)) {
		&manpage("$dir/$subdir/$_")
		    if /[^.]\.[^.]/;
	    }
	    closedir SUBDIR;
	}
	closedir DIR;
    }
}

# delete bold/italic etc. commands
sub strip {
    s=\\f[BRPI]==g;
    s=\\\&==g;
}

#
# parse single manpage
# and analyze
#
sub manpage {
    local($file) = @_;
    local(@stat, $do, @list, $file2,@xlist, @flist);

    unless ((@stat = stat($file))) {
	&err("$!\n");
	return 0;
    };

    if (! -f _) {
	&err("Not a plain file\n");
	return 0;
    }


    # cache
    if (defined $cached_file{$stat[$ST_DEV],$stat[$ST_INO]}) {
	&err("Already visit: " .
	    "$cached_file{$stat[$ST_DEV],$stat[$ST_INO]}\n") if $debug > 1;
	return 1;
    }
    $cached_file{$stat[$ST_DEV],$stat[$ST_INO]} = $file;

    if ($file =~ /$ext$/o) {
	$file2 = "$zcat $file |";
    } else {
	$file2 = $file;
    }
    open(MANPAGE, "$file2") || do {
	warn "open: $file2 $!\n";
	return 0;
    };

    $statistic{'manpage'}++;

    while(<MANPAGE>) {
	next if /^\.\\\"/o;	#" comment

	# Cross references in whole man page
	if ($sh{'XREF'} && (/^\.Xr\s/ || /^\.Fn\s/)) {
	    &strip;
	    s/\s+[\.\,\)]+\s*\n$/\n/;
	    push(@xlist, $_);
	}

	# File names in whole man page
	elsif ($sh{'VFILES'} && (/^\.It Pa\s/ || /^\.Pa\s/)) {
	    &strip;
	    s/\^.It Pa/.Pa/;
	    s/[,\.]$//;
	    push(@flist, $_);
	}


	if (/^\.S[hH]\s+/o) {	# .SH or .Sh
	    # start action $do with following lines (until next .SH)
	    eval $do;

	    # reset action and array
	    $do = ''; @list = ();

	    $name = $_;
	    $name =~ s/^\.S[hH]\s+\"?//; #"
	    $name =~ s/\"?\s*$//;	 #"

	    if ($sh{$name}) {
		&err("parse   $name\n") if $debug > 0;
		$do = $sh{$name}; # save action

	    } elsif (defined $sh{$name}) {
		&err("defined $name\n") if $debug > 1;
	    } else {
		&err("undef   $name\n") if $debug > 1;
	    }
	} elsif ($do) {
	    # \fBcurs_kernel\fR
	    &strip;

	    push(@list, $_);
	}
    }
    eval $do;			# last .SH in file

    $name = 'XREF';
    if ($sh{$name}) {
	# make uniq list
	local($prev) = '/nonesuch';
	@list = grep($_ ne $prev && ($prev = $_), sort @xlist);

	eval $sh{$name};
    }

    $name = 'VFILES';
    if ($sh{$name}) {
	# make uniq list
	local($prev) = '/nonesuch';
	@list = grep($_ ne $prev && ($prev = $_), sort @flist);

	&err("parse   VFILES\n") if $debug > 0;
	eval $sh{$name};
    }

    close MANPAGE;
}

#
# locate manpage,
# may be from cache or with stat
#
sub locate {
    local($manpage, $section) = @_;

    # ignore error
    return 0 if $okpages{"$manpage($section)"} ||
	$okpages{"$manpage"}; 

    # failed
    if (!defined $man{$manpage}) {

	# capitalize man page
	if ($manpage =~ /^[A-Z][a-z]/ && defined $man{&tolower($manpage)}) {
	    &errS("$noman ``$manpage($section)'', guess ``" .
		  &tolower($manpage) . "''\n") if $debug >= 1;
	    return (&tolower($manpage, $section));
	} else {
	    &errS("$noman ``$manpage($section)'' or ``$manpage''\n");
	    $cached_mp{"$manpage($section)"} -= 1;
	}
	return 0;
    }


    #
    # found manpage, cache = 1
    #

    # in specified section
    elsif ($man{$manpage} =~
	   /man\/man$section\/$manpage\.$section($ext)?(:|$)/) {
	&errS("Found ``$manpage($section)'' in ``$man{$manpage}''\n")
	    if $debug > 1;
    }

    # in approximated section
    elsif (defined $s_bogus{$section} &&
	   $man{$manpage} =~ /man\/man$s_bogus{$section}\/$manpage\.$s_bogus{$section}($ext)?(:|$)/) {
	   &errS("No man page for ``$manpage($section)'', possible alternate " .
	       "section ``$s_bogus{$section}''\n");

    # somewhere else
    } else {
	&errS("No man page for ``$manpage($section)'', possible alternates " .
	      "``$man{$manpage}''\n") if ($debug > 0 && length($section) > 2);
	# section may be 'int', 'double' etc.

    }

    $cached_mp{"$manpage($section)"}++;
    return 1;
}


# print error messages
sub err {
    if (!$cache_err{$file}) {
	print "$file\n";
	$cache_err{$file} = 1;
    }
    print "\t";
    print @_;
}

# count errors
sub errS {
    $statistic{$name}++;
    &err(@_);
}

#
# print statistic about references
# + successfully
# - failed
# 0 no references
#
sub references {
    local(@list) = ();
    local($key, $val);

    # + or -
    while(($key, $val) = each %cached_mp) {
	push(@list, sprintf("%-4d %s", $val, $key));
    }

    # find parsed man pages whithout a reference
    # to this pages
    while(($key, $val) = each %cached_file) {
	# /usr/share/man/man1/(ls).1.gz
	if ($val =~ /\/([^\/]+)\.([^\.]+)($ext)?$/o &&
	    !defined $cached_mp{"$1($2)"}) {
	    push(@list, sprintf("%-4d %s", 0, $val));
	}
    }

    print "\n" . "References count: successfully(+), failed(-), " .
	"whithout reference(0)\n" .
	join("\n",  reverse sort{$a <=> $b} @list) .
	    "\n";

    local($total) = $statistic{'manpage'};
    local($sum) = 0;
    delete $statistic{'manpage'};
    print "\nErrors in section\n";

    while(($key,$val) = each %statistic) {
	if ($sh{"$key"}) {
	    print sprintf("%-10s %5d\n", $key, $val);
	    $sum += $val;
	}
    }
    print "Manual pages absolut: $total errors absolut: $sum\n";
}


#
# .SH SEE ALSO
# test if manpages exist
#
sub see_also {
    foreach (@list) {
	s/\s+$//;		# strip ending blanks
	s/\"//g;		# remove '"'

	#if (/^$/) {
	#    &err("stop SEE ALSO empty line\n") if $debug > 1;
	#    return;
	#}

	foreach (split(/,/)) {
	    s/^\s+//;

	    # BSD
	    if (/^\.(Xr|Fn)\s+(\S+)\s+(\S+)/o) {
		&locate($2, $3);
	    }

	    # GNU
	    elsif (/^(\.[BI]R\s)?\s*(\S+)\s*\(\s*(\S+)\s*\)/o) {
		&locate($2, $3);
	    }

	    # Doku follow, break
	    elsif (/^\.(sp|Rs|LP|br|TP|RB|PP|Pp|%)/o ||
		   /^RFC/o) {
		&err("stop SEE ALSO: $_\n") if $debug > 1;
		return;
	    }

	    # Hm
	    elsif (/^\.(Bl|It)/) {
		&err("skip: ``$_''\n") if $debug > 0;
	    }

	    # Garbage
	    else {
		if ($debug == 1) {
		    &err("unknown: ``$_''\n");
		} elsif ($debug > 1) {
		    &errS("unknown: ``$_''\n");
		}
	    }
	}
    }
}

#
# Cross references in whole man page
#
sub xref {
    local($name) = 'XREF';
    &err("parse   $name\n") if $debug > 0;

    foreach (@list) {
	s/\s+$//;		# strip ending blanks
	s/\"//g;		# remove '"'

	foreach (split(/,/)) {
	    s/^\s+//;

	    # BSD
	    if (/^\.(Xr|Fn)\s+(\S+)\s+(\S+)/o) {
		&locate($2, $3);
	    } elsif (/^\.(Xr|Fn)\s+(\S+)/o) {
		$a = $2;
		if ($man{$a} =~ /\.([^\.]+)($ext)?$/) {
		    &locate($a, $1);
		} else {
		    &locate($a, '');
		}
	    }


	    else {
		&err("unknown: ``$_''\n");
	    }
	}
    }
}

#
# .SH FILES
# test if files exist
#
sub files {
    foreach (@list) {
	next if /^\.(Bl|El|\\\")/;

	# .TP \w'/etc/manpath.config'u+2n
	s/\s\\w'([^\']+)'.*/ $1/; #"

	if (s=.*\s(\S+)\s*$=$1= && /\//) {
	    s/\s+$//;
	    s/[\s\'\"].*//;	# "

	    next unless $_;

	    # skip some file for debug=0
	    if ($debug < 1) {
		next if !/^\//;	# only absolute path, no $HOME or ~/.foobar
		next if m%^/var/%; # no /var/
		next if !m%^[A-Za-z0-9._/-]+$%;	# no meta chars
		next if /X+$/;	# no bla.XXXX
	    }

	    if (defined $okfiles{$_})  {
		&err("ignore file ``$_''\n") if $debug > 1;
		next;
	    }

	    &err("test -e ``$_''\n") if $debug > 1;
	    if (! -e $_) {
		&errS("$_: file does not exist STAT\n");
	    }
	}
    }
}

#
# .SH SYNOPSIS
# test for include files
#
sub synopsis {
    local($inc);

    foreach (@list) {
	if (/\#include\s+[<"](\S+)[>"]/) { #
	    if ($cached_file{$1}) {
		&err("CACHED file: ``$1''\n") if $debug > 2;
		next;
	    }

	    $cached_file{$1} = 1;

	    &errS("$1: include does not exist in: @include\n")
		unless &include_test($1);
	}
    }
}

sub include_test {
    foreach $inc (@include) {
	&err("test -f ``$inc/$1''\n") if $debug > 1;
	return 1
	    if -f "$inc/$1";
    }
    return 0;
}

#
# .SH NAME
# test for NAME values
#
sub name {

    while ($_ = shift @list) {

	# BSD/doc style
	# .Nm name ...
	if (/^\.Nm\s+(\S+)/) {
	    &name_test($1);
	    next;
	} elsif (/^\.Nd/) {
	    return;		# ignore description
	}

	# GNU
        # .B bla
	# \- description
	if (/^\.B\s(\S+)/) {	
	    &name_test($1);
	    shift @list;
	}

	# line break
	elsif (/^\.br/) {
	}
	# name -   ...
	# name \-  ...
	elsif (/^\s*([a-zA-Z0-9].*)\s+(\\)?\-\s/) {
	    &name_test($1);
	}
	# name \(em ...
	# name \(mi ...
	elsif (/^\s*([a-zA-Z0-9_].*)\s+\\\((em|mi)/) {
	    &name_test($1);
	}
	# name, ...
	elsif (/^\s*([^,]+)[,\.]\s+/) {
	    &name_test($1);
	}


	# rest, except empty lines
	elsif (!/^\s*$/) {
	    &err("$_");
	}

    }
}

sub name_test {
    local($page) = @_;

    foreach (split(/,/, $page)) {
	s/^\s+//; s/\s+$//;	# blanks

	if ($man{$_}) {
	    &err("Found name ``$_''\n") if $debug > 1;
	} else {
	    &errS("Manpage ``$_'' does not exist\n") unless $okpages{$_};
	}
    }
}

# read file names which should not reported as 'errors'
sub read_exclude_files {
    local($excl_file) = @_;

    open(FILELIST, "$excl_file") || do {
	warn "open: $excl_file: $!\n";
	return 0;
    };

    while(<FILELIST>) {
	next if /^\#/;		# skip comments

	chop;
	$okfiles{$_} = 1;
    }
    close FILELIST;
}

# read pages which should not reported as 'errors'
sub read_exclude_pages {
    local($excl_file) = @_;

    open(FILELIST, "$excl_file") || do {
	warn "open: $excl_file: $!\n";
	return 0;
    };

    while(<FILELIST>) {
	next if /^\#/;		# skip comments

	chop;
	$okpages{$_} = 1;
    }
    close FILELIST;
}

sub tolower {
    local($name) = @_;

    $name =~ y/A-Z/a-z/;
    return $name;
}

##
## Main
##
# read enviroment
&var;
&parse(*ARGV);

# single manpages
if ($#ARGV >= 0) {
    &cache_manpages(*man);
    foreach (@ARGV) { &manpage($_); }
}

# parse tree
else {
    &cache_manpages(*man);
    &read_manpages($manpath);
}

&references if $debug > 0;
