#!/usr/local/bin/perl

# $Id: sgmlfmt.pl,v 1.26 1997/05/12 14:16:48 jfieber Exp $

#  Copyright (C) 1996
#       John R. Fieber.  All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
#  THIS SOFTWARE IS PROVIDED BY JOHN R. FIEBER AND CONTRIBUTORS ``AS IS'' AND
#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED.  IN NO EVENT SHALL JOHN R. FIEBER OR CONTRIBUTORS BE LIABLE
#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
#  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
#  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
#  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#  SUCH DAMAGE.


# Format an sgml document tagged according to the linuxdoc DTD.
# by John Fieber <jfieber@freebsd.org> for the FreeBSD documentation
# project.  


require 'newgetopt.pl';

#
# Where to find DTDs and related files
#

$sgmldir = "/usr/local/share/sgml";

#
# The SGML parser, and translation engine.
#

$sgmls = "nsgmls";
$instant = "instant";

#
# Things to clean up if we exit abnormally
#

@cleanfiles = ();

#
# Interrupt handler, remove scratch files.
#

sub sighandler {
    local($sig) = @_;
    unlink @cleanfiles;
    exit(1);
}

$SIG{'HUP'} = 'sighandler';
$SIG{'INT'} = 'sighandler';
$SIG{'QUIT'} = 'sighandler';

#
# Display a usage message.
#

sub usage {
    print "Usage:\n";
    print "sgmlfmt [-d <doctype>] -f <format> [-i <namea> ...] [-links]\n";
    print "    [-e encoding] [-hdr file] [-ftr file] file\n";
    print "where <doctype> is one of: linuxdoc (default), docbook.\n";
    print "and <format> is one of: ascii, html, koi8-r, latin1, ps, roff\n";
}

#
# Look for the file specified on the command line
#

sub getfile {
    local($filearg) = @_;
    if (-f "$filearg.sgml") {
	$file = "$filearg.sgml";
    }
    elsif (-f $filearg) {
	$file = $filearg;
    }
    else {
	return 0;
    }
    $fileroot = $file;
    $fileroot =~ s/.*\///;	# drop the path
    $fileroot =~ s/\.[^\.]*$//;	# drop the extension
    $filepath = $file;
    $filepath =~ s/\/*[^\/]*$//;	
    if ($filepath ne "") {
       $ENV{"SGML_PATH"} .= ":$filepath/%S:%S";
    }
    return 1;
}

#
# A function to run sgmls and instant on the input file.
#
# Arguments:
#   1. A file handle for the output
#   2. A translation file
#

sub sgmlparse {
    local($ifhandle, $replacement) = @_;
    $defines = join(" -i ", @opt_i);
    if ($defines ne "") {
	$defines = "-i $defines";
    }
    open($ifhandle, "$sgmls $defines $decl $file | " . 
    	"sed -e 's/^f\<OSFILE\>/f/' | " .
    	"$instant -Dfilename=$fileroot $instantopts " .
	"-t $sgmldir/transpec/${dtd}-${replacement}.ts |");
}

#
# Generate roff output
#

sub gen_roff {
    @cleanfiles = (@cleanfiles, "${fileroot}.roff");
    open (outfile, ">$fileroot.roff");
    &sgmlparse(infile, "roff");
    while (<infile>) {
	print outfile;
    }
    close(infile);
    close(outfile);
}

#
# Generate something from roff output
#

sub do_groff {
    local($driver, $postproc) = @_;
    @cleanfiles = (@cleanfiles, "${fileroot}.trf", "${fileroot}.tmp", 
    	"${fileroot}.qrf", "${fileroot}.${driver}");
    open (outfile, ">$fileroot.trf");
    &sgmlparse(infile, "roff");
    while (<infile>) {
	s,\\\(ru,_,g if /^\.GET[HP]N/;
	s,tmac\.m,m\.tmac,g;
	print outfile;
    }
    close(infile);
    close(outfile);
    system("groff -z -rRef=1 -T ${driver} -t ${fileroot}.trf 2>&1 | egrep '^\\.' > ${fileroot}.qrf");
    system("groff -T ${driver} -t ${fileroot}.trf ${postproc} > ${fileroot}.${driver}");
    unlink("${fileroot}.qrf");
    unlink("${fileroot}.trf");
}

#
# Generate HTML output.
#
# HTML is generated in two passes.  
#
# The first pass takes the output from sgmlsasp and gathers information
# about the structure of the document that is used in the sceond pass
# for splitting the file into separate files.  Targets for cross
# references are also stored in this pass.
#
# Based on the information from the first pass, the second pass
# generates a numbered series of HTML files, a "toplevel" file
# containing the title, author, abstract and a brief table of
# contents.  A detailed table of contents is also generated.  The
# second pass generates links for cross references and URLs.

#
# Tunable parameters
#
$maxlevel = 3;			# max section depth
$num_depth = 4;			# depth of numbering
$m_depth = 2;			# depth of menus


$sc = 0;			# section counter
$filecount = 0;			# file counter

$doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
$BODY = "<BODY text=\"#000000\" bgcolor=\"#ffffff\">";

# Other variables:
#
#  st_xxxx  - Section Table.  Arrays containing information about a
#             given section.  To be accesssed via the section counter $sc.
#             
#  st_ol    - The output level of the given section.  I.E. how many
#             levels from the table of contents does it lie in terms
#             of HTML files which is distinct from <sect1>, <sect2> etc.
#             levels. 
#
#  st_sl    - The absolute depth of a section.  Contrast st_ol.
# 
#  st_num   - The section number in the form X.Y.Z....
#
#  st_file  - The HTML file the section belongs to.
#
#  st_header - The text of the section title.
# 
#  st_parent - The section number of the given sections parent.

sub gen_html {
    local($i, $sl);

    # Remove any lingering link file
    unlink("${fileroot}.ln");

    $tmpfile = "/tmp/sgmlf.$$";
    @cleanfiles = (@cleanfiles, "$tmpfile", "${fileroot}.html", 
    	"${fileroot}_toc.html", "${fileroot}.ln");
    open(bar, ">$tmpfile");
#    print STDERR "(Pass 1...";
    &sgmlparse(foo, "html");
    while (<foo>) {
	print bar;
	# count up the number of files to be generated
	# and gather assorted info about the document structure
	if (/^<\@\@sect>/) {
	    $sl++;		# current section level
	    $sc++;		# current section number
	    $st_sl[$sc] = $sl;

	    # In case this section has subsections, set the parent
	    # pointer for this level to point at this section.
	    $parent_pointer[$sl] = $sc;

	    # Figure out who is the parent if this section.
	    $st_parent[$sc] = $parent_pointer[$sl - 1];

	    # Per level counters
	    $counter[$sl]++;
	    $counter[$sl + 1] = 0;

	    # calculate the section number in the form x.y.z.
	    if ($sl <= $num_depth) {
		$st_num[$sc] = $st_num[$st_parent[$sc]] . "$counter[$sl].";
	    }

	    # calculate the file number and output level
	    if ($sl <= $maxlevel) {
		$filecount++;
		$st_ol[$sc] = $sl;
    	    	@cleanfiles = (@cleanfiles, "${fileroot}${filecount}.html");
	    }
	    else {
		$st_ol[$sc] = $maxlevel;
	    }

	    $st_file[$sc] = $filecount;

	    # Calculate the highest level node in which this
	    # node should appear as a menu item.  
	    $st_pl[$sc] = $sl - $m_depth;
	    if ($st_pl[$sc] < 0) {
		$st_pl[$sc] = 0;
	    }
	    if ($st_pl[$sc] > $maxlevel) { 
		$st_pl[$sc] = $maxlevel;
	    } 
	}
	if (/^<\@\@endsect>/) {
	    $sl--;
	}

	# record section titles
	if (/^<\@\@head>/) {
	    chop;
	    s/^<\@\@head>//;
	    $st_header[$sc] = $_;
	}

	# record the section number that a label occurs in
	if (/^<\@\@label>/) {
	    chop;
	    s/^<\@\@label>//;
	    if ($references{$_} eq "") {
		$references{$_} = "$filecount";
		if ($opt_links) {
		    &extlink($_, "${fileroot}${filecount}.html");
		}
	    }
	    else {
		print STDERR "Warning: the label `$_' is multiply-defined.\n";
	    }
	}
    }
    close(bar);

    open(foofile, $tmpfile);
    &html2html(foofile, "boo");

    unlink($tmpfile);
}

#
# HTML conversion, pass number 2
#

sub html2html {
    local($infile, $outfile) = @_;
    local($i);

    $sc = 0;
    push(@scs, $sc);

    open(tocfile, ">${fileroot}_toc.html");
    print tocfile "$doctype\n<HTML>\n";

    while (<$infile>) {
	# change `<' and `>' to `&lt;' and `&gt;' in <pre></pre>
	if (/<pre>/.../<\/pre>/) {
	    s/</\&lt;/g;
	    s/\&lt;([\/]*)pre>/<\1pre>/g;
	    s/>/\&gt;/g;
	    s/<([\/]*)pre\&gt;/<\1pre>/g;
	}

	# remove extraneous empty paragraphs (it is arguable that this
 	# is really a bug with the DTD, but changing it would break
 	# almost every document written to this DTD.)
	s/<p><\/p>//;

      tagsw: {
	  # titles and headings
	  if (s/^<\@\@title>//) {
	      chop;
	      $st_header[0] = $_;
	      $st_parent[0] = -1;
	      $t = $st_header[0];
	      $t =~ s|<[a-zA-Z/][^>]*>||g;
	      print tocfile "<HEAD>\n$html_encoding\n<TITLE>$t</TITLE>\n" .
	      	  "</HEAD>\n";
	      print tocfile "<H1>$st_header[0]</H1>\n";

	      $header[$st_ol[$sc]] = 
	          "$doctype\n<HTML>\n<HEAD>\n$html_encoding\n<TITLE>$t</TITLE>\n" . 
		      "</HEAD>\n$BODY\n";
    	      $header[$st_ol[$sc]] .= $html_header;
	      $header[$st_ol[$sc]] .= "\n<H1>$st_header[0]</H1>\n"; 

	      $footer[$st_ol[$sc]] = "\n";
    	      $footer[$st_ol[$sc]] .= $html_footer;
	      $footer[$st_ol[$sc]] .= "\n</BODY>\n</HTML>\n";
	      last tagsw;
	  }

	  #
	  # HEADER begin
	  #
	  if (s/^<\@\@head>//) {
	      chop;

	      if ($part == 1) {
		  $text[0] .= "<H1>Part $partnum:<BR>$_";
		  last tagsw;
	      }

	      $href = "\"${fileroot}$st_file[$sc].html#$sc\"";

	      # set up headers and footers
	      if ($st_sl[$sc] > 0 && $st_sl[$sc] <= $maxlevel) {
    	    	  $t = $_;
	          $t =~ s|<[a-zA-Z/][^>]*>||g;
		  $header[$st_ol[$sc]] = 
		      "$doctype\n<HTML>\n<HEAD>\n$html_encoding\n<TITLE>$t</TITLE>\n" .
		      	"</HEAD>\n$BODY\n";
		  $header[$st_ol[$sc]] .= $html_header;
		  $header[$st_ol[$sc]] .= "\n$navbar[$st_ol[$sc]]\n<HR NOSHADE>\n";
		  $footer[$st_ol[$sc]] = "<HR NOSHADE>\n$navbar[$st_ol[$sc]]\n";
		  $footer[$st_ol[$sc]] .= $html_footer;
                  $footer[$st_ol[$sc]] .= "\n</BODY>\n</HTML>\n";
	      }

	      # Add this to the master table of contents
	      print tocfile "<DD>$st_num[$sc] " . 
		  "<A HREF=$href>$_";

	      # Calculate the <H?> level to use in the HTML file
	      $hlevel = $st_sl[$sc] - $st_ol[$sc] + 2;
	      $shlevel = $st_sl[$sc] - $st_ol[$sc] + 3;

	      $i = $st_ol[$sc];

	      # Add the section header
	      $text[$i] .= "<H$hlevel><A NAME=\"$sc\"></A>$st_num[$sc] $_";
	      $i--;
	      
	      # And also to the parent 
	      if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) {
		  $text[$i] .= "<H$shlevel>$st_num[$sc] " . 
			  "<A HREF=$href>$_";
		  $i--;
	      }

	      # and to the grandparents
	      for (; $i >= $st_pl[$sc];  $i--) {
		  $text[$i] .= "<DD>$st_num[$sc] " .
		      "<A HREF=$href>$_";
	      }

	      last tagsw;
	  }

	  #
	  # HEADER end
	  #
	  if (s/^<\@\@endhead>//) {
	      if ($part == 1) {
		  $text[0] .= "</H1>\n";
		  $part = 0;
		  last tagsw;
	      }
	      print tocfile "</A>\n";

	      $i = $st_ol[$sc];

	      # Close the section header
	      $text[$i] .= "</H$hlevel>\n";
	      $i--;

	      # in the parent...
	      if ($st_sl[$sc] == $st_ol[$sc] && $i >= 0) {
		  $text[$i] .= "</A></H$shlevel>\n";
		  $i--;
	      }

	      # in the grandparent...
	      for (; $i >= $st_pl[$sc];  $i--) {
		  $text[$i] .= "</A></DD>\n";
	      }
	      last tagsw;
	  }
	  
	  # sectioning
	  if (s/^<\@\@part>//) {
	      $part = 1;
	      $partnum++;
	      last tagsw;
	  }

	  #
	  # BEGINNING of a section
	  #
	  if (s/^<\@\@sect>//) {
	      # Increment the section counter and save it on a stack
	      # for future reference.
	      $sc++;
	      push(@scs, $sc);

	      # Set up the navigation bar
	      if ($st_file[$sc] > $st_file[$sc - 1]) {
		  &navbar($st_file[$sc], $filecount, $sc);
	      }

	      # Prepare for menu entries in the table of contents and
	      # parent file(s).
	      if ($st_sl[$sc - 1] < $st_sl[$sc]) {
		  print tocfile "<DL>\n";
		  $i = $st_ol[$sc] - 1 - ($st_sl[$sc] == $st_ol[$sc]);
		  for (; $i >= $st_pl[$sc];  $i--) {
		      $text[$i] .= "<DL>\n";
		  }
	      }
	      last tagsw;
	  }

	  #
	  # END of a section
	  #
	  if (s/^<\@\@endsect>//) {
	      
	      # Remember the section number! Subsections may have
	      # altered the global $sc variable.
	      local ($lsc) = pop(@scs);

	      # Close off subsection menus we may have created in
	      # parent file(s).
	      if ($st_sl[$lsc] > $st_sl[$sc + 1]) {
		  print tocfile "</DL>\n";
		  if ($st_sl[$lsc] > 1) {
		       print tocfile "</DD>\n";
		  }
		  $i = $st_ol[$lsc] - 1 - ($st_sl[$lsc] == $st_ol[$lsc]);
		  for (; $i >= $st_pl[$lsc];  $i--) {
		      $text[$i] .= "</DL>\n";
		  }
	      }

	      # If this section is below $maxlevel, write it now.
	      if ($st_sl[$lsc] <= $maxlevel) {
		  open(SECOUT, ">${fileroot}$st_file[$lsc].html");
		  print SECOUT "$header[$st_ol[$lsc]]  $text[$st_ol[$lsc]] " . 
		      "$footer[$st_ol[$lsc]]";
		  $text[$st_ol[$lsc]] = "";
		  close(SECOUT);
	      }
	      last tagsw;
	  }		

	  # cross references
	  if (s/^<\@\@label>//) {
	      chop;
	      $text[$st_ol[$sc]] .= "<A NAME=\"$_\"></A>";
	      last tagsw;
	  }
	  if (s/^<\@\@ref>//) {
	      chop;
	      $refname = $_;
	      if ($references{$_} eq "") {
		  print "Warning: Reference to $_ has no defined target\n";
	      }
	      else {
		  $text[$st_ol[$sc]] .= 
		      "<A HREF=\"${fileroot}$references{$_}.html#$_\">";
	      }
	      last tagsw;
	  }
	  if (s/^<\@\@endref>//) {
	      $text[$st_ol[$sc]] .= "</A>";
	      last tagsw;
	  }
	  if (s/^<\@\@refnam>//) {
	      $text[$st_ol[$sc]] .= "$refname";
	      last tagsw;
	  }

	  # If nothing else did anything with this line, just print it.
	  $text[$st_ol[$sc]] .= "$_";
      }
    }

    print tocfile "</HTML>\n";
    open(SECOUT, ">$fileroot.html");
    print SECOUT "$header[0] $text[0] $footer[0]";
    close(SECOUT);
    close tocfile;
}

# navbar
#
# Generate a navigation bar to go on the top and bottom of the page.

sub navbar {
    local ($fnum, $fmax, $sc) = @_;
    local ($i, $itext, $prv, $nxt, $colon);

    $colon = "<b>:</b>";

    # Generate the section hierarchy

    $navbar[$st_ol[$sc]] =
	"<A HREF=\"${fileroot}.html\"><EM>$st_header[0]</EM></A>\n";
    $i = $st_parent[$sc];
    while ($i > 0) {
	$itext = " $colon <A HREF=\"${fileroot}$st_file[$i].html\"><EM>$st_header[$i]</EM></A>\n$itext";
	$i = $st_parent[$i];
    }
    $navbar[$st_ol[$sc]] .= "$itext $colon <EM>$st_header[$sc]</EM><BR>\n";

    # Generate previous and next pointers

    # Previous pointer must be in a different file AND must be at the
    # same or higher section level.  If the current node is the
    # beginning of a chapter, then previous will go to the beginning
    # of the previous chapter, not the end of the previous chapter.

    $prv = $sc;
    while ($prv >= 0 && $st_file[$prv] >= $st_file[$sc] - 1) { 
	$prv--; 
    }
    $prv++;
    $navbar[$st_ol[$sc]] .=
	"<b>Previous:</b> <A HREF=\"${fileroot}$st_file[$prv].html\"><EM>$st_header[$prv]</EM></A><BR>\n";

    # Then next pointer must be in a higher numbered file OR the home
    # page of the document.

    $nxt = $sc;
    if ($st_file[$nxt] == $filecount) { 
	$nxt = 0; 
    }
    else {
	while ($st_file[$nxt] == $st_file[$sc]) {
	    $nxt++;
	}
    }

    $navbar[$st_ol[$sc]] .=
	"<b>Next:</b> <A HREF=\"${fileroot}$st_file[$nxt].html\"><EM>$st_header[$nxt]</EM></A>\n";

    $navbar[$st_ol[$sc]] .= "\n";

}

#
# Generate html output from docbook input
#

sub docbook_html {
    @cleanfiles = (@cleanfiles, "${fileroot}.html");
    open (outfile, ">$fileroot.html");
    &sgmlparse(infile, "html");
    while (<infile>) {
	print outfile;
    }
    close(infile);
    close(outfile);
}

# extlink
#
# Build a shell script to create symbolic links from the name in 
# a reference to the numbered
# html file.  Since the file number that any given section has is 
# subject to change as the document goes through revisions, this allows
# for a fixed target that separate documents can hook into.
#
# Slashes (/) in the reference are converted to percents (%) while
# spaces ( ) are converted to underscores (_);

sub extlink {
    local ($ref, $fn) = @_;

    $ref =~ s/\//%/g;
    $ref =~ s/ /_/g;

    $file = "$ref.html";

    if (!fileno(LINKFILE)) {
	open(LINKFILE, ">${fileroot}.ln");
    }

    print LINKFILE "ln -fs $fn $file\n";
}

# Now, read the command line and take appropriate action

sub main {
    # Check arguments
    if (!&NGetOpt('d=s', 'f=s', 'links', 'i:s@', 'hdr=s', 'ftr=s', 'e=s')) {
	&usage;
	exit 1;
    }
    if (@ARGV == 0) {
	print "An input file must be specified.\n";
	&usage;
	exit 1;
    }
    if (&getfile($ARGV[0]) == 0) {
	print "Cannot locate specified file: $ARGV[0]\n";
	&usage;
	exit 1;
    }

    # Figure out which DTD we are using
    if ($opt_d eq "docbook") {
    	$dtd = "docbook";
	$decl = "-c " . $sgmldir . "/docbook/2.4.1/catalog " .
	    "-c " . $sgmldir . "/docbook/3.0/catalog ";
    }
    else {
    	$dtd = "linuxdoc";
    	$decl = "-c " . $sgmldir . "/linuxdoc/catalog";
    }

    # Generate the output
    if ($opt_f eq 'html') {
    	# Set the character encoding
    	if (! $opt_e) {
    	    $opt_e = "iso-8859-1";
    	}
    	$html_encoding = "<META HTTP-EQUIV=\"Content-Type\" " .
    	    "CONTENT=\"text/html; charset=" . $opt_e . "\">";

    	if ($dtd eq "docbook") {
    	    if ($opt_hdr) {$instantopts .= " -D \"inchdr=${opt_hdr}\"";}
    	    if ($opt_ftr) {$instantopts .= " -D \"incftr=${opt_ftr}\"";}
    	    &docbook_html();
    	}
    	else {
    	    if ($opt_hdr) {$html_header = &gethf($opt_hdr);}
    	    if ($opt_ftr) {$html_footer = &gethf($opt_ftr);}
    	    &gen_html();
    	}
    }
    elsif ($opt_f eq 'roff') { 
	&gen_roff();
    }
    elsif ($opt_f eq 'ascii') {
    	&do_groff("ascii", "| col");
    }
    elsif ($opt_f eq 'latin1') {
    	&do_groff("latin1", "| col");
    }
    elsif ($opt_f eq 'koi8-r') {
    	&do_groff("koi8-r", "| col");
    }
    elsif ($opt_f eq 'ps') {
    	&do_groff("ps", "");
    }
    else {
	if ($opt_f eq "") {
	    print "An output format must be specified with the -f
	    option.\n";
	}
	else {
	    print "\"$opt_f\" is an unknown output format.\n";
	}
	&usage;
	exit 1;
    }
}

&main;

exit 0;

sub getdate {
    @months = ("January", "February", "March", "April", "May","June",
    	"July", "August", "September", "October", "November", "December");
    ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
    $year += 1900;
    return "$months[$mon] $mday, $year";
}

sub gethf {
    local ($file) = @_;

    $date = &getdate;
    $data = "";
    
    if (open(IN, $file)) {
    	while (<IN>) {
    	    s/\@\@UPDATE\@\@/Updated $date/;
    	    $data .= $_;
	}
	close(IN);
    }
    return $data;
}
