#!/usr/bin/perl

#
#	Copyright (C) 2001 Edwin Mons <info.to.html@edwinm.ik.nu>
#
#	This is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2, or (at your option)
#	any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

use strict;

use Getopt::Std;
use Compress::Zlib;

my $INFODIR = "/usr/share/info";
my $OUTDIR  = ".";
my $ME = "info_to_html";
my $VERSION = "0.9.5";

my %opts = (
	"i" =>  $INFODIR,			          # infodir
	"d"	=>	$OUTDIR,								# top-level directory
	"v" =>  0,											# verbose?
);

my @nonwordarr;


sub usage()
{
	print <<"EOF";
Usage: $ME [-i infodir] [-d outputdir] info

  -i infodir    Directory to use as info-root (Default: $INFODIR)
  -d outputdir  Directory to use as output-root (Default: $OUTDIR)
  -v            Verbose

The infofile will be split by node and dumped in a subdirectory of outputdir,
e.g. if the infofile was named gcc, the output would go to outputdir/gcc.

EOF
	exit 1;
}


sub html($)
{
	my ($t) = @_;

	$t =~ s/&/\&amp;/g;
	$t =~ s/</\&lt;/g;
	$t =~ s/>/\&gt;/g;

	return $t;
}


sub unhtml($)
{
	my ($t) = @_;

	$t =~ s/&lt;/</g;
	$t =~ s/&gt;/>/g;
	$t =~ s/&amp;/\&/g;

	return $t;
}


sub chompstart($)
{
  my ($var) = @_;

  $var =~ s/^[  ]+//;

  return $var;
}


sub getinfofile($$)
{
	my ($infofile, $buffer) = @_;
	my @consider = (
		[ "%s", "plain" ],
		[ "%s.info", "plain" ],
		[ "%s.info.gz", "gzip" ],
		[ "$opts{i}/%s", "plain" ],
		[ "$opts{i}/%s.info", "plain" ],
		[ "$opts{i}/%s.info.gz", "gzip" ],
	);
	my ($file, $it, $type);
	my $done = 0;

	foreach $it (@consider)
	{
		$file = sprintf($it->[0], $infofile);
		$type = $it->[1];
		print "Considering file $file\n" if ($opts{v});
		if ( -f $file )
		{
			print "$file exists.  Using it.  The type is $type\n" if ($opts{v});
			$done = 1;
			last;
		}
	}

	if (!$done)
	{
		print "Cannot find an infofile for $infofile\n";
		exit 2;
	}

	$$buffer = "";
	if ( $type eq "gzip" )
	{
		my ($b, $gz);

		$gz = gzopen($file, "rb") || die "Cannot open $file: $gzerrno\n";
		while ($gz->gzread($b, 131072) > 0)
		{
			$$buffer .= $b;
		}
		if ($gzerrno != Z_STREAM_END)
		{
			die "Error reading from $file: $gzerrno\n";
		}
		$gz->gzclose();
	}
	else
	{
		my $b;
		open(FILE, $file) || die "Cannot open $file";
		binmode(FILE);
		while (read(FILE, $b, 131072) > 0)
		{
			$$buffer .= $b;
		}
		close(FILE);
	}

	return $buffer;
}


sub convert_nodename($)
{
	my ($node) = @_;

	$node =~ s/[^A-Za-z0-9+]/_/g;

	return $node;
}


sub convert_nodename_long($$)
{
	my ($infoname, $node) = @_;
	my ($dir) = "";

	if ($node =~ m!(DIR)!i)
	{
		return "../";
	}
	if ($node =~ m!^\(([^)]+)\)!)
	{
		($infoname) = split(/\./, $1);
		$node = substr($node, length($1) + 2);
		$dir = "../$infoname/";
	}

	return $dir . "$infoname.info." . convert_nodename($node) . ".html";
}


sub condense_nodedata($)
{
	my ($ndref) = @_;
	my @ret = ();
	my $lasttype = "";
	my $node;
	
	foreach $node (@$ndref)
	{
#		if ($node->{'type'} eq $lasttype && $lasttype eq "para")
#		{
#			$ret[scalar(@ret) - 1]->{'data'} .= "\n" . $node->{'data'};
#		}
#		elsif ($node->{'type'} eq $lasttype && $lasttype eq "empty")
#		{
#		}
#		elsif ($node->{'type'} eq 'para' && ($lasttype eq 'menu' || $lasttype eq 'index') && ($node->{'data'} =~ m/^\s+/))
		if ($node->{'type'} eq 'para' && ($lasttype eq 'menu' || $lasttype eq 'index') && ($node->{'data'} =~ m/^\s+/))
		{
			$ret[scalar(@ret) - 1]->{'data'}->{'desc'} .= "\n" . $node->{'data'};
		}
		elsif ($node->{'type'} eq 'empty' && ($lasttype eq 'menu' || $lasttype eq 'index'))
		{
			push(@ret, {
				'type' => $lasttype
			});
		}
		else
		{
			push(@ret, $node);
			$lasttype = $node->{'type'};
		}
	}

	return @ret;
}


sub init_nonwordarr()
{
	my $c;

	foreach $c (0..255)
	{
		if (($c < 48 || $c > 57) &&
				($c < 65 || $c > 90) &&
				($c < 97 || $c > 122) &&
				($c != 46))
		{
			$nonwordarr[$c] = 1;
		}
		else
		{
			$nonwordarr[$c] = 0;
		}
	}
}


sub count_nonword($)
{
		my ($s) = @_;
		my $it;
		my $wcnt = 0;
		my $l = length($s) - 1;

		foreach $it (0..$l)
		{
			$wcnt++ if ($nonwordarr[ord(substr($s, $it, 1))]);
		}

		return $wcnt;
}


sub convert_para($$)
{
	my ($item, $infoname) = @_;
  my $html = "";
	my ($line, $last, $wcnt, $it, $s);

	# First, check for special para types.
	my @lines = split("\n", $item->{'data'});
	
	# Check if it's a title...
	if (@lines == 2 && 
			length($lines[0]) == length($lines[1]) &&
			($lines[1] =~ m/^[\*,=\-\.]+$/))
	{
		$html .= "<h1>$lines[0]</h1>\n";
	}
	else
	{
		$wcnt = 0;
		$wcnt = count_nonword($item->{'data'});

		# if the ratio is less than 0.4, treat it as preformatted text
		if (($wcnt / length($item->{'data'})) < 0.6)
		{
			$html .= "<pre>" . html($item->{'data'}) . "</pre>\n";
		}
		else
		{
			my $last = 0;
			my $first = -1;
			$wcnt = 0;
			foreach $line (@lines)
			{
				if ($line =~ m/^([\s`]+)/)
				{
					if (length($1) > $last)
					{
						$last = length($1);
						$wcnt++;
					}
					$first = length($1) if ($first < 0);
				}
				last if ($wcnt > 1);
			}
			$item->{'data'} = html($item->{'data'});
			$item->{'data'} =~ s/^(\s+)(\*[Nn]ote)/"$1" . ($2 eq "*Note"? "See ": "see ") . "$2"/sge;
			$item->{'data'} =~ s/\*[Nn]ote(\s+)([^:]+)::[,.]/"<a href='" . convert_nodename_long($infoname, unhtml($2)) . "'>" . chompstart("$1$2") . "<\/a>"/sge;
			$item->{'data'} =~ s/\*[Nn]ote(\s+)([^:]+): (\([^)]+\))?([^.,]+)[.,]/"<a href='" . convert_nodename_long($infoname, unhtml((defined($3)?$3:"")."$4")) . "'>" . chompstart("$1$2: " . (defined($3)?$3:"") . "$4") . "<\/a>"/sge;
			$item->{'data'} =~ s!(\s)_([^ ]*)_([.,\s])!$1<u>$2</u>$3!sg;

			if ($wcnt > 1)
			{
				$item->{'data'} =~ s/\n/<br>\n/gs;
				$item->{'data'} =~ s/^(\s+)/"<font style='font-family: courier; whitespace: pre;'>" . " " x (length($1) - $first) . "<\/font>"/egm;
			}
			if ($last == 0)
			{
				$html .= "<p>";
			}
			else
			{
				$html .= "<p style='padding-left: " . (8*($first)) . "px;'>";
			}
			$html .= $item->{'data'} . "</p>\n";
		}
	}

	return $html;
}


sub node_to_html($$)
{
	my ($nodedata, $nodeinfo) = @_;
	my $lasttype = "";
	my $item;
	my $html = "";
	my ($nodename, $infoname);
	my $wcnt;
	my $it;
	my $s;
	my $i = 0;
	my $line;

	($infoname) = split(/\./, $nodeinfo->{'File'});
	$nodename = convert_nodename($nodeinfo->{'Node'});

	$html .= "<html>\n\n<head>\n  <title>$infoname.info: $nodeinfo->{'Node'}</title>\n</head>\n\n<body>\n<h1>$infoname.info: $nodeinfo->{'Node'}</h1>\n";

	if (exists($nodeinfo->{'Next'}))
	{
		$html .= "<b>Go forward to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Next'}) . "'>$nodeinfo->{'Next'}</a></b><br>\n";
	}
	
	if (exists($nodeinfo->{'Prev'}))
	{
		$html .= "<b>Go backward to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Prev'}) . "'>$nodeinfo->{'Prev'}</a></b><br>\n";
	}
	
	if (exists($nodeinfo->{'Up'}))
	{
		$html .= "<b>Go up to <a href='" . convert_nodename_long($infoname, $nodeinfo->{'Up'}) . "'>$nodeinfo->{'Up'}</a></b><br>\n";
	}

	if ($nodeinfo->{'Node'} ne 'Top')
	{
		$html .= "<b>Go to the top op <a href='" . convert_nodename_long($infoname, "Top") . "'>$infoname</a></b><br>\n";
	}
	
	foreach $item (@$nodedata)
	{
		if ($item->{'type'} ne $lasttype)
		{
			if ($lasttype eq 'menu' || $lasttype eq 'index')
			{
				$html .= "</table>\n";
			}
		}

		if ($item->{'type'} eq "para")
		{
			$html .= convert_para($item, $infoname);
		}
		elsif ($item->{'type'} eq "menuhead")
		{
			$html .= "<h3>Menu</h3>\n";
		}
		elsif ($item->{'type'} eq "menu")
		{
			if ($lasttype ne $item->{'type'})
			{
				$html .= "<table border='0'>\n";
			}
			if (!exists($item->{'data'}->{'node'}))
			{
				$html .= "<tr><td>&nbsp;</td><td>&nbsp;</td></tr>\n";
			}
			else
			{
				$html .= "<tr><td><a href='" . convert_nodename_long($infoname, $item->{'data'}->{'node'}) . "'>" . html($item->{'data'}->{'name'}) . "</a></td>";
				if (exists($item->{'data'}->{'desc'}))
				{
					$html .= "<td>" . html($item->{'data'}->{'desc'}) . "</td>";
				}
				$html .= "</tr>\n";
			}
		}
		elsif ($item->{'type'} eq "index")
		{
			if ($lasttype ne $item->{'type'})
			{
				$html .= "<table border='0'>\n";
			}
			if (!exists($item->{'data'}->{'node'}))
			{
				$html .= "<tr><td>&nbsp;</td><td>&nbsp;</td></tr>\n";
			}
			else
			{
				$html .= "<tr><td>" . html($item->{'data'}->{'desc'}) . "</td><td><a href='" . convert_nodename_long($infoname, $item->{'data'}->{'node'}) . "'>" . html($item->{'data'}->{'node'}) . "</a></td></tr>\n";
			}
		}

		$lasttype = $item->{'type'};
	}

	if ($lasttype eq 'menu' || $lasttype eq 'index')
	{
		$html .= "</table>\n";
	}
	$html .= "<font size='-2'><i>Created " . localtime() . " on " . $ENV{HOSTNAME} . " with $ME version $VERSION.</i></font>\n</body>\n</html>\n";

	return $html;
}


sub write_html($$)
{
	my ($html, $nodeinfo) = @_;
	my $fname;
	my ($infoname, $nodename);

	($infoname) = split(/\./, $nodeinfo->{'File'});
	$nodename = convert_nodename($nodeinfo->{'Node'});
	$fname = "$opts{d}/$infoname/$infoname.info.$nodename.html";

	if (! -d "$opts{d}/$infoname" )
	{
		print "Creating $opts{d}/$infoname\n" if ($opts{v});
		mkdir("$opts{d}/$infoname", 0777) || die "Cannot create directory $opts{d}/$infoname";
	}

	print "Writing to $fname\n" if ($opts{'v'});
	open(FILE, ">$fname");
	print FILE $$html;
	close(FILE);
}


sub split_node($$$$)
{
	my ($inforef, $node, $nodedata, $nodeinfo) = @_;
	my ($nodename);
	my ($line, @lines);
	my ($item, $lasttype);

	# @nodedata will contain an array with the following data:
	# [ "type" => one of 'menu', 'menuhead', 'index', 'para' or 'empty',
	#   "data" => the data of that field.  In case of index or menu:
	#							[ "node" => link to the node,
	#								"desc" => Description
	#							]
	#	]

	# First, test if it's a node
	if (!($$node =~ m/^File:/))
	{
		# if it doesn't start with 'File:', it's not a node.  Discard it.
		print "Node doesn't start with File:\n" if ($opts{'v'});
		return 0;
	}

	@lines = split("\n", $$node);
	$_ = shift @lines;
	foreach $item (split(/,\s+/))
	{
		my ($key, $val) = split(/:\s+/, $item);
		$nodeinfo->{$key} = $val;
	}

	$lasttype = "";
	foreach $line (@lines)
	{
		$_ = $line;

		# Check for empty lines
		if (m/^\s*$/)
		{
			if ($lasttype ne 'empty')
			{
				push(@$nodedata, { "type" => "empty" });
				$lasttype = 'empty';
			}
		}
		# Check for Menu heads
		elsif (m/^\* Menu:$/)
		{
			push(@$nodedata, { "type" => "menuhead" });
			$lasttype = 'menuhead';
		}
		# Check for menu items.  A menu cannot follow directly on an indexitem
		elsif (@$nodedata > 0 && ($nodedata->[@$nodedata - 1]->{'type'} ne 'index') && 
						m/^\* ([^:]*)::(\s*(.*))?$/)
		{
			push(@$nodedata, { 
				"type" => "menu",
				"data" => {
										"node" => "$1",
										"name" => "$1",
										"desc" => "$3",
									}
			});
			$lasttype = 'menu';
		}
		# this basically supercedes index type
		elsif (m/^\* (.*):\s*([^\.,]*)[\.,](\s+(.*))?$/)
		{
			push(@$nodedata, { 
				"type" => "menu",
				"data" => {
										"node" => "$2",
										"name" => "$1",
									}
			});
			if (defined($4))
			{
				$nodedata->[@$nodedata - 1]->{'data'}->{'desc'} = $4;
			}
			$lasttype = 'menu';
		}
		# Check for index items
		elsif (m/^\* (.*):\s*(.*)[\.,]$/)
		{
			push(@$nodedata, { 
				"type" => "index",
				"data" => {
										"node" => "$2",
										"desc" => "$1",
									}
			});
			$lasttype = 'index';
		}
		else
		{
			if ($lasttype eq 'para')
			{
				$nodedata->[@$nodedata - 1]->{'data'} .= "\n" . $_;
			}
			else
			{
				push(@$nodedata, {
					"type" => "para",
					"data" => $_,
				});
				$lasttype = 'para';
			}
		}
	}

	return 1;
}


sub info_to_html($)
{
	my ($inforef) = @_;
	my ($node);

	my @firstnodes = split("\037\n?", $$inforef);
	my @nodes;
	my $temp;
	my $indirect;

	# First, find indirects
	foreach $node (@firstnodes)
	{
		if ($node =~ m/^Indirect:/)
		{
			print "Found indirect node table\n" if ($opts{'v'});
			foreach $indirect (split("\n", $node))
			{
				my ($file, $pos);
				if (($file, $pos) = $indirect =~ m/^([^:]*):\s+(\d+)$/)
				{
					print "Reading indirect `$file' ($pos)\n" if ($opts{'v'});
					if (getinfofile($file, \$temp) <= 0)
					{
						print "Something went wrong while reading the infofile...\n";
					}
					my @nextnodes = split("\037\n?", $temp);
					push(@nodes, @nextnodes);
				}
			}
		}
		else
		{
			push(@nodes, $node);
		}
	}

	foreach $node (@nodes)
	{
		my (@nodedata, %nodeinfo);
		my ($html);

		@nodedata = ();
		%nodeinfo = ();

		next if (!split_node($inforef, \$node, \@nodedata, \%nodeinfo));

		# We scanned the node, now we convert it to HTML
		@nodedata = condense_nodedata(\@nodedata);
		$html = node_to_html(\@nodedata, \%nodeinfo);
		write_html(\$html, \%nodeinfo);
	}
}


sub main()
{
	getopts('vi:d:?', \%opts);

	if (@ARGV < 1 || $opts{'h'} || $opts{'?'})
	{
		usage();
	}

	if ( ! -d $opts{i} )
	{
		die "The info-root ($opts{i}) does not exist.";
	}
	if ( ! -d $opts{d} )
	{
		die "The directory-root ($opts{d}) does not exist.";
	}

	my $infofile = $ARGV[0];

	my $info;

	if (getinfofile($infofile, \$info) <= 0)
	{
		die "Something went wrong while reading the infofile...\n";
	}

	init_nonwordarr();
	info_to_html(\$info);
}


main();

# ex:set ts=2 sw=2 ai:
