#!/usr/bin/perl -w
# Copyright (c) 2000, 2001 Udo Erdelhoff. All rights reserved.
# Written for the FreeBSD German Documentation Project
#
# Redistribution and use in source and compiled forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  1. Redistributions of source code must retain the above
#     copyright notice, this list of conditions and the following
#     disclaimer as the first lines of this file unmodified.
#
#  2. Redistributions in compiled form must reproduce the above
#     copyright notice, this list of conditions and the following
#     disclaimer in the documentation and/or other materials provided
#     with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY UDO ERDELHOFF "AS IS" AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL UDO ERDELHOFF BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
#
# $Id: slashexpand,v 1.5 2001/01/07 21:36:11 ue Exp $

use strict;
use Getopt::Long;

# Whitespace and/or newlines are significant inside these elements
my $specials = "<programlisting><screen><literallayout><address>";

# Hack to parse HTML-disguised-as-SGML
$specials = "$specials<pre>";

# These tags are never closed
my $singles = "<area><anchor><xref><co>";

# All other global variables
my ($in, $filetype, $chunk, $tag, $length, $realtag, $closer, @tagstack, $debug);

$debug = 0;

sub usage () {
        print STDERR "usage: slashexpand [--debug]\n";
        print STDERR "Second dash is optional, abbreviation is possible\n";
        die;
}

GetOptions ('debug'	=> \$debug);

# Sometimes, it's good to be bad...
$/ = "\0\0";		
$in = <>;
exit unless (defined $in);
chomp $in;
exit unless ($in ne "");

# Skip the FPI and any comments, brutal version
($filetype) = ($in =~ /^<!DOCTYPE\s+(.*?)\s/i);
unless (defined $filetype) {
	($filetype) = ($in =~ /^[\000-\377]*?<!DOCTYPE\s+(.*?)\s/i);
}
unless (defined $filetype) {
	print STDERR "Couldn't find opening <!DOCTYPE <type>\n";
	die "Cannot handle this document\n";
}
$filetype =~ tr/A-Z/a-z/;
($chunk) = ($in =~ /^([\000-\377]*?)<$filetype>/io);
unless (defined $chunk) {
	print STDERR "Could't find the opening <$filetype>\n";
	die "Cannot handle this document\n";
}
$length = length ($chunk);
substr ($in, 0, $length) = "";
print "$chunk";

# Let's deal out some damage ;->
while (defined $in && $in ne "") {
# Detect and handle SGML comments. We assume valid SGML...
	if ($in =~ /^<!--/) {
		($chunk) = ($in =~ /^(<!--[\000-\377]+?-->)/);
		unless (defined $chunk) {
			print STDERR "unclosed SGML comment found here: $in\n";
			die "Giving up\n";
		}
		$length = length ($chunk);
		substr ($in, 0, $length) = "";
		print "$chunk";
		next;
	}

# Detect and handle character data
	unless ($in =~ /^</) {
		($chunk) = ($in =~ /^([\000-\377]+?)</);
		$chunk = $in unless (defined $chunk);
		$length = length ($chunk);
		substr ($in, 0, $length) = "";
		print "$chunk";
		next;
	}

# Ladies and Gentlemen, we have a tag. Abuse it.
	($tag) = ($in =~ /^(<[\000-\377]+?>)/);
	unless (defined $tag) {
		print STDERR "impossible situation #2 found\n";
		print STDERR "Current input $in\n";
		die "Giving up!\n";
	}
	$realtag = $tag;
	if ($tag =~ /\s/) {
		($tag) = split (/\s/, $tag, -1);
		$tag = "$tag>";
	}
	$tag =~ tr/A-Z/a-z/;

	if ($specials =~ $tag) {				# Special tag?
		$closer = $tag;
		substr ($closer, 1, 0) = "/";
		($chunk) = ($in =~ /^($realtag[\000-\377]+?$closer)/);
		if (defined $chunk) {				# Good special
			$length = length ($chunk);
			substr ($in, 0, $length) = "";
			print "$chunk";
			next;
		}
		print "Yikes, special hack activated\n" if ($debug);
		($chunk) = ($in =~ /^($realtag[\000-\377]+?<\/>)/);
		unless (defined $chunk) {			# ***TILT***
			print STDERR "Unclosed special found: $in\n";
			die "Giving up!\n";
		}
		$length = length ($chunk);
		substr ($chunk, -3, 3) = $closer;
		substr ($in, 0, $length) = "";
		print "$chunk";
		next;
	}

	if ($singles =~ $tag) {					# Single tag?
		$length = length ($realtag);
		substr ($in, 0, $length) = "";
		print "$realtag";
		print "Special $realtag ignored, matched by $tag\n" if ($debug > 0);
		next;
	}

	$length = length ($realtag);				# Remember it
	if ($tag =~ m!^</!) {					# Closing tag?
		$closer = pop @tagstack;
		substr ($closer, 1, 0) = "/";
		print "Popped $closer\n" if ($debug > 0);
		if ($tag ne "</>") {
			if ($tag ne $closer) {			# Sanity check
				print STDERR "Stack is $closer\n";
				print STDERR "tag expects $tag\n";
				print STDERR "realtag expects $realtag\n";
				print STDERR "Input position is $in\n";
				die "Program lost in space -- Bad SGML?\n";
			}
		} else {					# Bad closer!
			print "Replacing </> with $closer\n" if ($debug > 0);
			$realtag = $closer;
		}
	} else {						# Opening tag!
		push @tagstack, $tag;
		print "Pushed $tag\n" if ($debug > 0);
	}

	substr ($in, 0, $length) = "";
	print "$realtag";
	next;
}
