#!/usr/bin/env python2
"""
doclifter: translate man/mdoc/ms/me/mm sources to DocBook.

by Eric S. Raymond, copyright 2002.

This comment is addressed to you if you want to add support for another
macro package to doclifter.  Or if you have encountered a bug in doclifter 
and need to understand the code in doclifter in order to fix it.  Or if
you just want to understand how it works.

Internally, doclifter consists mainly of a framework class called
TroffTranslator.  This class is instantiated and told to do its stuff
by a routine called compilerlike.sponge, which handles all I/O to disk
and gives doclifter its cc-like invocation protocol.  Underneath, it
passes TroffTranslator a string consisting of the entire text of the
file to be translated and accepts a translation string back.

TroffTranslator provides I/O and other basic services for a stack of request
interpreters.  Interpreters get added to the stack when TroffTranslator 
recognizes certain patterns in the input; see the table interpreter_dispatch
for details.  If a string pattern added to this table is length 2, 
TroffTranslator will assume it is a request name and check to make sure
that it's not a macro.

The interpreter stack always includes TroffInterpreter at the bottom.  This
request interpreter handles the small set of troff requests that we translate,
including .so, .nf, .fi, .if, etc.  It also handles macro and string expansion.

Each request interpreter is a class that provides methods and members to be
used by the framework.  Here they are:

name
   The name of the macro set

exclusive
   Whether this is a "major" macro set like man, mdoc, mm, ms, or me --
as opposed to a minor one like pod2man or TkMan.  Whichever major macro
set is triggered earliest in the file gets a lock on it; trigger patterns
from other exclusive macros are subsequently ignored.

toptag
   The top-level tag in the type of DocBook that this request interpreter
generates.

ignore_set
   Tags to ignore.  List here any presentation-level tags that don't have
structural implications.  They will be silently discarded.
   Note: there is a potential subtle gotcha in the handling of ignore
sets.  The code presently assumes that no tag in any interpreter's
ignore set is handled by any other interpreter.

complain_set
   Tags to complain about.  Put here things that can't be translated out
of presentation level but that might have structural meaning (such as
indentation changes).  The user will be warned on stderr when these 
come up.  Otherwise they're ignored.

parabreak_set
   The set of tags that forces a new paragraph without changing the
document section.  Used to recognize the end of lists.

sectionbreak_set
   The set of tags that forces a new document section.  Things that
are going to translate to a DocBook sect, refsect, or section tag 
should go here.  

translations
   Special-character to ISO literal mappings.  These are applied late
in the translation, *after* string and macro evaluation.
   It's also useful to know that your request interpreter can call the
function declare_body_start() to tell the framework class where the
body of the document starts (as opposed to the preamble full of troff
requests).  This infornation is used to restrict the scope of
character translations.

interpret
   The request interpreter.  Called on every input line that begins 
with a command character, that is . or ' not followed by another ' on the
samw line.
   This method needs to be careful about troff continuation (\c)
characters.  If you add trailing markup to a line, or entirely replace the 
line, be sure to check for trailing \c first, remove it if present, and
paste it back on the end.

break_trap
   If not None, a function to be called immediately before interpreting
each command that performs a paragraph or section break.  You don't need
to supply this, but it may be useful.

preprocess, postprocess
   Pre-processing and postprocessing hooks.  Each takes a string
(assumed to be the entire file text) and returns a string.

reductions:
   A list of pairs of macro names.  In each pair, the first is to be
replaced by the second if this macro set is active and the definition of
the first contains the second.  This member is useful for replacing
stereotyped wrapper macros with standard constructs that the translator
knows how to handle.  
   Most frequent case: lots of man page authors define a .Pp macro that
does various funky things in troff but just expands to .PP in nroff.
If we replace this with .PP various nasty parsing situations suddenly
don't break.

The easiest way to write a full-blown new request interpreter is to
take an existing one and mutate it.  If the macro package you are
trying to support merely adds a few tags to an existing one, consider
writing an interpreter for just those tags and adding it to the stack
(this is the way the Pod2ManInterpreter code relates to
ManInterpreter).  

Also note that there is existing machinery for filtering a file
section with an external command that generates XML or SGML output
-- search for cmdfilter.

Warning: much of this code is grubby.  Alas, the grubbiness is
intrinsic, because the troff request language is grubby.

$Id: doclifter,v 1.829 2003/03/18 15:14:22 esr Exp $
"""
# Requires Python 2.2a
import sys, os, glob, re, sre, string, exceptions, copy, tempfile

blankline = re.compile(r"^\s*$")

# Start tag on a line by itself
endtag = re.compile("<[^>]*>$")

# Used in C syntax recognition
c_keywords = ("void", "char", "short", "int",
              "long", "float", "double", "signed",
              "unsigned", "typedef", "struct",
              "union", "enum", "const", "volatile",
              "inline", "restricted")	# C9X

# Used to strip headers off generated HTML documents.
xmlheader = re.compile(r"<\?.*\?>\n")
doctype = re.compile(r"<\!DOCTYPE[^>]*\>\n")

# These patterns are applied *after* special-character translation

# Match an RFC822 email address, possibly with surrounding <>.
# This is the right thing because the XSL stylesheets surround
# <email> content with <> on output.
email_re = "\b(?:&lt;)?(?P<email>[-\w_.]+@[-\w_.]+)(?:&gt;)?\b"	

# Match an URL. This pattern is carefully constructed not to eat
# a following period if (as is often the case) it occurs at the
# end of a sentence.
url_re   = r"(?P<url>\b(http|ftp|telnet|mailto)://[-_%\w/.~]+[-_%\w/])"

# Match a troff highlight
highlight = re.compile(r"(?<!\\)((\\[fF]\(..)|(\\[fF].))")
highlight_stripper = re.compile(r"^\.[BI] ")

# Match a glue token with all preceding and following whitespace
hotglue = re.compile(r"\s*@GLUE@\s*")
cleantag = re.compile(r"</([a-z]+)><\1>")

# Match an identifier token in C or Python
id_re = re.compile("[_a-zA-Z][_a-zA-Z0-9]*")

# List how troff specials that can appear as list tags map into
# DocBook mark types.  According to Norm Walsh's DSSL and XSL stylesheets,
# both toolchains have two styles available; bullet and box.
ip_tag_mapping = {
    "\(bu":"bullet",
    "\(sq":"box",
    }

def deemphasize(str):
    "Throw out highlighting info from a string."
    return highlight.sub("", str)

def is_command(line):
    # This works around a common bug -- string-enclosing ' at the left margin
    return len(line) > 1 and \
           (line[0] == "." or (line[0] == "'" and line[1:].find("'") == -1))

def is_comment(line):
    # The malformed crap people write as troff comments is amazing...
    line = line.replace(" ", "").replace("\t", "")
    return line =="." or line[:3] in (r'.\"', r'./"', r',\"', r".\'", '\'\\"', r'\'\"', r'\".', r"...", r"'''", r"\!.") or line[:2] in (r'."', r".'", r'\"', r"'#") or line[:4] == r'.\\"'

def match_command(line, tag):
    # Cope with the possibility of spaces after the dot
    if not line or line[0] not in (".", "'"):
        return 0
    tokens = line[1:].strip().split()
    return tokens and tokens[0] == tag

def quoteargs(tokens):
    "Quote argument tokens so that re-parsing them won't produce surprises."
    if len(tokens) == 1:
        return tokens[0]
    else:
        return tokens[0] +  ' "' + '" "'.join(map(lambda x: x.replace('"', '""'), tokens[1:])) + '"'

def untagged(pattern):
    "Transform the pattern to guarantee that it won't match marked-up text."
    return re.compile("(?<!>)" + pattern + "(?!</)")

def fontclose(str):
    "Make sure we exit interpretation of the given string in normal font."
    last_font_escape = str.rfind(r'\f')
    if last_font_escape > -1 and str[last_font_escape+2] not in "R":
        str += r"\fR"
    str = re.compile(r"\f[^P]\fR$").sub(r"\fR", str)
    return str

def get_troff_char(str):
    "Extract a leading troff character from the string."
    if len(str) == 0:
        return ""
    elif str[0] != "\\":
        return str[0]
    take = 1
    if str[1] == '*':
        take += 1
    if str[take] == '(':
        take += 3
    else:
        take += 1
    return str[:take]

def make_comment(str):
    str = str.replace(r'.\"', "").replace(r'.\\"', "")
    return "<!-- " + str.replace("-", r"\-") + " -->"

def lineparse(line):
    "Parse arguments of a dot macro."
    if not is_command(line):
        return None
    #sys.stderr.write("About to parse: " + line + "\n")
    tokens = [line[0]]
    state = 'dot'		# Start after the dot in dot state
    for c in line[1:]:
        if state == 'dot':		# accumulating a token
            if c in (" ", "\t"):
                continue
            else:
                tokens[-1] += c
                state = 'token'
        elif state == 'token':		# accumulating a token
            if c in (" ", "\t"):
                state = 'ws'
            else:
                tokens[-1] += c
        elif state == 'ws':	# in whitespace
            if c in (" ", "\t"):
                continue
            elif c == '"':
                tokens.append('"')
                state = 'string'
            elif c == '\\':
                state = 'leader?'
            else:
                tokens.append(c)
                state = 'token'
        elif state == 'string':		# in string
            tokens[-1] += c
            if c == '"':
                state = 'stringend'
        elif state == 'stringend':	# just saw end-of-string, what now?
            if c == '"':
                state = 'string'
            elif c in (" ", "\t"):
                state = 'ws'
            elif c == '\\':
                state = 'leader?'
            else:
                state = 'token'
        elif state == 'leader?':	#  possible comment leader
            if c == '"':
                break
            else:
                tokens.append("\\" + c)
                state = 'token'
    # Special case: turn trailing brackets into an argument
    if len(tokens) == 1:
        trailer = tokens[0][3:5]
        if trailer in (r"\{", r"\}"):
            tokens[0] = tokens[0][:3]
            tokens.append(trailer)
    return tokens

def stripquotes(arg):
    "Perform quote-stripping appropriate for macros and .ds commands."
    if type(arg) == type([]):
        return map(stripquotes, arg)
    else:
        if arg[0] == '"':
            arg = arg[1:]
        if arg[-1] == '"':
            arg = arg[:-1]
        return arg

class LiftException(exceptions.Exception):
    def __init__(self, message, retval=1):
        self.message = message
        self.retval = retval

class SemanticHintsRegistry:
    "Represent all the samantic information gathered during a run."
    def __init__(self):
        self.dictionary = {}
    def post(self, token, type):
        "Post an association of a string with a semantic markup type."
        #sys.stderr.write("Markup %s as %s\n" % (token, type))
        self.dictionary[token] = type
    def apply(self, text):
        "Apply all known hints to lift tokens in a text string."
        # syss.stderr.write("Marked tokens:", self.marked_tokens\n")
        for (token, tag) in self.dictionary.items():
            with_hi = "<emphasis\s+remap='[A-Z]+'>(%s)</emphasis>" % token
            #self.warning("marking %s as %s via %s" % (token, tag, with_hi))
            try:
                ender = tag.split()[0]	# discard attributes
                text = re.compile(with_hi).sub(r"<%s>\1</%s>"%(tag,ender),text)
                text = re.compile(r"\b("+token+")\b").sub(r"<%s>\1</%s>" % (tag, ender), text)
                pass
            except sre.sre_compile.error:
                pass
        return text
    def read(self, input):
        "Read in a hints string or file as dumped by __str__"
        if hasattr(input, "read"):
            fp = open(input)
            data = fp.readlines()
            fp.close()
        else:
            data = input.split('\n')
        for line in data:
            if line.startswith('.\\" |'):
                # Someday we'll have more declarations
                (mark, token, as, markup) = line[5:].split()
                if mark != "mark" or as != "as":
                    continue
                self.post(token, markup)
    def __repr__(self):
        "Dump a representation of hint info."
        out = '.\\" Begin doclifter hints.\n'
        for (token, tag) in self.dictionary.items():
            out += '.\\" | mark %s as %s\n' % (token, tag)
        out += '.\\" End doclifter hints.\n'
        return out

class DocLifter:
    "DocBook translation of generic troff macros."
    # In each tuple, the first element is an emphasis remap attribute.
    # The second element is a regexp to match to the tag content.
    # If the regexp matches, the bracketing emphasis tags are replaced
    # with the semantic tag in the third column.
    lift_highlights = (
        ("SM",	r"[A-Z.]*",	"acronym"),	# Historical -- SM is rare
        ("SM",	r"[A-Z]+_[A-Z_]+",	"envar"),	# In bison.1, cvs.1
        ("[BI]",r"-[^<]+",	"option"),	# likely command option man(7)
        ("[BI]",r"[0-9.]+",	"literal"),	# literal value
        ("[BI]",r"[a-zA-Z0-9.]+((\s|&nbsp;)--?[^<]+)+",	"userinput"),	# user command
        ("[BI]",r"\.[a-zA-Z][^<]*",	"markup"),	# roff markup
        ("[BI]",r"/[^<]+",	"filename"),	# Marked filenames
        ("[BI]",r"~/[^<]*",	"filename"),	# Home directory filenames
        ("[BI]",email_re,	"email"),	# email addresses
        ("[BI]",r"SIG[A-Z]+",	"symbol class='signal'"),
        ("[BI]",r"errno",	"symbol class='variable'"),
        ("[BI]",r"[a-z_]*_t",	"type"),
        ("[BI]",r"[a-z_]+(?:\(\))", "function"),
        # Signals.  This is the Linux set.
        ("[BI]",r"E2BIG",	"errorcode"),
        ("[BI]",r"EACCES",	"errorcode"),
        ("[BI]",r"EAGAIN",	"errorcode"),
        ("[BI]",r"EBADF",	"errorcode"),
        ("[BI]",r"EBADMSG",	"errorcode"),
        ("[BI]",r"EBUSY",	"errorcode"),
        ("[BI]",r"ECANCELED",	"errorcode"),
        ("[BI]",r"ECHILD",	"errorcode"),
        ("[BI]",r"EDEADLK",	"errorcode"),
        ("[BI]",r"EDOM",	"errorcode"),
        ("[BI]",r"EEXIST",	"errorcode"),
        ("[BI]",r"EFAULT",	"errorcode"),
        ("[BI]",r"EFBIG",	"errorcode"),
        ("[BI]",r"EINPROGRESS",	"errorcode"),
        ("[BI]",r"EINTR",	"errorcode"),
        ("[BI]",r"EINVAL",	"errorcode"),
        ("[BI]",r"EIO",		"errorcode"),
        ("[BI]",r"EISDIR",	"errorcode"),
        ("[BI]",r"EMFILE",	"errorcode"),
        ("[BI]",r"EMLINK",	"errorcode"),
        ("[BI]",r"EMSGSIZE",	"errorcode"),
        ("[BI]",r"ENAMETOOLONG","errorcode"),
        ("[BI]",r"ENFILE",	"errorcode"),
        ("[BI]",r"ENODEV",	"errorcode"),
        ("[BI]",r"ENOENT",	"errorcode"),
        ("[BI]",r"ENOEXEC",	"errorcode"),
        ("[BI]",r"ENOLCK",	"errorcode"),
        ("[BI]",r"ENOMEM",	"errorcode"),
        ("[BI]",r"ENOSPC",	"errorcode"),
        ("[BI]",r"ENOSYS",	"errorcode"),
        ("[BI]",r"ENOTDIR",	"errorcode"),
        ("[BI]",r"ENOTEMPTY",	"errorcode"),
        ("[BI]",r"ENOTSUP",	"errorcode"),
        ("[BI]",r"ENOTTY",	"errorcode"),
        ("[BI]",r"ENXIO",	"errorcode"),
        ("[BI]",r"EPERM",	"errorcode"),
        ("[BI]",r"EPIPE",	"errorcode"),
        ("[BI]",r"ERANGE",	"errorcode"),
        ("[BI]",r"EROFS",	"errorcode"),
        ("[BI]",r"ESPIPE",	"errorcode"),
        ("[BI]",r"ESRCH",	"errorcode"),
        ("[BI]",r"ETIMEDOUT",	"errorcode"),
        ("[BI]",r"EXDEV",	"errorcode"),
        # Standard environment variables from environ(5).
        ("[BI]","USER",		"envar"),
        ("[BI]","LOGNAME",	"envar"),
        ("[BI]","HOME",		"envar"),
        ("[BI]","LANG",		"envar"),
        ("[BI]","PATH",		"envar"),
        ("[BI]","PWD",		"envar"),
        ("[BI]","SHELL",	"envar"),
        ("[BI]","TERM",		"envar"),
        ("[BI]","PAGER",	"envar"),
        ("[BI]","EDITER",	"envar"),
        ("[BI]","VISUAL",	"envar"),
        ("[BI]","BROWSER",	"envar"),
        # Common library environment variables, also from environ(5)
        ("[BI]","LANG",		"envar"),
        ("[BI]","LANGUAGE",	"envar"),
        ("[BI]","NLSPATH",	"envar"),
        ("[BI]","LOCPATH",	"envar"),
        ("[BI]","LC_ALL",	"envar"),
        ("[BI]","LC_MESSAGES",	"envar"),
        ("[BI]","TMPDIR",	"envar"),
        ("[BI]","LD_LIBRARY_PATH",	"envar"),
        ("[BI]","LD_PRELOAD",	"envar"),
        ("[BI]","POSIXLY_CORRECT",	"envar"),
        ("[BI]","HOSTALIASES",	"envar"),
        ("[BI]","TZ",		"envar"),
        ("[BI]","TZDIR",	"envar"),
        ("[BI]","TERMCAP",	"envar"),
        ("[BI]","COLUMNS",	"envar"),
        ("[BI]","LINES",	"envar"),
        ("[BI]","PRINTER",	"envar"),
        ("[BI]","LPDEST",	"envar"),
    )
    post_translation_patterns = (
        # man(7)-style man-page references
        (r"<emphasis remap='[BI]'>([^ ]+)</emphasis>(?:&zerosp;|&thinsp;)?\(([0-9]+)\)",
         r"<citerefentry><refentrytitle>\1</refentrytitle><manvolnum>\2</manvolnum></citerefentry>"),
        # Here's where we fold all those continuation lines.
        ("\\\c\n",	""),
        )
    post_lift_patterns = (
         # Find a highlight directly after an <option> makes it <replaceable>
        (r"(<option>[^ ]+</option>\s*)<emphasis remap='[BI]'>([^<]+)</emphasis>",
         r"\1<replaceable>\2</replaceable>"),
	# Find a replaceable in square brackets after an  option
        (r"(<option>[^ ]+</option>\s*)\[<emphasis remap='[BI]'>([^<]+)</emphasis>\]",
         r"\1<replaceable>\2</replaceable>"),
       ) 

    tabset       = re.compile(r"tab *\(?(.)\)?")

    pretranslations = (
     # The ultimate in decompiling presentation markup...
     (r"e\h'-\w:e:u'\`", r"\(`e"),	# gawk.1
     (r"e\h'-\w:e:u'\'", r"\('e"),	# gawk.1
     (r"T\h'-.1667m'\v'.224m'E\v'-.224m'\h'-.125m'X", r"TeX"),	# geqn.1
     )

    # In each tuple, the first element is an emphasis remap attribute.
    # The second element is a regexp to match to the tag content.
    # If the regexp matches, and the content grouped by parentheses
    # looks like an id that was created during translation, then the
    # emphasis tags are replaced with a link tag with the target being
    # the result of converting the tag contents to an XML id.
    lift_links = (
        ("SM",	r"[A-Z ]+"),	# Used in RCS and others
        ("Em",	r"[A-Z ]+"),	# Used in csh.1
        ("B",	r"[A-Z ]+"),	# Used in java.1, refer.1
      )

    def __init__(self, xml=0, verbose=0, quiet=0, tm_enable=0, includepath=""):
        self.xml = "/"[:xml]
        self.verbose = verbose
        self.quiet = quiet
        self.includepath = includepath
        self.ignore_set = {"br":1, "nl":1}
        if not tm_enable: self.ignore_set["tm"] = True
        self.outsubst = [] 
        self.sectname = None
        self.idlist = {}
        self.listitem = False
        self.sectionhooks = []
        self.fontfamily = ""

        global errorcount
        errorcount = 0

    def body_section(self):
        "Are we in a section that corresponds to a real refentry section?"
        return self.sectname and self.sectname.upper() not in ("NAME", "SYNOPSIS")
    def declare_body_start(self):
        "Latch the location where the document body starts."
        if not self.body_start:
            self.body_start = len(self.output)
            self.emit(make_comment("body begins here"))

    # I/O utility code
    def popline(self):
        "Pop a line off the input source."
        while self.lines:
            line = self.lines.pop(0)
            # Not clear why this is needed...
            if line is None:
                return None
            self.lineno += 1
            # Perhaps we just hit a marker for end of macroexpansion
            if type(line) == type(0):
                self.lineno = line
                self.troff.macroargs.pop()
                self.troff.macronames.pop()
                continue
            if self.verbose >= 4:
                self.notify("popped: " + line)
            return self.troff.expand(line)    
        return None
    def pushline(self, line):
        "Push a line back on to the input source"
        if self.verbose >= 4:
            self.notify("pushed: %s" % line)
        self.lines = [line] + self.lines
        self.lineno -= 1
    def peekline(self):
        "Look ahead in the input stream."
        # Has to be done with a pop/push, in case we hit the end of a macro
        line = self.popline()
        self.pushline(line)
        return line
    def macro_return(self):
        "Skip the remainder of the current macro."
        if not self.troff.macroargs:
            self.notify("warning: return outside of macro")
        else:
            while True:
                line = self.lines.pop(0)
                self.lineno += 1
                if type(line) == type(0):
                    self.lineno = line
                    self.troff.macroargs.pop()
                    self.troff.macronames.pop()
                    break
    def notify(self, msg):
        "C-compiler-like error message format."
        if self.troff.macronames:
            msg = '"%s", line %d, expanding %s: %s\n' % (self.file, self.lineno, self.troff.macronames[-1], msg)
        else:
            msg = '"%s", line %d: %s\n' % (self.file, self.lineno, msg)
        sys.stderr.write(msg)
        return msg
    def warning(self, msg):
        msg = self.notify("warning: " + msg)
        return msg
    def error(self, msg):
        msg = self.notify("error: " + msg)
        global errorcount
        errorcount += 1
        return msg
    def passthrough(self, tokens):
        if not self.quiet:
            self.emit(make_comment(" ".join(tokens)))
    def emit(self, line, trans=1):
        "Emit output."
        if self.verbose >= 5:
            self.notify("emit(%s, trans=%d)" % (line, trans))
        if trans and self.outsubst:
            do_xlate = True
            translated = ""
            i = 0
            while i < len(line):
                if line[i] == '<':
                    do_xlate = 0
                if not do_xlate:
                    if line[i] == '>':
                        do_xlate = True
                    translated += line[i]
                    i += 1
                else:
                    substituted = False
                    for (old, new) in self.outsubst:
                        if line[i:i+len(old)] == old:
                            translated += new
                            i += len(old)
                            substituted = True
                    if not substituted:
                        translated += line[i]
                        i += 1
            line = translated
        self.diversion.append(line.replace(r"\t", "\t"))

    # Section-break handlers
    def end_paragraph(self, label="random"):
        "Close the current paragraph, if we're in one."
        if self.verbose >= 3:
            self.notify("end_paragraph(%s)" % label)
        self.troff.nf = False
        self.need_para = False 
    def need_paragraph(self):
        "Cause <para> to be prepended to next text line."
        if self.verbose >= 3:
            self.notify("need_paragraph()")
        self.need_para = True
    def paragraph(self, remap=""):
        "Replace generic paragraph-start macro with blank line."
        if self.verbose >= 5:
            self.notify("paragraph(remap='%s')" % remap)
        self.end_paragraph("paragraph")
        if remap:
            self.emit(make_comment(remap))
        else:
            self.emit("")
        self.need_paragraph()
    def pop_section(self, depth):
        "Pop to new section level."
        self.troff.nf = False
        self.end_paragraph(label="pop_section")
        self.need_para = False
        # Execute any traps user might have planted.
        for hook in self.sectionhooks:
            hook()
        # Finally, emit end of section
        if self.interpreters[-1].toptag == "refentry":
            ref = "ref"
        else:
            ref = ""
        for i in range(self.sectiondepth - depth + 1):
            self.emit("</%ssect%d>" % (ref, self.sectiondepth - i))
        self.sectiondepth = depth
    def push_section(self, depth, title, makeid=1):
        "Start new section."
        self.pop_section(depth)
        if self.interpreters[-1].toptag == "refentry":
            ref = "ref"
        else:
            ref = ""
        if makeid:
            id = " id='%s'" % self.make_id_from_title(title)
        else:
            id = ""
        self.emit("\n<%ssect%d%s><title>%s\\fR</title>" % (ref, depth, id, title))
        self.need_paragraph()
        self.sectiondepth = depth
        self.sectname = title
        self.sectionhooks = []
    def paragraph_break(self, line):
        "Are we looking at a paragraph break command?"
        tokens = lineparse(line)
        if not tokens:
            tokens = ["." + line]
        if tokens and tokens[0] == ".end":
            return 1
        for interpreter in self.interpreters:
            if tokens:
                if tokens[0][1:] in interpreter.parabreak_set:
                    return 1
                if tokens[0][1:] in interpreter.sectionbreak_set:
                    return 1
        return 0
    def section_break(self, line):
        "Are we looking at a section break command?"
        tokens = lineparse(line)
        if not tokens:
            tokens = ["." + line]
        if tokens and tokens[0] == ".end":
            return 1
        for interpreter in self.interpreters:
            if tokens:
                if tokens[0][1:] in interpreter.sectionbreak_set:
                    return 1
        return 0

    def begin_block(self, markup, remap="", nofill=0):
        "Begin a block-context markup section."
        self.in_block = True
        self.end_paragraph(label="begin_block")
        if remap and not self.quiet:
            remap = " remap='" + remap + "'"
        trail = ""
        if nofill and markup != "literallayout":
            trail = " <literallayout>"
        self.emit("<" + markup + remap + ">" + trail)
        if nofill:
            self.troff.nf = True

    def end_block(self, markup, remap=""):
        "End a block-context markup section."
        if remap and not self.quiet:
            remap = " <!-- remap='" + remap + "' -->"
        # Turn off all font highlights -- technically incorrect,
        # but almost always the right thing to do.
        self.emit(r"\fR</" + markup + ">" + remap)
        self.need_paragraph()
        self.troff.nf = False
        self.in_block = False

    # Highlight handling
    def change_highlight(self, htype, prefix='f'):
        if prefix == 'F':	# groff font family change
            if htype == 'T':
                htype = ''
            self.fontfamily = htype
            return ""
        else:			# ordinary font change
            #real = htype
            pop = False
            #self.notify("1: change_highlight(%s) requested; current is %s" % (real, self.highlight))
            if htype == "0":
                pop = True
                htype = self.oldhighlight
            elif htype == "1":
                htype = "R"
            elif htype == "2":
                htype = "I"
            elif htype == "3":
                htype = "B"
            elif htype == "4":
                htype = "C"
            if htype == "P":
                pop = True
                htype = self.oldhighlight
            elif htype == self.highlight:
                #self.notify("2: change_highlight(%s) is a no-op" % real)
                return ""
            #self.notify("2: change_highlight(%s) mapped to %s" % (real,htype))
            if self.highlight == "R":
                newhi = ""
            else:
                newhi = "</emphasis>"
            if htype != "R":
                if pop:
                    newhi += "<emphasis remap='P->%s%s'>" % (self.fontfamily,htype)
                else:
                    newhi += "<emphasis remap='%s%s'>" % (self.fontfamily,htype)
            #self.notify("3: change_highlight(%s) from %s used %s" % (htype, self.highlight, newhi))
            self.oldhighlight = self.highlight
            self.highlight = htype
            return newhi
    def direct_highlight(self, highlight, args, trailer=""):
        "Translate man(7)-style ordinary highlight macros."
        if not args:
            line = self.popline()
            # Deals with broken stuff like the
            # .B
            # .BI -G num
            # on the gcc.1 man page.
            if line is None or is_command(line):
                self.pushline(line)
                return make_comment("%s elided" % highlight)
        else:
            line = " ".join(args)
        if not trailer and line[-2:] == "\\c":
            trailer = "\\c" 
            line = line[:-2]
        if len(highlight) > 1:
            highlight = "(" + highlight
        transformed = r"\f" + highlight
        transformed += line
        if highlight != "R":
            # Occasionally we see \ at end-of-line as somebody's error.
            # Prevent it from screwing us up.
            if transformed[-1] == '\\':
                transformed = transformed[:-1]
            transformed += r"\fR"	# Yes, see the definition of an-trap.
        transformed += trailer
        return transformed
    def alternating_highlight(self, highlight, words, trailer=""):
        "Translate the screwy man(7)-style alternating highlight macros."
        if not words:
            next = self.popline()
            # Deals with broken stuff like the
            # .BR
            # .SH CUSTOMIZATION
            # on the MAKEDEV.8 manual page.
            if next is None or is_command(next) or blankline.search(next):
                if next is not None:
                    self.pushline(next)
                return make_comment("bogus %s elided" % highlight)
            else:
                words = next.split()
        if not trailer and words[-1][-2:] == "\\c":
            trailer = "\\c" 
            words[-1] = words[-1][:-2]
        count = 0
        line = ""
        for word in words:
            line += r"\f" + highlight[count % 2]
            line += word
            count += 1
        # Occasionally we see \ at end-of-line as somebody's error.
        # Prevent it from screwing us up.
        if line[-1] == '\\':
            line = line[:-1]
        line += r"\fR" + trailer
        return line
    def index(self, args):
        "Generic index markup."
        # Some manpages (in LPRng, for example) pass blank index keys to IX
        # because macroexpansion will do funky things with them.  Foil this
        # in order to cut down on useless error messages.
        args = filter(lambda x: x != "", args)
        if len(args) == 0:
            error("index macro must have at least one argument.")
        elif len(args) == 1:
            return "<indexterm><primary>%s</primary></indexterm>" % args[0]
        elif len(args) == 2:
            return "<indexterm><primary>%s</primary><secondary>%s</secondary></indexterm>" % (args[0], args[1])
        elif len(args) == 3:
            return "<indexterm><primary>%s</primary><secondary>%s</secondary><tertiary>%s</tertiary></indexterm>" % (args[0], args[1], args[2])
        else:
            self.warning("index macro has more than three arguments.")
            return "<indexterm><primary>%s</primary><secondary>%s</secondary><tertiary>%s</tertiary></indexterm> <!-- %s -->" % (args[0], args[1], args[2], " ".join(args[3:]))

    def id_from_title(self, str):
        "Turn a string into a section ID usable in link declarations."
        # First, remove any trailing section of the title in parens
        str = re.sub(r" \(.*", "", str)
        str = str.replace("&nbsp;", "-")
        # Smash out all characters that aren't legal in SGML ids, except spaces
        squashed = ""
        for c in str:
           if c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_ ":
               squashed += c
        # IDs cannot begin with whitespace, a dash or digits
        squashed = squashed.strip()
        if squashed[0] in "_0123456789":
            squashed = "x" + squashed
        # Hack spaces, smash case, and enforce jade's jength limit
        # Allow four digits for disambiguating numbers.
        return squashed.replace(" ", "_").lower()[:40]
    def make_id_from_title(self, str):
        id = self.id_from_title(str)
        # We allow duplicate sections, but warn about them
        if not self.idlist.has_key(id):
            self.idlist[id] = 1
            return id
        else:
            self.idlist[id] += 1
            #self.error("more than one section is named %s" % str)
            return id + `self.idlist[id]`

    def cmdfilter(self, start, end, command):
        "Translate start/end-delimited section by filtering through command."
        tmp = tempfile.mktemp("doclifter")
        fp = open(tmp, "w")
        fp.write(start + "\n")
        while self.lines:
            line = self.popline()
            fp.write(line + "\n")
            if match_command(line, end[1:]):
                break
        fp.close()
        output = os.popen(command + " <" + tmp, "r").read()
        output = xmlheader.sub("", output)
        output = doctype.sub("", output)
        self.emit(output)

    def id_exists(self, id):
        "Test whether an id already exists"
        return self.idlist.has_key(id)
    def TBL(self, enddelim=".TE"):
        "Translate and emit a tbl section."
        # First process global options
        alignment = 'center'
        allbox = box = expand = False
        tab = "\t"
        lastheaderline = -1
        options = self.lines[0]
        if options.find("&semi;") == len(options)-1:
            taboption = DocLifter.tabset.search(options)
            if taboption:
                tab = taboption.group(1)
            options = options.replace("&semi;", "").replace(",", " ").split()
            self.lines.pop(0)
            for x in options:
                if   x == 'allbox':      allbox = True
                elif x == 'box':         box = True
                elif x == 'frame':       box = True
                elif x == 'center':      alignment = x
                elif x == 'doublebox':   box = True
                elif x == 'doubleframe': box = True
                elif x == 'expand':      expand = True
                elif x == 'frame':       box = True
                elif x == 'left':        alignment = x
                elif x == 'right':       alignment = x
        # Now parse table format lines
        tbl = []
        fmtwidth = 0
        while True:
            line = self.lines.pop(0)
            fline = []
            fmt = ""
            for ch in line:
                if ch in "lrcnds^," and fmt:
                    fline.append(fmt)
                    fmt = ""
                fmt += ch
            fline.append(fmt)
            tbl.append(fline)
            if len(fline) > fmtwidth:
                fmtwidth = len(fline)
            if line.find(".") > -1:
                break
        # Fill in missing format elements as copies of the rightmost ones
        for i in range(len(tbl)):
            if len(tbl[i]) < fmtwidth:
                tbl[i] += [tbl[i][-1]] * (fmtwidth - len(tbl[i]))
        # Grab all the data -- critical step that lets us do vertical spanning.
        data = []
        rowsep = []
        datawidth = 0
        while self.lines:
            tline = self.lines.pop(0)
            if tline == enddelim:
                break
            elif tline == ".TH":
                lastheaderline = len(data)
            elif tline[-2:] == "T{":
                self.pushline(tline)
                while self.lines:
                    continuation = self.lines.pop(0)
                    tline += "\n" + continuation
                    if continuation[:2] == "T}" and continuation[-2:] != "T{":
                        break
            if not tline in ("_", "="):
                fields = tline.split(tab)
                if len(fields) > datawidth:
                    datawidth = len(fields)
                for i in range(len(fields)):
                    if fields[i][:3] == "T{\n":
                        fields[i] = fields[i][3:]
                    if fields[i][-3:] == "\nT}":
                        fields[i] = fields[i][:-3]
                data.append(fields)
                rowsep.append(len(self.lines)>0 and self.lines[0] in ("_","="))
        # This code only runs if there is no TH and the table has a multiline
        # format spec.
        if lastheaderline == -1 and len(tbl) > 1:
            # Deduce location of last header format line.  It's the
            # format line before the last format line not to contain a
            # ^.  DocBook vertical spans can't cross the
            # table-header/table-body boundary, so there has to be at
            # least one table body format line not containing ^.
            lastheaderline = len(tbl) - 2;
            try:
                for tbli in range(len(tbl)):
                    for j in range(len(tbl[tbli])):
                        if tbl[tbli][j][0] == '^':
                            lastheaderline = tbli - 2;
                            raise "dropout"
            except "dropout":
                pass
            assert(lastheaderline >= 0);
        # Fill in missing data elements to match the widest data width
        for i in range(len(data)):
            data[i] += [""] * (datawidth - len(data[i]))
        # Fill in missing format elements to match the widest data width
        for i in range(len(tbl)):
            tbl[i] += [tbl[i][-1]] * (datawidth - fmtwidth)
        # Now that we have the data, copy the last format line down
        # enough times to cover all rows.
        tbl += [tbl[-1]] * (len(data) - len(tbl))
        # import pprint
        # pp = pprint.PrettyPrinter(indent=4)
        # print "Formats (%d, %d): " % (fmtwidth, len(tbl)); pp.pprint(tbl)
        # print "Data    (%d, %d): " % (len(data), len(data[0])); pp.pprint(data)
        # Compute table header
        tline = "\n<informaltable pgwide='%d'" % expand
	# The box, doublebox, frame, and doubleframe options
	# turn this on.  We can't actually do a doublebox,
	# so we map it into a single-thickness box instead.
        #
	# Unfortunately frame gives the wrong effect when using the
	# Norm Walsh's DSSSL and XSL modular stylesheets.  They set
	# BORDER=1 in the generated HTML if frame is anything other
	# than `none', which forces framing on all interior cells.
	# This is wrong -- according to the DocBook reference, it
        # looks like the frame attribute is only supposed to control
        # *exterior* framing.
        if allbox:
            tline += " frame='all'"
        else:
            tline += " frame='none'"
        tline += ">"
        self.emit(tline)
        tline = "  <tgroup cols='%d' align='%s'" % (fmtwidth, alignment)
        if self.xml and allbox:
            tline += " colsep='1' rowsep='1'"
        self.emit(tline + ">")
        for i in range(fmtwidth):
            self.emit("    <colspec colname='c%d'%s>" % (i+1, self.xml))
        if lastheaderline == -1:
            self.emit("    <tbody>")
        else:
            self.emit("    <thead>")
        # OK, finally ready to emit the table
        for i in range(len(data)):
            if rowsep[i]:
                self.emit("      <row rowsep='1'>")
            else:
                self.emit("      <row>")
            for j in range(len(data[i])):
                if "^" in tbl[i][j] or "s" in tbl[i][j]:
                    continue
                colspec = 1
                for k in range(j+1, fmtwidth):
                    if "s" not in tbl[i][k]:
                        break
                    else:
                        colspec += 1
                rowspan = 1
                if i < len(data) - 1 and '^' in tbl[i+1][j]:
                    for k in range(i+1, len(data)):
                        if "^" in tbl[k][j]:
                            rowspan += 1
                line = "        <entry"
                if   "c" in tbl[i][j]: line += " align='center'"
                elif "n" in tbl[i][j]: line += " align='right'"
                elif "r" in tbl[i][j]: line += " align='right'"
                elif "l" in tbl[i][j]: line += " align='left'"
                if (j < fmtwidth) and "|" in tbl[i][j]:
                    line += " colsep='1'"
                if colspec > 1:
                    line += " namest='c%d' nameend='c%d'" % (j+1, j+colspec)
                if rowspan > 1:
                    line += " morerows='%d'" % (rowspan-1)
                    if "t" in tbl[i][j]:
                        line += " valign='top'"
                    elif "d" in tbl[i][j]:
                        line += " valign='bottom'"
                    else:
                        line += " valign='middle'"
                line += ">"
                if data[i][j] != r'^':
                    if 'b' in tbl[i][j]:
                        line += r"\fB"
                    line += str(data[i][j])
                    if highlight.search(line) is not None:
                        line += r"\fR"
                self.emit(line + "</entry>")
            self.emit("      </row>")
            if i == lastheaderline:
                if lastheaderline > -1:
                    self.emit("    </thead>")
                self.emit("    <tbody>")
        # Done
        self.emit("    </tbody>")
        self.emit("  </tgroup>")
        self.emit("</informaltable>\n")

    def EQN(self):
        "Translate and emit an EQN section."
        # Someday translate this to MathML.
        # For now, do an ugly hack with literallayout
        eqnlines = []
        nondelimlines = 0
        while self.lines:
            line = self.popline()
            if match_command(line, "EN"):
                break
            elif line[:5] != "delim":
                nondelimlines += 1
            else:
                tokens = line.split()
                if len(tokens) == 2:
                    if tokens[1] == "off":
                        self.eqnsub = None
                        self.emit(make_comment("eqn delimiters off."))
                    else:
                        es = re.escape(tokens[1][0])
                        ee = re.escape(tokens[1][1])
                        self.eqnsub = re.compile("([^" + es + "]*)" + es + "([^" + ee + "]+)"+ ee +"(.*)")
                        self.emit(make_comment("eqn delimiters set to %s%s" % (tokens[1][0],tokens[1][1])))
                else:
                    self.eqnsub = None
            eqnlines.append(line)
        if nondelimlines:
            if self.preamble:
                self.emit(make_comment(".EQ"))
                for line in eqnlines:
                    self.emit(make_comment(line))
                self.emit(make_comment(".EN"))
            else:
                self.emit("<literallayout remap='EQN'>")
                self.emit(".EQ")
                for line in eqnlines:
                    self.emit(line)
                self.emit(".EN")
                self.emit("</literallayout>")

    def PIC(self):
        "Translate a PIC section to SVG."
        self.cmdfilter(".PS", ".PE", "pic2plot -T svg")

    def ignore(self, cmd):
        "Declare that we want to ignore a command."
        self.ignore_set[cmd] = True

    def unignore(self, cmd):
        "Declare that we want to stop ignoring a command."
        del self.ignore_set[cmd]

    def ignorable(self, command, nocomplaints=0):
        "Can this command be safely ignored?"
        if not command:
            return 0
        command = command.split()[0]	# only look at first token
        if command[0] in ".'":
            command = command[1:]
        if command in self.ignore_set:
            return 1
        for interpreter in self.interpreters:
            if command in interpreter.ignore_set:
                return 1
        if nocomplaints:
            for interpreter in self.interpreters:
                if command in interpreter.complain_set:
                    return 1
        return 0

    def execute(self, command, tokens):
        "Try to interpret this command using each interpreter in the stack."
        if command in self.ignore_set:
            self.passthrough(tokens)
            return 1
        if self.verbose >= 4:
            self.notify("after ignore check, interpreter sees: " + `tokens`)
        # This has to be a separate loop from the interpreter check
        for interpreter in self.interpreters:
            if interpreter.break_trap and (command in ("br","nl") or command in interpreter.parabreak_set or command in interpreter.sectionbreak_set):
                interpreter.break_trap(command)
        if self.verbose >= 4:
            self.notify("after break check, interpreter sees: " + `tokens`)
        # Here is where string expansion gets done:
        stripped = []
        for arg in stripquotes(tokens):
            if arg in self.troff.strings:
                stripped += self.troff.strings[arg].split()
            else:
                # Single non-string args map to single args
                stripped.append(arg)
        # This has to be a separate loop from the break trap check
        for interpreter in self.interpreters:
            if command in interpreter.ignore_set:
                self.passthrough(tokens)
                return 1
            elif command in interpreter.complain_set:
                self.passthrough(tokens)
                return 1
            else:
                # Macros string-strip their arguments, troff requests don't.
                if interpreter == self.troff:
                    args = tokens
                else:
                    args = stripped
                if interpreter.interpret(args, self):
                    return 1
        return 0

    def interpret_block(self, lines):
        # Line-by-line translation
        self.pushdown.append(self.lines)
        self.lines = lines
        self.lineno -= len(lines)
        try:
            while self.lines:
                line = self.popline()
                if self.verbose >= 4:
                    self.notify("interpreter sees: %s" % line)
                if line is None:
                    break
                # Usually we want to treat blank lines in body sections as
                # paragraph breaks. Man treats them that way all the time,
                # but we can't because we have structured sections like
                # Synopsis to cope with.  Also, they have a different
                # significance inside lists -- a blank line is not
                # expected to end a list item in .TP, but .PP is.  So
                # We'll make up our own command and pass it through
                # for the interpreters to munch on.
                if line == '':
                    if self.body_section() and not self.troff.nf:
                        self.pushline(".blank")
                    # Treat blank lines in synopses as break commands;
                    # see cpio.1 for an example of why this is necessary.
                    elif self.sectname and self.sectname.upper() == "SYNOPSIS":
                        self.pushline(".br")
                    else:
                        self.emit('')
                    continue
                # Handle eqn delimiters
                if self.eqnsub:
                    doit = True
                    while doit:
                        transformed = self.eqnsub.sub(r"\1<!-- start eqn -->\2<!-- end eqn -->\3", line)
                        doit = (line != transformed)
                        line = transformed
                # Could be a comment.  Handle various ugly undocumented things.
                if is_comment(line):
                    if line[3:]:
                        line = make_comment(line)
                    else:
                        line = ""
                    self.emit(line)
                    continue
                # Ugh...this is a nasty kluge intended to deal with people
                # who forget that the closing bracket of a conditional is
                # a command.  It's probably going to bite us someday.
                if line == r"\}":
                    self.pushline(".\}")
                    self.warning(r"adventitious \} should probably be .\}")
                    continue
                # If no command, emit, and go on.
                if not is_command(line):
                    # Note: This should be the only place where plain text
                    # is emitted.  When in doubt, use pushline() rather
                    # emit -- that will send the generated text back through
                    # here.
                    if self.need_para and line and not line[:4] == "<!--":
                        line = "<para>" + line
                        self.need_para = False
                    self.emit(line)
                    continue
                # We've got a dot command.  Try to interpret it as a request.
                tokens = lineparse(line)
                # sys.stderr.write("Command tokens:" + `tokens` + "\n")
                command = tokens[0][1:]
                # All macro sets accept TBL
                if command == "TS":
                    self.paragraph()
                    self.TBL()
                    self.paragraph()
                # All macro sets accept EQN
                elif command == "EQ":
                    self.paragraph()
                    self.EQN()
                    self.paragraph()
                # All macro sets accept PIC
                elif command == "PS":
                    self.paragraph()
                    self.PIC()
                    self.paragraph()
                # Our pseudo-troff end command.
                elif command == "end":
                    for interpreter in self.interpreters:
                        if interpreter.break_trap:
                            interpreter.break_trap(".end")
                elif not self.execute(command, tokens):
                    # We were not able to find an interpretation
                    # sys.stderr.write("Raw line:" + line + "\n")
                    # sys.stderr.write("Tokens:" + `tokens` + "\n")
                    self.emit(make_comment(line))
                    self.error("uninterpreted '%s' command" % command)
        except:
            # Pass the exception upwards for debugging purposes
            (exc_type, exc_value, exc_traceback) = sys.exc_info()
            if self.verbose >= 4:
                self.error("exception %s on: %s" % (exc_type, line))
            if not exc_type in (LiftException, SystemExit):
                self.error("internal error on: %s" % line)
            raise exc_type, exc_value, exc_traceback
        self.lines = self.pushdown.pop()

    def find(self, str):
        "Does the string occur in text we haven't seen yet?"
        for line in self.lines:
            if type(line) == type(""):
                if line.find(str) > -1:
                    return 1
        return 0

    def hack_translations(self, line):
        # Each interpreter may have its own text translations.
        if line[:4] != "<!--":
            for interpreter in self.interpreters:
                for (special, translation) in interpreter.translations:
                    line = line.replace(special, translation)
            # Translate font escapes.  We do this late in order to get
            # uniform handling of those that were generated either by
            # macros or by inline font escapes in the source text.
            while True:
                esc = highlight.search(line)
                if not esc:
                    break;
                else:
                    esc = esc.start()
                if line[esc+2] == "(":
                    line = line[:esc]+self.change_highlight(line[esc+3:esc+5],line[esc+1])+line[esc+5:]
                else:
                    line = line[:esc]+self.change_highlight(line[esc+2:esc+3],line[esc+1])+line[esc+3:]
        return line

    def lift_link(self, line):
        "Checks highlighted content to see if it's an XML id which exists"
        # Currently, matches only <emphasis> highlights
        if not re.compile("<emphasis").match(line):
            return line;
        for (link_highlight, re_content) in self.lift_links:
            lift = re.compile("<emphasis\s+remap='%s'>(%s)</emphasis>" % (link_highlight, re_content))
            if lift.match(line):
                content = lift.sub(r"\1", line)
                id = self.id_from_title(content)
                if self.id_exists(id):
                     return '<link linkend="%s">%s</link>' % (id, content)
        return line

    def is_active(self, macro_set):
        "Is a given macro set (specified by name) active?"
        return macro_set in map(lambda x: x.__class__.name, self.interpreters)

    def activate(self, macro_set):
        "Activate a given macro set."
        # Don't put duplicate instances in the interpreter list.
        if not self.is_active(macro_set.name):
            newinstance = macro_set(self, self.verbose)
            self.interpreters.append(newinstance)
            if self.verbose:
                sys.stderr.write("Uses %s macros...\n" % (macro_set.name))

    def close_tags(self, before, tag, tight):
        "Generate close tages for a block-level open tag."
        state = 0
        after = ""
        # This an re in case a tag instance has attributes.
        opentag = re.compile("<" + tag + r"\b[^>]*>")
        closetag = "</" + tag + ">"
        closetaglength = len(closetag)
        closer = "\\fR" + closetag
        inline = {"emphasis":1,"literal":1,"quote":1,"varname":1, "keycap":1,
                  "indexterm":1,"primary":1,"secondary":1,"tertiary":1,
                  #"entry":1,"row":1,"thead":1,"tbody":1,"tgroup":1,
                  #"informaltable":1, "tgroup":1, "colspec":1,
                  "subscript":1,"superscript":1,
                  "function":1,"type":1,"parameter":1,
                  "option":1,"command":1,"replaceable":1,
                  "firstname":1,"surname":1,"othername":1,"lineage":1,
                  "orgname":1,"affiliation":1,
                  "ulink":1,"link":1,"email":1,"footnote":1,
                  "citerefentry":1,"refentrytitle":1,"manvolnum":1,
                  "citetitle":1,"filename":1,"productname":1,
                  "phrase":1,"anchor":1,"acronym":1,
                  "mediaobject":1,"imageobject":1,"imagedata":1,
                  }
        while True:
            if state == 0:	# Looking for <tag>
                # Find the next tag to be closed
                nexttag = opentag.search(before)
                if nexttag is None:
                    after += before
                    break		# We're done
                else:
                    after += before[:nexttag.end()]
                    before = before[nexttag.end():]
                    state = 1
                    continue
            elif state == 1:	# Found <tag>, looking for next tag
                nexttag = 1 + before[1:].find("<")
                if nexttag == -1:
                    self.error("missing tag after <tag>!\n")
                after += before[:nexttag]
                before = before[nexttag:]
                # </tag> just closes the scope
                if before.startswith(closetag):
                    after += before[:closetaglength]
                    before = before[closetaglength:]
                    state = 0
                    continue
                # Any processing instruction means
                # keep looking for close of scope.
                elif before[1] in "?":
                    after += before[:2]
                    before = before[2:]
                    state = 1
                    continue
                # Comments require more handling, as they may contain < and >
                elif before.startswith("<!--"):
                    endc = before.find("-->") + 3
                    after += before[:endc]
                    before = before[endc:]
                # Otherwise we know it's a real tag.  Grab it.
                else:
                    btag = before[1:before.find(">")]
                    # Throw away attributes
                    tagtype = btag.split()[0]
                    # Is it a close tag?
                    close = tagtype[0] == "/"
                    # is it contentless?
                    if tagtype[-1] == "/":
                        trim = -1
                    else:
                        trim = len(tagtype)
                    # If it's not inline, close scope and keep going
                    if btag[close:trim] not in inline:
                        # Back up over any whitespace in the end
                        if not tight:
                            after += closer
                        else:
                            # Skip back over whitespace and comments
                            alen = len(after)
                            leader = after
                            while True:
                                if leader[-1] in " \n\t":
                                    leader = leader.rstrip()
                                elif leader.endswith("-->"):
                                    leader = leader[:leader.rfind("<!--")]
                                else:
                                    break
                            if leader != after:
                                after = leader+closer+after[len(leader)-alen:]
                            else:
                                after += closer
                        state = 0
                        continue
                    # If it's inline, skip tag and keep looking for close
                    else:
                        after += before[:len(btag)]
                        before = before[len(btag):]
                        state = 1
                        continue
        return after

    def __call__(self, name, file, text):
        "Translate a string containing troff source to DocBook markup."
        self.file = file
        self.eqnsub = None
        self.preamble = True
        self.body_start = 0
        self.highlight = "R"
        self.oldhighlight = "R"
        self.lines = None
        self.lineno = 0
        self.need_para = False
        self.sectiondepth = 0
        self.output = []
        self.inclusions = []
        self.pushdown = []		# Stack of input sources
        self.break_trap = None
        self.diversion = self.output
        # Set up the base troff interpreter
        self.troff = TroffInterpreter(self, self.verbose)
        self.interpreters = [self.troff]
        # FIXME: This argument should be self.xml.
        # But the DocBook install doesn't actually register entity files
        # for the XHTML sets yet.
        self.troff.register_translations(0)
        self.localhints = SemanticHintsRegistry()
        # Parse semantic hints, if present.  Yes, they should go to
        # the global registry.
        global globalhints
        globalhints.read(text)
        # Now determine which interpreter to use.  This code has the
        # elaborations it does because mixing macro sets (especially
        # using mdoc macros in a man page and vice-versa) is not an
        # uncommon error.
        triggers = []
        # Find uses of each trigger, Sort by position of first occurrence
        for (pattern, consumer) in interpreter_dispatch.items():
            # If the file has an extension, we can exclude some possibilities
            if "." in file:
                required = required_extensions.get(consumer)
                if required and not file.endswith(required):
                    continue
            # Otherwise look for first uses of trigger patterns
            if len(pattern) <= 5:
                if text.find("\n.de " + pattern) > -1:
                    where= -1			# Defined as a macro
                if text[1:1+len(pattern)] == pattern:
                    where = 0			# Occurs as the first request
                else:
                    where = text.find("\n." + pattern)
            else:
                where = text.find(pattern)
            if where > -1:
                triggers.append((where, consumer))
        triggers.sort(lambda x, y: x[0] - y[0])
        triggered = map(lambda x: x[1], triggers)
        # Now walk through the list from the front, doing exclusions
        exclusion_lock = False
        for consumer in triggered:
            # Only allow one exclusive macro set.  This is how we avoid, e.g.,
            # the presence of mdoc macros after a man page header causing
            # confusion.
            if consumer.exclusive:
                if exclusion_lock:
                    continue
                else:
                    exclusion_lock = True
            # Troff commands get evaluated first
            self.activate(consumer)
        # Nuke carriage returns (as in ogg123.1).
        text = re.sub(r"(?<!\\)\r", "", text)
        # Very grubby hack to get rid of some otherwise unfixable cases.
        for (ugly, entity) in DocLifter.pretranslations:
            text = text.replace(ugly, entity)
        # Allow the interpreters to preprocess the output.
        for interpreter in self.interpreters:
            text = interpreter.preprocess(text)
        # Split it into lines
        toplevel = text.split("\n")
        # Check for pure inclusions
        cmtlines = 0
        while is_comment(toplevel[cmtlines]):
            cmtlines +=1
        if toplevel[cmtlines][:4] == ".so ":
            inclusion = cmtlines
            cmtlines += 1
            while cmtlines < len(toplevel) and toplevel[cmtlines] == "\n":
                cmtlines += 1
            if cmtlines == len(toplevel) - 1:
                raise LiftException("see " + toplevel[inclusion].strip()[4:],1)
        # If it's not a pure inclusion, warn if we don't have a macro set.
        if len(self.interpreters) == 1:
            sys.stderr.write("doclifter: no macro set recognized\n")
        # Strip off trailing blank lines, they interact badly with the
        # paragraphing logic.
        while toplevel and toplevel[-1] == "":
            toplevel.pop()
        # This actually happens with some generated Perl pages
        # that stop after .TH
        if len(toplevel) == 0:
            raise LiftException("page is empty", 1)
        # Plant a sentinel at the end to force paragraph and list closes
        i = -1
        if not toplevel[i] and is_comment(toplevel[i]):
            i -= 1
        toplevel.insert(len(toplevel)-i, ".end")
        # Emit the top-level tag, with an id that will direct the
        # DocBook toolchain to do the right thing.
        if self.interpreters[-1].toptag:
            if self.file != "stdin":
                self.emit("<%s id='%s'>" % (self.interpreters[-1].toptag, self.make_id_from_title(os.path.basename(file))))
                
            else:
                self.emit("<%s>" % self.interpreters[-1].toptag)
        # Now interpret all the commands in the block
        self.lineno = len(toplevel) + self.lineno
        self.interpret_block(toplevel)
        # Wrap it up
        self.pop_section(1)
        for interpreter in self.interpreters:
            if hasattr(interpreter, "wrapup"):
                interpreter.wrapup()
        if self.interpreters[-1].toptag:
            self.emit("</%s>\n" % self.interpreters[-1].toptag)
        # Close paragraphs properly.  Note: we're going to run
        # all the lines together for this and split them up
        # again afterwards.  Because body_start is a line index,
        # we have to not insert or delete lines here.
        before = "\n".join(self.output)
        after = self.close_tags(before, "para", tight=1)
        after = self.close_tags(after, "literallayout", tight=0)
        self.output = after.split("\n")
        # Maybe it's all comments and blanks (result of a bare hints file).
        # In that case return None to suppress output
        if not filter(lambda x:x[:4]=="<--" or blankline.match(x),self.output):
            return None
        # Time for post-translations
        self.highlight = "R"
        for j in range(self.body_start, len(self.output)):
            self.output[j] = self.hack_translations(self.output[j])
            self.output[j] = self.lift_link(self.output[j])
        # OK, now do pattern-based markup lifting on the DocBook markup
        head = "\n".join(self.output[:self.body_start]) + "\n"
        body = "\n".join(self.output[self.body_start:]) + "\n"
        for (pattern, substitute) in DocLifter.post_translation_patterns:
            body = re.compile(pattern).sub(substitute, body)
        for (h, r, s) in DocLifter.lift_highlights:
            lift = re.compile("<emphasis\s+remap='%s'>(%s)</emphasis>" % (h,r))
            ender = s.split()[0]	# discard attributes
            body = lift.sub(r"<%s>\1</%s>" % (s, ender), body)
        for (pattern, substitute) in DocLifter.post_lift_patterns:
            body = re.compile(pattern).sub(substitute, body)
        # Semantic lifting based on the hints dictionary
        text = head + self.localhints.apply(globalhints.apply(body))
        # Allow the interpreters to postprocess the output
        for interpreter in self.interpreters:
            text = interpreter.postprocess(text)
        # Nuke the fake entity we created to represent zetro-width space.
        text = text.replace("&zerosp;", "")
        # Check for bad escapes in the generated output only.
        # This avoids error messages based on (for example) untraversed
        # branches in .if and .ie constructs.
        badescapes = []
        commentless = re.compile("<!-- .* -->").sub("", text)
        for k in range(0, len(commentless)-1):
            if commentless[k]=='\\' and commentless[k+1] in "abdhklLoprsuvwxyz":
                count = 0
                while k - count >= 0:
                    if commentless[k - count] == '\\':
                        count += 1
                    else:
                        break
                if (count % 2) and not commentless[k+1] in badescapes:
                    badescapes.append(commentless[k+1])
        if badescapes:
            sys.stderr.write("warning: uninterpreted escape sequences \\" + ", \\".join(badescapes) + "\n")
        # Use entity_names to compute the preamble
        preamble = ""
        if self.xml:
            preamble = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"
        preamble += "<!DOCTYPE %s PUBLIC \"-//OASIS//DTD DocBook " % self.interpreters[-1].toptag
        if self.xml:
            preamble += "XML V4.1.2//EN\"\n                   \"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd\""
        else:
            preamble += "V4.1//EN\""
        if self.inclusions:
            preamble += " [\n"
            for (entity, file) in self.inclusions:
                preamble += "<!ENTITY %s SYSTEM '%s'>\n" % (entity, file)
            preamble += "]"
        preamble += ">\n"
        preamble += "<!-- lifted from %s by doclifter -->\n" % "+".join(map(lambda x: x.__class__.name, self.interpreters))
        # All done...
        return preamble + text

class TroffInterpreter:
    "Interpret troff requests (does macroexpansion)."
    name = "troff"
    exclusive = False
    toptag = ""
    immutable_set = {}
    ignore_set = {
        # Ignore .. outside macro context, some people use it as a no-op.
        ".":1,
        # Just ignore most font/character-size/page controls
        "ps":1,"ss":1,"cs":1,"bd":1,"fp":1,"pl":1,"pn":1,"po":1,"ne":1,
        # Ignore most text filling and vertical spacing controls.
        "ad":1,"na":1,"vs":1,"ls":1,"sv":1,"os":1,"ns":1,"rs":1,
        # Line length, tabs,leaders, fields, ligature mode, hyphenation.
        "ll":1,"ta":1,"tc":1,"lc":1,"fc":1,"lg":1,"nh":1,"hy":1,"hc":1,"hw":1,
        # Ignore commands for page titles, exit, temporary indent, miscellanea
        "tl":1,"pc":1,"lt":1,"ex":1,"mc":1,"fl":1,"ti":1,
        # Also ignore diversions and register operations.
        "di":1,"da":1,"wh":1,"ch":1,"dt":1,"lt":1,"af":1,
        # This is some nonstandard extension (see access.1) safe to ignore
        "rb":1,
        # Weird groff extensions.
        "hcode":1, "hym":1, "hys":1, "hla":1, "shc":1, "cp":1,
        # Policy decision; don't do font remapping, on the theory that
        # passing through the font names the user specified into remap
        # attributes probably carries forward more information.
        "ftr":1,
        # Policy decision; ignore .in, about all complain gave us was
        # a lot of noise about instances inside mh macros.
        "in":1,
        }
    complain_set = {
        # Complain about stuff that produces gross motions.
        "ne":1,"mk":1,"rt":1,"ce":1, #"in":1,
        # We could do much of section 10,but these are a sign of trouble.
        "ec":1,"eo":1,"uf":1,"cc":1,"c2":1,
        # We can't handle environments, insertions, or next file.
        "ev":1,"rd":1,"nx":1,"pi":1,
        }
    parabreak_set = {"bp":1,}
    sectionbreak_set = {}
    sgml_translations = (
        # The entire list of characters described in the troff/nroff reference
        # is included here. Where there are no ISO equivalents it is noted.
        # The only collision in this table is a three-way between
        # \(or, \(br, \(bv. 
        #
        # Troff escapes (not handled here: \. \! \" \$ \* \[a-zA-Z]. \{, \})
        (r"\%", "&shy;"),		# ISOnum
        (r"\'", "&acute;"),	# ISOdia
        (r"\(!=", "&ne;"),	# ISOtech
        (r"\(**", "*"),
        (r"\(*a", "&agr;"),	# ISOgrk1
        (r"\(*A", "&Agr;"),	# ISOgrk1
        (r"\(*b", "&bgr;"),	# ISOgrk1
        (r"\(*B", "&Bgr;"),	# ISOgrk1
        (r"\(*d", "&dgr;"),	# ISOgrk1
        (r"\(*D", "&Dgr;"),	# ISOgrk1
        (r"\(*e", "&egr;"),	# ISOgrk1
        (r"\(*E", "&Egr;"),	# ISOgrk1
        (r"\(*f", "&phgr;"),	# ISOgrk1
        (r"\(*F", "&PHgr;"),	# ISOgrk1
        (r"\(*g", "&ggr;"),	# ISOgrk1
        (r"\(*G", "&Ggr;"),	# ISOgrk1
        (r"\(*h", "&thgr;"),	# ISOgrk1
        (r"\(*H", "&THgr;"),	# ISOgrk1
        (r"\(*i", "&igr;"),	# ISOgrk1
        (r"\(*I", "&Igr;"),	# ISOgrk1
        (r"\(*k", "&kgr;"),	# ISOgrk1
        (r"\(*K", "&Kgr;"),	# ISOgrk1
        (r"\(*l", "&lgr;"),	# ISOgrk1
        (r"\(*L", "&Lgr;"),	# ISOgrk1
        (r"\(*m", "&mgr;"),	# ISOgrk1
        (r"\(*M", "&Mgr;"),	# ISOgrk1
        (r"\(*n", "&ngr;"),	# ISOgrk1
        (r"\(*N", "&Ngr;"),	# ISOgrk1
        (r"\(*o", "&ogr;"),	# ISOgrk1
        (r"\(*O", "&Ogr;"),	# ISOgrk1
        (r"\(*p", "&pgr;"),	# ISOgrk1
        (r"\(*P", "&Pgr;"),	# ISOgrk1
        (r"\(*q", "&psgr;"),	# ISOgrk1
        (r"\(*q", "&PSgr;"),	# ISOgrk1
        (r"\(*r", "&rgr;"),	# ISOgrk1
        (r"\(*R", "&Rgr;"),	# ISOgrk1
        (r"\(*s", "&sgr;"),	# ISOgrk1
        (r"\(*S", "&Sgr;"),	# ISOgrk1
        (r"\(*t", "&tgr;"),	# ISOgrk1
        (r"\(*T", "&Tgr;"),	# ISOgrk1
        (r"\(*u", "&ugr;"),	# ISOgrk1
        (r"\(*U", "&Ugr;"),	# ISOgrk1
        (r"\(*w", "&ohgr;"),	# ISOgrk1
        (r"\(*W", "&OHgr;"),	# ISOgrk1
        (r"\(*x", "&khgr;"),	# ISOgrk1
        (r"\(*X", "&KHgr;"),	# ISOgrk1
        (r"\(*x", "&xgr;"),	# ISOgrk1
        (r"\(*X", "&Xgr;"),	# ISOgrk1
        (r"\(*y", "&eegr;"),	# ISOgrk1
        (r"\(*y", "&EEgr;"),	# ISOgrk1
        (r"\(*z", "&zgr;"),	# ISOgrk1
        (r"\(*z", "&Zgr;"),	# ISOgrk1
        (r"\(+-", "&plusmn;"),	# ISOnum
        (r"\(->", "&rarr;"),	# ISOnum
        (r"\(12", "&frac12;"),	# ISOnum
        (r"\(14", "&frac14;"),	# ISOnum
        (r"\(34", "&frac34;"),	# ISOnum
        (r"\(<-", "&larr;"),	# ISOnum
        (r"\(==", "&equiv;"),	# ISOtech
        (r"\(Fi", "&ffilig;"),	# ISOpub
        (r"\(Fl", "&ffllig;"),	# ISOpub
        (r"\(aa", "&acute;"),	# ISOdia
        (r"\(ap", "&sim;"),	# ISOtech
        (r"\(bl", "&phonexb;"),	# ISOpub
        (r"\(br", "&verbar;"),	# ISOnum
        (r"\(bu", "&bull;"),	# ISOpub
        (r"\(bv", "&verbar;"),	# ISOnum
        (r"\(ca", "&cap;"),	# ISOtech
        (r"\(ci", "&cir;"),	# ISOpub
        (r"\(co", "&copy;"),	# ISOnum
        (r"\(ct", "&cent;"),	# ISOnum
        (r"\(cu", "&cup;"),	# ISOtech
        (r"\(da", "&darr;"),	# ISOnum
        (r"\(de", "&deg;"),	# ISOnum
        (r"\(dg", "&dagger;"),	# ISOpub
        (r"\(dd", "&Dagger;"),	# ISOpub
        (r"\(di", "&divide;"),	# ISOnum
        (r"\(em", "&mdash;"),	# ISOpub
        (r"\(eq", "&equals;"),	# ISOnum
        (r"\(es", "&empty;"),	# ISOamso
        (r"\(ff", "&fflig;"),	# ISOpub
        (r"\(fi", "&filig;"),	# ISOpub
        (r"\(fl", "&fllig;"),	# ISOpub
        (r"\(fm", "&prime;"),	# ISOtech
        (r"\(ge", "&ge;"),	# ISOtech
        (r"\(gr", "&nabla;"),	# ISOtech
        (r"\(hy", "&hyphen;"),	# ISOnum
        (r"\(ib", "&sube;"),	# ISOtech
        (r"\(if", "&infin;"),	# ISOtech
        (r"\(ip", "&supe;"),	# ISOtech
        (r"\(is", "&int;"),	# ISOtech
        (r"\(le", "&le;"),	# ISOtech
        # No equivalent of \(lh, left hand
        # No equivalent of \(lb, left lower bracket curve
        # No equivalent of \(lc, left upper bracket corner
        # No equivalent of \(lf, left lower bracket corner
        # No equivalent of \(lk, left bracket nipple
        # No equivalent of \(lt, left upper bracket curve
        (r"\(mi", "&minus;"),	# ISOtech
        (r"\(mo", "&isin;"),	# ISOtech
        (r"\(mu", "&times;"),	# ISOnum
        (r"\(no", "&not;"),	# ISOnum
        (r"\(or", "&verbar;"),	# ISOnum
        (r"\(pl", "&plus;"),	# ISOnum
        (r"\(pt", "&prop;"),	# ISOtech
        (r"\(rg", "&trade;"),	# ISOnum
        # No equivalent of \(rb, right lower bracket curve
        # No equivalent of \(rc, right upper bracket corner
        # No equivalent of \(rf, right lower bracket corner
        # No equivalent of \(rh, right hand
        # No equivalent of \(rk, right bracket nipple
        # No equivalent of \(rt, right upper bracket curve
        # No equivalent of \(rn, overbar
        (r"\(ru", "&lowbar;"),	# ISOnum
        (r"\(sb", "&sub;"),	# ISOtech
        (r"\(sc", "&sect;"),	# ISOnum
        (r"\(sl", "/"),
        (r"\(sp", "&sup;"),	# ISOtech
        (r"\(sq", "&squf;"),	# ISOpub	
        (r"\(sr", "&radic;"),	# ISOtech
        (r"\(ts", "&sfgr;"),	# ISOgrk1
        (r"\(ua", "&uarr;"),	# ISOnum
        (r"\(ul", "_"),
        (r"\(~=", "&cong;"),	# ISOtech
        (r"\0", "&numsp;"),	# ISOpub
        (r"\^", "&hairsp;"),	# ISOpub
        (r"\`", "&grave;"),	# ISOdia
        (r"\e", "&bsol;"),	# ISOnum
        (r"\|", "&thinsp;"),	# ISOpub
        # Extended specials supported by groff; see groff_char(7).
        # These are listed in the order they occur on that man page.
        (r"\(-D",	"&ETH;"),	# ISOlat: Icelandic uppercase eth
        (r"\(Sd",	"&eth;"),	# ISOlat1: Icelandic lowercase eth
        (r"\(TP",	"&THORN;"),	# ISOlat1: Icelandic uppercase thorn
        (r"\(Tp",	"&thorn;"),	# ISOlat1: Icelandic lowercase thorn
        (r"\(AE",	"&AElig;"),	# ISOlat1
        (r"\(ae",	"&aelig;"),	# ISOlat1
        (r"\(OE",	"&OElig;"),	# ISOlat2
        (r"\(oe",	"&oelig;"),	# ISOlat2
        (r"\(IJ",	"&ijlig;"), 	# ISOlat2: Dutch IJ ligature
        (r"\(ij",	"&IJlig;"), 	# ISOlat2: Dutch ij ligature
        (r"\(ss",	"&szlig;"),	# ISOlat1
        (r"\('A",	"&Aacute;"),	# ISOlat1
        (r"\('C",	"&Cacute;"),	# ISOlat2
        (r"\('E",	"&Eacute;"),	# ISOlat1
        (r"\('I",	"&Iacute;"),	# ISOlat1
        (r"\('O",	"&Oacute;"),	# ISOlat1
        (r"\('U",	"&Uacute;"),	# ISOlat1
        (r"\('Y",	"&Yacute;"),	# ISOlat1
        (r"\('a",	"&aacute;"),	# ISOlat1
        (r"\('c",	"&cacute;"),	# ISOlat2
        (r"\('e",	"&eacute;"),	# ISOlat1
        (r"\('i",	"&iacute;"),	# ISOlat1
        (r"\('o",	"&oacute;"),	# ISOlat1
        (r"\('u",	"&uacute;"),	# ISOlat1
        (r"\('y",	"&yacute;"),	# ISOlat1
        (r"\(:A",	"&Auml;"),	# ISOlat1
        (r"\(:E",	"&Euml;"),	# ISOlat1
        (r"\(:I",	"&Iuml;"),	# ISOlat1
        (r"\(:O",	"&Ouml;"),	# ISOlat1
        (r"\(:U",	"&Uuml;"),	# ISOlat1
        (r"\(:Y",	"&Yuml;"),	# ISOlat2
        (r"\(:a",	"&auml;"),	# ISOlat1
        (r"\(:e",	"&euml;"),	# ISOlat1
        (r"\(:i",	"&iuml;"),	# ISOlat1
        (r"\(:o",	"&ouml;"),	# ISOlat1
        (r"\(:u",	"&uuml;"),	# ISOlat1
        (r"\(:y",	"&yuml;"),	# ISOlat1
        (r"\(^A",	"&Acirc;"),	# ISOlat1
        (r"\(^E",	"&Ecirc;"),	# ISOlat1
        (r"\(^I",	"&Icirc;"),	# ISOlat1
        (r"\(^O",	"&Ocirc;"),	# ISOlat1
        (r"\(^U",	"&Ucirc;"),	# ISOlat1
        (r"\(^a",	"&acirc;"),	# ISOlat1
        (r"\(^e",	"&ecirc;"),	# ISOlat1
        (r"\(^i",	"&icirc;"),	# ISOlat1
        (r"\(^o",	"&ocirc;"),	# ISOlat1
        (r"\(^u",	"&ucirc;"),	# ISOlat1
        (r"\(`A",	"&Agrave;"),	# ISOlat1
        (r"\(`E",	"&Egrave;"),	# ISOlat1
        (r"\(`I",	"&Igrave;"),	# ISOlat1
        (r"\(`O",	"&Ograve;"),	# ISOlat1
        (r"\(`U",	"&Ugrave;"),	# ISOlat1
        (r"\(`a",	"&agrave;"),	# ISOlat1
        (r"\(`e",	"&egrave;"),	# ISOlat1
        (r"\(`i",	"&igrave;"),	# ISOlat1
        (r"\(`o",	"&ograve;"),	# ISOlat1
        (r"\(`u",	"&ugrave;"),	# ISOlat1
        (r"\(~A",	"&Atilde;"),	# ISOlat1
        (r"\(~N",	"&Ntilde;"),	# ISOlat1
        (r"\(~O",	"&Otilde;"),	# ISOlat1
        (r"\(~a",	"&atilde;"),	# ISOlat1
        (r"\(~n",	"&ntilde;"),	# ISOlat1
        (r"\(~o",	"&otilde;"),	# ISOlat1
        (r"\(vS",	"&Scaron;"),	# ISOlat2
        (r"\(vs",	"&scaron;"),	# ISOlat2
        (r"\(vZ",	"&Zcaron;"),	# ISOlat2
        (r"\(vz",	"&zcaron;"),	# ISOlat2
        (r"\(,C",	"&Ccedil;"),	# ISOlat1
        (r"\(,c",	"&ccedil;"),	# ISOlat1
        (r"\(/L",	"&Lstrok;"),	# ISOlat2: Polish L with a slash
        (r"\(/l",	"&lstrok;"),	# ISOlat2: Polish l with a slash
        (r"\(/O",	"&Oslash;"),	# ISOlat1
        (r"\(/o",	"&oslash;"),	# ISOlat1
        (r"\(oA",	"&Aring;"),	# ISOlat1
        (r"\(oa",	"&aring;"),	# ISOlat1
        (r"\(a\"",	"&dback;"),	# ISOdia: double acute accent (Hungarian umlaut)
        (r"\(a-",	"&macr;"),	# ISOdia: macron or bar accent
        (r"\(a.",	"&dot;"),	# ISOdia: dot above
        (r"\(a^",	"&circ;"),	# ISOdia: circumflex accent
        (r"\(aa",	"&acute;"),	# ISOdia: acute accent
        (r"\(ga",	"&grave;"),	# ISOdia: grave accent
        (r"\(ab",	"&breve;"),	# ISOdia: breve accent
        (r"\(ac",	"&cedil;"),	# ISOdia: cedilla accent
        (r"\(ad",	"&uml;"),	# ISOdia: umlaut or dieresis
        (r"\(ah",	"&caron;"),	# ISOdia: caron (aka hacek accent)
        (r"\(ao",	"&ring;"),	# ISOdia: ring or circle accent
        (r"\(a~",	"&tilde;"),	# ISOdia: tilde accent
        (r"\(ho",	"&ogon;"),	# ISOdia: hook or ogonek accent
        (r"\(.i",	"&inodot;"),	# ISOlat2: i without a dot (also in ISOamso)
        (r"\(.j",	"&jnodot;"),	# ISOamso: j without a dot
        (r"\(Cs",	"&curren;"),	# ISOnum: currency sign
        (r"\(Do",	"&dollar;"),	# ISOnum
        (r"\(Po",	"&pound;"),	# ISOnum
        (r"\(Ye",	"&yen;"),	# ISOnum
        # (r"\(Fn",	"&florin;"),	# No entity for this
        (r"\(ct",	"&cent;"),	# ISOnum
        (r"\(Fo",	"&laquo;"),	# ISOnum
        (r"\(Fc",	"&raquo;"),	# ISOnum
        # (r"\(fo",	"&guilsinglleft;"),	# No entity for this
        # (r"\(fc",	"&guilsinglright;"),	# No entity for this
        (r"\(r!",	"&iexcl;"),	# ISOnum
        (r"\(r?",	"&iquest;"),	# ISOnum
        # (r"\(ff",	"&ff;"), 	# ISOpub: ff ligature (in old troff)
        # (r"\(fi",	"&fi;"), 	# ISOpub: fi ligature (in old troff)
        # (r"\(fl",	"&fl;"), 	# ISOpub: fl ligature (in old troff)
        # (r"\(Fi",	"&ffi;"),	# ISOpub: ffi ligature (in old troff)
        # (r"\(Fl",	"&ffl;"),	# ISOpub: ffl ligature (in old troff)
        (r"\(OK",	"&check;"),	# ISOpub
        (r"\(Of",	"&ordf;"),	# ISOnum
        (r"\(Om",	"&ordm;"),	# ISOnum
        (r"\(pc",	"&middot;"),	# ISOnum
        (r"\(S1",	"&sup1;"),	# ISOnum
        (r"\(S2",	"&sup2;"),	# ISOnum
        (r"\(S3",	"&sup3;"),	# ISOnum
        # (r"\(<-",	"&larr;"),	# ISOnum (in old troff)
        # (r"\(->",	"&rarr;"),	# ISOnum (in old troff)
        (r"\(<>",	"&iff;"),	# ISOtech: iff (horizontal double-headed arrow)
        (r"\(da",	"&darr;"),	# ISOnum
        (r"\(ua",	"&uarr;"),	# ISOnum
        (r"\(lA",	"&lArr;"),	# ISOtech
        (r"\(rA",	"&rArr;"),	# ISOtech
        # These don't seem to have ISO equivalents 
        # (r"\(hA", "&arrowdblboth;"),	# horizontal double-headed double arrow
        # (r"\(dA",	"&arrowdbldown;"),
        # (r"\(uA",	"&arrowdblup;"),
        # (r"\(vA",	"&\&;"), 	# vertical double-headed double arrow
        (r"\(ba",	"&verbar;"),	# ISOnum
        (r"\(bb",	"&brvbar;"),	# ISOnum
        # (r"\(br", "&br;"), 	# box rule with traditional troff metrics
        # (r"\(ru", "&ru;"), 	# baseline rule
        # (r"\(ul",	"&ul;"), 	# underline with traditional troff metrics
        # (r"\(bv",	"&bv;"), 	# bar vertical
        # (r"\(bs",	"&bell;"),	# No ISO equivalent
        # (r"\(ci",	"&cir;"),	# ISOpub (in old troff)
        # (r"\(bu",	"&bull;"),	# ISOnum (in old troff)
        # (r"\(co",	"&copy;"),	# ISOnum (in old troff)
        # (r"\(rg",	"&reg;"),	# ISOnum (in old troff)
        (r"\(tm",	"&trade;"),	# ISOnum
        # (r"\(dd",	"&Dagger;"),	# ISOpub: double dagger sign
        # (r"\(dg",	"&dagger;"),	# ISOpub
        (r"\(ps",	"&para;"),	# ISOnum: paragraph or pilcrow sign
        (r"\(sc",	"&sect;"),	# ISOnum (in old troff)
        # (r"\(de",	"&deg;"),	# ISOnum (in old troff)
        # (r"\(em",	"&mdash;"),	# ISOpub: em dash (in old troff)
        (r"\(en",	"&ndash;"),	# ISOpub: en dash
        (r"\(%0",	"&permil;"),	# ISOtech: per thousand, per mille sign
        # (r"\(12",	"&frac12;"),	# ISOnum (in old troff)
        # (r"\(14",	"&frac14;"),	# ISOnum (in old troff)
        # (r"\(34",	"&frac34;"),	# ISOnum (in old troff)
        (r"\(f/",	"&horbar;"),	# ISOnum: horizintal bar for fractions
        # (r"\(fm",	"&prime;"),	# ISOtech: footmark, prime (in old troff)
        (r"\(sd",	"&Prime;"),	# ISOtech
        (r"\(ha",	"^"),		# ASCII circumflex, hat, caret
        (r"\(ti",	"~"), 		# ASCII tilde, large tilde
        # (r"\(hy",	"&hyphen;"),	# ISOnum (in old troff)
        (r"\(lB",	"&lsqb;"),	# ISOnum: left (square) bracket
        (r"\(rB",	"&rsqb;"),	# ISOnum: right (square) bracket
        (r"\(lC",	"&lcub;"),	# ISOnum: left (curly) brace
        (r"\(rC",	"&rcub;"),	# ISOnum: right (curly) brace
        (r"\(la",	"&lang;"),	# ISOtech: left angle bracket
        (r"\(ra",	"&rang;"),	# ISOtech: right angle bracket
        # (r"\(lh",	"&handleft;"),	# No ISO equivalent
        # (r"\(rh",	"&handright;"),	# No ISO equivalent
        (r"\(Bq",	"&lsquor;"),	# ISOpub: low double comma quote
        (r"\(bq",	"&ldquor;"),	# ISOpub: low single comma quote
        (r"\(lq",	"&ldquo;"),	# ISOnum
        (r"\(rq",	"&rdquo;"),	# ISOpub
        (r"\(oq",	"&lsquo;"),	# ISOnum: single open quote
        (r"\(cq",	"&rsquo;"),	# ISOnum: single closing quote (ASCII 39)
        (r"\(aq",	"'"),		# apostrophe quote
        (r"\(dq",	"\""),		# double quote (ASCII 34)
        # (r"\(or",	"&verbar;"),	# ISOnum (in old troff)
        (r"\(at",	"&commat;"),	# ISOnum
        (r"\(sh",	"&num;"),	# ISOnum
        (r"\(sl",	"/"),
        (r"\(rs",	"&bsol;"),	# ISOnum
        # (r"\(sq",	"&squf;"),	# ISOpub (in old troff)
        (r"\(3d",	"&there4;"),	# ISOtech
        (r"\(tf",	"&there4;"),	# ISOtech
        # Their table duplicates the Greek letters here.
        # We list only the variant forms here, mapping them into
        # the ISO Greek 4 variants (which may or may not be correct :-() 
        (r"\(+f",	"&b.phiv;"),	# ISOgrk4: variant phi
        (r"\(+h",	"&b.thetas;"),	# ISOgrk4: variant theta
        (r"\(+p",	"&b.omega;"),	# ISOgrk4: variant pi, looking like omega
        # Variant forms end
        (r"\(~~",	"&sim;"),	# ISOtech
        # This appears to be an error in the groff table.  
        # It clashes with the Bell Labs use of ~= for a congruence sign
        # (r"\(~=",	"&erDot;"),	# ISOamsr
        (r"\(!=",	"&ne;"),	# ISOtech
        (r"\(**",	"&lowast;"),	# ISOtech
        (r"\(-+",	"&mnplus;"),	# ISOtech
        # (r"\(+-",	"&plusmn;"),	# ISOnum (in old troff)
        (r"\(<=",	"&le;"),	# ISOtech
        # (r"\(==",	"&equiv;"),	# ISOtech
        # The groff table says this is "congruence".
        (r"\(=~",	"&erDot;"),	# ISOamsr
        (r"\(>=",	"&ge;"),	# ISOtech
        (r"\(AN",	"&and;"),	# ISOtech
        (r"\(OR",	"&or;"),	# ISOtech
        # (r"\(no",	"&not;"),	# ISOnum
        (r"\(te",	"&exist;"), 	# ISOtech: there exists, existential quantifier
        (r"\(fa",	"&forall;"), 	# ISOtech: for all, universal quantifier
        (r"\(Ah",	"&aleph;"),	# ISOtech
        (r"\(Im",	"&image;"),	# ISOamso: Fraktur I, imaginary
        (r"\(Re",	"&real;"),	# ISOamso: Fraktur R, real
        # (r"\(if",	"&infin;"),	# ISOtech
        (r"\(md",	"&middot;"),	# ISOnum
        # (r"\(mo",	"&isin;"),	# ISOtech (in old troff)
        # (r"\(mu",	"&times;"),	# ISOnum (in old troff)
        (r"\(nb",	"&nsub;"),	# ISOamsr
        (r"\(nc",	"&nsup;"),	# ISOamsn
        (r"\(ne",	"&nequiv;"),	# ISOamsn
        (r"\(nm",	"&notin;"),	# ISOtech
        # (r"\(pl",	"&plus;"),	# ISOnum: + in special font (in old troff)
        # (r"\(eq",	"&equa;"),	# ISOnum: = in special font (in old troff)
        # (r"\(pt",	"&prop;"),	# ISOtech (in old troff)
        (r"\(pp",	"&perp;"),	# ISOtech
        # (r"\(sb",	"&sub;"),	# ISOtech (in old troff)
        # (r"\(sp",	"&sup;"),	# ISOtech (in old troff)
        # (r"\(ib",	"&sube;"),	# ISOtech (in old troff)
        # (r"\(ip",	"&supe;"),	# ISOtech (in old troff)
        # (r"\(ap",	"&sim;"),	# ISOtech (in old troff)
        # (r"\(is",	"&int;"),	# ISOtech: integral sign (in old troff)
        # (r"\(sr",	"&radic;"),	# ISOtech: square root (in old troff)
        # (r"\(rn",	"&overline;"),  # No ISO equivalent
        (r"\(pd",	"&part;"),	# ISOtech: partial differentiation sign
        (r"\(c*",	"&otimes;"),	# ISOamsb: multiply sign in a circle
        (r"\(c+",	"&oplus;"),	# ISOamsb: plus sign in a circle
        # (r"\(ca",	"&cap;"),	# ISOtech: intersection, cap
        # (r"\(cu",	"&cup;"),	# ISOtech: union, cup
        # (r"\(di",	"&divide;"),	# ISOnum: division sign
        (r"\(-h",	"&planck;"),	# ISOamso: h-bar (Planck's constant)
        # (r"\(gr",	"&nabla;"),	# ISOtech (in old troff)
        # (r"\(es",	"&empty;"),	# ISOamso
        (r"\(CL",	"&clubs;"),	# ISOpub: club suit
        (r"\(SP",	"&spades;"),	# ISOpub: spade suit
        (r"\(HE",	"&hearts;"),	# ISOpub: heart suit
        (r"\(DI",	"&diams;"),	# ISOpub: diamond suit
        # (r"\(CR",	"&carriagereturn;"), # carriage return symbol
        (r"\(st",	"&bepsi"),	# ISOamsr: such that
        (r"\(/_",	"&ang;"),	# ISOamso
        (r"\<<",	"&Lt;"),	# ISOamsr
        (r"\>>",	"&Gt;"),	# ISOamsr
        (r"\(wp",	"&weierp;"),	# ISOamso
        (r"\(lz",	"&loz;"),	# ISOpub
        # (r"\(an",	"&arrowhorizex;"), # horizontal arrow extension
        # Added in groff 1.18
        (r"\(eu",	"&euro;"),	# ISOnum
        (r"\(mc",	"&micro;"),	# ISOnum
      )
    xml_translations = (
        # The entire list of characters described in the troff/nroff reference
        # is included here. Where there are no Unicode equivalents it is noted.
        # If a target character is not defined in any of the XHTML entity sets,
        # prefer its ISO name in SGML to the Unicode code point, on the theory
        # that XML-Docbook installations generally have the ISO entities 
        # available and they are easier to parse by eyeball than Unicode hex.
        # The only collision in this table is a three-way between
        # \(or, \(br, \(bv. 
        #
        # Troff escapes (not handled here: \. \! \" \$ \* \[a-zA-Z]. \{, \})
        (r"\%", "&shy;"),		# HTMLlat1
        (r"\'", "&acute;"),	# HTMLlat1
        (r"\(!=", "&ne;"),	# HTMLsymbol
        (r"\(**", "*"),		# ASCII
        (r"\(*a", "&alpha;"),	# HTMLsymbol
        (r"\(*A", "&Alpha;"),	# HTMLsymbol
        (r"\(*b", "&beta;"),	# HTMLsymbol
        (r"\(*B", "&Beta;"),	# HTMLsymbol
        (r"\(*d", "&delta;"),	# HTMLsymbol
        (r"\(*D", "&Delta;"),	# HTMLsymbol
        (r"\(*e", "&epsilon;"),	# HTMLsymbol
        (r"\(*E", "&Epsilon;"),	# HTMLsymbol
        (r"\(*f", "&phi;"),	# HTMLsymbol
        (r"\(*F", "&Phi;"),	# HTMLsymbol
        (r"\(*g", "&gamma;"),	# HTMLsymbol
        (r"\(*G", "&Gamma;"),	# HTMLsymbol
        (r"\(*h", "&theta;"),	# HTMLsymbol
        (r"\(*H", "&Theta;"),	# HTMLsymbol
        (r"\(*i", "&iota;"),	# HTMLsymbol
        (r"\(*I", "&Iota;"),	# HTMLsymbol
        (r"\(*k", "&kappa;"),	# HTMLsymbol
        (r"\(*K", "&Kappa;"),	# HTMLsymbol
        (r"\(*l", "&lambda;"),	# HTMLsymbol
        (r"\(*L", "&Lambda;"),	# HTMLsymbol
        (r"\(*m", "&mu;"),	# HTMLsymbol
        (r"\(*M", "&Mu;"),	# HTMLsymbol
        (r"\(*n", "&nu;"),	# HTMLsymbol
        (r"\(*N", "&Nu;"),	# HTMLsymbol
        (r"\(*o", "&omicron;"),	# HTMLsymbol
        (r"\(*O", "&Omicron;"),	# HTMLsymbol
        (r"\(*p", "&pi;"),	# HTMLsymbol
        (r"\(*P", "&Pi;"),	# HTMLsymbol
        (r"\(*q", "&psi;"),	# HTMLsymbol
        (r"\(*q", "&Psi;"),	# HTMLsymbol
        (r"\(*r", "&rho;"),	# HTMLsymbol
        (r"\(*R", "&Rho;"),	# HTMLsymbol
        (r"\(*s", "&sigma;"),	# HTMLsymbol
        (r"\(*S", "&Sigma;"),	# HTMLsymbol
        (r"\(*t", "&tau;"),	# HTMLsymbol
        (r"\(*T", "&Tau;"),	# HTMLsymbol
        (r"\(*u", "&upsilon;"),	# HTMLsymbol
        (r"\(*U", "&Upsilon;"),	# HTMLsymbol
        (r"\(*w", "&omega;"),	# HTMLsymbol
        (r"\(*W", "&Omega;"),	# HTMLsymbol
        (r"\(*x", "&chi;"),	# HTMLsymbol
        (r"\(*X", "&Chi;"),	# HTMLsymbol
        (r"\(*x", "&xi;"),	# HTMLsymbol
        (r"\(*X", "&Xi;"),	# HTMLsymbol
        (r"\(*y", "&eta;"),	# HTMLsymbol
        (r"\(*y", "&Eta;"),	# HTMLsymbol
        (r"\(*z", "&zeta;"),	# HTMLsymbol
        (r"\(*z", "&Zeta;"),	# HTMLsymbol
        (r"\(+-", "&plusmn;"),	# HTMLlat1
        (r"\(->", "&rarr;"),	# HTMLsymbol
        (r"\(12", "&frac12;"),	# HTMLlat1
        (r"\(14", "&frac14;"),	# HTMLlat1
        (r"\(34", "&frac34;"),	# HTMLlat1
        (r"\(<-", "&larr;"),	# HTMLsymbol
        (r"\(==", "&equiv;"),	# HTMLsymbol
        (r"\(Fi", "ffi"),		# ASCII
        (r"\(Fl", "ffl"),		# ASCII
        (r"\(aa", "&acute;"),	# HTMLlat1
        (r"\(ap", "&sim;"),	# HTMLsymbol
        (r"\(bl", "&phone;"),	# ISOpub
        (r"\(br", "&verbar;"),	# ISOnum
        (r"\(bu", "&bull;"),	# HTMLsymbol
        (r"\(bv", "&brvbar;"),	# HTMLlat1
        (r"\(ca", "&cap;"),	# HTMLsymbol
        (r"\(ci", "&cir;"),	# ISOpub
        (r"\(co", "&copy;"),	# HTMLlat1
        (r"\(ct", "&cent;"),	# HTMLlat1
        (r"\(cu", "&cup;"),	# HTMLsymbol
        (r"\(da", "&darr;"),	# HTMLsymbol
        (r"\(de", "&deg;"),	# HTMLlat1
        (r"\(dg", "&dagger;"),	# HTMLspecial
        (r"\(dd", "&Dagger;"),	# HTMLspecial
        (r"\(di", "&divide;"),	# HTMLlat1
        (r"\(em", "&mdash;"),	# HTMLspecial
        (r"\(eq", "="),		# ASCII
        (r"\(es", "&empty;"),	# HTMLspecial
        (r"\(ff", "ff"),		# ASCII
        (r"\(fi", "fi"),		# ASCII
        (r"\(fl", "fl"),		# ASCII
        (r"\(fm", "&prime;"),	# HTMLsymbol
        (r"\(ge", "&ge;"),	# HTMLsymbol
        (r"\(gr", "&nabla;"),	# ISOtech
        (r"\(hy", "&hyphen;"),	# ISOnum
        (r"\(ib", "&sube;"),	# HTMLsymbol
        (r"\(if", "&infin;"),	# HTMLsymbol
        (r"\(ip", "&supe;"),	# HTMLsymbol
        (r"\(is", "&int;"),	# HTMLsymbol
        (r"\(le", "&le;"),	# HTMLsymbol
        (r"\(lh", "&#x261E;"),	# Unicode: Miscellaneous Symbols
        # No equivalent of \(lb, left lower bracket curve
        # No equivalent of \(lc, left upper bracket corner
        # No equivalent of \(lf, left lower bracket corner
        # No equivalent of \(lk, left bracket nipple
        # No equivalent of \(lt, left upper bracket curve
        (r"\(mi", "&minus;"),	# HTMLsymbol
        (r"\(mo", "&isin;"),	# HTMLsymbol
        (r"\(mu", "&times;"),	# HTMLsymbol
        (r"\(no", "&not;"),	# HTMLlat1
        (r"\(or", "&verbar;"),	# ISOnum
        (r"\(pl", "+"),		# ASCII
        (r"\(pt", "&prop;"),	# HTMLsymbol
        (r"\(rg", "&trade;"),	# HTMLsymbol
        # No equivalent of \(rb, right lower bracket curve
        # No equivalent of \(rc, right upper bracket corner
        # No equivalent of \(rf, right lower bracket corner
        (r"\(rh", "&#x261C;"),	# Unicode: Miscellaneous symbols
        # No equivalent of \(rk, right bracket nipple
        # No equivalent of \(rt, right upper bracket curve
        (r"\(rn", "&macr;"),	# HTMLlat1
        (r"\(ru", "&lowbar;"),	# ISOnum
        (r"\(sb", "&sub;"),	# HTMLsymbol
        (r"\(sc", "&sect;"),	# HTMLlat1
        (r"\(sl", "&frasl;"),	# HTMLsymbol
        (r"\(sp", "&sup;"),	# HTMLsymbol
        (r"\(sq", "&squf;"),	# ISOpub
        (r"\(sr", "&radic;"),	# HTMLsymbol
        (r"\(ts", "&sfgr;"),	# HTMLsymbol
        (r"\(ua", "&uarr;"),	# HTMLsymbol
        (r"\(ul", "_"),		# ASCII
        (r"\(~=", "&cong;"),	# HTMLsymbol
        (r"\0", "&numsp;"),	# ISOpub
        (r"\^", "&hairsp;"),	# ISOpub
        (r"\`", "&grave;"),	# ISOdia
        (r"\e", "&bsol;"),	# ISOnum
        (r"\|", "&thinsp;"),	# HTMLspecial
        # Extended specials supported by groff; see groff_char(7).
        # These are listed in the order they occur on that man page.
        (r"\(-D",	"&ETH;"),	# HTMLlat1: Icelandic uppercase eth
        (r"\(Sd",	"&eth;"),	# HTMLlat1: Icelandic lowercase eth
        (r"\(TP",	"&THORN;"),	# HTMLlat1: Icelandic uppercase thorn
        (r"\(Tp",	"&thorn;"),	# HTMLlat1: Icelandic lowercase thorn
        (r"\(AE",	"&AElig;"),	# HTMLlat1
        (r"\(ae",	"&aelig;"),	# HTMLlat1
        (r"\(OE",	"&OElig;"),	# HTMLspecial
        (r"\(oe",	"&oelig;"),	# HTMLspecial
        (r"\(IJ",	"&ijlig;"), 	# ISOlat2: Dutch IJ ligature
        (r"\(ij",	"&IJlig;"), 	# ISOlat2: Dutch ij ligature
        (r"\(ss",	"&szlig;"),	# HTMLlat1
        (r"\('A",	"&Aacute;"),	# HTMLlat1
        (r"\('C",	"&Cacute;"),	# ISOlat2
        (r"\('E",	"&Eacute;"),	# HTMLlat1
        (r"\('I",	"&Iacute;"),	# HTMLlat1
        (r"\('O",	"&Oacute;"),	# HTMLlat1
        (r"\('U",	"&Uacute;"),	# HTMLlat1
        (r"\('Y",	"&Yacute;"),	# HTMLlat1
        (r"\('a",	"&aacute;"),	# HTMLlat1
        (r"\('c",	"&cacute;"),	# ISOlat2
        (r"\('e",	"&eacute;"),	# HTMLlat1
        (r"\('i",	"&iacute;"),	# HTMLlat1
        (r"\('o",	"&oacute;"),	# HTMLlat1
        (r"\('u",	"&uacute;"),	# HTMLlat1
        (r"\('y",	"&yacute;"),	# HTMLlat1
        (r"\(:A",	"&Auml;"),	# HTMLlat1
        (r"\(:E",	"&Euml;"),	# HTMLlat1
        (r"\(:I",	"&Iuml;"),	# HTMLlat1
        (r"\(:O",	"&Ouml;"),	# HTMLlat1
        (r"\(:U",	"&Uuml;"),	# HTMLlat1
        (r"\(:Y",	"&Yuml;"),	# HTMLspecial
        (r"\(:a",	"&auml;"),	# HTMLlat1
        (r"\(:e",	"&euml;"),	# HTMLlat1
        (r"\(:i",	"&iuml;"),	# HTMLlat1
        (r"\(:o",	"&ouml;"),	# HTMLlat1
        (r"\(:u",	"&uuml;"),	# HTMLlat1
        (r"\(:y",	"&yuml;"),	# HTMLlat1
        (r"\(^A",	"&Acirc;"),	# HTMLlat1
        (r"\(^E",	"&Ecirc;"),	# HTMLlat1
        (r"\(^I",	"&Icirc;"),	# HTMLlat1
        (r"\(^O",	"&Ocirc;"),	# HTMLlat1
        (r"\(^U",	"&Ucirc;"),	# HTMLlat1
        (r"\(^a",	"&acirc;"),	# HTMLlat1
        (r"\(^e",	"&ecirc;"),	# HTMLlat1
        (r"\(^i",	"&icirc;"),	# HTMLlat1
        (r"\(^o",	"&ocirc;"),	# HTMLlat1
        (r"\(^u",	"&ucirc;"),	# HTMLlat1
        (r"\(`A",	"&Agrave;"),	# HTMLlat1
        (r"\(`E",	"&Egrave;"),	# HTMLlat1
        (r"\(`I",	"&Igrave;"),	# HTMLlat1
        (r"\(`O",	"&Ograve;"),	# HTMLlat1
        (r"\(`U",	"&Ugrave;"),	# HTMLlat1
        (r"\(`a",	"&agrave;"),	# HTMLlat1
        (r"\(`e",	"&egrave;"),	# HTMLlat1
        (r"\(`i",	"&igrave;"),	# HTMLlat1
        (r"\(`o",	"&ograve;"),	# HTMLlat1
        (r"\(`u",	"&ugrave;"),	# HTMLlat1
        (r"\(~A",	"&Atilde;"),	# HTMLlat1
        (r"\(~N",	"&Ntilde;"),	# HTMLlat1
        (r"\(~O",	"&Otilde;"),	# HTMLlat1
        (r"\(~a",	"&atilde;"),	# HTMLlat1
        (r"\(~n",	"&ntilde;"),	# HTMLlat1
        (r"\(~o",	"&otilde;"),	# HTMLlat1
        (r"\(vS",	"&Scaron;"),	# HTMLspecial
        (r"\(vs",	"&scaron;"),	# HTMLspecial
        (r"\(vZ",	"&Zcaron;"),	# ISOlat2
        (r"\(vz",	"&zcaron;"),	# ISOlat2
        (r"\(,C",	"&Ccedil;"),	# HTMLlat1
        (r"\(,c",	"&ccedil;"),	# HTMLlat1
        (r"\(/L",	"&Lstrok;"),	# ISOlat2: Polish L with a slash
        (r"\(/l",	"&lstrok;"),	# ISOlat2: Polish l with a slash
        (r"\(/O",	"&Oslash;"),	# HTMLlat1
        (r"\(/o",	"&oslash;"),	# HTMLlat1
        (r"\(oA",	"&Aring;"),	# HTMLlat1
        (r"\(oa",	"&aring;"),	# HTMLlat1
        (r"\(a\"",	"&dback;"),	# ISOdia: double acute accent (Hungarian umlaut)
        (r"\(a-",	"&macr;"),	# HTMLlat1: macron or bar accent
        (r"\(a.",	"&dot;"),	# ISOdia: dot above
        (r"\(a^",	"&circ;"),	# HTMLspecial: circumflex accent
        (r"\(aa",	"&acute;"),	# HTMLlat1: acute accent
        (r"\(ga",	"&grave;"),	# ISOdia: grave accent
        (r"\(ab",	"&breve;"),	# ISOdia: breve accent
        (r"\(ac",	"&cedil;"),	# ISOdia: cedilla accent
        (r"\(ad",	"&uml;"),	# HTMLlat1: umlaut or dieresis
        (r"\(ah",	"&caron;"),	# ISOdia: caron (aka hacek accent)
        (r"\(ao",	"&ring;"),	# ISOdia: ring or circle accent
        (r"\(a~",	"&tilde;"),	# ISOdia: tilde accent
        (r"\(ho",	"&ogon;"),	# ISOdia: hook or ogonek accent
        (r"\(.i",	"&inodot;"),	# ISOlat2: i without a dot (also in ISOamso)
        (r"\(.j",	"&jnodot;"),	# ISOamso: j without a dot
        (r"\(Cs",	"&curren;"),	# HTMLlat1: currency sign
        (r"\(Do",	"$"),		# ASCII
        (r"\(Po",	"&pound;"),	# HTMLlat1
        (r"\(Ye",	"&yen;"),	# HTMLlat1
        (r"\(Fn",	"&fnof;"),	# HTMLsymbol
        (r"\(ct",	"&cent;"),	# HTMLlat1
        (r"\(Fo",	"&laquo;"),	# HTMLlat1
        (r"\(Fc",	"&raquo;"),	# HTMLlat1
        # (r"\(fo",	"&guilsinglleft;"),	# No entity for this
        # (r"\(fc",	"&guilsinglright;"),	# No entity for this
        (r"\(r!",	"&iexcl;"),	# HTMLlat1
        (r"\(r?",	"&iquest;"),	# HTMLlat1
        # (r"\(ff",	"&ff;"), 	# ISOpub: ff ligature (in old troff)
        # (r"\(fi",	"&fi;"), 	# ISOpub: fi ligature (in old troff)
        # (r"\(fl",	"&fl;"), 	# ISOpub: fl ligature (in old troff)
        # (r"\(Fi",	"&ffi;"),	# ISOpub: ffi ligature (in old troff)
        # (r"\(Fl",	"&ffl;"),	# ISOpub: ffl ligature (in old troff)
        (r"\(OK",	"&check;"),	# ISOpub
        (r"\(Of",	"&ordf;"),	# HTMLlat1
        (r"\(Om",	"&ordm;"),	# HTMLlat1
        (r"\(pc",	"&middot;"),	# HTMLlat1
        (r"\(S1",	"&sup1;"),	# HTMLlat1
        (r"\(S2",	"&sup2;"),	# HTMLlat1
        (r"\(S3",	"&sup3;"),	# HTMLlat1
        # (r"\(<-",	"&larr;"),	# HTMLsymbol (in old troff)
        # (r"\(->",	"&rarr;"),	# HTMLsymbol (in old troff)
        (r"\(<>",	"&iff;"),	# ISOtech: iff (horizontal double-headed arrow)
        (r"\(da",	"&darr;"),	# HTMLsymbol
        (r"\(ua",	"&uarr;"),	# HTMLsymbol
        (r"\(lA",	"&lArr;"),	# ISOtech
        (r"\(rA",	"&rArr;"),	# ISOtech
        (r"\(hA",	"&#x21D4;"),	# horizontal double-headed double arrow
        (r"\(dA",	"&#x21D3;"),	# double down-arrow
        (r"\(uA",	"&#x21D1;"),	# double up-arrow
        (r"\(vA",	"&#x21D5;"), 	# vertical double-headed double arrow
        (r"\(ba",	"&verbar;"),	# ISOnum
        (r"\(bb",	"&brvbar;"),	# HTMLlat1
        # (r"\(br",	"&br;"), 	# box rule with traditional troff metrics
        # (r"\(ru",	"&ru;"), 	# baseline rule
        # (r"\(ul",	"&ul;"), 	# underline with traditional troff metrics
        # (r"\(bv",	"&bv;"), 	# bar vertical
        # (r"\(bs",	"&phone;"),	# ISOpub: Use black-phone symbol for Bell logo
        # (r"\(ci",	"&cir;"),	# ISOpub (in old troff)
        # (r"\(bu",	"&bull;"),	# ISOnum (in old troff)
        # (r"\(co",	"&copy;"),	# ISOnum (in old troff)
        # (r"\(rg",	"&reg;"),	# ISOnum (in old troff)
        (r"\(tm",	"&trade;"),	# HTMLsymbol
        # (r"\(dd",	"&Dagger;"),	# HTMspecial: double dagger sign
        # (r"\(dg",	"&dagger;"),	# HTMLspecial
        (r"\(ps",	"&para;"),	# HTMLlat1: paragraph or pilcrow sign
        (r"\(sc",	"&sect;"),	# HTMLlat1 (in old troff)
        # (r"\(de",	"&deg;"),	# HTMLlat1 (in old troff)
        # (r"\(em",	"&mdash;"),	# HTMLspecial: em dash (in old troff)
        (r"\(en",	"&ndash;"),	# HTMLspecial: en dash
        (r"\(%0",	"&permil;"),	# HTMLspecial: per thousand, per mille sign
        # (r"\(12",	"&frac12;"),	# HTMLlat1 (in old troff)
        # (r"\(14",	"&frac14;"),	# HTMLlat1 (in old troff)
        # (r"\(34",	"&frac34;"),	# HTMLlat1 (in old troff)
        (r"\(f/",	"&horbar;"),	# ISOnum: horizintal bar for fractions
        # (r"\(fm",	"&prime;"),	# HTMLsymbol: footmark, prime (in old troff)
        (r"\(sd",	"&Prime;"),	# HTMLsymbol
        (r"\(ha",	"^"),		# ASCII circumflex, hat, caret
        (r"\(ti",	"~"), 		# ASCII tilde, large tilde
        # (r"\(hy",	"&hyphen;"),	# ISOnum (in old troff)
        (r"\(lB",	"&lsqb;"),	# ISOnum: left (square) bracket
        (r"\(rB",	"&rsqb;"),	# ISOnum: right (square) bracket
        (r"\(lC",	"&lcub;"),	# ISOnum: left (curly) brace
        (r"\(rC",	"&rcub;"),	# ISOnum: right (curly) brace
        (r"\(la",	"&lang;"),	# ISOtech: left angle bracket
        (r"\(ra",	"&rang;"),	# ISOtech: right angle bracket
        # (r"\(lh", "&#x261E;"),	# Unicode: Miscellaneous Symbols
        # (r"\(rh", "&#x261C;"),	# Unicode: Miscellaneous symbols
        (r"\(Bq",	"&bdquo;"),	# HTMLspecial: low double comma quote
        (r"\(bq",	"&sbquo;"),	# HTMLspecial: low single comma quote
        (r"\(lq",	"&ldquo;"),	# HTMLspecial
        (r"\(rq",	"&rdquo;"),	# HTMLspecial
        (r"\(oq",	"&lsquo;"),	# HTMLspecial: single open quote
        (r"\(cq",	"&rsquo;"),	# HTMLspecial: single closing quote
        (r"\(aq",	"'"),		# ASCII apostrophe quote
        (r"\(dq",	"\""),		# ASCII double quote (ASCII 34)
        # (r"\(or",	"&verbar;"),	# ISOnum (in old troff)
        (r"\(at",	"&commat;"),	# ISOnum
        (r"\(sh",	"&num;"),	# ISOnum
        (r"\(sl",	"/"),		# ASCII
        (r"\(rs",	"&bsol;"),	# ISOnum
        # (r"\(sq",	"&squf;"),	# ISOpub (in old troff)
        (r"\(3d",	"&there4;"),	# HTMLsymbol
        (r"\(tf",	"&there4;"),	# HTMLsymbol
        # Their table duplicates the Greek letters here.
        # We list only the variant forms here, mapping them into
        # the ISO Greek 4 variants (which may or may not be correct :-() 
        (r"\(+f",	"&b.phiv;"),	# ISOgrk4: variant phi
        (r"\(+h",	"&b.thetas;"),	# ISOgrk4: variant theta
        (r"\(+p",	"&b.omega;"),	# ISOgrk4: variant pi, looking like omega
        # Variant forms end
        (r"\(~~",	"&sim;"),	# HTMLsymbol
        # This appears to be an error in the groff table.  
        # It clashes with the Bell Labs use of ~= for a congruence sign
        # (r"\(~=",	"&erDot;"),	# ISOamsr
        (r"\(!=",	"&ne;"),	# HTMLsymbol
        (r"\(**",	"&lowast;"),	# HTMLsymbol
        (r"\(-+",	"&mnplus;"),	# ISOtech
        # (r"\(+-",	"&plusmn;"),	# HTMLlat1 (in old troff)
        (r"\(<=",	"&le;"),	# HTMLsymbol
        # (r"\(==",	"&equiv;"),	# HTMLsymbol
        # The groff table says this is "congruence".
        (r"\(=~",	"&erDot;"),	# ISOamsr
        (r"\(>=",	"&ge;"),	# HTMLsymbol
        (r"\(AN",	"&and;"),	# HTMLsymbol
        (r"\(OR",	"&or;"),	# HTMLsymbol
        # (r"\(no",	"&not;"),	# HTMLlat1
        (r"\(te",	"&exist;"), 	# HTMLsymbol: there exists, existential quantifier
        (r"\(fa",	"&forall;"), 	# HTMLsymbol: for all, universal quantifier
        (r"\(Ah",	"&aleph;"),	# HTMLsymbol
        (r"\(Im",	"&image;"),	# HTMLsymbol: Fraktur I, imaginary
        (r"\(Re",	"&real;"),	# HTMLsymbol: Fraktur R, real
        # (r"\(if",	"&infin;"),	# HTMLsymbol
        (r"\(md",	"&middot;"),	# HTMLlat1
        # (r"\(mo",	"&isin;"),	# HTMLsymbol (in old troff)
        # (r"\(mu",	"&times;"),	# HTMLsymbol (in old troff)
        (r"\(nb",	"&nsub;"),	# HTMLsymbol
        (r"\(nc",	"&nsup;"),	# HTMLsymbol
        (r"\(ne",	"&nequiv;"),	# ISOamsn
        (r"\(nm",	"&notin;"),	# HTMLsymbol
        # (r"\(pl",	"&plus;"),	# ISOnum: + in special font (in old troff)
        # (r"\(eq",	"&equa;"),	# ISOnum: = in special font (in old troff)
        # (r"\(pt",	"&prop;"),	# ISOtech (in old troff)
        (r"\(pp",	"&perp;"),	# HTMLsymbol
        # (r"\(sb",	"&sub;"),	# HTMLsymbol (in old troff)
        # (r"\(sp",	"&sup;"),	# HTMLsymbol (in old troff)
        # (r"\(ib",	"&sube;"),	# HTMLsymbol (in old troff)
        # (r"\(ip",	"&supe;"),	# HTMLsymbol (in old troff)
        # (r"\(ap",	"&sim;"),	# HTMLsymbol (in old troff)
        # (r"\(is",	"&int;"),	# HTMLsymbol: integral sign (in old troff)
        # (r"\(sr",	"&radic;"),	# HTMLsymbol: square root (in old troff)
        # (r"\(rn", "&macr;"),	# HTMLlat1
        (r"\(pd",	"&part;"),	# HTMLsymbol: partial differentiation sign
        (r"\(c*",	"&otimes;"),	# HTMLsymbol: multiply sign in a circle
        (r"\(c+",	"&oplus;"),	# HTMLsymbol: plus sign in a circle
        # (r"\(ca",	"&cap;"),	# HTMLsymbol: intersection, cap
        # (r"\(cu",	"&cup;"),	# HTMLsymbol: union, cup
        # (r"\(di",	"&divide;"),	# HTMLlat1: division sign
        (r"\(-h",	"&planck;"),	# ISOamso: h-bar (Planck's constant)
        # (r"\(gr",	"&nabla;"),	# ISOtech (in old troff)
        # (r"\(es",	"&empty;"),	# HTMLsymbol
        (r"\(CL",	"&clubs;"),	# HTMLsymbol: club suit
        (r"\(SP",	"&spades;"),	# HTMLsymbol: spade suit
        (r"\(HE",	"&hearts;"),	# HTMLsymbol: heart suit
        (r"\(DI",	"&diams;"),	# HTMLsymbol: diamond suit
        (r"\(CR",	"&crarr;"), 	# HTMLsymbol: carriage return symbol
        (r"\(st",	"&bepsi"),	# ISOamsr: such that
        (r"\(/_",	"&ang;"),	# HTMLsymbol
        (r"\<<",	"&Lt;"),	# ISOamsr
        (r"\>>",	"&Gt;"),	# ISOamsr
        (r"\(wp",	"&weierp;"),	# HTMLsymbol
        (r"\(lz",	"&loz;"),	# HTMLsymbol
        # (r"\(an",	"&arrowhorizex;"), # horizontal arrow extension
        # Added in groff 1.18
        (r"\(eu",	"&euro;"),	# HTMLspecial
        (r"\(mc",	"&micro;"),	# HTMLlat1
    )
    xmlify_patterns = map(lambda x: (re.compile(x[0]), x[1]), (
        # troff point changes go away
        (r"\\s[+-]?[0-9]+",	""),
        # Order of these & substitutions is significant.
        # These have to go early, otherwise you mess up tags
        # generated by requests.
        (r";",		"&semi;"),
        (r"(?<!\\)&(?![a-z]+;)",	"&amp;"),
        (r"\\&",	"&zerosp;"),
        (r"<--",	""),
        (r"-->",	""),
        (r"<",		"&lt;"),
        (r">",		"&gt;"),
        (r"\\-",	"-"),
        (r"\\ ", 	"&nbsp;"),
        ))

    def register_translations(self, xml):
        "Set the Troff interpreter's translation step."
        if xml:
            TroffInterpreter.translations = TroffInterpreter.xml_translations
        else:
            TroffInterpreter.translations = TroffInterpreter.sgml_translations

    def __init__(self, source, verbose):
        self.source = source
        self.verbose = verbose
        self.strings = {}	# String table for ds, as, rm, rn
        self.macros = {}	# String table for de, ae, rm, rn
        self.registers = {}	# Register table for nr, rr, rnn
        self.macroend = ".."	# Maco ender character as set by .em
        self.macroargs = []	# Macro argument stack
        self.macronames = []	# Macro name stack (only used in error msgs)
        self.nf = False		# Initially we're filling and adjusting
        self.screen = False		# Initially we're not in a screen context
        self.in_block = False	# Initially we're not in a block context
        self.break_trap = None
        self.ifstack = []

    def expand(self, line):
        "Expand strings in the given line."
        for (key, value) in self.strings.items():
            if len(key) == 1:
                line = line.replace(r"\*"+key, value)
            else:
                line = line.replace(r"\*("+key, value)
        # Maybe we're in a macro eval?
        if self.macroargs:
            for argnum in range(1, 9):
                line = line.replace(r"\\$%d" % argnum, self.macroargs[-1][argnum-1])
        return line

    def eval_expr(self, exp):
        "Evaluate expressions for use in groff conditionals."
        if self.source.verbose >= 2:
            self.source.notify("eval_expr(%s)" % exp)
        if exp == "":
            return exp
        # Accept ! prefix
        elif exp[0] == "!":
            return not self.eval_expr(exp[1:])
        # Evaluate built-in conditions
        if exp[0] == 'n':
            return "1"+self.eval_expr(exp[1:])	# We're an nroff-like device
        elif exp[0] == 't':
            return "0"+self.eval_expr(exp[1:])	# Not a troff-like device
        elif exp[0] == 'o':
            return "1"+self.eval_expr(exp[1:])	# Forever on page 1
        elif exp[0] == 'e':
            return "0"+self.eval_expr(exp[1:])	# No page breaks
        elif exp[0] == 'v':
            return "0"+self.eval_expr(exp[1:])	# This isn't vroff either
        elif exp[0] == 'c':
            # This ring-around-the-rosy is necessary to defeat some
            # machinery that groff uses to define tty graphics.  We
            # need to be sure, for example, that c\[if] evaluates
            # to 1.  To make this work, \[if] has to be canonicalized
            # to \(if and looked up in the translation tables of the
            # active interpreters.  Otherwise including tty-char.tmac
            # will cause grave confusion.
            if exp[1:3] == r"\*":
                glyph = exp[2]
                exp = exp[3:]
            elif exp[1:3] == r"\(":
                glyph = exp[2:4]
                exp = exp[4:]
            else: # exp[1:3] == r"\["
                rbracket = exp.find("]")
                glyph = exp[2:rbracket]
                exp = exp[rbracket+1:]
            if len(glyph) == 1:
                glyph = r"\*" + glyph
            elif len(glyph) == 2:
                glyph = r"\(" + glyph
            else:
                glyph = r"\[" + glyph + "]"
            for interpreter in self.source.interpreters:
                if glyph in interpreter.translations:
                    return self.eval_expr("1"+self.eval_expr(exp))
                else:
                    return self.eval_expr("0"+self.eval_expr(exp))
        # Evaluate numeric expressions
        elif exp.startswith(r"\n("):
            if exp[3:5] in self.registers:
                val = self.registers[exp[3:5]]
            else:
                val = 0
            return self.eval_expr(`val`+exp[5:])
        elif exp.startswith(r"\n"):
            if exp[3] in self.registers:
                val = self.registers[exp[3]]
            else:
                val = 0
            return self.eval_expr(`val`+exp[3:])
        # Could be a string comparison
        elif exp.count(exp[0]) >= 3:
            count = 0
            for i in range(len(exp)):
                if exp[i] == exp[0]:
                    count += 1
                if count == 3:
                    break
            remainder = exp[i:]
            exp = exp[:i-1]
            expparts = exp.split(exp[0])
            return self.eval_expr(`expparts[1] == expparts[2]`+remainder)
        # Numeric literal must be followed by an operator
        elif exp[0] in '0123456789':
            numeric = ""
            while exp:
                if exp[0] in '0123456789':
                    numeric += exp[0]
                    exp = exp[1:]
                else:
                    break
            numeric = int(numeric)
            if not exp:
                return numeric
            # Look for an operator 
            if exp.startswith("=="):
                return numeric == self.eval_expr(exp[2:])
            elif exp.startswith("+"):
                return numeric + self.eval_expr(exp[1:])
            elif exp.startswith("*"):
                return numeric * self.eval_expr(exp[1:])
            elif exp.startswith("-"):
                return numeric - self.eval_expr(exp[1:])
            elif exp.startswith("/"):
                return numeric / self.eval_expr(exp[1:])
            elif exp.startswith("%"):
                return numeric % self.eval_expr(exp[1:])
            elif exp.startswith("="):
                return numeric == self.eval_expr(exp[1:])
            elif exp.startswith("&gt;="):
                return numeric >= self.eval_expr(exp[5:])
            elif exp.startswith("&lt;="):
                return numeric <= self.eval_expr(exp[5:])
            elif exp.startswith("&gt;?"):
                return max(numeric, self.eval_expr(exp[5:]))
            elif exp.startswith("&lt;?"):
                return min(numeric, self.eval_expr(exp[5:]))
            elif exp.startswith("&gt;"):
                return numeric > self.eval_expr(exp[4:])
            elif exp.startswith("&lt;"):
                return numeric < self.eval_expr(exp[4:])
            elif exp.startswith("&amp;"):
                return numeric & self.eval_expr(exp[1:])
            elif exp.startswith(":"):
                return numeric | self.eval_expr(exp[1:])
            else:
                self.source.error('unknown operator in expression %s' % exp)
        # We don't know what's going on, just call it true.
        else:
            raise sys.stderr.write("bogus-looking conditional " + exp)
            return True

    def skiptoend(self, tokens):
        "Skip command lines in a conditional arm we're going to ignore"
        wholeline = "".join(tokens)
        brace = wholeline.find("{")
        if brace > -1 and (brace == len(wholeline)-1 or wholeline[brace+1] != "}"):	# If there's an unbalanced { on the line.
            elsedepth = 1		# Eat lines until balancing \}
            while self.source.lines:
                line = self.source.popline()
                if line[1:3] in ("if", "ie", "el"):
                    elsedepth += 1
                elif line.find(r"\}") > -1:
                    elsedepth -= 1
                    if elsedepth == 0:
                        break
    def interpret(self, tokens, caller):
        command = tokens[0][1:]
        args = tokens[1:]
        # .nl is apparently an undocumented synonym for .br in groff(1).
        if command == "br" or command == "nl":
            pass	# Might get interpreted as <sbr/> by the break trap
        elif command == "sp":
            # Treat this as a paragraph break in body text.
            if self.source.body_section() and not self.nf:
                self.source.end_paragraph(label="sp")
            # Always insert the space, it can't hurt and may help
            # (e.g in function synopses).
            lines = 1
            if len(args) > 0 and args[0]:
                try:
                    lines = int(args[0])
                except ValueError:
                    pass
            for i in range(lines):
                self.source.diversion.append("")
            if self.source.body_section() and not self.nf:
                self.source.need_para = True
        elif command == "bp":
            self.source.passthrough(tokens)
            # Do this when the stylesheets are ready (ddate.1 is an example)
            #self.source.emit("<beginpage%s>" % self.source.xml)
        elif command == "ft":
            # Ugh...this deals with sequences like
            # .ft CW
            # .in +4n
            # .nf
            # which frequently occur in attemps to simulate .DS/.DE and
            # are going to turn into <para><emphasis remap="CW"></para>
            # which is guaranteed to lose because the scope of <emphasis>
            # can't cross a paragraph boundary.  So just swap these...
            if args and args[0]!="R" and self.source.peekline():
                # skip any number of things like .in
                while self.source.ignorable(self.source.peekline(), nocomplaints=1):
                    self.source.emit(make_comment(self.source.popline()))
                if self.source.peekline()[1:3] == "nf":
                    self.source.popline()
                    self.source.end_paragraph(label="ft")
                    self.source.emit("<literallayout remap='.nf'>")
                    self.nf = True
            # The actual highlight change
            if self.nf:
                if len(tokens) == 1:
                    self.source.emit(r"\fP")
                elif len(tokens[1]) == 1:
                    self.source.emit(r"\f" + tokens[1])
                else:
                    self.source.emit(r"\f(" + tokens[1])
        elif command == 'fam':		# groff font family extension
            if not args or args[0] == 'T':
                self.source.emit(r"\FT")
            elif len(args[0]) == 1:
                self.source.emit(r"\F" + args[0])
            else:
                self.source.emit(r"\F(" + args[0])
        elif command in ("fi", "FI"):	# .FI is an oddly common typo
            if self.nf or self.screen:
                # Flip side of the above.  Sequences like
                # .fi
                # .in +4n
                # .ft
                # have to be inverted.
                if self.source.peekline():
                    while blankline.match(self.source.peekline()):
                        self.source.popline()
                        while self.source.ignorable(self.source.peekline(), nocomplaints=1):
                            self.source.emit(make_comment(self.source.popline()))
                    next = self.source.peekline()
                    if next and next[0:3] == ".ft":
                        cmd = lineparse(self.source.popline())
                        if len(cmd) == 1:
                            self.source.emit(r"\fR")
                        else:
                            self.source.emit(r"\f" + cmd[1])
                # End the literal layout.
                # Because emphasis can't cross a block-layout boundary,
                # we need to turn off highlights here.
                if self.screen:
                    self.source.emit(r"\fR</screen> <!-- .fi -->")
                    self.screen = False
                else:
                    self.source.emit(r"\fR</literallayout> <!-- .fi -->")
                    self.nf = False
                self.source.need_paragraph()
        elif command in ("nf", "NF"):	# .NF is an oddly common typo
            self.source.end_paragraph(label="nf")
            if self.source.peekline() == ".ft CW":
                self.source.popline()
                self.source.emit("<screen> <!-- .nf -->")
                self.screen = True
            else:
                self.source.emit("<literallayout remap='.nf'>")
                self.nf = True
        elif command in ("ul", "cu"):
            if args:
                try:
                    forlines = int(args)
                except (ValueError, TypeError):
                    forlines = 1
                    self.source.error("nonnumeric %s argument" % command)
            else:
                forlines = 1
            for i in range(min(forlines, len(self.source.lines))):
                self.source.lines[i] = r"\fU" + self.source.lines[i] + r"\fP"
        elif command == "tr":
            while True:
                frompart = get_troff_char(args[0])
                args[0] = args[0][len(frompart):]
                topart = get_troff_char(args[0])
                args[0] = args[0][len(topart):]
                if not frompart:
                    break
                if frompart and not topart:
                    topart = " "
                # Each interpreter may have its own translation of the to part.
                for interpreter in self.source.interpreters:
                    for (special, translation) in interpreter.translations:
                        if topart == special:
                            topart = translation
                if self.source.verbose:
                    self.source.notify("tr: %s -> %s" % (frompart, topart))
                self.source.outsubst.append((frompart, topart))
        elif command == "tm":
            sys.stderr.write(" ".join(args) + "\n")
        elif command == "mso" and args[0] in mso_dispatch:
            self.source.activate(mso_dispatch[args[0]])
        elif command in ("so", "mso"):
            file = tokens[1]
            path = ""
            if command == "so":
                searchpath = self.source.includepath
            elif command == "mso":
                searchpath = glob.glob("/usr/share/groff/*/tmac")
            # First, search by straight filename
            for dir in searchpath:
                maybe = os.path.join(dir, file)
                if os.access(maybe, os.R_OK):
                    path = maybe
                    break
            # Next, on an mso, by macro-set name
            if not path and command == "mso":
                for dir in searchpath:
                    maybe = os.path.join(dir, file + ".tmac")
                    if os.access(maybe, os.R_OK):
                        path = maybe
                        break
            # Found the file.  If it's all comments and commands, include it
            if path:
                try:
                    text = self.preprocess(open(path).read())
                    lines = map(string.rstrip, text.split("\n"))
                    if filter(lambda x: x and not (is_comment(x) or is_command(x)), lines):
                        self.source.warning(file + " contains text -- generating entity reference.")
                        path = None
                    else:
                        if self.verbose:
                            self.source.notify("including" + path)
                        lines = ["<!-- *** start include from %s *** -->" % path] \
                                + lines \
                                + ["<!-- *** end include from %s *** -->" % path]
                        self.source.lines = lines + self.source.lines
                except (IOError, OSError):
                    self.source.warning(file + " not found -- generating entity reference.")
                    path = None
            if not path:
                entity = tokens[1].replace("/", "_")
                while entity[0] == "_":
                    entity = entity[1:]
                self.source.inclusions.append((entity, file))
                self.source.emit("&" + entity + ";")
        # String and macro interpretation
        elif command == "ds":
            #self.source.notify("String definition: " + `tokens`)
            if len(tokens) < 3:
                self.strings[tokens[1]] = ""
            elif self.macronames:
                self.strings[tokens[1]] = " ".join(tokens[2:])
            else:
                self.strings[tokens[1]] = " ".join(tokens[2:])
        elif command == "as":
            if len(tokens) == 3:
                if self.macronames:
                    self.strings[tokens[1]] += tokens[2]
                else:
                    self.strings[tokens[1]] += tokens[2]
        elif command == "rm":
            if tokens[1] in self.strings:
                del self.strings[tokens[1]]
            if tokens[1] in self.macros:
                del self.macros[tokens[1]]
        elif command == "rn":
            oldname = tokens[1]
            newname = tokens[2]
            suppressed = False
            for interpreter in self.source.interpreters:
                if oldname in interpreter.immutable_set.keys():
                    suppressed = True
                    break
            if suppressed:
                # Warning rather than error because redefining an imutable
                # is invariably a presentation hack.
                self.source.warning("attempt to rename immutable macro %s" % oldname)
            else:
                if oldname in self.strings:
                    self.strings[newname] = self.strings[oldname]
                    del self.strings[oldname]
                if oldname in self.macros:
                    self.macros[newname] = self.macros[oldname]
                    del self.macros[oldname]
        elif command == "em":
            if len(tokens) == 1:
                self.macroend = ".."
            else:
                self.macroend = tokens[1]
        elif command in ("de", "am"):
            if self.verbose >= 2:
                self.source.notify("macro definition begins")
            name = tokens[1]
            if len(tokens) >= 3:
                endon = newname
            else:
                endon = self.macroend
            if command == ".de" or not name in self.macros:
                self.macros[name] = []
            isused = filter(lambda x: x[0:len(name)+1]=="."+name, self.source.lines)
            suppressed = False
            for interpreter in self.source.interpreters:
                if name in interpreter.immutable_set.keys():
                    suppressed = True
                    break
            # We don't want macro listings showing up in the Synopsis section.
            # They play hell with the Synopsis parser...
            listing = isused and self.source.body_section() and not suppressed and not self.source.quiet
            if listing:
                self.source.emit("<!-- Macro definition:")
                self.source.emit("%s %s" % (command, name))  
            while self.source.lines:
                line = self.source.popline()
                if line.replace(" ", "") == endon:
                    break
                # Filter out commands we're going to ignore anyway
                linetoks = lineparse(line)
                if linetoks:
                    for interpreter in self.source.interpreters:
                        if linetoks[0][1:] in interpreter.ignore_set:
                            line += '\t.\\" IGNORED'
                        elif linetoks[0][1:] in interpreter.complain_set:
                            line += '\t.\\" IGNORED'
                            if not suppressed:
                                self.source.error("cannot translate "+linetoks[0][1:])
                if listing:
                    self.source.emit(line)
                self.macros[name].append(line)
            if listing:
                self.source.emit("-->", trans=0)
            if suppressed:
                del self.macros[name]
                self.source.emit(make_comment("%s listing suppressed (immutable)"%name))
            elif not isused:
                self.source.emit(make_comment("%s listing suppressed (not used)"%name))
            # OK, now perform macro reduction.  Recognize macros that are
            # presentation-level hacks around various standard constructs
            # that we want to be able to recognize and elide.
            for interpreter in self.source.interpreters:
                if hasattr(interpreter, "reductions"):
                    map(lambda x: self.conditionally_replace(x[0], x[1]), interpreter.reductions.items())
        # Implementation of numeric registers
        elif command == "nr":
            reg = args[0]
            val = args[1]
            if val[0] in "-+":
                if reg in self.macros:
                    baseval = self.macros[reg]
                else:
                    baseval = '0'
                val = `eval(baseval+val)`
            self.registers[reg] = val
        elif command == "rr":
            reg = args[0]
            if reg in self.registers:
                del self.registers[reg]
        elif command == "rnn":		# Groff extension
            reg = args[0]
            new = args[1]
            if reg and new in self.registers:
                val = self.registers[reg]
                del self.registers[reg]
                self.registers[new] = val
        # OK, now process conditionals
        elif command in ("ie", "if"):
            if len(tokens) == 1:
                # Cope with a transposition typo...see vmstat(8) for example.
                if command == "if" and self.nf:
                    self.source.pushline(".fi")
                else:
                    self.source.error("malformed conditional %s" % command)
                return 1
            # Evaluate the guard
            guardval = self.eval_expr(tokens[1])
            if self.verbose >=2:
                self.source.notify("condition stack push %s from: %s" % (guardval, `tokens`))
            self.ifstack.append(guardval)
            if command == "ie":
                if self.verbose >=2:
                    self.source.notify("stack push from: " + `tokens`)
                self.ifstack.append(guardval)
            # If it's a one-liner and condition true, push back remaining text,
            # *unless* the first token past the guard is an .if command.  The
            # latter test deals with a nasty piece of boilerplate created
            # by Perl documentation tools that looks like this:
            #
            # .if \n(.H>23 .if \n(.V>19 \
            #
            # Evidently this is somedy's way of getting "and" in conditionals.
            if guardval:
                if len(tokens) > 2 and not tokens[2].startswith(r"\{") and tokens[2] != ".if":
                    self.source.pushline(r".\}")
                    if tokens[2].startswith(r"\{"):
                        tokens[2] = tokens[2][2:]
                    if self.verbose >=4:
                        self.source.notify("pushing back: " + `tokens[2:]`)
                    self.source.pushline(" ".join(tokens[2:]))
            else:
                # Kluge -- we don't want to trip on .ig terminators
                for i in range(1, len(args)):
                    if args[i] == '.ig' and len(args) > i:
                        self.source.ignore(args[i+1])
            # There may be a hanging { on the next line; if so, nuke it
            if self.source.peekline() in ("\\{", "\\{\\"):
                self.source.popline()
            # If condition is false we need to do a skip now
            if not guardval:
                self.skiptoend(tokens)
        elif command == "el":
            if not self.ifstack:
                # Urggh.  The way this happens is that when somebody who
                # should know better (such as Larry Wall) writes something
                # like
                #
                # .if n .Sh """Considerations"""
		# .el .Sh "``Considerations''"
                #
                # in the a2p man page, forgetting that this needs an .ie
                # rather than .if in order for the stack operations to
                # balance.  Let's not pop the stack and die.
                if self.verbose >= 1:
                    self.source.warning("unbalanced condition-stack operation")
                condition = True	# Works out right if the guard was true.
            else:
                condition = self.ifstack[-1]
            if self.verbose >=2:
                self.source.notify(".el %s" % condition)
            # If it's a one-liner and condition false, push back remaining text
            if not condition:
                if len(tokens) > 1 and tokens[1][:2] != r"\{":
                    self.source.pushline(r".\}")
                    self.source.pushline(" ".join(tokens[1:]))
            # If condition is true we need to do a skip now
            if condition:
                self.skiptoend(tokens)
            if self.verbose >=2:
                self.source.notify("stack state after .el: %s" % self.ifstack)
        elif command == r"\}":
            if self.ifstack:	# See above note on a2p
                if self.verbose >=2:
                    self.source.notify("stack pop from: " + `tokens`)
                self.ifstack.pop()
        elif command == "nop":	# groff extension
            if args:
                self.source.pushline(" ".join(args))
        elif command == "return":	# groff extension
            self.source.macro_return()
        elif command == "ig":
            if not args:
                args[0] = '.'
            if self.source.body_section():
                self.source.end_paragraph(label="ft")
            self.source.emit("<!-- " + " ".join(tokens))
            while self.source.lines:
                line = self.source.popline()
                if line.startswith("." + args[0]):
                    self.source.emit(".%s -->" % args[0], trans=0)
                    break
                self.source.emit(line)
            if self.source.body_section():
                self.source.need_paragraph()
        # Debugging
        elif command == "pm":	# For debugging
            sys.stderr.write("Strings: " + `self.strings` + "\n")
            sys.stderr.write("Macros: " + `self.macros` + "\n")
            sys.stderr.write("Registers: " + `self.registers` + "\n")
        elif command in self.macros:
            self.source.lineno = 0
            self.macroargs.append(stripquotes(tokens[1:]) + ([""] * 9))
            self.macronames.append(command)
            self.source.lines = self.macros[command] + [self.source.lineno] + self.source.lines
        # Extended groff macros
        elif command == "ab":
            if not args:
                args = ["User Abort"]
            sys.stderr.write(" ".join(args) + "\n")
            raise SystemExit, 1
        elif command == "als":
            # Implements soft link rather than hard; hard would be difficult
            # because the target would be the old macro name.
            self.strings[args[0]] = self.strings[args[1]]
        elif command == "do":
            pass
        elif command == "shift":
            tokens = tokens[1:]
            args = tokens[1:]
        elif command == "PSPIC":
            file = args[0]
            self.source.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s" format="EPS"/></imageobject>\n</mediaobject>' % file)
        # We're done
        else:
            return 0
        # Was there a trailing close bracket?  Then push it back.
        if len(tokens) > 1 and tokens[-1] == r"\}":
            if self.verbose:
                self.source.notify("pushing back a trailing bracket")
            self.source.pushline(r".\}")
        return 1
    def conditionally_replace(self, wrapper, standard):
        "Replace a wrapper with a standard macro if the wrapper contains it."
        if wrapper in self.macros and filter(lambda x: x.find(standard) > -1, self.macros[wrapper]):
            if not self.source.quiet:
                self.source.emit(make_comment("%s reduced to %s" % (wrapper, standard)))
            m = re.compile("^." + wrapper)
            self.source.lines = map(lambda x: m.sub("."+standard, x), self.source.lines)
    def preprocess(self, text):
        expanded=[]
        for line in text.split("\n"):
            # Fix a common error -- beginning a line with a string quote
            # that isn't supposed to be a non-breaking request (example at
            # eject.1).
            if line and line[0]=="'" and not is_comment(line) and len(line)>3:
                if line[1] not in string.letters or line[2] not in string.letters:
                    line = r"\&" + line
            # Don't allow ellipses to be mistaken for
            # commands (TCL_EvalTokens.3).
            ellipsis = re.compile(r"^(\s+)(\.\.\..*)")
            seen = ellipsis.match(line)
            if seen:
                line = seen.group(1) + r"\&" + seen.group(2)
            # XMLify everything
            for (pattern, substitute) in TroffInterpreter.xmlify_patterns:
                line = pattern.sub(substitute, line)
            expanded.append(line)
        return "\n".join(expanded)
    def postprocess(self, text):
        # We turned ; into a fake XML entity early on, in order to make
        # tokenization of synopses easier.  Turn it back.
        text = text.replace("&semi;", ";")
        # Convert vertical motions to superscript/subscript operations.
        upmotion   = re.compile(r"\\v'\-\.[0-9]+[mnv]'|\\u(\.[0-9]+[mnv])?")
        downmotion = re.compile(r"\\v'\+?\.[0-9]+[mnv]'|\\d(\.[0-9]+[mnv])?")
        direction = None
        while True:
            upward = upmotion.search(text)
            downward = downmotion.search(text)
            if not (upward or downward):
                break
            if upward and downward:
                if upward.start() < downward.start():
                    downward = None
                else:
                    upward = None
            if direction is None:
                if upward:
                    text = text[:upward.start()] \
                           + r"<superscript>" \
                           + text[upward.end():]
                    direction = 'up';
                    if self.verbose > 1:
                        print "Starting from None, I see upward", upward
                elif downward:
                    text = text[:downward.start()] \
                           + r"<subscript>" \
                           + text[downward.end():]
                    direction = 'down'
                    if self.verbose > 1:
                        print "Starting from None, I see downward", downward
                else:
                    self.source.error("error in vertical-motion match")
            elif direction == 'up':
                if upward:
                    self.source.error("two upward motions in a row")
                    raise SystemExit
                elif downward:
                    text = text[:downward.start()] \
                           + r"</superscript>" \
                           + text[downward.end():]
                    direction = None
                    if self.verbose > 1:
                        print "Starting from up, I see downward", downward
                else:
                    self.source.error("error in vertical-motion match (up)")
            elif direction == 'down':
                if upward:
                    text = text[:upward.start()] \
                           + r"</subscript>" \
                           + text[upward.end():]
                    direction = None
                    if self.verbose > 1:
                        print "Starting from down, I see upward", upward
                elif downward:
                    self.source.error("two downward motions in a row")
                    raise SystemExit
                else:
                    self.source.error("error in vertical-motion match (down)")
        # Now some pattern lifting to be applied after all macro sets.
        # This hairy pattern matches the result of lifting .nf/.fi
        # sections, possibly with a highlight.
        keyword_lifter = \
            "(<literallayout remap='.nf'>(?:\n*<emphasis remap='[A-Z]*'>)?)" \
            "([^<]*(%s)[^<]*)" \
            "((</emphasis>\n?)?</literallayout>)"
        # Start by recognizing source-code listings and screenshots
        # of command examples.
        literal_lifts = (
            (r"struct|typedef|#define",	"programlisting class='C'"),
            ("@_",			"programlisting class='Perl'"),
            ("\ndef|elif|try|except",	"programlisting class='Python'"),
            ("mov|jmp",			"programlisting class='assembler'"),
            ("\nbash$|\n$",		"screen")
            )
        for (keywords, type) in literal_lifts:
            listing = re.compile(keyword_lifter % keywords)
            ender = type.split()[0]
            text = listing.sub(r"<%s remap='.nf'>\2</%s>" % (type,ender), text)
        return text

#
# Some formatting functions are common across more than one macro set.
#

def skip_ignorables(source):
    "Skip blank lines and ignorable commands."
    while source.lines:
        line = source.popline()
        if line == "":				# Skip blank lines
            continue
        elif source.paragraph_break(line):	# Skip ordinary paragraphs
            continue
        else:
            tokens = lineparse(line)
            if not tokens:			# Non-blank text line
                source.pushline(line)
                break
            else:
                if source.ignorable(tokens[0]):
                    continue
                source.pushline(" ".join(tokens))
                break

def gather_lines(source):
    "Gather text lines until we hit a command."
    res = []
    while source.lines:
        line = source.popline()
        if is_command(line) and line[1] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
            source.pushline(line)
            break
        if not (is_command(line) and source.ignorable(line)):
            res.append(line)
    return res

def gather_item(source, tag=None):
    "Gather item, emitting opening and closing listitem tags."
    if tag:
        source.emit("<" + tag + ">")
    source.need_paragraph()
    savesect = []
    outlines = []
    # Discard commands that generate nothing
    skip_ignorables(source)
    # Now gather the list item proper
    source.listitem = True
    if source.verbose >= 4:
        sys.stderr.write("gathering list item\n")
    while source.lines:
        line = source.popline()
        # Maybe we're looking at a commented-out entry
        if line == ".ig":
            savesect.append(".ig")
            while True:
                line = source.popline()
                savesect.append(line)
                if line == "..":
                    break
            continue
        elif line is None:
            break
        elif line.startswith(".blank"):
            # The point is not to end the list on these.
            savesect.append(".blank")
        elif source.section_break(line):
            # Push back any blank lines before the section break.
            # This avoids generating some spurious paragraph()
            # calls that can litter the output with extra close tags.
            while blankline.match(savesect[-1]):
                source.pushline(savesect[-1])
                savesect.pop()
            source.pushline(line)
            break
        elif source.paragraph_break(line):
            source.pushline(line)
            break
        else:
            savesect.append(line)
    if source.verbose >= 4:
        sys.stderr.write("gathering list item\n")
    #source.notify("Interpreting: " + `savesect`)
    source.diversion = outlines
    source.interpret_block(savesect)
    source.diversion = source.output
    if verbose >= 4:
        sys.stderr.write("interpretation of savesect complete\n")
    if filter(lambda x: not not x and x[:4] != "<!--", outlines):
        map(lambda x: source.emit(x), outlines)
    else:
        source.emit("&nbsp; <!-- FIX-ME: empty list item -->")
    source.listitem = False
    source.end_paragraph(label="gather_listitem")
    if tag:
        source.emit(r"</" + tag + ">")

def gather_variablelist(cmd, source):
    "Translate to variablelist markup -- used in both man and me macros."
    ipcount = 0
    source.end_paragraph(cmd)
    source.emit("<variablelist remap='%s'>" % cmd)
    while len(source.lines):
        line = source.popline()
        tokens = lineparse(line)
        if tokens[0] != cmd or len(tokens) == 1:
            source.pushline(line)
            break
        else:
            term = tokens[1]	# Get the list tag
            if ipcount:
                source.emit("</varlistentry>")
            source.emit("<varlistentry>")
            source.emit("<term>%s</term>" % fontclose(term))
            ipcount += 1
            gather_item(source, "listitem")
    source.emit("</varlistentry>")
    trail = ""
    if not source.quiet:
        trail = " <!-- " + cmd + " (gather_variableliist) -->"
    source.emit("</variablelist>" + trail)

def gather_simplelist(cmd, source):
    "Gather listitems, terminate when you see a dot command."
    while len(source.lines):
        line = source.popline()
        if not line.startswith(cmd):
            source.pushline(line)
            break
        else:
            gather_item(source, "listitem")

def gather_itemizedlist(cmd, source, bullet):
    "Translate to bullet-list markup -- used in both man and me macros."
    source.emit("<itemizedlist mark='%s'>" % bullet)
    gather_simplelist(cmd, source)
    source.emit("</itemizedlist>\n")

def gather_orderedlist(cmd, source):
    "Translate to numbered-list markup."
    source.emit("<orderedlist mark='%s'>" % bullet)
    gather_simplelist(cmd, source)
    source.emit("</orderedlist>\n")

def gather_term(hanging_tags, interpreter, hook):
    "Parse one or more hanging-tag constructions, possibly a command synopsis."
    # Next line may be an instance of hanging_tag.  What we don't know
    # whether it's a single one or the first in two or more
    # consecutive hanging_tag lines, an idiom used for tagged lists
    # with one or more alternative keys per item, presented vertically
    # stacked.  Since a variablelist entry may have multiple terms,
    # we can handle this general case.
    #
    # addr2line(1) makes a good test for this code
    # afmtodit(1) tests .TP generated as a result of macroexpansion.
    # apm(1) tests the case where .TP is just before a .SH.
    #
    termlines = []
    while True:
        tagline = interpreter.source.popline()
        #interpreter.source.notify("Looking at: " + tagline)
        if is_command(tagline) and interpreter.source.ignorable(tagline):
            continue
        elif tagline[1:3] not in hanging_tags:
            interpreter.source.pushline(tagline)
            break
        line = interpreter.source.popline()
        # Perform in-line transformations associated with this macro set
        tokens = lineparse(line)
        if tokens:
            transformed = hook(tokens[0][1:], tokens[1:])
            if transformed:
                line = transformed
        termlines.append(line)
    if len(termlines) == 0:
        return None
    else:
        return "</term>\n<term>\n".join(map(fontclose, termlines))

def parse_name_section(nameline):
    "Parse a NAME -- description line."
    nameline = deemphasize(nameline)
    nameline = nameline.replace("\t", r' ')
    nameline = nameline.replace(" - ", r' \- ')
    nameline = nameline.replace(r" \(em ", r' \- ')
    if nameline.find(r" \- ") == -1:
        nameline = nameline.replace("--", r' \- ')
    return nameline.split(r' \- ')

#
# Synopsis-parsing machinery.
#

class ParseNode:
    def __init__(self, type, token=None, choice="plain", repeat=0):
        self.type = type
        self.token = token
        self.choice = choice
        self.righthand = None
        self.repeat = repeat
        self.glue = None
        self.children = []
    def __repr__(self):
        if self.type == "option":
            if self.righthand:
                return "%s=<replaceable>%s</replaceable>" % (self.token, self.righthand)
            else:
                return self.token + self.glue
        elif self.type == "replaceable":
            return "<replaceable>%s</replaceable>" % (self.token)
        elif self.type in ("arg", "group"):
            pre = "<%s"  % self.type
            if self.choice:
                pre += " choice='%s'" % self.choice
            if self.repeat:
                pre += " rep='repeat'"
            pre += ">"
            post = "</%s>" % self.type
            res = ""
            for child in self.children:
                res += `child`
            return pre + res + post
        elif self.type == "@GLUE@":
            return "@GLUE@"
        elif self.type == "redirect":
            return "<arg>" + self.token + "</arg>"
        else:
            for child in self.children:
                res += `child`
            return res

def is_file_or_command_name(tok):
    # Yes, some legitimate commands begin with digits;
    # 411toppm is a good example.
    if not tok:
        return None
    else:
        return tok[0] in string.letters+"/" or (tok[0] in string.digits and tok[-1] in string.letters)

class LineTokenizer:
    "Make a collection of lines available either as lines or tokens."
    def __init__(self, lines):
        self.lines = lines
        self.pretokenizer = None
        self.token_index = 0
        self.tokens = []
        self.lookbehind = []
        self.tokenize()
    def popline(self):
        "Grab the next line and make it the token buffer."
        if not self.lines:
            #sys.stderr.write("Popline returns None\n")
            return None
        else:
            #sys.stderr.write("Popline starts with: %s\n" % self)
            res = self.lines[0]
            self.lines.pop(0)
            self.tokens = []
            if self.lines:
                self.tokenize(self.pretokenizer)
            #sys.stderr.write("In popline, I return %s: %s\n" % (`res`, self))
            return res
    def pushline(self, line):
        "Replace the token buffer with the current line."
        self.lines = [self.line] + self.lines
        self.tokenize(self.pretokenizer)
        #sys.stderr.write("Pushline leaves: %s\n" % self)
    def peekline(self):
        "Return the token buffer"
        if not self.lines:
            return None
        else:
            return self.lines[0]
    def tokenize(self, new_pretokenizer=None):
        "Split a line on tabs and whitespaces, but not linefeeds."
        self.pretokenizer = new_pretokenizer
        if self.lines:
            if self.pretokenizer:
                line = self.pretokenizer(self.lines[0])
            else:
                line = self.lines[0]
            self.tokens = line.split()
            #sys.stderr.write("In tokenize, I split: " + `self` + '\n')
    def restore_newlines(self):
        self.tokens.append("\n")
        for i in range(len(self.lines)):
            self.lines[i] += "\n"
    def token_pop(self, count=1):
        "Get a token."
        if not self.lines:
            return None
        #sys.stderr.write("In token_pop, I see: " + `self` + '\n')
        res = self.tokens[0]
        self.tokens = self.tokens[count:]
        if not self.tokens:
            if not self.lines:
                #sys.stderr.write("In token_pop, I return None: " + `self` + '\n')
                return None
            self.popline()
        self.token_index += 1
        #sys.stderr.write("In token_pop, I return: " + `self` + '\n')
        self.lookbehind.append(res)
        return res
    def token_peek(self):
        "Peek at the next token."
        if self.tokens:
            return self.tokens[0]
        else:
            return None		# list empty means we're out of data

    def token_unpop(self):
        "Push back a token."
        self.tokens = [self.lookbehind[-1]] + self.tokens
        # We do *not* alter the source line!
        self.token_index -= 1
    def __str__(self):
        "Display the state of the object."
        return "<tokens=%s, lines=%s>" % (self.tokens, self.lines)
    __repr__ = __str__

    def text(self):
        return "\n".join(self.lines)

class FunctionSynopsisSequenceParser:
    "Consume a function synopsis sequence and return markup."
    # Candidate lines for FuncSynopsisInfo
    language_lines = (
        (re.compile(r"^\s*#\s*(define|include|ifn?def|endif|extern)"), "C"),
        (re.compile(r"^\s*/\*"),	"C"),
        (re.compile(r"^\s*struct\s"),	"C"),
        (re.compile(r"^\s*union\s"),	"C"),
        (re.compile(r"^\s*typedef\s"),	"C"),
        (re.compile(r"^\s*extern\s"),	"C"),
        (re.compile(r"^\s*import\s"),	"Python"),
        (re.compile(r"^\s*use\s.*;"),	"Perl"),
        (re.compile(r"#\s*perl"),	"Perl"),
        )
    # This patterns identify lines that are provbabl 
    language_fragments = (
        # This is looking for the stuff that one finds around the left
        # paren of a C declaration.  This is something we're quite unlikely
        # to see in running text.
        (re.compile(r"[a-z][a-z][a-z]\([_a-zA-Z][_a-zA-Z0-9]+[, ]"), "C"),
        # Look for lines led with C declarations
        (re.compile(r"^\s*(int|char|long)\s"),	"C"),
        # Someday, use these
        #(re.compile(r"^\s*def\s"),	"Python"),
        #(re.compile(r"^\s*class\s"),	"Python"),
        )
    token_pairs = (
        ("/*",		"*/",		"C",	"C comment"),
        ("struct",	"}&semi;",	"C",	"C struct"),
        ("union",	"}&semi;",	"C",	"C union"),
        ("typedef",	"}&semi;",	"C",	"C typedef"),
        ("extern",	"&semi;",	"C",	"C extern"),
        )
    def __init__(self, io, caller):
        self.io = io
        self.io.restore_newlines()
        self.caller = caller
        self.output = ""
        self.language = None
        self.error = None
        self.sbr = "<sbr%s>" % self.caller.source.xml
        # Shortcut:  assume "-" and | can never occur in a header.  Look for
        # it and return immediately if we find it.
        if filter(lambda x: "-" in x or "|" in x or x.find("] [")>-1, self.io.lines):
            if self.caller.source.verbose:
                self.caller.source.notify("can't be a function synopsis, contains - or |  or '] ['")
            self.error = "<!-- contains - or |  or '] [' -->"
            return
        # Another shortcut: to  be parseable C, headers must contain
        # either ( or ;.  Command synopses generally have neither.
        if not filter(lambda x: "(" in x or ";" in x, self.io.lines):
            if self.caller.source.verbose:
                self.caller.source.notify("can't be a function synopsis, does not contain either ) or ;")
            self.error = "<!-- does not contain either ) or ; -->"
            return
        # Otherwise time for a normal parse
        try:
            if self.caller.verbose > 1:
                self.caller.source.notify("parse_funcsynopsis_sequence() sees: " + `self.io`)
            self.__parse_funcsynopsis_sequence()
        except LiftException, e:
            self.error = "function synopsis parse failed on `%s' (%d): %s" % \
                         (self.io.token_peek(), self.io.token_index, e.message)
            # Since we can detect function synopses reliably, check here
            # and make self.output nonempty so we'll error out and not try
            # doing a command parse.
            if filter(self.is_sourcecode, self.io.lines):
                self.output = "<!-- invalid function synopsis -->"

    def __pretokenizer(self, line):
        line = line.replace(")", " ) ").replace("(", " ( ")
        line = line.replace(",", " , ").replace("*", " * ")
        line = line.replace("[", " [ ").replace("]", " ] ")
        return line

    def __detokenize(self, line):
        return line.replace("[ ]", "[]").replace("* ", "*")

    def is_sourcecode(self, text):
        "Recognize that a line is source code."
        if blankline.search(text):
            return 1
        for (pattern,lang) in FunctionSynopsisSequenceParser.language_lines:
            if pattern.search(text):
                return 1
        for (pattern,lang) in FunctionSynopsisSequenceParser.language_fragments:
            if pattern.search(text):
                return 1
        return 0

    def __parse_paramdef(self, arg):
        "We've been handed a formal argument; parse it into a ParamDef."
        if self.language != "C" or len(arg) == 1:
            return "    <paramdef><parameter>"+arg[0]+"</parameter></paramdef>\n"
        # If there is a function prototype in the declaration, strip it.
        # No, this won't handle nested prototypes.
        def rindex(x, lst):
            last = len(lst) - 1
            for i in range(0, last+1):
                if lst[last - i] == x:
                   return last - i
            return -1
        last = len(arg) - 1
        if arg[-1] == ')':
            last = rindex("(", arg)
        # Now look for the rightmost token that resembles a name.
        # There's your parameter.
        param_ind = -1
        for i in range(last):
            if arg[last - i][0].isalpha():
                param_ind = last - i
                break
        if param_ind == -1:
            prolog = " ".join(arg)
            var = ""
            epilog = ""
        else:
            prolog = " ".join(arg[:param_ind])
            var = arg[param_ind]
            epilog = " ".join(arg[param_ind+1:])
        prolog = self.__detokenize(prolog)
        epilog = self.__detokenize(epilog)
        self.caller.source.localhints.post(var, "symbol class='argument'")
        return "    <paramdef>" + prolog + " <parameter>" + var + "</parameter>" + epilog + "</paramdef>\n"

    def __parse_funcprototype(self):
        "Parse a function prototype."
        if self.caller.verbose:
            self.caller.source.notify("beginning function prototype parse, language %s" % self.language)
        if self.caller.verbose > 1:
            self.caller.source.notify("parse_funcprototype() sees: " + `self.io`)
        # Seek the name token.
        parendepth = 0
        name = None
        prolog = []
        first_token = self.io.token_pop()
        simple = (self.io.token_peek() == '(')
        self.io.token_unpop()
        # We may be able to extract some semantics from the type specifiers.
        if self.language == "C" and not simple:
            if self.caller.source.verbose:
                self.caller.source.notify("not simple")
            if self.io.token_peek() in ("struct", "union"):
                prolog.append(self.io.token_pop())
                if id_re.match(self.io.token_peek()):
                    self.caller.source.localhints.post("type",
                        first_token + r"\s*" + self.io.token_peek())
                    prolog.append(self.io.token_pop())
            else:
                if first_token not in c_keywords:
                    self.caller.source.localhints.post(first_token, "type")
        # Now search for the leftmost token that looks like an identifier
        # and is not a C reserved word.
        while True:
            tok = self.io.token_peek()
            # Cope with obnoxious Tcl sidebar marks
            if tok == "|":
                self.io.token_pop()
            elif tok not in c_keywords and id_re.match(tok):
                name = tok
                self.io.token_pop()
                break
            elif tok == '(':
                parendepth += 1
                prolog.append(self.io.token_pop())
            elif tok == ')':
                parendepth -= 1
                prolog.append(self.io.token_pop())
            else:
                prolog.append(self.io.token_pop())
        if name:
            if self.caller.source.verbose:
                self.caller.source.notify("Function name: " + name)
            self.caller.source.localhints.post("function", name)
        else:
            raise LiftException("missing name in apparent function declaration.")
        # Now that we know where it is, walk right to the next open paren
        # at the same nesting level.
        epilog = []
        while True:
            tok = self.io.token_peek()
            # No paren - can't be a function synopsis
            if tok is None:
                return ""
            # Cope with obnoxious Tcl sidebar marks
            elif tok == "|":
                self.io.token_pop()
            elif parendepth == 0 and tok == "(":
                self.io.token_pop()
                break
            else:
                if tok == '(':
                    parendepth += 1
                    epilog.append(self.io.token_pop())
                elif tok == ')':
                    parendepth -= 1
                    epilog.append(self.io.token_pop())
                else:
                    epilog.append(self.io.token_pop())

        else:
            raise LiftException("function building failed while looking for (")
        # Now we know how to segment things
        prolog = " ".join(prolog)
        if self.caller.verbose:
            self.caller.source.notify("Name: %s" % name)
        epilog = " ".join(epilog)
        prolog = "  <funcdef>%s <function>%s</function>%s</funcdef>\n" % (prolog, name, epilog)
        globalhints.post(name, "function")
        # Go get the prototype formals
        formal_args = ""
        while self.io.token_peek() != ')':
            formal = []
            while True:
                tok = self.io.token_pop()
                if self.caller.verbose > 1:
                    self.caller.source.notify("Token: %s" % tok)
                if tok is None:
                    raise LiftException(self.caller.source.error("unexpected end of token list"))
                elif tok == "\n":
                    continue
                elif tok == "(":
                    parendepth += 1
                    formal.append(tok)
                elif tok == ')':
                    if parendepth == 0:
                        self.io.token_unpop()
                        break			# End of formal and prototype
                    else:
                        formal.append(tok)
                        parendepth -= 1
                elif tok == ',':
                    if parendepth == 0:
                        break		# End of formal
                    else:
                        formal.append(tok)
                else:
                    formal.append(tok)
            # Formal argument should be complete. Hand it off for analysis
            if self.caller.verbose:
                self.caller.source.notify("Formal: %s" % formal)
            formal_args += self.__parse_paramdef(formal)
        # We've gatherered all the argument markup
        if formal_args == "<paramdef><parameter>void</parameter></paramdef>":
            formal_args = "  <void%s>\n" % self.caller.source.xml
        if formal_args == "<paramdef><parameter>...</parameter></paramdef>":
            formal_args =  "  <vaargs%s>\n" % self.caller.source.xml
        # Consume closing paren and any following semicolon
        self.io.token_pop()
        if self.io.token_peek() in ("&semi;", ";"):
            self.io.token_pop() 
        # Now assemble and return it.
        if prolog or formal_args:
            output = "<funcprototype>\n" + prolog + formal_args + "</funcprototype>\n"
        if self.caller.verbose:
            self.caller.source.notify("ending function prototype parse")
        return output
    def __detect_passthroughs(self, line):
        # Pass through language-specific line patterns
        for (pattern, lang) in FunctionSynopsisSequenceParser.language_lines:
            if pattern.search(line):
                return lang
        return None
    def __parse_function_synopsis(self):
        "Parse a C or Perl function synopsis, possibly preceded by inclusions."
        output = ""
        if self.caller.verbose:
            self.caller.source.notify("beginning function synopsis parse")
            if self.caller.verbose > 1:
                self.caller.source.notify("parse_function_synopsis() sees: " + `self.io`)
        # First, accept any number of preamble lines as a FuncSynopsisInfo
        # Have to chop every line, because they have their own linefeeds.
        # emit() supplies them
        while True:
            # Pass through blank lines
            line = self.io.peekline()
            if line is None:
                break
            if blankline.match(line):
                output += self.io.popline()[:-1]
                continue
            # Other things, like cpp directives, should pass through as well.
            lang = self.__detect_passthroughs(line)
            if lang:
                if self.caller.source.verbose:
                    self.caller.source.notify("language identified as %s\n"%lang)
                self.language = lang
                output += self.io.popline()[:-1]
                continue
            # Pass through line sequences bracketed by specified token pairs
            for (start, end, lang, errsmg) in FunctionSynopsisSequenceParser.token_pairs:
                if self.io.token_peek() == start:
                    language = lang
                    while True:
                        tok = self.io.token_pop()
                        if tok == "\n":
                            output += self.io.popline()[:-1]
                        # This is the magic that allows us to avoid elaborate
                        # tokenization rules.  Look for the terminator as the
                        # suffix of a token.
                        elif tok.endswith(end):
                            output += self.io.popline()[:-1]
                            # Consume blank lines and breaks
                            while True:
                                tok = self.io.token_peek()
                                if tok == self.sbr:
                                     self.io.popline()
                                elif tok == '\n':
                                    output += self.io.popline()[:-1]
                                else:
                                    break
                            break
                        elif tok is None:
                            raise LiftException(self.source.error("missing end token for " + errmsg))
                            continue
            else:
                # Nothing we recognize.  Stop, and don't pop the current line
                break
        self.io.tokenize()	# restore default tokenization
        # Easier to nuke <sbr/> here than prevent it from being issued.
        if output:
            output = "<funcsynopsisinfo>\n" + \
                       output.replace(self.sbr, "") + \
                      "</funcsynopsisinfo>\n"
        # If there is input left, we look for a function synopsis.
        # Some pages (like errno.3) have preamble but no functions.
        if self.io.token_peek():
            self.io.tokenize(self.__pretokenizer)
            while True:
                if self.io.token_peek() is None:
                    break
                elif self.io.token_peek() in ('\n', self.sbr):
                    self.io.token_pop() 
                    output += "\n"
                elif self.__detect_passthroughs(self.io.peekline()):
                    self.io.popline()
                    break
                else:
                    fs = self.__parse_funcprototype()
                    if fs:
                        output += fs
                    else:
                        break
            if output:
                output = "<funcsynopsis>\n" + output + "</funcsynopsis>\n"
        if self.caller.verbose:
            self.caller.source.notify("ending function synopsis parse")
        return output
    def __parse_funcsynopsis_sequence(self):
        "Parse and emit for one or more function synopses."
        if self.caller.verbose:
            self.caller.source.notify("beginning function synopsis sequence parse")
        output = ""
        while self.io.lines:
            while self.io.token_peek() in ('\n', self.sbr):
                self.io.token_pop() 
                output += "\n"
                continue
            output += self.__parse_function_synopsis()
            if self.io.peekline() is None:
                break
            elif not filter(self.is_sourcecode, self.io.lines):
                break
        if self.caller.verbose:
            self.caller.source.notify("ending function synopsis sequence parse")
        # If there was no content thst looked like a potential
        # function synopsis, we'll get "" back.  Otherwise
        # self.output will contain a partial parse.
        self.output = output
        self.io.tokenize()	# Restore normal tokenization

class CommandSynopsisSequenceParser:
    "Parse a sequence of command synopses."
    opt_file_ext = re.compile(r"\[\.([a-zA-Z|.]+)\]")
    option_dash = re.compile(r"^-\w|\s-\w")
    synopsis_command = re.compile(r"(\.[BI] ([a-zA-Z][a-zA-Z0-9+]*)$)|([\\a-zA-Z0-9+]+$)")

    def __init__(self, io, caller):
        self.io = io
        self.caller = caller
        self.output = ""
        self.language = None
        self.error = None
        self.sbr = "<sbr%s>" % self.caller.source.xml
        # Arrange for lexical analysis to work
        self.io.tokenize(self.__pretokenize)
        while True:
            next = self.io.peekline()
            if next is None:
                break
            elif next.startswith(self.sbr) or blankline.search(next):
                self.io.popline()
                continue
            else:
                nextpart = []
                for line in self.io.lines:
                    if line.startswith(self.sbr):
                        break
                    nextpart.append(line)
                if not filter(self.is_command_synopsis_line, nextpart):
                    break;
                output = self.parse_command_synopsis()
                if not output:
                    break
                self.output += output
                if self.error:
                    break
        self.io.tokenize()	# Restore normal tokenization

    def __pretokenize(self, ln):
        # Fix a perldoc problem
        ln = ln.replace(r"\*(--", "--")
        # Remove ordinarly troff highlight macros
        ln = highlight_stripper.sub("", ln)
        # Convert . . . to ...
        ln = re.sub(r"\.\s+\.\s+\.", r"...", ln)
        # Grotty little hack to make lexical analysis trivial.  I got
        # this idea from something I read about the first FORTRAN compiler.
        ln = CommandSynopsisSequenceParser.opt_file_ext.sub(r".@LB@\1@RB@", ln)
        ln = ln.replace(r"|.", r"&verbar;.")
        ln = ln.replace("][", "] @GLUE@ [")
        ln = ln.replace("|", " | ").replace("...", " ... ")
        ln = ln.replace("[", " [ ").replace("]", " ] ")
        ln = ln.replace("{", " { ").replace("}", " } ")
        ln = ln.replace("@LB@", "[").replace("@RB@", "]")
        # Identify and split up redirections
        # Ooops...have to be smarter than this!
        #ln = ln.replace(" &lt;", " &lt; ").replace("&gt;", " &gt; ")
        return ln

    def is_command_synopsis_line(self, line):
        "Does this look like a command synopsis, not just a string of words?"
        # Look for special characters from the synopsis syntax
        for c in "{[]}|":
            if c in line:
                return 1
        # If the line begins with one of the command's aliases, always treat
        # as a synopsis line.  This catches the important special case where
        # the command name occurs alone on the line, followed by lines
        # describing options.  Also catches cases like "pf2afm fontfilename".
        tokens = line.split()
        if len(tokens) and tokens[0] in self.caller.refnames:
            return 1
        if CommandSynopsisSequenceParser.option_dash.search(line):
            return 1
        if CommandSynopsisSequenceParser.synopsis_command.match(line):
            return 1
        # If we see <command, it means this line was generated by Nm
        # during the first pass and does indeed start a synopsis.
        if line.find("<command") > -1:
            return 1
        # This is only called following a SYNOPSIS label.  In mdoc that's
        # reliable, because synopsis sections aren't allowed to contain
        # running text.
        if self.caller.source.is_active("mdoc"):
            return 1
        if self.caller.verbose:
            self.caller.source.notify("'%s' does not look like a synopsis line" % line.rstrip())
        return 0

    def parse_command_synopsis(self):
        "Translate a synopsis line -- here is where the heavy work starts."
        output = ""
        try:
            self.nesting_depth = 0
            first_token = self.io.token_peek()
            if first_token in self.caller.refnames or is_file_or_command_name(first_token):
                command = self.io.token_pop()
                globalhints.post(command, "command")
                # In case of the second or condition
                self.caller.refnames[first_token] = True
                output += ("  <command>%s</command>\n" % command)
            else:
                raise LiftException("first token in synopsis looks wrong.")
            # Don't terminate on <sbrk> alone.  Users may be employing it at
            # presentation level.  Do terminate when you see it followed by
            # a refname.
            while self.io.lines:
                arg = self.__compile_arg()
                if arg == None:
                    break
                output += "    " + `arg` + "\n"
            return "<cmdsynopsis>\n"+output+"</cmdsynopsis>\n"
        except LiftException, e:
            self.error = "command synopsis parse failed on `%s' (%d): %s" % \
                         (self.io.token_peek(), self.io.token_index, e.message)
            # Generate a useful error message:
            errmsg = "\n<!--\n" + self.error + "\n"
            errmsg += " ".join(self.io.lookbehind)
            errmsg += "\n-->\n"
            return errmsg

    # Lexical tests
    def __is_next_special(self):
        return self.io.token_peek() in ("[", "]", "{", "}", "|", "...")
    def __is_next_option(self):
        return self.io.token_peek() and self.io.token_peek()[0] in ("-", "+")
    def __is_next_numeric(self):
        try:
            int(self.io.token_peek())
            return 1
        except (ValueError, TypeError):
            return 0
    def __is_next_replaceable(self):
        next = self.io.token_peek()
        # Good reasons for accepting funky leader characters:
        # @, % -- dig.1
        # :, ', " -- perlrun.1 and other manual pages
        # = -- as.1
        # , -- chmod.1
        # . -- date.1
        # # -- gphoto.1
        # ? -- cdecl.1 and other places where ? invokes help.
        # / -- dummy filename arguments
        if next is None:
            return None
        else:
            return next[0] in string.letters + "./=:'\"@%,#?" or (next[:4] == "&lt;" and next != "&lt;") or self.__is_next_numeric() or is_file_or_command_name(next)
    # Manual-synopsis grammar
    def __compile_arg(self):
        if self.caller.verbose >= 2:
            self.caller.source.notify("compile_arg(" + `self.io.tokens` + ") <" + `self.nesting_depth` + ">")
        res = self.__compile_arg1()
        if res is None:
            return None		# Failure is signaled by throwing an exception 
        elif self.io.token_peek() == "...":
            self.io.token_pop()
            res.repeat = 1
        elif self.io.token_peek() == "|":
            self.io.token_pop()
            first = res
            res = ParseNode("group")
            res.children.append(first)
            while self.io.tokens and self.io.token_peek() not in ("]", "}")  and not self.io.token_peek() in self.caller.refnames:
                res.children.append(self.__compile_arg1())
                if self.io.token_peek() == "|":
                    self.io.token_pop()
        elif self.io.token_peek() == "@GLUE@":
            res = ParseNode(self.io.token_pop())
        if self.caller.verbose >= 2:
            self.caller.source.notify("compile_arg() <%d> returns %s: tokens are %s" % (self.nesting_depth, `res`, self.io.tokens))
        return res
    def __compile_arg1(self):
        if self.caller.verbose >= 2:
            self.caller.source.notify("compile_arg1("+`self.io.tokens`+") <" + `self.nesting_depth` + ">")
        while self.io.token_peek() == self.sbr:
            next = self.io.token_pop()
            if self.nesting_depth==0 and is_file_or_command_name(next):
                return None
        if self.io.token_peek() is None:
            if self.nesting_depth == 0:
                return None
            else:
                raise LiftException("unbalanced group in synopsis markup")
        elif self.io.token_peek() in self.caller.refnames:
            if self.nesting_depth == 0:
                return None
            else:
                raise LiftException("unbalanced group in synopsis markup")
        elif self.__is_next_option():
            option = self.io.token_pop()
            oldstyle = self.io.token_peek() == "@GLUE@"
            if oldstyle:
                self.io.token_pop()
            res = ParseNode("arg")
            gnustyle = option.split("=")
            if len(gnustyle) > 1:
                optnode = ParseNode("option", gnustyle[0])
                res.children.append(optnode)
                optnode.righthand = gnustyle[1]
            else:
                optnode = ParseNode("option", option)
                res.children.append(optnode)
                if self.io.tokens and self.__is_next_replaceable():
                    res.children.append(ParseNode("replaceable",self.io.token_pop()))
            if oldstyle:
                optnode.glue = ""
            else:
                optnode.glue = " "
            self.caller.source.localhints.post(re.escape(optnode.token), "option")
        elif self.__is_next_replaceable():
            res = ParseNode("arg", None, "plain")
            # We have to stop on instances of command names because there
            # is no way to spot the beginning of an alternate command form
            # syntactically.
            while self.__is_next_replaceable() and not self.io.token_peek() in self.caller.refnames:
                part = ParseNode("replaceable", self.io.token_pop())
                if self.io.token_peek() == "...":
                    self.io.token_pop()
                    res.repeat = 1
                res.children.append(part)
        elif self.io.token_peek() and self.io.token_peek() in ("&lt;", "&gt;"):
            res = ParseNode("redirect", None, "plain")
            res.token = self.io.token_pop()
            if self.io.token_peek():
                res.token += self.io.token_pop()
        elif self.io.token_peek() in ("[", "{"):
            self.nesting_depth += 1
            self.io.token_pop()
            if self.io.token_peek() == "{":
                required = "req"
            else:
                required = "opt"
            lst = []
            while self.io.token_peek() not in (None, "]", "}"):
                lst.append(self.__compile_arg())
            if len(lst) == 1:
                res = lst[0]
            else:
                res = ParseNode("arg")
                res.children = lst
            res.choice = required
            if self.io.token_peek() is None or self.io.token_peek() == self.sbr:
                raise LiftException("expecting ] or }")
            else:
                self.io.token_pop()
            self.nesting_depth -= 1
        else:
            raise LiftException("expecting argument")
        if self.caller.verbose >= 2:
            self.caller.source.notify("compile_arg1() returns %s: tokens are %s <%d>" % (res, " ".join(self.io.tokens), self.nesting_depth))
        return res


class SynopsisSectionParser:
    "Parse a synopsis section.  Handles disabling extraneous commands."
    old_style_option_glue = re.compile(r"([^A-Za-z]-[A-Za-z]*)(?:\f.)([A-Za-z])")
    unparseable = re.compile(r"\$[a-z]")		# Perl and other nightmares
    def __init__(self, caller):
        self.caller = caller
        self.synopses = []
        # Set up the parsing machinery so that ordinary line interpretation
        # will accumulates synopsis lines in this object, and not emit them.
        caller.source.diversion = self.synopses
        caller.break_trap = self.__synopsis_break_trap
        caller.source.ignore("nf")
        caller.source.ignore("fi")
        caller.source.ignore("ft")
        caller.source.ignore("in")
        caller.source.ignore("ti")
        caller.source.ignore("ce")
        caller.source.unignore("br")
        caller.source.unignore("nl")
        #caller.source.ignore("TP")	# Ugh...macro-set-specific
    def __wrap__(self):
        # Re-enable normal commands
        self.caller.break_trap = None
        self.caller.source.diversion = self.caller.source.output
        self.caller.source.unignore("nf")
        self.caller.source.unignore("fi")
        self.caller.source.unignore("ft")
        self.caller.source.unignore("in")
        self.caller.source.unignore("ce")
        self.caller.source.ignore("br")
        self.caller.source.ignore("nl")
        #self.caller.source.unignore("TP")	# Ugh...macro-set-specific
    def __synopsis_break_trap(self, cmd):
        self.synopses.append("<sbr%s>" % self.caller.source.xml)
    def __detroff(self, ln):
        # Remove markup generated by the Mdoc document macros
        ln = ln.replace("<replaceable>", "").replace("</replaceable>", "")
        ln = ln.replace("<command>", "").replace("</command>", "")
        ln = ln.replace("<command remap='Ic'>", "")
        ln = ln.replace("<command remap='Nm'>", "")
        ln = re.sub(r"<option>\s*", "-", ln).replace("</option>", "")
        # Some man pages (like afmtodit.1) run options together with their
        # following arguments together on the man page, with the boundary
        # marked by a highlight change.  Replace these with a glue token so
        # there will be a parseable boundary there.
        ln=SynopsisSectionParser.old_style_option_glue.sub(r"\1 @GLUE@ \3",ln)
        # We have now extracted all the semantic information we can from
        # highlight boundaries.
        ln = deemphasize(ln)
        # Apply the character translations associated with each
        # interpreter on the list.  We have to do this because of the
        # garbage pod2man often puts into synopses.  It won't get done
        # a second time because we're outside the man page body.
        # Note -- has to be done before we nuke backslashes below!
        ln = self.caller.source.hack_translations(ln)
        # Throw out the entity results of translating some confusing troff
        # characters.  Yes, some man pages (notably several associated with
        # nmh) throw soft hyphens in there for no obvious reason.
        ln = ln.replace("&thinsp;","").replace("&zerosp;","")
        ln = ln.replace("&nbsp;"," ").replace("&shy;", "").replace("\\", "")
        ln = ln.replace(r"-^-", "--").replace("&mdash;", "--")
        return ln
    def detect_unparseable_synopsis(self, lines):
        "Detect stuff we just shouldn't try to parse."
        # Like, anything that is Perl...
        if self.caller.source.is_active("pod2man"):
            return 1
        return filter(lambda x: SynopsisSectionParser.unparseable.search(x), lines)
    def parse_and_emit(self):
        "Parse and emit the Synopsis section we've gathered."
        if "nf" not in self.caller.source.ignore_set:
            return None
        # Undo redirection and re-enable normal commands.
        self.__wrap__()
        # First, fold the lines.  We have to handle continuations
        # explicitly, since we're outside the body section.
        processed = []
        for line in self.synopses:
            if line[:4] != "<!--":
                if not processed:
                    processed = [line+"\n"]
                elif processed[-1][-2:] == "\\c":
                    processed[-1] = processed[-1][:-2] + line
                else:
                    processed.append(line+"\n")
        # Translate troff characters and XMLlify everything.
        self.io = LineTokenizer(map(lambda x: self.__detroff(x), processed))

        # This code is failure-prone.  It is coping, badly, with a mess.
        #
        # The underlying problem is that from DocBook's point of view,
        # Synopsis sections in man pages come in three different flavors
        # that need to be marked up differently -- command synopses,
        # function synopses, and plain old text.
        #
        # Trying to analyze Synopsis sections raises three problems.  
        # One: man page authors don't maintain these neat distinctions.
        # in particular, it's fairly common for a text section to
        # follow a function or command synopsis.   We have to cope
        # with this case somehow.
        #
        # The grammar we assume here looks like this:
        #
        # [ function-synopsis | command-synopsis ] [text-section ]
        #
        # The second problem is that there are no airtight ways to
        # distinguish netween the three different sections.  This code
        # simply assumes we have predicates.  See the
        # is_command_synopsis(), is_function_synopsis, and
        # is_literal_text() functions above for discussion.
        #
        # The third thing is that there is stuff we shouldn't even bother
        # trying to parse because it's hopeless -- Perl synopses are the
        # largest subcategory of these.  We should detect these and pass
        # them through as plain-text Synopsis sections.
        #
        # Unfortunately, we can't resolve the problem before doing
        # first-pass macrointerprtation of the whole synopsis
        # section.  That's how we get mdoc macros evaluated -- and
        # there may be others the man-page author created.  This
        # means the text we're interpreting may contain <. >, and tags.

        # First, try consuming a sequence of function synopses.  Try
        # this first because the test for them is least likely to
        # throw a false positive.  Don't crap out of the function
        # synopsis parse unless we got far enough along to identidfy a
        # language.
        out = ""
        if self.detect_unparseable_synopsis(processed):
            err = None
            out = "<refsynopsisdiv id='%s'>\n" % self.caller.source.make_id_from_title('synopsis') \
                  + "<synopsis>\n" \
                  + self.io.text() \
                  + "</synopsis>\n" \
                  + "</refsynopsisdiv>\n"
            if self.caller.verbose:
                self.caller.source.notify("got unparseable synopsis ")
        else:
            # Parse a copy, so that if we error out the CommandSynopsis
            # parser gets a crack at the whole thing.
            func_copy = copy.deepcopy(self.io)
            obj = FunctionSynopsisSequenceParser(func_copy, self.caller)
            err = obj.error
            if not err:
                out = "<refsynopsisdiv id='%s'>\n" % (self.caller.source.make_id_from_title('synopsis')) \
                      + obj.output \
                      + "</refsynopsisdiv>\n"
                if self.caller.verbose:
                    self.caller.source.notify("got function synopsis")
                self.io = func_copy
            elif obj.output and obj.language:
                self.caller.source.error(obj.error)
            else:
                obj = CommandSynopsisSequenceParser(self.io, self.caller)
                err = obj.error
                if not err:
                    out = "<refsynopsisdiv id='%s'>\n"  % self.caller.source.make_id_from_title('synopsis') \
                          + obj.output + \
                          "</refsynopsisdiv>\n"
                    if self.caller.verbose:
                        self.caller.source.notify("got command synopsis '%s'"%out)
                elif obj.output:
                    out = obj.output
                    self.caller.source.error(err)
            # We've pulled as much of the section as we can into structured
            # markup.  If there's anything left, treat it as plain text.
            if self.io.lines:
                self.caller.source.warning("dubious content in Synopsis")
                if self.caller.source.verbose:
                    self.caller.source.notify("The content: "+`self.io`)
                err = None
                for i in range(len(self.io.lines)):
                    if self.io.lines[i].startswith("<sbr"):
                        self.io.lines[i] = "</para><para>"
                out += "<refsect1 id='%s'>"  % self.caller.source.make_id_from_title('Synopsis Notes')\
                      + "<title>Synopsis Notes</title>\n<para>" \
                      + "".join(self.io.lines) \
                      + "</para>\n</refsect1>"
                if self.caller.verbose:
                    self.caller.source.notify("got unknown section")
        # Postprocess the output to remove glue and clean up empty tags
        if out:
            out = hotglue.sub("", out)
            out = cleantag.sub("", out)
            self.caller.source.emit(out)
        return not err

class Author:
    "Represents an Author object."
    def __init__(self, iname=None, iaffil=None):
        self.firstname = None
        self.middle = None
        self.surname = None
        self.lineage = None
        self.affiliation = None
        if iname:
            self.name(iname)
        if iaffil:
            self.affiliate(iaffil)
    def name(self, name):
        "Parse a single name from a text line (English-language rules)."
        trial = name.split()
        if len(trial) >= 4:
            self.lineage = trial[3]
        if len(trial) >= 3:
            (self.firstname, self.middle, self.surname) = trial[:3]
        elif len(trial) >= 2:
            (self.firstname, self.middle, self.surname) = (trial[0], None, trial[1])
        else:
            self.middle = trial[0]
    def affiliate(self, line):
        "Parse an affiliation from a line."
        self.affiliation = line		# Trivial, so far...
    def __repr__(self):
        res  = "<author>"
        if self.firstname:
            res += "<firstname>%s</firstname>" % self.firstname
        if self.middle:
            if self.middle[-1] == '.':
                role = " role='mi'"
            else:
                role = ""
            res += "<othername%s>%s</othername>" % (role, self.middle)
        if self.surname:
            res += "<surname>%s</surname>" % self.surname
        if self.lineage:
            res += "<lineage>%s</lineage>" % self.lineage
        if self.affiliation:
            res += "<affiliation><orgname>%s</orgname></affiliation>" % self.affiliation
        res += "</author>"
        return res

#
# Macro interpreters.
#

class ManInterpreter:
    "Interpret man(7) macros."
    name = "man"
    exclusive = True
    toptag = "refentry"
    immutable_set = { "B":1, "I":1,"R":1,"SM":1,"BI":1,"BR":1,"IB":1,"IR":1,
                     "RB":1,"RI":1,"SB":1, "P":1,"PP":1,"LP":1,"HP":1,
                     "IP":1,"RS":1,"RE":1,"SH":1,"SS":1,"HP":1,"TP":1,
                      "UE":1,"UN":1,"UR":1,"IX":1,"BY":1,}
    ignore_set = {"PD":1, "DT":1, "RS":1, "RE":1,
                  # Undocumented and obscure
                  "LO":1, "PU":1, "UC":1, "l":1,
                  # Extensions from mtools doc set; we can safely ignore them
                  "iX":1, "lp":1,
                  # fm is some kind of attribution extension in MIT pages
                  "FM":1,
                  # .Id is used to embed RCS/SCCS IDs.
                  "Id":1,}
    complain_set = {}
    parabreak_set = {"blank":1,"P":1,"PP":1,"LP":1,"HP":1,"IP":1,"TP":1,"RS":1,}
    sectionbreak_set = {"SH":1,"SS":1,}
    translations = (
      (r"\*(Tm", "&trade;"),
      (r"\*R",  "&reg;"),
      (r"\*(lq", "&ldquo;"),
      (r"\*(rq", "&rdquo;"),
      )
    # Tricky interaction with pod2man here; the Ip reduction will get called if
    # there is an explicit Ip macro, but if pod2man is recognized there will
    # be no explicit definition.
    reductions = {"Pp":"PP", "Tp":"TP", "Ip":"IP",
                  # catch some mdoc-influenced mistakes we see occasionally...
                  "Nm":"B", "Sh":"SH", "Ss":"SS"}
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.hack_urls = True
        self.break_trap = None
        self.authors = None
        self.synopsis = None	# Never gets this value once one has been seen
        self.transplant = []
        self.volnum = []
        self.refnames = {}
        self.seen_synopsis = False
        self.seen_DS = False
	#self.systype = None
    def fold_highlights(self, cmd, args):
        # We need this to be a separate entry point for TP tag processing.
        # .R is not one of the documented font-change macros, but it is
        # occasionally used anyway (eg by sz.1) -- derived from Ultrix.
        if cmd in ("B", "I", "R", "SM"):
            return self.source.direct_highlight(cmd, args)
        elif cmd in ("BI", "BR", "IB", "IR", "RB", "RI", "SB"):
            return self.source.alternating_highlight(cmd, args)
        else:
            return None    
    def end_synopsis(self):
        self.source.unignore("Ve")	# For Perl generated man pages
        self.source.unignore("Vb")	# For Perl generated man pages
        self.source.unignore("Ip")	# For Perl generated man pages
        self.source.unignore("HP")
        self.source.unignore("RS")
        self.source.unignore("RE")
    def flush_synopsis(self, caller):
        if self.synopsis and self.synopsis.parse_and_emit():
            self.synopsis = None
        else:
            # If there's a transplant, emit it now.
            if self.transplant:
                if self.verbose >= 1:
                    self.source.warning("transplanting Synopsis section")
                caller.interpret_block(self.transplant)
                self.transplant = []
        self.source.declare_body_start()
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        # Highlighting
        highlighted = self.fold_highlights(cmd, args)
        if highlighted:
            self.source.emit(highlighted)
        # Sectioning
        elif cmd in ("blank", "P","PP","LP","HP") or cmd=="IP" and (not args or not args[0] or args[0][0] in string.digits):
            if self.source.body_section():
                self.source.paragraph()
            return 1
        elif cmd == "SH":
            if not args:
                args = self.source.popline().split()
            # Ignore '.SH ""' -- yes, this actually happens on passwd.1,
            # apparently as a half-assed way to resume paragraphing after
            # a list.
            elif args[0] == "":
                self.source.need_paragraph()
                return 1
            # Handle nasty perversity in cvsversion.1 that might be repeated
            elif args[0].find("--") > -1:
                tokens = args[0].split()
                args[0] = tokens[0]
                self.source.pushline(" ".join(tokens))
            self.source.troff.nf = False
            # Skip blank lines and paragraph commands
            while True:
                line = self.source.popline()
                # Can't use paragraph_break() here lest we skip .TP or .IP
                if line and not line[:3] in (".PP", ".LP", ".P"):
                    self.source.pushline(line)
                    break
            #self.source.pushline(line)
            # Now do processing that is specific to the section type.
            if args[0] in ("NAME", "Name"):
                self.source.sectname = "NAME"
                namelines = [""]
                self.source.ignore("nf")
                self.source.ignore("fi")
                while True:
                    line = self.source.popline()
                    if line is None or self.source.section_break(line):
                        self.source.pushline(line)
                        break
                    elif is_comment(line):
                        continue
                    elif not (is_command(line) and self.source.ignorable(line)):
                        if not line or self.source.paragraph_break(line):
                            continue
                        # This copes with cvspack.1.
                        # Maybe we ought to generate something from this?
                        elif match_command(line, "IX"):
                            continue
                        if "-" in line and line.find("-") > 0:
                            namelines.append(line)
                        else:
                            namelines[-1] += " " + line
                self.source.unignore("nf")
                self.source.unignore("fi")
                # Skip lines that are blank or consist of a leading dot only
                # (as in groff_mdoc(7)).
                while not namelines[0] or namelines[0] == ".":
                    namelines.pop(0)
                try:
                    namesect = parse_name_section(" ".join(namelines))
                except:
                    raise LiftException("ill-formed NAME section, bailing out.", 2)
                # Split the entire name section on "-".  If it only contains
                # one of these, the part before could be a multi-line list of
                # entry points (which is OK).  If it contains more than one
                # "-", assume it consists of multiple name lines; parse the
                # first, pass through the rest.
                if len(namesect) == 2:
                    (name, description) = namesect[:2]
                else:
                    try:
                        (name, description) = parse_name_section(namelines[0])
                    except:
                        raise LiftException("ill-formed NAME section, bailing out.", 2)
                self.source.emit("<refnamediv id='%s'>" % self.source.make_id_from_title('name'))
                for id in map(lambda x: x.strip(), name.split(",")):
                    id = highlight_stripper.sub("", id)
                    self.refnames[id] = True
                    self.source.emit("<refname>%s</refname>" % id)
                self.source.emit("<refpurpose>%s</refpurpose>"%description)
                if len(namesect) > 2:
                    self.source.error("there are multiple name lines.")
                    #self.source.emit("<!-- FIX-ME: multiple name lines")
                    for line in namelines[1:]:
                        self.source.emit(line)
                    self.source.emit("-->")
                self.source.emit("</refnamediv>")
            elif args == ["SYNOPSIS"]:
                self.seen_synopsis = True
                self.source.sectname = "SYNOPSIS"
                self.source.ignore("RS")
                self.source.ignore("RE")
                self.source.ignore("HP")
                self.source.ignore("Ve")	# For Perl generated man pages
                self.source.ignore("Vb")	# For Perl generated man pages
                self.source.ignore("Ip")	# For Perl generated man pages
                self.source.sectionhooks.append(self.end_synopsis)
                self.synopsis = SynopsisSectionParser(self)
            elif not self.seen_synopsis and self.source.find(".SH SYNOPSIS"):
                self.transplant = [quoteargs(tokens)]
                while True:
                    line = self.source.popline()
                    if line is None:
                        break
                    if line.find("SYNOPSIS") > -1:
                        self.source.pushline(line)
                        break
                    self.transplant.append(line)
            else:
                self.flush_synopsis(caller)
                self.source.push_section(1, " ".join(args))
        elif cmd == "SS":
            if not args:
                args = self.source.popline().split()
            self.flush_synopsis(caller)
            if self.source.body_section():
                self.source.push_section(2, " ".join(args))
            else:
                # In case the Synopsis section contains a subsection,
                # as in cph.1, we want to start a new *first* level section.
                self.source.push_section(1, " ".join(args))
        elif cmd == "TH":
            title = args[0]
            self.volnum = args[1]
            date = len(args)   >= 3 and args[2]
            msrc = len(args)   >= 4 and args[3]
            manual = len(args) >= 5 and args[4]
            self.source.preamble = False
            self.source.emit("<refmeta>")
            self.source.emit("<refentrytitle>%s</refentrytitle>" % title)
            self.source.emit("<manvolnum>%s</manvolnum>" % self.volnum)
            if date:
                self.source.emit("<refmiscinfo class='date'>%s</refmiscinfo>" % date)
            if msrc:
                self.source.emit("<refmiscinfo class='source'>%s</refmiscinfo>"% msrc)
            if manual:
                self.source.emit("<refmiscinfo class='manual'>%s</refmiscinfo>"% manual)
            self.source.emit("</refmeta>")
        # Lists
        elif cmd == "TP":
            # Brace yourself for an epic.  TP syntax is loose and people
            # do really perverse things with it.
            if not self.source.body_section(): return 1
            self.source.end_paragraph(label="TP")
            self.tpcount = 0
            self.source.pushline(".TP")
            while self.source.lines:
                # This conceals a great deal of hair, and may throw
                # an exception.  TQ is found in grefer.1.
                term = gather_term(("TP","TQ"), self, self.fold_highlights)
                if term is None:
                    break;
                # We've got our term.  Gather the item body and translate.
                # Skip an ordinary paragraph command if it's the first thing
                # we see.  
                if self.source.peekline() in (".P", ".PP"):
                    self.source.popline()
                # Also, if we hit a section break right after the tag line
                # assume the author was being brain-dead and arrange for the
                # tag to be emitted as an ordinary paragraph after breaking
                # out of the loop.
                if self.source.peekline() and self.source.peekline()[:3] in (".SH", ".SS"):
                    trail = ""
                    if not self.source.quiet:
                        trail = " <!-- telltale signs of broken .TP markup -->"
                    self.source.pushline("<para>" + term + "</para>" + trail)
                    break
                # Passed those tests. Looks like a real list item.
                if self.tpcount == 0:
                    self.source.emit("<variablelist remap='TP'>")
                else:
                    self.source.emit("</varlistentry>")
                self.source.emit("<varlistentry>")
                self.source.emit("<term>%s</term>" % fontclose(term))
                self.tpcount += 1
                # Gather item, emitting opening and closing listitem tags.
                gather_item(self.source, "listitem")
            if self.tpcount:
                self.source.emit("</varlistentry>")
                trail = ""
                if not self.source.quiet:
                    trail = " <!-- .TP -->"
                self.source.emit("</variablelist>" + trail)
        elif cmd == "IP":
            if not self.source.body_section(): return 1
            self.source.end_paragraph(label="IP")
            # If no tag is specified, treat as ordinary paragraph.
            if tokens[1] in ip_tag_mapping:
                self.source.pushline(quoteargs(tokens))
                gather_itemizedlist(".IP", self.source,
                                    ip_tag_mapping[tokens[1]])
            else:
                self.source.pushline(quoteargs(tokens))
                gather_variablelist(".IP", self.source)
        # Hyperlinks
        elif cmd == "UE":
            if self.source.body_section():
                self.source.pushline("</ulink>")
        elif cmd == "UN":
            if not args:
                self.source.error("UN macro requires an argument")
            elif self.source.body_section():
                self.source.pushline("<anchor id='%s'>" % self.source.make_id_from_title(tokens[1]))
        elif cmd == "UR":
            if not args:
                self.source.error("UR macro requires an argument")
            elif self.source.body_section():
                self.source.pushline("<ulink url='%s'>" % tokens[1])
            self.hack_urls = False
        # Indexing
        elif cmd == "IX":
            if self.source.body_section() and len(tokens) > 1:
                # Discard Perl section indicators
                if tokens[1] in ("Name","Title","Header","Subsection","Item"):
                    tokens = tokens[2:]
                self.source.pushline(self.source.index(map(deemphasize, args)))
        # Ultrix extensions.  Taken from groff's man.ultrix file
        # Some of these (like EX/EE) appear in Linux manual pages.
        # See http://www.geocrawler.com/archives/3/377/1992/10/0/2062814/
        # for an interesting historical sidelight.
        elif cmd == "CT":
            self.source.pushline("&lt;CTRL/%s&lt;" % args[0])
        elif cmd == "Ds":
            self.source.emit("<literallayout remap='Ds'>")
        elif cmd == "De":
            self.source.emit("</literallayout> <!-- De -->")
        elif cmd == "EX":
            self.source.emit('<programlisting remap="EX">')
        elif cmd == "EE":
            self.source.emit("</programlisting> <!-- EE -->")
        elif cmd == "NT":
            self.source.emit("<note remap='NT'>")
        elif cmd == "NE":
            self.source.emit("</note> <!-- NE -->")
        elif cmd == "RN":
            self.source.pushline("<keycap>RETURN</keycap>")
        elif cmd == "PN":
            self.source.pushline("<filename>%s</filename>" % args[0])
        elif cmd == "MS":
            self.source.pushline("<citerefentry><refentrytitle>%s</refentrytitle><manvolnum>%s</manvolnum></citerefentry>" % args[:2])
        # Undocumented -- interpret args as comma-separated list of authors
        elif cmd == "BY":
            self.authors = " ".join(args).split(",")
        # Undocumented -- hangover from old Bell Labs and Berkeley macros
        elif cmd == "UX":
            self.source.pushline("Unix")
        elif cmd == "AT":
            pass
#            self.systype = "7th Edition"	# Also .AT 3
#            if len(args) > 0:
#                if args[0] == "4":
#                    self.systype = "System III"
#                elif args[0] == "5":
#                    if len(args) > 1:
#                        self.systype = "System V Release 2"
#                    else:
#                        self.systype = "System V"
        elif cmd == "UC":
            pass
#            self.systype = "3rd Berkeley Distribution":
#            if len(args) > 0:
#                if args[0] == "4":
#                    self.systype = "4th Berkeley Distribution"
#                elif args[0] == "5":
#                    self.systype = "4.2 Berkeley Distribution"
#                elif args[0] == "6":
#                    self.systype = "4.3 Berkeley Distribution"
#                elif args[0] == "7":
#                    self.systype = "4.4 Berkeley Distribution"
        # mtools man pages use these...ugh
        elif cmd == "(l":
            self.source.pushline(".nf")
        elif cmd == ")l":
            self.source.pushline(".fi")
        # DS/DE isn't part of the man macros.  Interpret it anyway,
        # as there is an obvious meaning that people try to use.
        elif cmd == "DS":
            # Catch an odd, pointless use of .DS that pops up on a number
            # of SANE manual pages (probably generated from something).
            if self.source.peekline() == ".sp\n":
                self.source.popline()
                self.source.popline()
            elif self.source.find("DE"):
                self.source.begin_block("literallayout", remap='DS', nofill=1)
                self.seen_DS = True
        elif cmd == "DE":
            if self.seen_DS:
                self.source.end_block("literallayout", remap='DE')
            else:
                return 0
        # Use our reductions as fallbacks
        elif cmd in ManInterpreter.reductions:
            replace_with = ManInterpreter.reductions[cmd]
            self.source.pushline("." + replace_with + " " + quoteargs(args))
        else:
            return 0
        return 1
    def wrapup(self):
        if self.authors:
            # Assumes man pages with "BY" don't have explicit AUTHOR parts.
            self.source.emit("<refsect1><title>Author</title>")
            self.source.emit("<para>" + ", ".join(self.authors) + "</para>")
            self.source.emit("</refsect1>")
    def preprocess(self, text):
        # Ugh.  Some ISC man pages (for lwres*) actually make this necessary!
        # Who knew that troff allowed this?
        text = re.sub(r"\n\fR.SH", r".SH", text)
        # Nuke bogus empty sections (as in a2p.1 and dos2unix.1).
        text = re.sub(r'\n\.S[HS](.*)\n\.(\\"|IX|PP|LP).*\n\.S[HS]', r'\n.\\" Empty \1 section removed\n.SH', text)
        # We use [A-Z]* here to let by cvsversion
        text = re.sub(r"\n\.S[HS]([A-Z ]*)\n*\.SH", r'\n.\\" Empty \1 section removed\n.SH', text)
        return text
    def postprocess(self, text):
        # If there was no explicit URL markup, process implicit ones
        if self.hack_urls:
            text = untagged(url_re).sub(r"<ulink url='\g<url>'>\g<url></ulink>", text)
        # Pattern-based lifting in the final sections
        if not self.source.is_active("mwww"):
            foundit = text.rfind("AUTHOR")
            if foundit > -1:
                before = text[:foundit]
                after = text[foundit:]
                after = re.sub(email_re, r'<email>\1</email>', after)
                text = before + after
        foundit = text.rfind("SEE ALSO")
        if foundit > -1:
            before = text[:foundit]
            after = text[foundit:]
            after = re.sub(r'([a-z_]+)\(([0-9].?)\)', r'<citerefentry><refentrytitle>\1</refentrytitle><manvolnum>\2</manvolnum></citerefentry>', after)
            text = before + after
        foundit = text.rfind("FILES")
        if foundit > -1:
            before = text[:foundit]
            after = text[foundit:]
            following = ""
            endit = after.find("<refsect1")
            if endit > -1:
                following = after[endit:]
                after = after[:endit]
            after = re.sub(r'<term>([^<]*)</term>', r'<term><filename>\1</filename></term>', after)
            text = before + after + following
        return text

class Pod2ManInterpreter:
    "Interpret pod2man emulation macros."
    name = "pod2man"
    exclusive = False
    toptag = "refentry"
    immutable_set = {"Sp":1,"Ip":1,"Sh":1,"Vb":1,"Ve":1,}
    ignore_set = {}
    complain_set = {}
    parabreak_set = {"Sp":1, "Ip":1,}
    sectionbreak_set = {"Sh":1,}
    translations = (
      (r'\*(--',"&mdash;"),
      (r'\*(PI',"&pgr;"),
      (r'\*(L"',"&ldquo;"),
      (r'\*(R"',"&rdquo;"),
      (r'\*(C+',"C++;"),
      (r"\*(C'","'"),
      (r'\*(C`',"`"),
      )
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        # Sectioning
        if cmd == "Sp" or cmd=="Ip" and (not args or not args[0] or args[0][0] in string.digits):
            self.source.paragraph()
        elif cmd == "Sh":
            self.source.pushline(quoteargs([".SS"] + args))
        elif cmd == "Vb":
            if self.source.body_section():
                self.source.begin_block("literallayout", remap="Vb", nofill=1)
        elif cmd == "Ve":
            if self.source.body_section():
                self.source.end_block("literallayout", remap="Ve")
        elif cmd == "Ip":
            self.source.pushline(quoteargs(tokens))
            if tokens[1]:
                gather_variablelist(".Ip", self.source)
            else:
                gather_itemizedlist(".Ip", self.source, "bullet")
        else:
            return 0
        return 1
    def preprocess(self, text):
        # Detect and strip out a pod2man header.  It does some very funky and
        # random stuff that is too hard for our troff emulation to cope with.
        # We can easily simulate the structural effect of its macros.
        # We'll emulate Sh, Sp, Ip, Vb, Ve, and provide translations for the
        # special characters \*(--, \*(PI, \*(L", \*(R", \*(C+, \*(C',
        # and \*(`.S
        lines = text.split("\n")
        while True:
            line = lines.pop(0)
            self.source.lineno += 1
            if re.match(r"\.[ST]H", line):
                break
        return "\n".join(lines)
    def postprocess(self, text):
        return text

class TkManInterpreter:
    "Interpret Tk manual emulation macros."
    name = "tkman"
    exclusive = False
    toptag = "refentry"
    immutable_set = {"AP":1,"AS":1,"AS":1,"BS":1,"BE":1,"CS":1,"CE":1,
                     "VS":1,"VE":1,"DS":1,"DE":1,"SO":1,"SE":1,"OP":1,
                     "UL":1,"^B":1,}
    ignore_set = {}
    complain_set = {}
    parabreak_set = {"AP":1,}
    sectionbreak_set = {}
    translations = ()
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        # Documentation for these is taken from the wish.1 header.
        #
	# .AP type name in/out ?indent?
	#   Start paragraph describing an argument to a library procedure.
	#   type is type of argument (int, etc.), in/out is either "in", "out",
	#   or "in/out" to describe whether procedure reads or modifies arg,
	#   and indent is equivalent to second arg of .IP (shouldn't ever be
	#   needed;  use .AS below instead)
        if cmd == "AP":
            if not self.source.body_section(): return 1
            self.source.end_paragraph(label="AP")
            self.source.emit("<informaltable>\n<tgroup cols='3'>\n<tbody>\n")
            self.source.pushline(quoteargs(tokens))
            while self.source.lines:
                line = self.source.popline()
                tokens = lineparse(line)
                while len(tokens) < 4:
                    tokens.append("")
                self.source.emit("<row><entry>%s</entry><entry>%s</entry><entry>%s</entry>" % (tokens[1], tokens[2], tokens[3]))
                if tokens[1] not in c_keywords:
                    globalhints.post(tokens[1], "type")
                gather_item(self.source, "entry")
                self.source.emit("</row>")
                if self.source.section_break(self.source.peekline()):
                    break
            self.source.emit("</tbody>\n</tgroup>\n</informaltable>\n")
	# .AS ?type? ?name?
	#   Give maximum sizes of arguments for setting tab stops.  Type and
	#   name are examples of largest possible arguments that will be passed
	#   to .AP later.  If args are omitted, default tab stops are used.
        elif cmd == "AS":
            self.source.passthrough(tokens)
	# .BS
	#   Start box enclosure.  From here until next .BE, everything will be
	#   enclosed in one large box.
        elif cmd == "BS":
            self.source.passthrough(tokens)
	# .BE
	#   End of box enclosure.
        elif cmd == "BE":
            self.source.passthrough(tokens)
	# .CS
	#   Begin code excerpt.
        elif cmd == "CS":
            self.source.begin_block("programlisting", remap="CS", nofill=1)
	# .CE
	#   End code excerpt.
        elif cmd == "CE":
            self.source.end_block("programlisting", remap="CE")
	# .VS ?version? ?br?
	#   Begin vertical sidebar, for use in marking newly-changed parts
	#   of man pages.  The first argument is ignored and used for recording
	#   the version when the .VS was added, so that the sidebars can be
	#   found and removed when they reach a certain age.  If another
	#   argument is present, then a line break is forced before starting
        #   the sidebar.
        elif cmd == "VS":
            # It's tempting to try translating this as a <sidebar>/
            # Problem is the usage pattern really is presentation-level;
            # .VS/.VE is frequently wrapped around entire major sections.
            # There are also nast interactions with list markup.
            self.source.passthrough(tokens)
	# .VE
	#   End of vertical sidebar.
        elif cmd == "VE":
            self.source.passthrough(tokens)
	# .DS
	#   Begin an indented unfilled display.
        elif cmd == "DS":
            self.source.begin_block("blockquote", remap="DS", nofill=1)
	# .DE
	#   End of indented unfilled display.
        elif cmd == "DE":
            self.source.end_block("blockquote", remap="DE")
	# .SO
	#   Start of list of standard options for a Tk widget.  The
	#   options follow on successive lines, in four columns separated
	#   by tabs.
	# .SE
	#   End of list of standard options for a Tk widget.
	elif cmd == "SO":
            self.source.push_section(1, 'STANDARD OPTIONS')
            self.source.pushline("l l l l.")
            self.source.TBL(".SE")
	elif cmd == "OP":
	# .OP cmdName dbName dbClass
	#   Start of description of a specific option.  cmdName gives the
	#   option's name as specified in the class command, dbName gives
	#   the option's name in the option database, and dbClass gives
	#   the option's class in the option database.
            self.source.emit("<synopsis>")
            self.source.emit("Command-Line Name:    \\fB\\%s\\fR" % args[0])
            self.source.emit("Database Name:        \\fB\\$2\\fR" % args[1])
            self.source.emit("Database Class:       \\fB\\$3\\fR" % args[2])
            self.source.emit("</synopsis>")
	# .UL arg1 arg2
	#   Print arg1 underlined, then print arg2 normally.
        elif cmd == "UL":
            self.pushline("<emphasis remap='U'>%s</emphasis>%s"%(args[0],args[1]))
        else:
            return 0
        return 1
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

class MdocInterpreter:
    "Interpret mdoc(7) macros."
    name = "mdoc"
    exclusive = True
    toptag = "refentry"
    immutable_set = {}
    ignore_set = {"blank":1, "Bk":1, "Ek":1,}
    complain_set = {"Cd":1,"Db":1,}
    parabreak_set = {"Pp":1,}
    sectionbreak_set = {"Sh":1, "Ss":1}
    translations = (
        (r"\(Lq",	"&ldquo;"),	# ISOnum
        (r"\(Rq",	"&rdquo;"),	# ISOpub
        (r"\*q",	'"'),
        (r"\(Pi",	"&pgrk;"),
        (r"\(Ne",	"&ne;"),
        (r"\(Le",	"&le;"),
        (r"\(Ge",	"&ge;"),
        (r"\(Lt",	"&lt;"),
        (r"\(Gt",	"&gt;"),
        (r"\(Pm",	"&plusmn;"),
        (r"\(If",	"&infin;"),
        (r"\(Na",	"NaN"),
        (r"\(Ba",	"&verbar;"),
        )
    # These are listed in the order they appear on the mdoc(7) man page,
    # except for .Fl, .Nd, %N, %D, %O, Lb, St, which are missing from
    # those tables. Ai and Px are not documented at all.
    # Also we have to treat Sm as parseable even through it isn't.
    parsed = {"Ad":1,"Ai":1,"An":1,"Ar":1,"Cm":1,"Dv":1,"Er":1,"Ev":1,
              "Fl":1,"Fo":1,"Fc":1,"Ic":1,"Lb":1,"Li":1,"Nd":1,
              "Nm":1,"Op":1,"Oo":1,"Oc":1,"Ot":1,"Pa":1,"St":1,"Va":1,
              "Vt":1,"Xr":1,"%A":1,"%B":1,"%D":1,"%J":1,"%N":1,"%O":1,
              "%T":1,"Ac":1,"Ao":1,"Ap":1,"Aq":1,"Bc":1,"Bo":1,"Bq":1,
              "Bx":1,"Dc":1,"Do":1,"Dq":1,"Ec":1,"Em":1,"Eo":1,
              "No":1,"Ns":1,"Pc":1,"Pf":1,"Po":1,"Pq":1,"Px":1,"Qc":1,
              "Ql":1,"Qo":1,"Qq":1,"Sc":1,"Sc":1,"Sm":1,"So":1,"Sq":1,"St":1,
              "Sx":1,"Sy":1,"Ta":1,"Tn":1,"Ux":1,"Xc":1,"Xo":1,}
    callable = {"Ad":1,"Ai":1,"An":1,"Ar":1,"Cm":1,"Dv":1,"Er":1,"Ev":1,
                "Fa":1,"Fd":1,"Fl":1,"Fo":1,"Fc":1,"Ic":1,"Lb":1,"Li":1,"Nm":1,
                "Oc":1,"0o":1,"Op":1,"Ot":1,"Pa":1,"St":1,"Va":1,"Vt":1,"Xr":1,
                "%B":1,"%T":1,"Ac":1,"Ao":1,"Ap":1,"Aq":1,"Bc":1,"Bo":1,
                "Bq":1,"Bx":1,"Dc":1,"Do":1,"Dq":1,"Ec":1,"Em":1,"Eo":1,
                "No":1,"Ns":1,"Pc":1,"Po":1,"Pq":1,"Px":1,"Qc":1,"Ql":1,
                "Qo":1,"Qq":1,"Sc":1,"So":1,"Sq":1,"St":1,"Sx":1,"Sy":1,
                "Ta":1,"Tn":1,"Ux":1,"Xc":1,"Xo":1,}
    # Substitution strings for the St request
    st_dict = {
            # ANSI/ISO C
            "-ansiC-89":	"ANSI X3.159-1989 (ANSI C)",
            "-ansiC":		"ANSI X3.159-1989 (ANSI C)",
            "-isoC":		"ISO/IEC 9899:1990 (ISO C 89)",
            "-isoC-99":		"ISO/IEC 9899:1999 (ISO C 99)",
            # POSIX Part 1: System API
            "-p1003.1":		"IEEE Std 1003.1 (POSIX.1)",
            "-p1003.1-88":	"IEEE Std 1003.1-1988 (POSIX.1)",
            "-p1003.1-90":	"IEEE Std 1003.1-1990 (POSIX.1)",
            "-iso9945-1-90":	"IEEE Std 1003.1-1990 (POSIX.1)",
            "-p1003.1b-93":	"IEEE Std 1003.1b-1993 (POSIX.1)",
            "-p1003.1c-95":	"IEEE Std 1003.1c-1995 (POSIX.1)",
            "-p1003.1i-95":	"IEEE Std 1003.1i-1995 (POSIX.1)",
            "-p1003.1-96":	"ISO/IEC 9945-1:1996 (POSIX.1)",
            "-iso9945-1-96":	"ISO/IEC 9945-1:1996 (POSIX.1)",
            "-p1003.1g-2000":	"IEEE Std 1003.1g-2000 (POSIX.1)",

            # POSIX Part 2: Shell and Utilities
            "-p1003.2":		"IEEE Std 1003.2 (POSIX.2)",
            "-p1003.2-92":	"IEEE Std 1003.2-1992 (POSIX.2)",
            "-p1003.2a-92":	"IEEE Std 1003.2a-1992 (POSIX.2)",
            "-iso9945-2-93":	"ISO/IEC 9945-2:1993",

            # X/Open
            "-susv2":	"Version 2 of the Single UNIX Specification (SuSv2)",
            "-svid4":	"System V Interface Definition, Fourth Edition (SVID)",
            "-xbd5":	"X/Open System Interface Definitions Issue 5 (XBD 5)",
            "-xcu5":	"X/Open Commands and Utilities Issue 5 (XCU 5)",
            "-xcurses4.2":	"X/Open Curses Issue 4.2 (XCURSES 4.2)",
            "-xns5":		"X/Open Networking Services Issue 5 (XNS 5)",
            "-xns5.2":	"X/Open Networking Services Issue 5.2 (XNS 5.2)",
            "-xpg3":	"X/Open Portability Guide Issue 3 (XPG 3)",
            "-xpg4":	"X/Open Portability Guide Issue 4 (XPG 4)",
            "-xpg4.2":	"X/Open Portability Guide Issue 4.2 (XPG 4.2)",
            "-xsh5":	"X/Open System Interfaces and Headers Issue 5 (XSH 5)",

            # Miscellaneous
            "-ieee754":		"IEEE Std 754-1985",
            "-iso8802-3":	"ISO/IEC 8802-3:1989",
            }

    lb_dict = {
        "libarm32":	"ARM32 Architecture Library (libarm32, -larm32)",
        "libc":		"Standard C Library (libc, -lc)",
        "libcompat":	"Compatibility Library (libcompat, -lcompat)",
        "libcrypt":	"Crypt Library (libcrypt, -lcrypt)",
        "libcurses":	"Curses Library (libcurses, -lcurses)",
        "libedit":	"Command Line Editor Library (libedit, -ledit)",
        "libi386":	"i386 Architecture Library (libi386, -li386)",
        "libipsec":	"IPsec Policy Control Library (libipsec, -lipsec)",
        "libkvm":	"Kernel Data Access Library (libkvm, -lkvm)",
        "libm":		"Math Library (libm, -lm)",
        "libmenu":	"Curses Menu Library (libmenu, -lmenu)",
        "libossaudio":	"OSS Audio Emulation Library (libossaudio, -lossaudio)",
        "libposix":	"POSIX Compatibility Library (libposix, -lposix)",
        "libresolv":    "DNS Resolver Library (libresolv, -lresolv)",
        "libtermcap":	"Termcap Access Library (libtermcap, -ltermcap)",
        "libutil":	"System Utilities Library (libutil, -lutil)",
        "libz":		"Compression Library (libz, -lz)",
        }
    openers = {"(":1, "[":1}
    closers = {".":1, ",":1, "&semi;":1, ")":1, "]":1,}
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
        # State collected by the header macros
        self.month = None	# .Dd
        self.day   = None	# .Dd
        self.year  = None	# .Dd
        self.title = None	# .Dt
        self.os    = None	# .Os
        self.name  = None	# .Nm
        self.desc  = None	# .Nd
        self.synopsis = None
        # Other internal state
        self.tokens    = []
        self.liststack = []
        self.suppress_callables = False
        self.spacemode = True
        self.biblio = []
        self.inref = False
        self.refnames = {}
        self.volnum = None
        self.refmeta_flushed = False
    def flush_refmeta(self):
        if not self.refmeta_flushed:
            self.source.emit("<refmeta>")
            self.source.emit("<refentrytitle>%s</refentrytitle>" % self.title[0])
            self.source.emit("<manvolnum>%s</manvolnum>"%self.volnum)
            self.source.emit("</refmeta>")
            self.source.emit("")
            self.source.emit("<refnamediv id='%s'>" % self.source.make_id_from_title('purpose'))
            self.source.emit("<refname>%s</refname>" % self.name)
            self.source.emit("<refpurpose>%s</refpurpose>" % self.desc)
            self.source.emit("</refnamediv>")
            self.refmeta_flushed = True
    def hasargs(self, cmd, args):
        "Check to see if there is an macro argument available."
        if not args:
            self.source.error("the %s macro requires arguments." % cmd)
            return 0
        else:
            return 1
        return arg
    def interpret(self, tokens, caller):
        tokens[0] = tokens[0][1:]
        # First, collect any additional arguments implied by o/c enclosures.
        for c in "ABFDOPQSX":
            if c + "o" in tokens and c + "c" not in tokens:
                where = tokens.index(c + "o")
                while True:
                    line = self.source.popline()
                    newtokens = lineparse(line)
                    tokens.append("\n")
                    if newtokens is None:
                        tokens.append("No")
                        tokens.append(line)
                    else:
                        for fld in newtokens:
                            if fld[0] == '.':
                                tokens.append(fld[1:])
                            else:
                                tokens.append(fld)
                        if c + "c" in newtokens or "." + c + "c" in newtokens:
                            break
        # Now that we've folded ?o/?c pairs, interpret resulting command
        command = tokens[0]
        args = tokens[1:]
        # First, check parsed/callable macros
        if command in MdocInterpreter.parsed:
            self.source.emit(self.eval(tokens))
        # These aren't in the parsed/callable set in mdoc(7)
        elif command == "At":
            self.source.pushline("<productname>AT&amp;T Unix</productname>")
        elif command == "Bsx":
            if args:
                version = " " + args[0]
            else:
                version = ""
            self.source.pushline("<productname>BSD/OS%s</productname>" % version)
        elif command == "Fx":
            if args:
                version = " " + args[0]
            else:
                version = ""
            self.source.pushline("<productname>FreeBSD%s</productname>" % version)
        elif command == "Nx":
            if args:
                version = " " + args[0]
            else:
                version = ""
            self.source.pushline("<productname>NetBSD%s</productname>" % version)
        elif command == "Ox":
            if args:
                version = " " + args[0]
            else:
                version = ""
            self.source.pushline("<productname>OpenBSD%s</productname>" % version)
        elif command in ("Dl", "D1"):
            if self.hasargs(command, args):
                self.source.pushline("<phrase role='%s'>%s</phrase>" % (command, self.eval(["No"] + args)))
        elif command == "In":
            self.source.pushline("#include <%s>" % args[0])
        # Ex, Fa, Fd, Fn, Rv are not in the non-parseable exception list,
        # but usage on the groff_mdoc(7) list shows they should be.
        elif command == "Ex":
            if self.hasargs("Ex", args):
                if args[0] == "-std":
                    self.pushline("<para>The %s utility exits 0 on success, and >0 if an error occurs.</para>" % args[1])
        elif command == "Fa":
            self.source.pushline(" ".join(args))
        elif command == "Fd":
            self.source.pushline(" ".join(args))
        elif command == "Fn":
            self.source.pushline(args[0] + "(" + ", ".join(args[1:]) + ");")
        elif command == "Ft":
            if self.hasargs("Ft", args):
                if self.source.body_section():
                    self.source.pushline(string.join(self.encloseargs(args,
                                     "<type remap='Ft'>", "</type>")))
                else:
                    self.source.pushline(" ".join(args))	# Feed the parser
        elif command == "Rv":
            if self.hasargs("Rv", args):
                if args[0] == "-std":
                    self.source.pushline("<para>The %s() function returns the value 0 if successful; otherwise the value ‐1 is returned and the global variable errno is set to indicate the error.</para>" % args[1])
        # Hyperlinks
        elif command == "UE":
            self.source.emit("</ulink>")
        elif command == "UN":
            self.source.pushline("<anchor id='%s'>" % self.source.make_id_from_title(args[0]))
        elif command == "UR":
            self.source.pushline("<ulink url='%s'>" % args[0])
            self.hack_urls = False
        # Structure requests
        elif command == "Dd":
            (self.month, self.day, self.year) = args[:3]
            self.day = self.day[:-1]
        elif command == "Dt":
            self.title = args
            self.volnum = args[1]
        elif command == "Os":
            if args:
                self.os = args
            else:
                self.os = "BSD"
        elif command == "Sh":
            if not args:
                args = self.source.popline().split()
            if args[0].upper() == "NAME":
                self.source.sectname = "Name"
                # Copes with lines that are blank or a dot only (groff_mdoc...)
                while self.source.peekline() in ("", "."):
                    self.source.popline()
                # Kluge -- it turns out that some web pages (like ash.1)
                # don't use the mandoc macros.  Instead they use a Linux-
                # style "NAME - description" header.  Test for this.
                line = self.source.popline()
                if is_command(line):
                    # Macros will handle it
                    self.source.pushline(line)
                else:
                    # Parse it explicitly.
                    (self.name, self.desc) = parse_name_section(line)
            elif args[0].upper() == "SYNOPSIS":
                self.flush_refmeta()
                self.source.preamble = True
                self.source.sectname = "Synopsis"
                self.synopsis = SynopsisSectionParser(self)
                return 1	# Declaration macros will do the work
            else:
                self.flush_refmeta()
                # in case someone forgets to close a list (see mktemp.1).
                for lst in self.liststack:
                    self.source.pushline(".El")
                    return 1
                if self.source.preamble:
                    self.source.preamble = False
                    self.source.declare_body_start()
                if self.synopsis:
                    self.synopsis.parse_and_emit()
                    self.synopsis = None
                self.source.push_section(1, " ".join(args))
        elif command == "Ss":
            # in case someone forgets to close a list
            for lst in self.liststack:
                self.source.pushline(".El")
                return 1
            self.source.push_section(2, " ".join(args))
        elif command == "Pp":
            if self.source.body_section():
                self.source.paragraph()
        elif command == "Bd":
            if self.source.peekline() and self.source.peekline()[0:3] != ".Bl":
                self.source.begin_block("literallayout", remap="Bd", nofill=1)
        elif command == "Ed":
            if self.source.troff.nf:
                self.source.end_block("literallayout", remap="Bd")
            self.source.need_paragraph()
        # List markup
        elif command == "Bl":
            # There may  be leading text here that's not part of an item
            # (as in ash(1)).  Pass it through before emitting the list header.
            while True:
                next = self.source.popline()
                if match_command(next, "It"):
                    self.source.pushline(next)
                    break
                else:
                    self.source.interpret_block([next])
            self.source.end_paragraph(label="Bl")
            hasbodies = False
            ind = 0
            while ind < len(self.source.lines):
                next = self.source.lines[ind]
                ind += 1
                if match_command(next, "El"):
                    break
                if not match_command(next, "It"):
                    hasbodies += 1
            self.itemcount = 0
            if not hasbodies or "-column" in tokens[1:]:
                self.source.emit("<table remap='%s'><title></title>"% " ".join(tokens))
                columns = 0
                for fld in self.tokens[1:]:
                    if fld[0] != '-':
                        columns += 1
                self.source.emit("<tgroup cols='%d'><tbody>" % columns)
                self.liststack.append("</table>")
            elif "-bullet" in tokens[1:]:
                self.source.emit("<itemizedlist remap='%s' mark='bullet'>" % " ".join(tokens))
                self.liststack.append("</itemizedlist>")
            elif "-item" in tokens[1:]:
                self.source.emit("<itemizedlist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</itemizedlist>")
            elif "-enum" in tokens[1:]:
                self.source.emit("<orderedlist remap='%s'>" % " ".join(tokens))
                self.liststack.append("</orderedlist>")
            elif "-tag" in tokens[1:]:
                self.source.emit("<variablelist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</variablelist>")
            elif "-diag" in tokens[1:]:
                self.source.emit("<variablelist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</variablelist>")
                self.suppress_callables = True
            elif "-hang" in tokens[1:]:
                self.source.emit("<variablelist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</variablelist>")
            elif "-ohang" in tokens[1:]:
                self.source.emit("<variablelist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</variablelist>")
            elif "-inset" in tokens[1:]:
                self.source.emit("<variablelist remap='%s'>"% " ".join(tokens))
                self.liststack.append("</variablelist>")
        elif command == "It":
            if args:
                tagline = self.eval(["No"] + args)
                args = tagline.split()
            else:
                tagline = ""
                args = [""]
            # Columns into tables: the arguments of the item macro are it
            if self.liststack[-1] == "</table>":
                self.source.emit("    <row>")
                for fld in args:
                    self.source.emit("      <entry>%s</entry>" % fontclose(fld))
                self.source.emit("    </row>")
            else:
                # Otherwise we may have to close a previous entry
                if self.itemcount:
                    self.source.end_paragraph(label="It")
                    self.source.emit("</listitem>")
                    if self.liststack[-1] == "</variablelist>":
                        self.source.emit("</varlistentry>")
                self.itemcount += 1
                termlines = [tagline]
                while True:
                    next = self.source.popline()
                    if match_command(next, "It"):
                        digested = lineparse(next)
                        digested = self.eval(["No"] + digested[1:])
                        termlines.append(digested)
                    else:
                        self.source.pushline(next)
                        break;
                term =  "</term>\n<term>\n".join(termlines)
                # We certainly have to open a new entry.
                if self.liststack[-1] == "</variablelist>":
                    self.source.emit("<varlistentry>")
                    self.source.emit("<term>%s</term>" % fontclose(term))
                self.source.emit("<listitem>")
                self.source.need_paragraph()
        elif command == "El":
            if self.liststack[-1] == "</table>":
                self.source.emit("  </tbody></tgroup>")
            else:
                self.source.end_paragraph(label="El")
                self.source.emit("</listitem>")
                if self.liststack[-1] == "</variablelist>":
                    self.source.emit("</varlistentry>")
            self.source.emit(self.liststack.pop())
            self.source.need_paragraph()
        elif command == "Rs":
            self.biblio.append({})
            self.biblio[-1]["id"] = `len(self.biblio)`
            self.inref = True
        elif command == "Re":
            self.inref = False
            if self.source.output[-1] == "</variablelist>":
                self.source.output = self.source.output[:-1]
            else:
                self.source.end_paragraph(label="Re")
                self.source.emit("<variablelist>")
            # We'd like to emit a <bibliography> here, but the DocBook DTD
            # doesn't permit it.
            self.source.emit("<varlistentry id='%s'>" % self.source.make_id_from_title("ref" + `len(self.biblio)`))
            self.source.emit("<term>[%s]</term>" % len(self.biblio))
            self.source.emit("<listitem><para>")
            for (fld, tag) in ( \
                ("A", None), \
                ("Q", None), \
                ("B", "citetitle"), \
                ("V", None), \
                ("J", None), \
                ("N", None), \
                ("P", None), \
                ("T", "citetitle"), \
                ("D", None), \
                ("I", None), \
                ("C", None), \
                ("O", None), \
                ):
                if self.biblio[-1].has_key(fld):
                    line = ""
                    if tag:
                        line += "<%s>" % tag
                    line += ", ".join(self.biblio[-1][fld])
                    if tag:
                        line += "</%s>" % tag
                    line += ";"
                    self.source.emit(line[:-1])
            self.source.emit("</para></listitem>")
            self.source.emit("</varlistentry>\n")
            self.source.emit("</variablelist>\n")
        # Not documented, but present in the macro files
        elif command == "Ud":
            self.source.pushline("currently under development")
        else:
            return 0
        return 1
    # Machinery for evaluating parsed macros begins here
    def evalmacro(self, args):
        "Pop args off the stack and evaluate any associated macro."
        #self.source.notify("evalmacro(%s)" % ", ".join(map(repr, args)))
        cmd = args.pop(0)
        if cmd == "Ad":
            # We don't care.  We're translating it...
            #self.source.warning("the Ad macro is deprecated.")
            return self.encloseargs(args,"<phrase role='address'>","</phrase>")
        elif cmd == "Ai":
            return ["<acronym>ANSI</acronym>"]
        elif cmd == "An":
            if self.hasargs("An", args):
                return self.encloseargs(args, "phrase", "role='author'")
        elif cmd == "Aq":
            if self.hasargs("Aq", args):
                return self.encloseargs(args, "&lt;@GLUE@", "@GLUE@&gt;")
        elif cmd == "Ac":
            return self.replacemacro(args, "@GLUE@&gt;")
        elif cmd == "Ao":
            return self.replacemacro(args, "&lt;@GLUE@")
        elif cmd == "Ar":
            if not args:
                return ["<replaceable>file...</replaceable>"]
            else:
                return self.styleargs(args, "replaceable")
        elif cmd == "At":
            return ["<productname>AT&T Unix</productname>"]
        elif cmd == "Bc":
            return self.replacemacro(args, "@GLUE@]")
        elif cmd == "Bo":
            return self.replacemacro(args, "[@GLUE@")
        elif cmd == "Bq":
            return self.encloseargs(args, "[@GLUE@", "@GLUE@]")
        elif cmd == "Bx":
            if not args:
                return ["BSD UNIX"]
            else:
                return self.process_punct(args, lambda x: ["-".join(["%sBSD" % x[0]] + x[1:])], 1)
        elif cmd == "Cm":
            if self.hasargs("Cm", args):
                return self.styleargs(args, "command")
        elif cmd == "Dc":
            return self.replacemacro(args, "@GLUE@&rdquo;")
        elif cmd == "Do":
            return self.replacemacro(args, "&ldquo;@GLUE@")
        elif cmd == "Dq":
            return  self.encloseargs(args, "&ldquo;@GLUE@", "@GLUE@&rdquo;")
        elif cmd == "Dv":
            if self.hasargs("Dv", args):
                return self.styleargs(args, "constant")
        elif cmd == "Em":
            if self.hasargs("Em", args):
                return self.styleargs(args, "emphasis", "remap='Em'")
        elif cmd == "Er":
            if self.hasargs("Er", args):
                return self.styleargs(args, "errorcode")
        elif cmd == "Ev":
            if self.hasargs("Ev", args):
                return self.styleargs(args, "envar")
        elif cmd == "Fc":
            return ");"
        elif cmd == "Fl":
            if not args:
                return ["-"]
            else:
                return self.styleargs(args, "option", "", "-")
        elif cmd == "Fo":
            return "("
        elif cmd == "Ic":
            if self.hasargs("Ic", args):
                return self.styleargs(args, "command", "remap='Ic'")
        elif cmd == "Lb":
            return self.process_punct(args, self.lbhook, 1)
        elif cmd == "Li":
            return self.styleargs(args, "literal")
        elif cmd == "Nd":
            savesect = [" ".join(self.encloseargs(args, "", ""))]
            while True:
                line = self.source.popline()
                if match_command(line, "Sh"):
                    self.source.pushline(line)
                    break
                else:
                    savesect.append(line)
            lines = []
            self.source.diversion = lines
            self.source.interpret_block(savesect)
            self.source.diversion = self.source.output
            self.desc = " ".join(lines) 
            if not self.source.body_section():
                return []
            else:
                return self.desc
        elif cmd == "Nm":
            name = " ".join(self.encloseargs(args, "", ""))
            if not self.name:
                self.name = name
            self.refnames[name] = True
            if self.source.sectname and self.source.sectname.upper() == "NAME":
                return []
            else:
                if not name:
                    name = self.name
                return ["<command remap='Nm'>%s</command>" % self.name]
        elif cmd == "No":
            return self.replacemacro(args, "")
        elif cmd == "Ns":
            return self.replacemacro(args, "@GLUE@")
        elif cmd == "Oc":
            return self.replacemacro(args, "@GLUE@]")
        elif cmd == "Oo":
            return self.replacemacro(args, "[@GLUE@")
        elif cmd == "Op":
            return self.styleargs(args, ("[@GLUE@", "@GLUE@]"))
        elif cmd == "Pa":
            return self.styleargs(args, "filename")
        elif cmd == "Pc":
            return self.replacemacro(args, "@GLUE@)")
        elif cmd == "Pf":
            # We don't want punctuation processing here
            operands = []
            while args:
                if args[0] in MdocInterpreter.callable:
                    break
                this = args.pop(0)
                operands.append(this)
                if this == '\n':
                    break
            if len(operands) > 1:
                return [operands[0],"@GLUE@"] + operands[1:]
            else:
                return [operands[0],"@GLUE@"]
        elif cmd == "Po":
            return self.replacemacro(args, "(@GLUE@")
        elif cmd == "Pq":
            return self.encloseargs(args, "(@GLUE@", "@GLUE@)")
        elif cmd == "Px":
            return ["<acronym>POSIX</acronym>"]
        elif cmd == "Ql":
            return self.encloseargs(args, "<literallayout>\n'", "'\n</literallayout>")
        elif cmd == "Qc":
            return self.replacemacro(args, "@GLUE@\"")
        elif cmd == "Qo":
            return self.replacemacro(args, "\"@GLUE@")
        elif cmd == "Qq":
            return self.encloseargs(args, '"@GLUE@', '@GLUE@"')
        elif cmd == "Sc":
            return self.replacemacro(args, "@GLUE@\'")
        elif cmd == "So":
            return self.replacemacro(args, "\'@GLUE@")
        elif cmd == "Sq":
            return self.encloseargs(args, '\`@GLUE@', '@GLUE@\'')
        elif cmd == "St":
            return self.process_punct(args, self.sthook, 1)
        elif cmd == "Sx":
            title = " ".join(args)
            return self.process_punct(args, lambda x: ["<link linkend='%s'>%s</link>" % (self.source.id_from_title(" ".join(x)), " ".join(x))], 0)
        elif cmd == "Sy":
            return self.styleargs(args, "emphasis", 'remap="Sy"')
        elif cmd == "Ta":
            return self.replacemacro(args, "\t")
        elif cmd == "Tn":
            return self.styleargs(args, "acronym", "remap='Tn'")
        elif cmd == "Ux":
            return ["<productname>Unix</productname>"]
        elif cmd == "Va":
            return self.styleargs(args, "varname")
        elif cmd == "Xc":
            return self.replacemacro(args, "")
        elif cmd == "Xo":
            return self.replacemacro(args, "")
        elif cmd == "Xr":
            return self.process_punct(args, self.xrhook, 0)
        elif cmd[0] == "%":
            lst = self.process_punct(args, lambda x: self.bibliohook(cmd[1], x), 1)
            if self.inref:
                return []
            else:
                return lst
        # Sm is not officially parseable, but we have to treat it that way
        # in order for it to work inside Oo/Oc pairs (as in slogin.1). 
        elif cmd == "Sm":
            enable = self.extractargs(args)
            if "on" in enable:
                self.spacemode = True
            elif "off" in enable:
                self.spacemode = False
            else:
                self.source.error("unknown argument to Sm")
            return []
        else:
            self.source.error("unknown parseable macro " + `cmd`)
            return []
    def bibliohook(self, field, lst):
        ref = " ".join(lst)
        if self.inref:
            # If we're within the scope of an Rs/Re, accumulate entry.
            if not self.biblio[-1].has_key(field):
                self.biblio[-1][field] = []
            self.biblio[-1][field].append(ref)
        # Otherwise return the reference.
        for entry in self.biblio:
            if ref in entry[field]:
                return ["<link linkend='ref%s'>[%s]</link>" % (entry["id"], entry["id"])]
        # Unresolved titles can simply turn into a title citation
        if field == "T":
            return ["<citetitle>%s</citetitle>" % (ref)]
        raise LiftException("unresolved reference to '%s'" % ref, 2)
    def sthook(self, args):
        if args[0] in MdocInterpreter.st_dict:
            return["<citetitle>" + MdocInterpreter.st_dict[args[0]] + "</citetitle>"]
        else:
            raise LiftException("unknown St macro '%s'" % args[0], 2)
    def lbhook(self, args):
        if args[0] in MdocInterpreter.lb_dict:
            return["<citetitle>" + MdocInterpreter.lb_dict[args[0]] + "</citetitle>"]
        else:
            raise LiftException("unknown Lb macro '%s'" % args[0], 2)
    def xrhook(self, args):
        if len(args) < 2:
            return ["<citerefentry><refentrytitle>%s</refentrytitle></citerefentry>" % args[0]]
        else:
            return ["<citerefentry><refentrytitle>%s</refentrytitle><manvolnum>%s</manvolnum></citerefentry>" % (args[0], args[1])]
    def extractargs(self, args, stop_on_callable=0):
        operands = []
        while args:
            if stop_on_callable and args[0] in MdocInterpreter.callable:
                break
            this = args.pop(0)
            operands.append(this)
            if this == '\n':
                break
        return operands
    def process_punct(self, args, hook, stop_on_callable):
        "Wrap required processing of punctuation around an evaluation."
        prepunct = []
        postpunct = []
        # Save leading punctuation
        while args and args[0] in MdocInterpreter.openers:
            prepunct.append(args.pop(0))
        while args and args[-1] in MdocInterpreter.closers:
            postpunct = [args.pop()] + postpunct 
        operands = []
        while args:
            if stop_on_callable and args[0] in MdocInterpreter.callable:
                break
            this = args.pop(0)
            operands.append(this)
            if this == '\n':
                break
        operands = prepunct + hook(operands) + postpunct
        result = []
        for arg in operands:
            if arg in MdocInterpreter.closers:
                result.append("@GLUE@" + arg)
            elif arg in MdocInterpreter.openers:
                result.append(arg + "@GLUE@")
            else:
                result.append(arg)
        return result
    def encloseargs(self, args, opener, closer):
        "Grab and process arguments for an enclosure macro."
        return self.process_punct(args, lambda x: [opener] + x + [closer], 0)
    def stylehook(self, args, tag, attr, prefix=""):
        "Wrap non-punctuation characters in given tag pair."
        result = []
        if attr:
            attr = " " + attr
        if len(tag) == 2:
            start = tag[0] + attr
            end = tag[1]
        else:
            start = "<" + tag + attr + ">"
            end =  "</" + tag + ">"
        for arg in args:
            if arg == "|" or arg in self.openers or arg in self.closers:
                result.append(arg)
            else:
                result.append(start + arg + end)
        return result
    def styleargs(self, args, tag, attribute="", prefix=""):
        return self.process_punct(args, lambda x: self.stylehook(x, tag, attribute, prefix), 1)
    def replacemacro(self, args, with):
        return self.process_punct(args, lambda x: [with] + x, 1)
    def eval(self, args):
        "Evaluate a macro, returning a list."
        if args[0][0] == '.':
            args[0] = args[0][1:]
        # Consume arguments and macro calls until none are left
        result = []
        while args:
            nextpart = filter(lambda x: x, self.evalmacro(args))
            if not self.spacemode and len(nextpart) > 1:
                for ind in range(len(nextpart)):
                    nextpart.insert(2*ind+1, "@GLUE@")
            #self.source.notify("evalmacro -> %s" % nextpart)
            result += nextpart
        # Glue the results together
        result = " ".join(result)
        result = hotglue.sub("", result)
        result = cleantag.sub("", result)
        return result
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

class MsInterpreter:
    "Interpret ms(7) macros."
    name = "ms"
    exclusive = True
    toptag = "article"
    immutable_set = {}
    ignore_set = {
        # Ignore presentation-level-only requests from Bell Labs.
        "RP":1, "ND":1, "DA":1, "1C":1, "2C":1, "MC":1, "RS":1, "RE":1,
        "BX":1, "KS":1, "KE":1, "KF":1,
        # Also ignore the Berkeley thesis-mode extension
        "TM":1, "CT":1, "XS":1, "XE":1, "XA":1, "PX":1, "AM":1,
        "EH":1, "OH":1, "EF":1, "OF":1,
        # These are not documented in the ms reference, but
        # they occur in ms papers, probably as relics from mm.
        "MH":1, "CS":1, "D3":1
        }
    complain_set = {}
    parabreak_set = {"blank":1,"PP":1, "LP":1, "XP":1, "IP":1,}
    sectionbreak_set = {"NH":1, "SH":1, "SC":1,}
    translations = (
        # The Bell Labs prefix diacriticals
        (r"\*'", "&acute;"),
        (r"\*`", "&grave;"),
        (r"\*:", "&uml;"),
        (r"\*^", "&circ;"),
        (r"\*~", "&tilde;"),
        (r"\*C", "&caron;"),
        (r"\*,", "&cedil;"),
        # Berkeley extensions
        #(r"\**(_", "&mdash;"),	# Input text was "\e\(**\^\u_\d" in original
        (r"\**(Q", "&ldquo;"),
        (r"\**(U", "&rdquo;"),
        # Berkeley postscript diacriticals
        (r"\**('", "&grave;"),
        (r"\**(`", "&acute;"),
        (r"\**(^", "&circ;"),
        (r"\**(,", "&cedil;"),
        (r"\**(?", "&iquest;"),
        (r"\**(!", "&iexcl;"),
        (r"\**(v", "&caron;"),
        (r"\**(_", "&macr;"),
        # \**. wants to be underdot, but there is no defined entity for this
        (r"\**/",  "&oslash;"),
        (r"\**o",  "&Aring;"),
        # "\**(3t wants to be a yogh, but there is no defined entity for this
        (r"\**(Th", "&THORN;"),
        (r"\**(th", "&thorn;"),
        (r"\**(D-", "&ETH;"),
        (r"\**(d-", "&eth;"),
        # \**q wants to be "hooked o" but I don not know what that is
        (r"\**(ae", "&aelig;"),
        (r"\**(Ae", "&AElig;"),
        (r"\**(oe", "&oelig;"),
        (r"\**(Oe", "&Oelig;"),
        )
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
        self.font = "R"
        self.pointsize = 0
        self.fmt = "R"
        self.TL = None
        self.AU = None
        self.AI = []
        self.AB = None
        self.flushed = False
    def interpret(self, tokens, caller):
        command = tokens[0][1:]
        args = tokens[1:]
        if command in ("B", "I", "R", "UL", "SM", "LG", "NL"):
            # Get our remap attribute in sync with other macro sets.
            if command == "UL": command=="U"
            # Could be a change along either axis
            newpointsize = self.pointsize
            newfont = self.font
            if command == "NL":
                newpointsize = 0
            elif command == "LG":
                newpointsize += 1
            elif command == "SM":
                newpointsize += -1
            else:
                newfont = command
            # If no actual change (as with two successive .NLs), we're done.
            if self.font == newfont and self.pointsize == newpointsize:
                return 1
            if newpointsize == 0:
                fmt = newfont
            else:
                fmt = newfont + `newpointsize`
            if self.fmt == "R":
                if not args:
                    self.source.emit(r"\f%s" % fmt)
                else:
                    self.source.emit(r"\f%s%s\fP" % (fmt, args[0]))
            elif fmt == "R":
                if not args:
                    self.source.emit(r"\fP")
                else:
                    self.source.emit(r"\fP%s\f%s" % (args[0], self.fmt))
            if not args:
                self.font = newfont
                self.pointsize = newpointsize
                self.fmt = fmt
            return 1
        elif command == "B1":
            self.source.emit(r"<sidebar>")
        elif command == "B2":
            self.source.emit(r"</sidebar>")
	# Undocumented Bell Labs-isms begin here
        elif command == "UX":
            self.source.pushline("<productname>Unix</productname>")
            return 1
        elif command == "UC":
            self.source.pushline("<productname>%s</productname>" % args[0])
            return 1
        # Commands for front matter
        elif command == "TL":
            self.source.declare_body_start()
            self.TL = gather_lines(self.source)
            return 1
        elif command == "OK":	# Undocumented -- probably some Bell Labs thing
            gather_lines(self.source)
            return 1
        elif command == "AU":
            self.AU = gather_lines(self.source)
            return 1
        elif command == "AI":
            rawlines = gather_lines(self.source)
            self.source.diversion = self.AI
            self.source.interpret_block(rawlines)
            self.source.diversion = self.source.output
            return 1
        elif command == "AB":
            self.AB = []
            while self.source.lines:
                line = self.source.popline()
                tokens = lineparse(line)
                if tokens and tokens[0][1:3] == "AE":
                    break
                if not (is_command(line) and self.source.ignorable(line)):
        	    self.AB.append(line)
            return 1
        # Here's where we analyze the front matter and generate the header
        if not self.flushed:
            self.source.preamble = False
            self.flushed = True
            # If there's only one line of authors, try to break it up by
            # looking for " and ".  There are a couple of historical examples
            # of this, notably in the EQN docs.
            if self.AU:
                if len(self.AU) == 1:
                    trial = self.AU[0].split(" and ")
                    if trial > 1:
                        self.AU = trial
                    else:
                        # We'll also try splitting on commas
                        trial = self.AU[0].split(", ")
                        if trial > 1:
                            self.AU = trial
                # Now we have one author per line.  Try to analyze each name.
                digested = []
                for name in self.AU:
                    author = Author(name)
                    if self.AI:
                        author.affiliate(" ".join(self.AI))
                    digested.append(author)
            # OK, we've got enough info to generate the header
            if self.TL or self.AU or self.AI or self.AB:
                self.source.end_paragraph(label="mm header")
                self.source.emit("<articleinfo>")
                if self.TL:
                    self.source.emit("<title>")
                    caller.interpret_block(self.TL)
                    self.source.emit("</title>")
                if self.AU or self.AI:
                    for author in digested:
                        self.source.emit(`author`)
                if self.AB:
                    self.source.emit("<abstract>")
                    self.source.need_paragraph()
                    caller.interpret_block(self.AB)
                    self.source.end_paragraph(label="AB")
                    self.source.emit("</abstract>")
                self.source.emit("</articleinfo>")
        if command in ("blank","PP","LP","XP") or command == "IP" and len(tokens) == 1:
            self.source.paragraph()
        elif command in ("NH", "SH"):
            title = self.source.popline()
            try:
                newdepth = int(tokens[1])
            except:
                newdepth = 1
            self.source.push_section(newdepth, title)
        elif command == "IP":
            # If no tag is specified, treat as ordinary paragraph.
            self.source.end_paragraph(label="IP")
            if tokens[1] in ip_tag_mapping:
                self.source.pushline(quoteargs(tokens))
                gather_itemizedlist(".IP", self.source,
                                    ip_tag_mapping[tokens[1]])
            else:
                self.source.pushline(quoteargs(tokens))
                gather_variablelist(".IP", self.source)
        elif command == "QP":
            self.source.begin_block("blockquote", remap="QP")
            self.source.emit("<para>")
            self.source.need_paragraph()
            while self.source.lines:
                line = self.source.popline()
                if is_command(line):
                    self.source.pushline(line)
                    break
                self.source.emit(line)
            self.source.end_block("blockquote", remap="QE")
        elif command == "DS":
            self.source.begin_block("literallayout", remap='DS', nofill=1)
        elif command == "DE":
            self.source.end_block("literallayout", remap='DE')
        elif command == "FS":
            self.source.begin_block("footnote", remap='FS')
        elif command == "FE":
            self.source.end_block("footnote", remap='FE')
        elif command == "QS":
            self.source.begin_block("blockquote", remap='QS')
        elif command == "QE":
            self.source.end_block("blockquote", remap='QE')
	# Undocumented Bell Labs-isms begin here
        elif command == "SC":
            self.source.push_section(1, args[0])
        elif command == "P1":
            self.source.begin_block("programlisting", remap='P1')
        elif command == "P2":
            self.source.end_block("programlisting", remap='P2')
        else:
            return 0
        return 1
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

class MeInterpreter:
    "Interpret me macros."
    name = "me"
    exclusive = True
    toptag = "article"
    immutable_set = {}
    ignore_set = {"1c":1,"2c":1,"bc":1,"bl":1,"ef":1,"eh":1,"ep":1,"fo":1,
                  "he":1,"hx":1,"m1":1,"m2":1,"m3":1,"m4":1,"n1":1,"n2":1,
                  "of":1,"oh":1,"tp":1,"xl":1,"xp":1,"sk":1,"(z":1,")z":1,
                  "sz":1,"(l":1,")l":1,
                  }
    complain_set = {"ba":1,"bx":1,"ix":1,"(b":1,")b":1,"(c":1,")c":1,"pa":1,
                    "sx":1,"uh":1,".$p":1,".$c":1,".$f":1,".$h":1,".$s":1,
                    "+c":1,"(x":1,")x":1,
                    }
    parabreak_set = {"blank":1,"lp":1,"pp":1,"ip":1,"np":1,}
    sectionbreak_set = {"sh":1,}
    translations = (
        (r"\*-", "&ndash;"),	# Not quite right, supposed to be 3/4 dash
        (r"\*:", "&uml;"),
        (r"\*<", "<subscript>"),
        (r"\*>", "</subscript>"),
        (r"\*{", "<superscript>"),
        (r"\*}", "</superscript>"),
        (r"\*('", "&acute;"),
        (r"\*(`", "&grave;"),
        (r"\('", "&acute;"),
        (r"\(`", "&grave;"),
        (r"\*^", "&circ;"),
        (r"\*,", "&cedil;"),
        (r"\*~", "&tilde;"),
        (r"\*(qe", "&exist;"),
        (r"\*(qa", "&forall;"),
        (r"\(lq", "&ldquo;"),
        (r"\(rq", "&rdquo;"),
      )
    # List how .IP tags map into DocBook mark types
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.delay = []
        self.in_abstract = False
        self.source.preamble = False
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        if command in ("b", "bi", "i", "r", "rb", "sm", "u"):
            if len(args) <= 2:
                trailer = ""
            else:
                trailer = args[1]
            self.source.pushline(self.source.direct_highlight(command.upper(), [args[0]], trailer))
        elif command == "q":
            if len(args) <= 2:
                trailer = ""
            else:
                trailer = args[1]
            self.source.pushline("<quote>%s</quote>%s" % (args[0], trailer))
        elif cmd in ("blank", "lp", "pp"):
            self.source.declare_body_start()
            self.source.paragraph()
        elif cmd == "ip":
            self.source.pushline(quoteargs(tokens))
            gather_variablelist(".ip", self.source)
        elif cmd == "bp":
            self.source.pushline(quoteargs(tokens))
            gather_itemizedlist(".bp", self.source, "bullet")
        elif cmd == "np":
            self.source.pushline(quoteargs(tokens))
            gather_orderedlist(".np", self.source)
        elif cmd == "(q":
            self.source.begin_block("blockquote", remap='(q')
        elif cmd == ")q":
            self.source.end_block("blockquote", remap=')q')
        elif cmd == "(f":
            self.source.begin_block("footnote", remap='(q')
        elif cmd == ")f":
            self.source.end_block("footnote", remap=')q')
        elif cmd == "(d":
            self.source.diversion = self.delay
        elif cmd == ")d":
            self.source.diversion = self.output
        elif cmd == "pd":
            self.output += self.delay
            self.delay = []
        elif cmd == "sh":
            self.source.push_section(int(tokens[1]), tokens[2])
        elif cmd == "++":
            if tokens[1] == "AB":
                self.in_abstract = True
                self.emit("<abstract>")
            elif self.in_abstract:
                self.in_abstract = False
                self.emit("</abstract>")
        else:
            return 0
        return 1
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

class MmInterpreter:
    "Interpret mm(7) macros."
    name = "mm"
    exclusive = True
    toptag = "article"
    immutable_set = {"B":1, "I":1, "R":1,
                     "BI":1, "BR":1, "IB":1, "IR":1, "RB":1, "RI":1,
                     "AE":1, "AF":1, "AL":1, "RL":1, "APP":1, "APPSK":1,
                     "AS":1, "AT":1, "AU":1, "B1":1, "B2":1, "BE":1,
                     "BL":1, "ML":1, "BS":1, "BVL":1, "VL":1, "DE":1, "DL":1,
                     "DS":1, "FE":1, "FS":1, "H":1, "HU":1, "IA":1, "IE":1,
                     "IND":1, "LB":1, "LC":1, "LE":1, "LI":1, "P":1,
                     "RF":1, "SM":1, "TL":1, "VERBOFF":1, "VERBON":1,
                     "WA":1, "WE":1, }
    ignore_set = {")E":1, "1C":1, "2C":1, "AST":1, "AV":1, "AVL":1,
                  "COVER":1, "COVEND":1, "EF":1, "EH":1, "EDP":1,
                  "EPIC":1, "FC":1, "FD":1, "HC":1, "HM":1,
                  "GETR":1, "GETST":1, "HM":1,
                  "INITI":1, "INITR":1, "INDP":1, "ISODATE":1,
                  "MT":1, "NS":1, "ND":1, "OF":1, "OH":1, "OP":1,
                  "PGFORM":1, "PGNH":1, "PE":1, "PF":1, "PH":1,
                  "RP":1, "S":1, "SA":1, "SP":1,
                  "SG":1, "SK":1, "TAB":1, "TB":1, "TC":1, "VM":1, "WC":1}
    complain_set = {"EC":1, "EX":1, "FG":1,
                    "GETHN":1, "GETPN":1, "GETR":1, "GETST":1,
                    "LT":1, "LD":1, "LO":1,
                    "MOVE":1, "MULB":1, "MULN":1, "MULE":1, "NCOL":1,
                    "nP":1, "PIC":1, "RD":1, "RS":1, "SETR":1, }
    parabreak_set = {}
    sectionbreak_set = {}
    translations = (
        (r"\*F", ""),	# Assumes that footnone marks are adjacent to footnotes
      )
    reductions = {}
    # Specific to this interpreter
    markdict = {"1":"arabic",
                "A":"upperalpha",
                "a":"loweralpha",
                "I":"upperroman",
                "i":"lowerroman"}
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
        self.liststack = []
        self.listcount = []
        # State collected by macros
        self.author_firm = None
        self.author_title = None
        self.author_info = None
    def end_list(self):
        if self.listcount[-1]:
            self.source.end_paragraph(label="end_list")
            self.source.emit("</listitem>")
            if self.liststack[-1] == "</variablelist>":
                self.source.emit("</varlistentry>")
        self.source.emit(self.liststack.pop())
        self.listcount.pop()
    def fold_highlights(self, cmd, args):
        # We need this to be a separate entry point for TP tag processing.
        if cmd in ("B", "I", "R"):
            return self.source.alternating_highlight(cmd + "P", args)
        elif cmd in ("BI", "BR", "IB", "IR", "RB", "RI"):
            return self.source.alternating_highlight(cmd, args)
        else:
            return None    
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        # Highlighting
        highlighted = self.fold_highlights(cmd, args)
        if highlighted:
            self.source.emit(highlighted)
        # Ordinary formatting comands.
        elif cmd == "AE":
            self.source.end_paragraph(label="AE")
            self.source.emit("</abstract>")
            self.source.need_paragraph()
        elif cmd == "AF":
            self.author_firm = " ".join(args)	# Not yet used
        elif cmd == "AL" or cmd == "RL":
            enumeration = 'arabic'
            spacing = 'normal'
            if args:
                spec = MmInterpreter.markdict.get(args[0])
                if not spec:
                    self.source.error("unknown enumeration type %s in AL" % args[0])
                else:
                    enumeration = spec
                if len(args) >= 3:
                    spacing = 'compact'
            self.source.emit("<orderedlist numeration='%s' %s>" % (enumeration, spacing))
            self.liststack.append("</orderedlist>")
            self.listcount.append(0)
        elif cmd == "APP" or cmd == "APPSK":
            name = args[0]
            text = args[1 + (cmd == "APPSK")]
            self.troff.strings["Apptxt"] = " ".join(text)
            self.source.emit("<appendix><title>%s</title>" % name)
        elif cmd == "AS":
            self.source.emit("<abstract>")
            self.source.need_paragraph()
        elif cmd == "AT":
            self.author_title = args	# Not yet used
        elif cmd == "AU":
            self.author_info = args	# Not yet used
        elif cmd == "B1":
            self.source.emit(r"<sidebar>")
        elif cmd == "B2":
            self.source.emit(r"</sidebar>")
        elif cmd == "BE":
            self.source.paragraph("End of BS/BE block")
        elif cmd == "BL" or cmd == "ML":
            if len(args) == 2:
                spacing = 'compact'
            else:
                spacing = 'normal'
            self.source.emit("<itemizedlist %s mark='bullet'>" % spacing)
            self.liststack.append("</itemizedlist>")
            self.listcount.append(0)
        elif cmd == "BS":
            self.source.paragraph("FIX-ME: BS/BE block may need to be moved")
        elif cmd == "BVL" or cmd == "VL":
            self.source.emit("<variablelist>")
            self.liststack.append("</variablelist>")
            self.listcount.append(0)
        elif cmd == "DE":
            self.source.emit("</screen>")
        elif cmd == "DL":
            if len(args) == 2:
                spacing = 'compact'
            else:
                spacing = 'normal'
            self.source.emit("<itemizedlist %s mark='dash'>" % spacing)
            self.liststack.append("</itemizedlist>")
        elif cmd == "DS" or cmd == "DF":
            self.source.emit("<screen>")
        elif cmd == "FE":
            self.source.end_paragraph(label="FE")
            self.source.pushline(r"</footnote>")
        elif cmd == "FS":
            self.source.pushline(r"<footnote>")
            self.source.need_paragraph()
        elif cmd == "H":
            for level in self.liststack:
                self.end_list()
            level = int(args[0])
            heading_text = heading_suffix = ""
            if len(args) > 1:
                heading_text = args[1]
                if len(args) > 2:
                    heading_suffix = args[1]
            self.source.push_section(level, heading_text + heading_suffix)
        elif cmd == "HU":
            heading_text = args[0]
            for level in self.liststack:
                self.end_list()
            self.source.push_section(self.source.sectiondepth, heading_text, makeid=0)
        # We can ignore H[XYZ] as they are user-defined exits
        elif cmd == "IA":
            self.source.emit("<!-- Start IA address spec: " + `args`)
        elif cmd == "IE":
            self.source.emit("End IE address spec. -->")
        elif cmd == "IND":
            self.source.pushline(self.source.index(map(deemphasize, args)))
        elif cmd == "LB":
            type = int(args[3])
            mark = "1"
            if len(args) > 4:
                mark = args[4]
            if type == 0:
                # Not strictly correct -- what LB really wants us to do
                # is generate a mark from the mark argument.
                self.source.emit("<itemizedlist mark='bullet'>" % spacing)
                self.liststack.append("</itemizedlist>")
            else:
                spec = MmInterpreter.markdict.get(mark)
                if not spec:
                    self.source.error("unknown enumeration type %s in LB"%mark)
                    enumeration = 'arabic'
                else:
                    enumeration = spec
                self.source.emit("<orderedlist numeration='%s'>" % enumeration)
                self.liststack.append("</orderedlist>")
                self.listcount.append(0)
        elif cmd == "LC":
            for level in self.liststack:
                self.end_list()
        elif cmd == "LE":
            self.end_list()
        elif cmd == "LI":
            mark = ""
            if len(args) > 0:
                mark = args[0]	# FIXME: process second argument
            # End previous entry
            if self.listcount[-1]:
                self.source.end_paragraph(label="LI")
                self.source.emit("</listitem>")
                if self.liststack[-1] == "</variablelist>":
                    self.source.emit("</varlistentry>")
            # Begin this entry
            if self.liststack[-1] == "</variablelist>":
                self.source.emit("<varlistentry>")
                self.source.emit("<term>%s</term>" % fontclose(mark))
            self.source.emit("<listitem>")
            self.source.need_paragraph()
            # Bump counter
            self.listcount[-1] += 1
        elif cmd == "P" or cmd == "blank":
            self.source.paragraph()
        elif cmd == "RF":
            self.source.emit("Reference end -->")
        elif cmd == "SM":
            if len(args) > 2:
                self.source.pushline(r"%s\fS%s\fP%s" % args)
            else:
                self.source.pushline(r"\fS%s\fP%s" % args)
        elif cmd == "TL":
            self.title = gather_lines(self.source)
        # We can ignore user exits, TP, TX, TY.
        elif cmd == "VERBOFF":
            self.source.emit("</screen>")
        elif cmd == "VERBON":
            self.source.emit("<screen>")
        elif cmd == "WA":
            self.source.emit("<!-- Start WA address spec: " + `args`)
        elif cmd == "WE":
            self.source.emit("End WA address spec. -->")
        # Unknown command.
        else:
            return 0
        return 1
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

class MwwwInterpreter:
    "Interpret mm(7) macros."
    name = "mwww"
    exclusive = False
    toptag = "article"
    immutable_set = {"HX":1, "BCL":1, "BGIMG":1,
                     "URL":1, "MTO":1, "FTP":1, "IMG":1, "HTML":1,
                     "TAG":1, "HR":1,}
    ignore_set = {"HX":1, "BCL":1, "BGIMG":1,
                  "HTML":1, "HR":1, "LK":1, "NHR":1,
                  "HnS":1, "HnE":1, "DC":1, "HTL":1, }
    complain_set = {}
    parabreak_set = {}
    sectionbreak_set = {}
    translations = ()
    reductions = {}
    def __init__(self, source, verbose=0):
        self.source = source
        self.verbose = verbose
        self.break_trap = None
    def interpret(self, tokens, caller):
        cmd = tokens[0][1:]
        args = tokens[1:]
        if len(args) == 1:
            args.append("")
        if len(args) == 2:
            args.append("")
        def make_url(url, txt, after):
            return '<ulink url="%s">%s</ulink>%s' % (url,txt,after)
        # Ordinary formatting comands.
        if cmd == "URL":
            self.source.emit(make_url(args[0], args[1], args[2]))
        elif cmd == "MTO":
            self.source.emit(make_url(args[0], "mailto:"+args[1], args[2]))
        elif cmd == "FTP":
            self.source.emit(make_url(args[0], args[1], args[2]))
        elif cmd == "IMG":
            file = args[0]
            ext = ""
            if '.' in file:
                ext = file[:file.index('.')+1].upper()
            self.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s" format="%s"/></imageobject>\n</mediaobject>' % (file, ext))
        elif cmd == "PIMG":
            file = args[0]
            self.pushline('<mediaobject>\n<imageobject><imagedata fileref="%s" format="PNG"/></imageobject>\n</mediaobject>' % file)
        elif cmd == "TAG":
            self.source.pushline('<anchor id="%s"%s>' % self.source.make_id_from_title(args[0]), self.source.xml)
        elif cmd == "ULS":
            self.source.emit("<itemizedlist>")
        elif cmd == "ULE":
            self.source.emit("</itemizedlist>")
        elif cmd == "LI":
            self.error("LI is not yet supported, because it's not documented.")
        # Unknown command.
        else:
            return 0
        return 1
    def preprocess(self, text):
        return text
    def postprocess(self, text):
        return text

# This is how we autodetect the right macro set:

interpreter_dispatch = {
    "pp": MeInterpreter,
    "Dt": MdocInterpreter,
    "Dd": MdocInterpreter,
    "Nm": MdocInterpreter,
    "AU": MsInterpreter,
    "NH": MsInterpreter,
    "TH": ManInterpreter,
    "MT": MmInterpreter,
    "COVER": MmInterpreter,
    # Extension macro sets
    "supplemental macros used in Tcl/Tk": TkManInterpreter,
    "the F register is turned on": Pod2ManInterpreter,
    # These are all of the supported Mwww tags
    "URL": MwwwInterpreter,
    "FTP": MwwwInterpreter,
    "MTO": MwwwInterpreter,
    "FTP": MwwwInterpreter,
    "PIMG": MwwwInterpreter,
    "IMG": MwwwInterpreter,
    "TAG": MwwwInterpreter,
    }

mso_dispatch = {
    "e.tmac":    MeInterpreter,
    "doc.tmac":  MdocInterpreter,
    "s.tmac":    MsInterpreter,
    "an.tmac":   ManInterpreter,
    "m.tmac":    MmInterpreter,
    "www.tmac":  MwwwInterpreter,
    }

required_extensions = {
    MeInterpreter: "me",
    MsInterpreter: "ms",
    MmInterpreter: "mm",
    }

def sponge(name, arguments, trans_data, trans_filename=None):
    "Read input sources entire and transform them in memory."
    if not arguments:
        outdoc = trans_data(name, "stdin", sys.stdin.read())
        if outdoc:
            sys.stdout.write(outdoc)
    else:
        for file in arguments:
            infp = open(file)
            indoc = infp.read()
            infp.close()
            tempfile = file + ".~%s-%d~" % (name, os.getpid())
            try:
                outfp = open(tempfile, "w")
            except OSError:
                sys.stderr.write("%s: can't open tempfile" % name)
                return 1
            try:
                outdoc = trans_data(name, file, indoc)
            except:
                os.remove(tempfile)
                # Pass the exception upwards
                (exc_type, exc_value, exc_traceback) = sys.exc_info()
                raise exc_type, exc_value, exc_traceback
            if outdoc == indoc:
                os.remove(tempfile)
            if outdoc is None:
                continue
            else:
                outfp.write(outdoc)
                outfp.close()	# under Windows you can't rename an open file
                if not trans_filename:
                    os.rename(tempfile, file)
                elif type(trans_filename) == type(""):
                    i = file.rfind(trans_filename[0])
                    if i > -1:
                        file = file[:i]
                    os.rename(tempfile, file + trans_filename)
                else:
                    os.rename(tempfile, trans_filename(file))

if __name__ == "__main__":
    import getopt
    (options, arguments) = getopt.getopt(sys.argv[1:], "I:h:qstxv")
    includepath = ["."]
    hintfile = None
    quiet = False
    tm_enable = False
    verbose = False
    errorcount = 0
    xml = True
    for (switch, val) in options:
        if switch == "-I":
            includepath = val.split(":")
        elif switch == '-h':
            hintfile = val
        elif switch == '-q':
            quiet += 1
        elif switch == '-s':
            xml = False
        elif switch == '-t':
            tm_enable = True
        elif switch == '-v':
            verbose += 1
        elif switch == '-x':
            xml = True
    if xml:
        ext = ".xml"
    else:
        ext = ".sgml"
    try:
        globalhints = SemanticHintsRegistry()
        lifter = DocLifter(xml, verbose, quiet, tm_enable, includepath)
        sponge("doclifter", arguments,
               lambda n,f,d: lifter(n, f, d),
               ext)
    except LiftException, e:
        sys.stderr.write("doclifter: " + e.message + "\n")
        raise SystemExit, e.retval
    except IOError, e:
        sys.stderr.write("doclifter: file I/O error: %s\n" % e)
        raise SystemExit, 1
    except KeyboardInterrupt:
        sys.stderr.write("doclifter: bailing out...\n")
        raise SystemExit, 3
    if errorcount:
        # Error message will already have been emitted.
        raise SystemExit, 2

    if hintfile:
        fp = open(hintfile, "w")
        fp.write(str(globalhints))
        fp.close()

# The following sets edit modes for GNU EMACS
# Local Variables:
# mode:python
# End:
