#
# xmlscan.rb
#
#   Copyright (C) Ueno Katsuhiro 2000,2001
#
# $Id: xmlscan.rb,v 1.8 2001/01/04 09:56:56 katsu Exp $
#

class XMLScanner

  class ParseError < StandardError; end

  class PrivateArray < Array
    private(*superclass.instance_methods)
  end

  class XMLSource < PrivateArray

    class PortWrapper

      def gets ; @port.gets ; end
      def lineno ; 0 ; end
      def path ; '-' ; end

      def initialize(port)
        @port = port
        unless port.respond_to? :gets then
          if port.is_a? Array then
            @n = -1
            def self.gets ; @port[@n += 1] ; end
            def self.lineno ; @n + 1 ; end
          else
            @port = port.to_s
            def self.gets ; s = @port ; @port = nil ; s ; end
          end
        end
        if port.respond_to? :lineno then
          def self.lineno ; @port.lineno ; end
        end
        if port.respond_to? :path then
          def self.path ; @port.path ; end
        end
      end

      def send_port(method, *args)
        @port.send(method, *args)
      end

      def self.wrap(port)
        if instance_methods.find { |i| not port.respond_to? i } then
          new port
        else
          port
        end
      end

    end


    module DummyPort
      def self.gets ; nil ; end
      def self.lineno ; 0 ; end
      def self.path ; '-' ; end
    end


    def initialize(port = nil)
      super()
      feed port
    end

    public

    def feed(port)
      if port then
        @port = PortWrapper.wrap(port)
      else
        @port = DummyPort
      end
      @eof = false
      @lineno_size, @lineno_count = -1, 0
      self
    end

    def abort
      @eof = true
      clear
      self
    end

    def pop
      if at(1) or first == '>' or @eof then  # at(1) == (size > 1)
        super
      else
        begin
          src = @port.gets
          unless src then
            @eof = true
            break
          end
          a = src.split(/(?=>?<)|>/, -1)
          a[0] = super << a[0] if last and (c = a[0][0]) != ?< and c != ?>
          concat a
        end until at(1)  # at(1) == (size > 1)
        reverse!
        @lineno_size = size - 1
        @lineno_count = super
      end
    end

    def tag_end?
      s = last and s[0] != ?<
    end

    def tag_start?
      s = last and s[0] == ?<
    end

    def eof?
      @eof and empty?
    end

    def lineno
      unless size == @lineno_size then
        @port.lineno
      else
        unless @lineno_count.is_a? Integer then
          if @lineno_count then
            @lineno_count = @lineno_count.sub(/\A\s+/, '').scan(/^/).size - 1
          else
            @lineno_count = 0
          end
        end
        @port.lineno - @lineno_count
      end
    end

    def path
      @port.path
    end

    def send_port(method, *args)
      if @port.is_a? PortWrapper then
        @port.send_port(method, *args)
      else
        @port.send(method, *args)
      end
    end

  end




  def initialize(port = nil)
    @src = XMLSource.new
    @prolog = false
    feed port if port
  end


  def feed(port)
    @src.feed port
    @prolog = true
    self
  end


  attr_reader :prolog
  alias in_prolog? prolog
  undef prolog


  private

  def on_error(path, lineno, msg)
    raise ParseError, sprintf('%s:%d: %s', path, lineno, msg)
  end

  def on_xmldecl(version, encoding, standalone)
  end

  def on_doctype(root, pubid, sysid)
  end

  def on_comment(strs)
  end

  def on_pi(target, pi)
  end

  def on_chardata(str)
  end

  def on_etag(name)
  end

  def on_stag(name, attr)
  end

  def on_emptyelem(name, attr)
  end

  def on_entityref(ref)
    s = entityref_literal(ref)
    on_chardata s if s
  end

  def on_charref(ref)
    s = parse_charref(ref)
    on_chardata s if s
  end

  def on_eof
  end


  private

  def parse_error(msg)
    on_error @src.path, @src.lineno, msg
  end


  PredefinedEntity = {
    'lt' => '<',     'gt' => '>',   'quot' => '"',
    'apos' => '\'',  'amp' => '&',
  }

  def charref_literal(code)
    [code].pack('N').sub(/\A\000+/mn, '')
  end

  def entityref_literal(ref)
    PredefinedEntity[ref] or
      begin
        parse_error "undefined general entity `#{ref}'"
        nil
      end
  end


  def parse_charref(ref)
    if /\A#(\d+)\z/ =~ ref then
      charref_literal $1.to_i
    elsif /\A#x([\dA-Fa-f]+)\z/ =~ ref then
      charref_literal $1.hex
    else
      parse_error "parse error at `&#{ref}'"
      nil
    end
  end


  def on_attribute_value(key, val)
    inc = 0
    val.gsub!(/&([^;\s<>]+)?;?/) { |m|
      if m[-1] == ?; and (s = $1) then
        if s[0] == ?\# then
          rep = parse_charref(s)
        else
          rep = entityref_literal(s)
        end
        unless rep then
          @unexpanded_entityrefs = [] unless defined? @unexpanded_entityrefs
          @unexpanded_entityrefs.push [ key.dup, $~.begin(0) + inc, s ]
          rep = ''
        end
        inc += rep.size - m.size
        rep
      else
        parse_error "parse error at `#{m}'"
        m
      end
    }
    true
  end


  def scan_content(s)
    while true
      unless /&/ =~ s then
        on_chardata s
      else
        on_chardata s unless (s = $`).empty?
        $'.split(/&/, -1).each { |i|
          unless /;/ =~ i then
            parse_error "parse error at `&#{i.split(/\b|\s/,2)[0]}'"
            on_chardata('&' << i)
            next
          end
          e, i = $`, $'
          if /\s/ =~ e then
            parse_error "parse error at `&#{$`}'"
            on_chardata('&' << i)
            next
          end
          if e[0] == ?\# then
            parse_charref e
          else
            on_entityref e
          end
          on_chardata i unless i.empty?
        }
      end
      break if @src.tag_start?
      s = @src.pop
      break unless s
      s[0,0] = '>' unless s == '>'
    end
  end


  def scan_comment(s)
    s[0,4] = ''   # <!--
    comm = [ s ]
    until /--/ =~ s
      s = @src.pop
      unless s then
        parse_error "unterminated comment meets EOF"
        return on_comment(comm)
      end
      comm.push '>' if (c = s[0]) != ?< and c != ?>
      comm.push s
    end
    if @src.tag_end? and $'.strip.empty? then
      comm[-1] = $`
    else
      parse_error "comment is not terminated by `-->'"
      until @src.tag_end? and (s = @src.pop)
        comm.push '>' if (c = s[0]) != ?< and c != ?>
        comm.push s
      end
    end
    on_comment comm
  end


  def scan_pi(s)
    unless /\A<\?(\S+)(?:\s+|(?=\?\z))/ =~ s then
      parse_error "parse error at `<?'"
      on_chardata s
    else
      target, pi = $1, $'
      until @src.tag_end? and pi[-1] == ??
        s = @src.pop
        unless s then
          parse_error "unterminated PI meets EOF"
          return on_pi(name, pi)
        end
        pi << '>' if (c = s[0]) != ?< and c != ?>
          pi << s
      end
      pi.chop!
      on_pi target, pi
    end
  end


  def scan_cdata(s)
    cdata = [ s ]
    until @src.tag_end? and s[-1] == ?] and s[-2] == ?]
      s = @src.pop
      unless s then
        parse_error "unterminated CDATA section meets EOF"
        return on_chardata(cdata.join)
      end
      cdata.push '>' if (c = s[0]) != ?< and c != ?>
      cdata.push s
    end
    s.chop!.chop!
    on_chardata cdata.join
  end


  def unclosed_tag(t)
    if @src.eof? then      # <tag[EOF]
      parse_error "unclosed #{t} meets EOF"
    else                   # <tag <tag
      parse_error "unclosed #{t} meets another tag"
    end
  end


  def read_until(re, dst, t)
    begin
      s = @src.pop
      unless s then
        parse_error "unterminated #{t} meets EOF"
        return ''
      end
      dst << '>' if (c = s[0]) != ?< and c != ?>
      v, s = s.split(re, 2)
      dst << v
    end until s
    s
  end


  def scan_etag(s)
    s[0,2] = ''  # </
    if /\s/ =~ s then
      s1, s2 = $`, $'
      if s1.empty? then    # </ tag
        parse_error "parse error at `</'"
        return on_chardata('</' + s)
      elsif not s2.strip.empty? then     # </ta g
        parse_error "parse error at `</#{s1}'"
      end
      s = s1
    elsif s.empty? then
      if @src.tag_end? then
        parse_error "found an empty end tag `</>'"
      else
        parse_error "parse error at `</'"
        return on_chardata('</')
      end
    end
    unclosed_tag 'end tag' unless @src.tag_end?
    on_etag s
  end


  def scan_stag(s)
    attr = {}
    method = :on_stag
    unless /(?=[\/\s])/ =~ s then
      name = s
      name[0,1] = ''
      if name.empty? then   # << or <>
        if @src.tag_end? then
          parse_error "found an empty start tag `<>'"
        else
          parse_error "parse error at `<'"
          return on_chardata('<' + s)
        end
      end
    else
      name = $`
      s = $'
      name[0,1] = ''
      if name.empty? then   # < tag
        parse_error "parse error at `<'"
        return on_chardata('<' + s)
      end
      begin
        complete = true
        s.scan(/\s+(?:([^\s\/=]+)\s*=\s*('[^']*'?|"[^"]*"?)|\z)|\s*(\/\z)|\s*(.[^='"\s]*)/m
               ) { |key,val,e,err|
          if key then
            qmark = val.slice!(0,1)
            if val[-1] == qmark[0] then
              val.chop!
            else
              complete = false
              re = /#{qmark}/
              begin
                s = @src.pop
                if not s then
                  parse_error "unterminated #{t} meets EOF"
                  complete = true
                  break
                elsif (c = s[0]) == ?< then
                  parse_error "`<' is found in attribute value"
                elsif c != ?> then
                  val << '>'
                end
                v, s = s.split(re, 2)
                val << v
              end until s
              # always break here.
            end
            if on_attribute_value(key, val) then
              parse_error "doubled attribute `#{key}'" if attr.key? key
              attr[key] = val
            end
          elsif e then
            method = :on_emptyelem
          elsif err then
            parse_error "parse error at `#{err.split(/\b|\s/,2)[0]}'"
          end
        }
      end until complete
    end
    unclosed_tag 'start tag' unless @src.tag_end?
    send method, name, attr
  end


  def parse_internal_dtd(s)
    parse_error "internal DTD subset is not supported"
  end


  DOCTYPEPattern =
    /\A([^\s\["']+)(?:\s+(?:SYSTEM|PUBLIC\s+("[^"]*"|'[^"']*'))\s+("[^"]*"?|'[^']*'?))\s*/

  def scan_doctype(s)
    unless DOCTYPEPattern =~ s then
      parse_error "parse error in DOCTYPE"
      return
    end
    root, pubid, sysid, s = $1, $2, $3, $'
    if pubid then
      pubid.chop!
      pubid[0,1] = ''
    end
    if sysid then
      c = sysid.slice!(0,1)
      if c[0] == sysid[-1] then
        sysid.chop!
      else
        s = read_until(/#{c}\s*/, sysid, 'DOCTYPE')
      end
    end
    if s[0] == ?[ then
      s[0,1] = ''
      parse_internal_dtd s
    elsif not s.empty? then
      parse_error "parse error at `#{s.split(/\b|\s/,2)[0]}'"
    end
    unclosed_tag 'DOCTYPE' unless @src.tag_end?
    on_doctype root, pubid, sysid
  end


  def scan_bang_tag(s)
    parse_error "parse error at `<!'"
    on_chardata s
  end


  def scan_text(s)
    if (c = s[0]) == ?< then
      if (c = s[1]) == ?/ then
        scan_etag s
      elsif c == ?! then
        if s[2] == ?- and s[3] == ?- then
          scan_comment s
        elsif /\A<!\[CDATA\[/ =~ s then
          scan_cdata $'
        else
          scan_bang_tag s
        end
      elsif c == ?? then
        scan_pi s
      else
        scan_stag s
      end
    elsif c == ?> then
      scan_text @src.pop
    else
      scan_content s
    end
  end


  XMLDeclPattern, TextDeclPattern = instance_eval {
    version = '\\s+version\\s*=\\s*("[^"\']+"|\'[^"\']+\')'
    encoding = '\\s+encoding\\s*=\\s*("[^"\']+"|\'[^"\']+\')'
    standalone = '\\s+standalone\\s*=\\s*("[^"]+"|\'[^\']+\')'
    [ /\A<\?xml#{version}(?:#{encoding}(?:#{standalone})?)?\s*\?\z/,
      /\A<\?xml(?:#{version})?#{encoding}\s*\?\z/
    ]
  }

  def scan_prolog
    s = @src.pop
    if s and /\A<\?xml\b/ =~ s then
      unless XMLDeclPattern =~ s then
        parse_error 'parse error in XML declaration'
      else
        version, encoding, standalone = $1, $2, $3
        version.chop!
        version[0,1] = ''
        if encoding then
          encoding.chop!
          encoding[0,1] = ''
          encoding.downcase!
        end
        if standalone then
          standalone.chop!
          standalone[0,1] = ''
          if standalone == 'yes' then
            standalone = true
          elsif standalone == 'no' then
            standalone = false
          else
            parse_error 'invalid standalone document declaration'
            standalone = nil
          end
        end
        on_xmldecl version, encoding, standalone
        unclosed_tag 'XML declaration' unless @src.tag_end?
        s = @src.pop
      end
    end
    while s
      if s[0] == ?< then
        if (c = s[1]) == ?! then
          if s[2] == ?- and s[3] == ?- then
            scan_comment s
          elsif /\A<!DOCTYPE\s+/ =~ s then
            scan_doctype $'
          else
            break
          end
        elsif c == ?? then
          scan_pi s
        else
          break
        end
      elsif s.strip.empty? and @src.tag_start? then
        on_chardata s
      else
        break
      end
      s = @src.pop
    end
    @prolog = false
    s and scan_text(s)
  end


  public

  def step
    if @prolog then
      scan_prolog
    elsif @src.eof? then
      ret = on_eof
      @src.feed nil
      ret
    elsif s = @src.pop then
      scan_text s
    else
      nil
    end
  end


  def parse(src = nil)
    feed src if src
    scan_prolog if @prolog
    while s = @src.pop
      scan_text s
    end
    on_eof
    self
  end




  module Loose

    def parse_error(msg)
      STDERR.printf "parse error:%s:%d: %s\n", path, lineno, msg if $VERBOSE
    end
    private :parse_error

  end



  module Recoverable

    def feed(*args)
      @errors = []
      super
    end

    private

    def on_error(path, lineno, msg)
      @errors.push sprintf('%s:%d: %s', path, lineno, msg)
    end

    def error?
      @errors.empty?
    end

    def scan_text(s)
      raise ParseError, @errors.shift unless @errors.empty?
      super
    end

    public

    def step
      ret = super
      raise ParseError, @errors.shift unless @errors.empty?
      ret
    end

    def parse(*args)
      ret = super
      raise ParseError, @errors.shift unless @errors.empty?
      ret
    end

  end



  ## for external general parsed entities

  module ExternalEntity

    def scan_prolog
      s = @src.pop
      if s and /\A<\?xml\b/ =~ s then
        unless TextDeclPattern =~ s then
          parse_error 'parse error in text declaration'
        else
          version, encoding = $1, $2
          if encoding then
            encoding.chop!
            encoding[0,1] = ''
            encoding.downcase!
          end
          ret = on_xmldecl(version, encoding, nil)
          unclosed_tag 'text declaration' unless @src.tag_end?
          @prolog = false
          return ret
        end
      end
      @prolog = false
      s and scan_text(s)
    end

  end

end



class LooseXMLScanner < XMLScanner
  include Loose
end

class RecoverableXMLScanner < XMLScanner
  include Recoverable
end




class WellFormedXMLScanner < XMLScanner

  class ElementStack < PrivateArray

    def no_element?
      first.nil?       # empty? or first.nil?
    end

    def root_found?
      not empty?
    end

    def push_element(name)
      if first or empty? then
        push name
      else
        pop
        push name
        nil
      end
    end

    def pop_element(name)
      if name == last then
        pop
        push nil if empty?
        self
      else
        nil
      end
    end

    alias current last
    public :current

    def each
      reverse_each { |i| yield i if i }
    end

  end



  def feed(*args)
    @elemstack = type::ElementStack.new
    @standalone = false
    super
  end


  private

  def on_start_element(name, attr)
  end

  def on_end_element(name)
  end

  def on_text(str)
  end


  def name_in_errmsg(name)
    name
  end


  def on_xmldecl(version, encoding, standalone)
    @standalone = standalone
  end

  def on_doctype(root, pubid, sysid)
    @standalone = false if @standalone.nil? and (pubid or sysid)
  end

  def scan_content(s)
    @__xml_text__ = ''
    super
    on_text @__xml_text__
  end

  def on_chardata(str)
    unless @elemstack.no_element? then
      @__xml_text__ << str
    else
      unless str.strip.empty? then
        parse_error "character data are found out of root element"
      end
    end
  end

  # def on_entityref(ref)
  # def on_charref(ref)

  def on_stag(name, attr)
    if @elemstack.push_element name then
      on_start_element name, attr
    else
      parse_error "another root element `#{name_in_errmsg(name)}'"
      @src.abort
    end
  end

  def on_etag(name)
    unless @elemstack.pop_element name then
      parse_error "element type `#{name_in_errmsg(name)}' is not matched"
    else
      on_end_element name
    end
  end

  def on_emptyelem(name, attr)
    if @elemstack.push_element name then
      on_start_element name, attr
      on_end_element name
      @elemstack.pop_element name
    else
      parse_error "another root element `#{name_in_errmsg(name)}'"
      @src.abort
    end
  end

  def on_eof
    if not @elemstack.root_found? then
      parse_error "no root element was found"
    elsif not @elemstack.no_element? then
      @elemstack.dup.each { |name|
        parse_error "unclosed element `#{name_in_errmsg(name)}' meets EOF"
        on_end_element name
        @elemstack.pop_element name
      }
    end
  end


  def entityref_literal(ref)
    PredefinedEntity[ref] or (@standalone and super)
  end

  def on_attribute_value(key, val)
    val.gsub!(/\r\n|\s/, ' ')
    super
  end

end




class XMLScannerWithNamespace < WellFormedXMLScanner

  PredefinedNamespace = {
    'xml' => 'http://www.w3.org/XML/1998/namespace',
  }

  class ElementStack < superclass::ElementStack

    def initialize
      super
      @namespace = {}
    end

    attr_reader :namespace

    def default_namespace
      @namespace[:default]
    end

    def get_namespace(name)
      @namespace[name] or PredefinedNamespace[name]
    end

    def set_namespace(name, uri)
      push [ name, @namespace[name] ]
      if uri.empty? then
        @namespace.delete name
      else
        @namespace[name] = uri
      end
    end

    def pop_element(name)
      if name == last then
        pop
        while log = last and not log[2]
          pop
          @namespace[log[0]] = log[1]
        end
        push nil unless log
        self
      else
        nil
      end
    end

    def each
      reverse_each { |i| yield i if i[2] }
    end

  end



  private

  def expand_qualified_name(name, default = nil)
    unless /:/ =~ name then
      [ default, nil, name ]
    else
      prefix, localpart = $`, $'
      if localpart.empty? then
        parse_error "parse error at `:'"
        return [ nil, nil, name ]
      elsif /:/ =~ localpart then
        parse_error "localpart `#{localpart}' includes a colon"
      end
      unless namespace = @elemstack.get_namespace(prefix) then
        parse_error "undeclared namespace `#{prefix}'"
        namespace = nil
      end
      [ namespace, prefix, localpart ]
    end
  end

  def expand_attr_namespace(attr)
    dst = {}
    attr.each { |key,val|
      namespace, prefix, name = expand_qualified_name(key)
      h = dst[namespace]
      dst[namespace] = h = {} unless h
      h[name] = val
    }
    dst
  end

  def expand_attr_namespace_2(attr)
    attr.keys.each { |key|
      namespace, prefix, name = expand_qualified_name(key)
      if namespace then
        k = namespace + ' ' + name
      else
        k = ' ' + name
      end
      attr[k] = attr.delete(key)
    }
    attr
  end

  def name_in_errmsg(name)
    if name[1] then
      "#{name[1]}:#{name[2]}"
    else
      name[2]
    end
  end

  def on_stag(name, attr)
    super(expand_qualified_name(name, @elemstack.default_namespace),
          expand_attr_namespace(attr))
  end

  def on_emptyelem(name, attr)
    super(expand_qualified_name(name, @elemstack.default_namespace),
          expand_attr_namespace(attr))
  end

  def on_etag(name)
    super expand_qualified_name(name, @elemstack.default_namespace)
  end

  def on_pi(target, pi)
    parse_error "PI target must not include `:'" if /:/ =~ target
  end

  def on_attribute_value(key, val)
    super
    f = nil
    if key == 'xmlns' or f = (key[0,6] == 'xmlns:') then
      if f then
        name = key[6..-1]
        if name.empty? then
          parse_error "parse error at `:'"
        elsif /:/ =~ name then
          parse_error "namespace name `#{name}' includes a colon"
        elsif name[0,3].downcase == 'xml' then
          parse_error "prefix `#{name}' is reserved"
        elsif /\s/ =~ val then
          parse_error "invalid namespace `#{val}'"
        elsif val.empty? then
          parse_error "null namespace is declared as `#{name}'"
        else
          @elemstack.set_namespace name, val
        end
      else
        @elemstack.set_namespace :default, val
      end
      false
    else
      true
    end
  end

end


=begin

supported by XMLScanner

Well-Formedness Constraint: °ΰ
ϥ϶ǥǤϡƱ°̾ʾиƤϤʤʤ

Well-Formedness Constraint: °ͤ<ޤޤʤ
°ľŪϴŪ˻ȤΤִƥȤˤϡ<
ޤǤϤʤʤ


supported by WellFormedXMLScanner

Well-Formedness Constraint: ǷΥޥå
Ǥνλ̾ϡǤγϥˤǷʤ̾ˤȥޥå
Фʤʤ

Well-Formedness Constraint: ΤƤ뤳
DTD⤿ʤʸ񡤥ѥ᥿λȤޤޤʤDTD֥åȤʸ
 "standalone='yes'" ʸˤơλȤѤ Name ϡ
֥åȵڤӥѥ᥿ʳ˸˴ޤޤ̾
ޥåʤФʤʤʸϡamp,
lt, gt, apos, quot ɬפϤʤ̼Τξϡ°ꥹ
ǥեǤλȤˡʤФʤʤ֥å
ѥ᥿ΤǼΤȤ򸡾ڤʤץ
ɤߡ뤳Ȥ̳ŤʤȤաʸǤϡΤ
ʤФʤʤȤ§ϡstandalone='yes'ξΤߡ
Ȥʤ롣


supported by XMLScanner with xmldtd.rb

Well-Formedness Constraint: ֥åΥѥ᥿
DTD֥åȤǤϡѥ᥿λȤϡޡդиǽʾ
˽иǤ롣ޡդΰȤƤϽиǤʤϡѥ
ϳ֥åȤǤλȤˤŬѤʤ

Well-Formedness Constraint: DTD
ѥ᥿λȤϡDTDˤиƤ褤


supported by XMLParsedEntity

Well-Formedness Constraint: Ƶʤ
оݼΤϡ켫ΤؤλȤľܤˤܤˤޤǤϤʤʤ




Well-Formedness Constraint: ΤؤλȤʤ
°ͤˤϡΤؤľŪϴŪʻȤޤळȤϤǤʤ

Well-Formedness Constraint: ѤǤʸ
ʸȤǻȤʸϡChar§˥ޥåʤФʤʤ

Well-Formedness Constraint: оݼ
λȤϡоݳΤ̾ޤǤƤϤʤʤоݳΤϡ
ENTITYENTITIES Ȥ°ͤȤƤȤǤ롣

=end






## for internal general parsed entities

class XMLParsedEntity < WellFormedXMLScanner

  def initialize(name, src)
    @name = name
    super src
  end

  #undef feed

  private

end




## for external general parsed entities

class ExtXMLScanner < XMLScanner

  def scan_prolog
    s = @src.pop
    if s and /\A<\?xml\b/ =~ s then
      unless TextDeclPattern =~ s then
        parse_error 'parse error in text declaration'
      else
        version, encoding = $1, $2
        if encoding then
          encoding.chop!
          encoding[0,1] = ''
          encoding.downcase!
        end
        ret = on_xmldecl(version, encoding, nil)
        unclosed_tag 'text declaration' unless @src.tag_end?
        @prolog = false
        return ret
      end
    end
    @prolog = false
    s and scan_text(s)
  end

end





if __FILE__ == $0 then
  #class TestScanner < XMLScanner
  #class TestScanner < WellFormedXMLScanner
  class TestScanner < XMLScannerWithNamespace
    def on_error(path, lineno, msg)
      STDERR.printf "%s:%d: %s\n", path, lineno, msg
    end
  end
  STDOUT.sync = STDERR.sync = true

  if /\A--?\z/ === ARGV[0] then
    if ARGV.shift == '--' and ARGV.size == 1 then
      p = IO.popen("diff -u #{ARGV[0]} -", 'w')
      STDOUT.reopen p
      class Hash
        def []=(k,v)
          (@a ||= []).push [ k, v ]
        end
        def each(&b)
          @a.each(&b) if defined? @a
        end
      end
    end
    class TestScanner
      $".push 'xmlscan.rb'
      require 'xmltoken'
      def self.def_handler(*name)
        name.each { |i|
          eval %{
            def on_#{i.downcase}(*a)
              super
              print Tokenizer::#{i}.new(*a).to_s
            end
          }
        }
      end
      def_handler 'CharData', 'Comment', 'PI', 'XMLDecl', 'Doctype'
      def_handler 'ETag', 'STag', 'EmptyElem'
    end
  end

  src = ARGF.read
  scan = TestScanner.new
  t1 = Time.times.utime
  scan.parse(src)
  t2 = Time.times.utime
  STDERR.printf "%2.3f sec\n", t2 - t1
end
