#
# rubyrefmparser.rb
#
# Copyright (c) 2002,2003 Minero Aoki <aamine@loveruby.net>
#
# This program is free software.
# You can distribute/modify this program under the terms of
# the GNU Lesser General Public License version 2 or later.
#

require 'refe/lineinput'
require 'refe/rdutils'


module ReFe

  class RubyReferenceManualParser

    const = '[A-Z]\w+'
    constpath = '[A-Z]\w+(?:\::[A-Z]\w+)*'
    CLASS_BEGIN = [
      [/\A= (#{constpath})\s*$/n,                               nil],
      [/\A={2,3} (#{const}(?:\::#{const})+)\s/n,                nil],
      [/\A={1,3} (?:[Cc]lass|[Mm]odule)\s+(#{constpath})/n,     nil],
      [/\A={1,3} (#{constpath})\s+(?:[Cc]lass|[Mm]odule)/n,     nil],
      [/\A={1,3} \(\(:(#{constpath}):\)\)/n,                    nil],
      [/\A=== ¤Υ饹Υ饹᥽å/e,                  'Struct::XXX'],
      [/\A= Ȥ?ߴؿ/e,                                   'Kernel'],
      [/\A= Ȥ?/e,                                   'Kernel']
    ]

    reject_titles = %w(
      BeOS Cygwin DJGPP Example GNU Mac MinGW
      Miscellaneous OS2 Unix VMS Win32 WindowsCE
      Summary
    )
    CLASS_REJECT = /\A(?:#{ reject_titles.join('|') })\z/

    SINGLETON_METHODS_BEGIN = [
      /\A={2,3} 饹᥽å/e,
      /\A={2,3} ⥸塼ؿ/e,
      /\A={2,3} ⥸塼°/e,
      /\A={2,3} ⥸塼᥽å/e,
      /\A={2,4} Module Functions?/i,
      /\A={2,4} Class Methods?/i,
      /\A={2,4} Singleton Methods?/i
    ]

    INSTANCE_METHODS_BEGIN = [
      /\A={2,3} ᥽å/e,
      /\A={2,3} ץ饤١ȥ᥽å/e,
      /\A={2,4} Instance Methods?/i,
      /\A={2,4} Methods?/i
    ]

    def parse( f )
      classes = {}   # {class => description}
      methods = {}   # {class[#.] => {method => description}}
      current_class = nil
      s_table = nil
      m_table = nil
      tbl = nil
      off = false

      f = LineInput.new(f)
      while line = f.gets
        case line
        when /\A=+\s/
          case line
          when *SINGLETON_METHODS_BEGIN
            tbl = s_table
            off = false
          when *INSTANCE_METHODS_BEGIN
            tbl = m_table
            off = false
          when /\A= sprintfեޥå/
            (methods['man.'] ||= {})['sprintf'] = read_page(f, line)
            classes['man'] = ''
          when /\A= packƥץ졼ʸ/
            (methods['man.'] ||= {})['pack'] = read_page(f, line)
            classes['man'] = ''
          else
            off = true
            if /\A= / === line
              current_class = nil
              s_table = m_table = tbl = nil
            end
            CLASS_BEGIN.each do |re, static_mname|
              m = re.match(line) or next
              c = (static_mname || m[1])
              next if CLASS_REJECT === c

              current_class = c
              buf = ''
              f.until_match(/\A[\=\-]/) do |line|
                buf << line
              end
              classes[current_class] ||= RDUtils.untag(buf.strip)
              s_table = (methods[current_class + '.'] ||= {})
              m_table = (methods[current_class + '#'] ||= {})
              tbl = m_table
              off = false
              break
            end
          end

        when /\A(?:---|:) (?>[A-Z\d_:]+)\s/   # constants
          next unless s_table
          register_entry_to s_table, line, f

        when /\A(?:---|:)\s/   # method
          if /\A--- ([\w:]+[\.\#])/ === line   # context independent entry
            spec = $1
            tmp = (methods[spec] ||= {})
            register_entry_to tmp, line, f
          else
            next unless tbl
            next if off
            register_entry_to tbl, line, f
          end
        end
      end

      return classes, methods
    end

    def read_page( f, first_line )
      buf = ''
      buf << first_line
      f.until_match(/\A= /) do |line|
        buf << line unless /\A\#\#\# / === line
      end
      RDUtils.untag(buf.strip)
    end

    def register_entry_to( table, first_line, f )
      mnames, ent = read_entry(first_line, f)
      if desc = find_same_method(table, mnames)
        desc << ent
      else
        desc = ent
      end
      mnames.each do |name|
        table[name] = desc
      end
    end

    def read_entry( first_line, f )
      buf = ''
      buf << first_line
      mnames = [get_method_name(first_line, f)]

      # check method aliases
      f.while_match(/\A(?:---|:)/) do |line|
        buf << line
        mnames.push get_method_name(line, f)
      end

      # read description
      f.until_match(/\A(?:---|:|=)/) do |line|
        buf << line
      end

      return mnames, RDUtils.untag(buf).strip + "\n\n"
    end

    def find_same_method( table, mnames )
      a = mnames.map {|n| table[n] }.compact.uniq
      raise 'fatal: inconsistent document; cannot parse' if a.length > 1
      a[0]
    end

    def get_method_name( line, f )
      n = _get_method_name(line)
      unless n
        p f.lineno
        p line
        raise 'cannot get method name'
      end
      n
    end

    def _get_method_name( line )
      case line
      when /\A(?:---|:)\s*([\w:\.\#]+[?!]?)\s*(?:[\(\{\-]|\z)/   # name(arg), name{}, name
        remove_class_part($1)
      when /\A---\s*[\w:]+[\.\#]([+\-<>=~*^&|%\/]+)/         # Complex#+
        $1
      when /\A(?:---|:)\s*self\s*(==|===|=~)\s*\w+/          # self == other
        $1
      when /\A(?:---|:)\s*([\w:\.\#]+)\s*\=(?:\(|\s*\w+)?/   # name=
        remove_class_part($1) + '='
      when /\A(?:---|:)\s*\w+\[.*\]=/                        # self[key]=
        '[]='
      when /\A(?:---|:)\s*[\w\:]+\[.*\]/                     # self[key]
        '[]'
      when /\A(?:---|:)\s*self\s*([+\-<>=~*^&|%\/]+)\s*\w+/  # self + other
        $1
      when /\A(?:---|:)\s*([+\-~`])\s*\w+/                   # ~ self
        $1
      when /\A(?:---|:)\s*(?:[\w:]+[\.\#])?(\[\]=?)/         # Matrix.[](i)
        $1
      when /\A(?:---|:)\s*([+\-<>=~*^&|%]+)/                 # +(m)
        $1
      when /\A(?:---|:)\s*([A-Z]\w+\*)/                      # HKEY_*
        $1
      else
        nil
      end
    end

    def remove_class_part( str )
      str.sub(/\A[A-Z]\w*(?:::[A-Z]\w*)*[\.\#]/, '')
    end
  
  end

end

# memo: 'ȹߥ饹⥸塼롿㳰饹'
# memo: 'źե饤֥'
