
#==============================================================================#
# gyokuro/parser.rb
#==============================================================================#

#==============================================================================#
# Gyokuro Class
class Gyokuro

  #============================================================================#
  # Include Modules

  include Enumerable

  #============================================================================#
  # Initialize Method

  def initialize(text)
    @text     = text.to_s
    @morpheme = parse(@text)
  end

  #============================================================================#
  # Accessor

  attr_reader :text, :morpheme

  #============================================================================#
  # Class Methods

  def self.parse(text)
    Chasen.getopt('-F', Line::Format, '-j')
    return Chasen.sparse(text)
  end

  #============================================================================#
  # Instance Methods

  def parse(text)
    return self.type.parse(text).reject { |line|
      line.chomp == 'EOS'
    }.collect { |line|
      Parts.parse(Line.new(line))
    }
  end

  def each(&block)
    return @morpheme.each(&block)
  end

  def grep(*classes)
    return @morpheme.select { |element|
      classes.find { |klass|
        element.kind_of?(klass)
      }
    }
  end

  def sequencial(*classes)
    array  = []
    buffer = ''

    @morpheme.each { |element|
      if classes.find { |klass| element.kind_of?(klass) }
        buffer << element.origin
      else
        unless buffer.empty?
          array << buffer
          buffer = ''
        end
      end
    }

    unless buffer.empty?
      array << buffer
    end

    return array
  end

  def noun
    return self.grep(Noun)
  end

  def prefix
    return self.grep(Prefix)
  end

  def verb
    return self.grep(Verb)
  end

  def adjective
    return self.grep(Adjective)
  end

  def adverb
    return self.grep(Adverb)
  end

  def attributive
    return self.grep(Attributive)
  end

  def conjunction
    return self.grep(Conjunction)
  end

  def particle
    return self.grep(Particle)
  end

  def auxiliary_verb
    return self.grep(AuxiliaryVerb)
  end

  def exclamation
    return self.grep(Exclamation)
  end

  def mark
    return self.grep(Mark)
  end

  def other
    return self.grep(Other)
  end

  def filler
    return self.grep(Filler)
  end

  def non_language
    return self.grep(NonLanguage)
  end

  def fragment
    return self.grep(Fragment)
  end

  def unknown
    return self.grep(Unknown)
  end

  #============================================================================#
  # Line Class
  class Line

    #==========================================================================#
    # Class Constant

    Format = %w[%m %y %M %Y %h %P- %t %T- %f %F- %?U/unknown/known/].join("\t") + "\t\n"

    #==========================================================================#
    # Initialize Method

    def initialize(line)
      field = line.chomp.split(/\t/)

      @origin               = field[0].to_s                                # Ф(и)
      @origin_yomi          = field[1].to_s                                # Фɤ(и)
      @basis                = field[2].to_s                                # Ф(ܷ)
      @basis_yomi           = field[3].to_s                                # Фɤ(ܷ)
      @part_of_speech_code  = field[4].to_i                                # ʻ쥳
      @part_of_speech       = field[5].to_s.split(/-/)                     # ʻ
      @conjugated_type_code = field[6].to_i                                # ѷ
      @conjugated_type      = (field[7].to_s != '-' ? field[7].to_s : nil) # ѷ
      @conjugated_form_code = field[8].to_i                                # ѷ
      @conjugated_form      = (field[9].to_s != '-' ? field[9].to_s : nil) # ѷ
      @known                = (field[10].to_s == 'known')                  # θ/̤θ

      @part_of_speech_code = 0          unless @known                      # ̤θξϡʻ쥳ɤ[0]
      @part_of_speech      = ['̤θ'] unless @known                      # ̤θξϡʻ[̤θ]
      @conjugated_type     = nil        if @conjugated_type == '-'         # ѷ̵ϡѷ[nil]
      @conjugated_form     = nil        if @conjugated_form == '-'         # ѷ̵ϡѷ[nil]
    end

    #==========================================================================#
    # Accessor

    attr_reader :origin, :origin_yomi,
                :basis, :basis_yomi,
                :part_of_speech, :part_of_speech_code,
                :conjugated_type, :conjugated_type_code,
                :conjugated_form, :conjugated_form_code,
                :known

  end # Line

  #============================================================================#
  # Parts Class
  class Parts

    #==========================================================================#
    # Initialize Method

    def initialize(line)
      @origin               = line.origin
      @origin_yomi          = line.origin_yomi
      @basis                = line.basis
      @basis_yomi           = line.basis_yomi
      @part_of_speech_code  = line.part_of_speech_code
      @part_of_speech       = line.part_of_speech
      @conjugated_type_code = line.conjugated_type_code
      @conjugated_type      = line.conjugated_type
      @conjugated_form_code = line.conjugated_form_code
      @conjugated_form      = line.conjugated_form
    end

    #==========================================================================#
    # Accessor

    attr_reader :origin, :origin_yomi,
                :basis, :basis_yomi,
                :part_of_speech, :part_of_speech_code,
                :conjugated_type, :conjugated_type_code,
                :conjugated_form, :conjugated_form_code

    #==========================================================================#
    # Class Methods

    def self.parse(line)
      code, name, klass = PART_OF_SPEECH.find { |part_of_speech_code, part_of_speech_name, part_of_speech_klass|
        part_of_speech_code == line.part_of_speech_code
      }

      return (klass || Unknown).new(line)
    end

    #==========================================================================#
    # Instance Methods

    def to_s
      return @origin
    end

  end # Parts

end # Gyokuro

#==============================================================================#
#==============================================================================#
