#
# rmails.rb
#
#   Copyright (c) 1998-2001 Minero Aoki <aamine@loveruby.net>
#
#   This program is free software.
#   You can distribute/modify this program under the terms of
#   the GNU Lesser General Public License version 2 or later.
#


module TMail

  class Scanner_R

    Version = '0.9.10'
    Version.freeze

    #
    # regexps
    #

    atomchar  = Regexp.quote( "\#!$%&`'*+{|}~^/=?" ) + '\\-'
    tokenchar = Regexp.quote( "\#!$%&`'*+{|}~^." )   + '\\-'
    jisstr    = '|\e..[^\e]*\e..'

    ATOM = {}
    TOKEN = {}
    {
      'e' => '|(?:[\xa1-\xfe][\xa1-\xfe])+',
      's' => '|(?:[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc])+',
      'u' => '|(?:[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf])+',
      'n' => ''
    }.each do |k,v|
      ATOM[k]  = /\A[\w#{atomchar}]+#{v}#{jisstr}/n
      TOKEN[k] = /\A[\w#{tokenchar}]+#{v}#{jisstr}/n
    end

    LWSP    = /\A[\n\r\t ]+/n
    DIGIT   = /\A\d+\z/

    QSTR   = /\A[^"\\\e]+#{jisstr}/n
    CSTR   = /\A[^\)\(\\\e]+#{jisstr}/n
    DSTR   = /\A[^\]\\]+#{jisstr}/n


    RECV_TOKEN = {
      'from' => :FROM,
      'by'   => :BY,
      'via'  => :VIA,
      'with' => :WITH,
      'id'   => :ID,
      'for'  => :FOR
    }


    def initialize( str, header, comments )
      @s = StringScanner.new( str, false )

      @header = header
      @comments = comments

      @atom_mode = :atom
      @word_re = ATOM[ $KCODE[0,1].downcase ]
      @recv_mode = false

      case header
      when 'CTypeH', 'CEncodingH', 'CDispositionH'
        @atom_mode = :token
        @word_re = TOKEN[ $KCODE[0,1].downcase ]
      when 'RecvH'
        @recv_mode = true
      end

      @debug = false
    end

    attr_accessor :debug


    def scan( &block )
      if @debug then
        scan_main do |arr|
          s, v = arr
          printf "%7d %-10s %s\n",
                 @s.restsize,
                 s.respond_to?(:id2name) ? s.id2name : s.inspect,
                 v.inspect
          yield arr
        end
      else
        scan_main &block
      end
    end

    def scan_main
      until @s.empty? do
        if @s.skip LWSP then
          break if @s.empty?
        end

        if tmp = @s.scan( @word_re ) then
          case @atom_mode
          when :atom
            if DIGIT === tmp then
              yield :DIGIT, tmp
            elsif @recv_mode then
              yield RECV_TOKEN[ tmp.downcase ] || :ATOM, tmp
            else
              yield :ATOM, tmp
            end
          when :token
            yield :TOKEN, tmp
          else
            bug! 'atom mode is not atom/token'
          end

        elsif @s.skip( /\A"/ ) then
          yield :QUOTED, quoted

        elsif @s.skip( /\A\(/ ) then
          tmp = comment
          @comments.push tmp if @comments

        elsif @s.skip( /\A\[/ ) then
          yield :DOMLIT, domlit

        else
          tmp = @s.getch
          yield tmp, tmp
        end
      end

      yield false, '$'
    end


    private


    def quoted
      scan_qstr QSTR, /\A"/, 'quoted-string'
    end

    def domlit
      scan_qstr DSTR, /\A]/, 'domain-literal'
    end

    def scan_qstr( exp, term, type )
      ret = ''
      while true do
        @s.empty? and scan_error! "found unterminated #{type}"
        
        if    tmp = @s.scan( exp ) then ret << tmp
        elsif @s.skip( term )      then break
        elsif @s.skip( /\A\\/ )    then ret << @s.getch
        else
          bug! "not match in #{type}"
        end
      end

      ret
    end


    def comment
      ret = ''
      nest = 1

      while nest > 0 do
        @s.empty? and scan_error! 'found unterminated comment'

        if    tmp = @s.scan(CSTR) then ret << tmp
        elsif @s.skip( /\A\)/ )   then nest -= 1; ret << ')' unless nest == 0
        elsif @s.skip( /\A\(/ )   then nest += 1; ret << '('
        elsif @s.skip( /\A\\/ )   then ret << @s.getch
        else
          bug! 'not match in comment'
        end
      end

      ret
    end


    def scan_error!( msg )
      raise ScanError, msg
    end

  end   # class Scanner_R

end   # module TMail
