# nmzqr.rb - libraries for Query structure and Result structure
# $Id: nmzqr.rb,v 1.4 2000/12/03 16:21:27 ryu Exp $
#
# nmzqr.rb is copyrighted free software by OHSHIMA Ryunosuke.
# You can use/redistribute/modify it under the terms of Namazu or Ruby.

require 'nmzqr.tab'

class Namazu
  module ScoreResultHash
    public
    def add(docid, score, time = nil)
      self[docid] = [score, time]
    end

    public
    def get_scoreresultitem(docid)
      self[docid]
    end

    alias scoreresultitem get_scoreresultitem

    public
    def docids()
      keys()
    end

    public
    def each_docid()
      each_key() do |docid|
	yield docid
      end
    end

    public
    def scoreresultitems()
      values()
    end

    public
    def each_scoreresultitem()
      each_value() do |scoreresultitem|
	yield scoreresultitem
      end
    end
  end # ScoreResultHash

  ScoreResult = Hash
end

class Hash
  include Namazu::ScoreResultHash
end

class Namazu
  module ScoreResultItemArray
    public
    def score()
      self[0]
    end

    public
    def score=(score)
      self[0] = score
    end

    public
    def time()
      self[1]
    end
  end # ScoreResultItemArray

  ScoreResultItem = Array
end

class Array
  include Namazu::ScoreResultItemArray
end

class Namazu
  class QueryInterface
    public
    def query(lazyflag = false, idfflag = false)
      ResultInterface.new(lazyflag, idfflag)
    end

    alias search query

    private
    def make_phrase(word)
      nmzword = @index.word()
      nmzword.open()
      if word.empty?()
	nmzword.close()
	nil
      elsif nmzword.get_wordid(word)
	nmzword.close()
	[word]
      else
	phrase = nil
	tmpword = word.dup()
	restword = tmpword[-1, 1]
	tmpword.chop!()
	until tmpword.empty?()
	  if nmzword.get_wordid(tmpword)
	    nmzword.close()
	    phrase = make_phrase(restword.reverse!())
	    if phrase
	      return phrase.unshift(tmpword)
	    end
	    nmzword.open()
	  end
	  restword << tmpword[-1, 1]
	  tmpword.chop!()
	end
	nmzword.close()
	nil
      end
    end
  end # QueryInterface

  class Query < QueryInterface
    def initialize(index, word)
      @index = index
      @word = word
    end

    public
    def query(lazyflag = false, idfflag = false)
      phrase = make_phrase(@word)
      if (phrase and (phrase.length() >= 2))
	phrase.collect!() do |word|
	  Result.new(@index, word, true, false)
	end
	ResultPhrase.new(@index, phrase, lazyflag, idfflag)
      else
	Result.new(@index, @word, lazyflag, idfflag)
      end
    end
  end # Query

  class QueryPhrase < QueryInterface
    def initialize(index, words)
      @index = index
      @words = words
    end

    public
    def query(lazyflag = false, idfflag = false)
      phrases = @words.collect() do |word|
	phrase = make_phrase(word)
	if phrase
	  phrase
	else
	  [word]
	end
      end
      phrases.collect!() do |phrase|
	if (phrase.length() >= 2)
	  phrase.collect!() do |word|
	    Result.new(@index, word, true, false)
	  end
	  ResultPhrase.new(@index, phrase, true, false)
	else
	  Result.new(@index, phrase[0], true, false)
	end
      end
      ResultPhrase.new(@index, phrases, lazyflag, idfflag)
    end
  end # QueryPhrase

  class QueryRegexp < QueryInterface
    def initialize(index, regexpstring)
      @index = index
      @regexp = Regexp.new(regexpstring)
    end

    public
    def query(lazyflag = false, idfflag = false)
      nmzword = @index.word()
      nmzword.open()
      results = nmzword.match_words(@regexp)
      nmzword.close()
      results.collect!() do |word, wordid|
	ResultWordid.new(@index, word, wordid, lazyflag, false)
      end
      ResultRegexp.new(@index, @regexp, results, lazyflag, idfflag)
    end
  end # QueryRegexp

  class QueryMatch < QueryRegexp
    def initialize(index, matchstring)
      @index = index
      regexpstring = nil
      if /^\*(.+)\*$/ =~ matchstring
	regexpstring = $1
      elsif /^\*(.+)$/ =~ matchstring
	regexpstring = $1 + '$' # '
      elsif /^(.+)\*$/ =~ matchstring
	regexpstring = '^' + $1
      end
      @regexp = Regexp.new(regexpstring)
    end
  end # QueryMatch

  class QueryOperator < QueryInterface
    def initialize(queries)
      @queries = queries
    end
  end # QueryOperator

  class QueryAnd < QueryOperator
    public
    def query(lazyflag = false, idfflag = false)
      results = @queries.collect() do |query|
	query.query(true, true)
      end
      ResultAnd.new(results, lazyflag)
    end
  end # QueryAnd

  class QueryOr < QueryOperator
    public
    def query(lazyflag = false, idfflag = false)
      results = @queries.collect() do |query|
	query.query(lazyflag, true)
      end
      ResultOr.new(results, lazyflag)
    end
  end # QueryOr

  class QueryNot < QueryOperator
    public
    def query(lazyflag = false, idfflag = false)
      results = @queries.collect() do |query|
	query.query(true, idfflag)
      end
      ResultNot.new(results, lazyflag)
    end
  end # QueryNot

  class ResultInterface
    def initialize()
      @scoreresult = {}
    end
    attr_reader(:scoreresult)

    public
    def word()
      ''
    end

    public
    def docids()
      @scoreresult.docids()
    end

    public
    def hitnum()
      @scoreresult.length()
    end

    public
    def to_s()
      '[' << word() << ': ' << hitnum().to_s() << ']'
    end
  end # ResultInterface

  class Result < ResultInterface
    def initialize(index, word, lazyflag = false, idfflag = true)
      @word = word
      nmzword = index.word()
      nmzscore = index.score()
      nmzword.open()
      nmzscore.open()
      @scoreresult = nmzscore.get_scores_by_wordid(nmzword.get_wordid(word))
      nmzword.close()
      nmzscore.close()
      @idf = nil
      if idfflag
	idf = Math.log(index.docnum() / @scoreresult.length()) / Math.log(2)
	if lazyflag
	  @idf = idf
	else
	  @scoreresult.each_value() do |scoreresultitem|
	    scoreresultitem[0] = scoreresultitem[0] * idf + 1
	  end
	end
      end
    end
    attr_reader(:word)

    public
    def get_scoreresultitem(docid)
      scoreresultitem = @scoreresult[docid]
      if (scoreresultitem and @idf)
	scoreresultitem[0] = scoreresultitem[0] * @idf + 1
      end
      scoreresultitem
    end
  end # Result

  class ResultWordid < Result
    def initialize(index, word, wordid, lazyflag = false, idfflag = true)
      @word = word
      nmzword = index.word()
      nmzscore = index.score()
      nmzword.open()
      nmzscore.open()
      @scoreresult = nmzscore.get_scores_by_wordid(wordid)
      nmzword.close()
      nmzscore.close()
      @idf = nil
      if idfflag
	idf = Math.log(index.docnum() / @scoreresult.length()) / Math.log(2)
	if lazyflag
	  @idf = idf
	else
	  @scoreresult.each_value() do |scoreresultitem|
	    scoreresultitem[0] = scoreresultitem[0] * idf + 1
	  end
	end
      end
    end
  end # ResultWordid

  class ResultOperator < ResultInterface
    attr_reader(:results)

    def word()
      @results.collect() do |result| result.word() end . join('')
    end

    def to_s()
      '[' << @results.collect() do |result| result.to_s() end . join(' ') << ' :: ' << hitnum().to_s() << ']'
    end
  end # ResultOperator

  class ResultPhrase < ResultOperator
    def initialize(index, results, lazyflag = false, idfflag = true)
      @results = results
      @scoreresult = nil

      if results.empty?()
	@scoreresult = {}
      else
	scoreresults = results.collect() do |result| result.scoreresult() end
	scoreresults.sort!() do |a, b| a.length() <=> b.length() end
	scoreresult = scoreresults.shift().dup()

	if lazyflag
	  docid = nil
	  for scoreresulttmp in scoreresults
	    scoreresult.delete_if() do |docid, |
	      not scoreresulttmp[docid]
	    end
	  end
	else
	  resultstmp = results.dup()
	  resultstmp.sort!() do |a, b|
	    a.scoreresult().length() <=> b.scoreresult().length()
	  end
	  resulttmp = resultstmp.shift()
	  scoreresult.each_key() do |docid|
	    scoreresult[docid] = resulttmp.get_scoreresultitem(docid)
	  end
	  len = results.length()
	  score = scoreresultitemtmp = result = i = nil
	  scoreresult.each() do |docid, scoreresultitem|
	    score = scoreresultitem[0]
	    for resulttmp in resultstmp
	      scoreresultitemtmp = resulttmp.get_scoreresultitem(docid)
	      if (not scoreresultitemtmp)
		score = nil
		break
	      else
		score += scoreresultitemtmp[0]
	      end
	    end
	    if score
	      scoreresultitem[0] = score.to_f() / len
	    else
	      scoreresult.delete(docid)
	    end
	  end
	end

	phrasestring = results.collect() do |result| result.word() end . join('')
	phrasedocids = {}
	nmzphrase = index.phrase()
	nmzphrase.open()
	nmzphrase.get_docids(phrasestring).each() do |docid|
	  phrasedocids[docid] = true
	end
	nmzphrase.close()
	scoreresult.delete_if() do |docid, |
	  not phrasedocids[docid]
	end
	@scoreresult = scoreresult
      end

      @idf = nil
      if idfflag
	idf = Math.log(index.docnum() / @scoreresult.length()) / Math.log(2)
	if lazyflag
	  @idf = idf
	else
	  @scoreresult.each_value() do |scoreresultitem|
	    scoreresultitem[0] = scoreresultitem[0] * idf + 1
	  end
	end
      end
    end

    public
    def get_scoreresultitem(docid)
      scoreresultitem = nil
      if @scoreresult[docid]
	results = @results
	scoreresultitem = results[0].get_scoreresultitem(docid)
	score = scoreresultitem[0]
	for i in (1 ... results.length())
	  score += results[i].get_scoreresultitem(docid)[0]
	end
	if @idf
	  scoreresultitem[0] = score.to_f() * @idf / results.length() + 1
	else
	  scoreresultitem[0] = score.to_f() / results.length()
	end
      end
      scoreresultitem
    end

    public
    def to_s()
      '{' << @results.collect() do |result| result.to_s() end . join(' ') << ' :: ' << hitnum().to_s() << '}'
    end
  end # ResultPhrase

  class ResultRegexp < ResultOperator
    def initialize(index, regexp, results, lazyflag = false, idfflag = true)
      @word = regexp.inspect()
      @results = results
      @scoreresult = nil

      if results.empty?()
	@scoreresult = {}
      else
	scoreresults = results.collect() do |result| result.scoreresult() end
	if lazyflag
	  scoreresults.sort!() do |a, b| b.length() <=> a.length() end
	else
	  scoreresults.sort!() do |a, b| a.length() <=> b.length() end
	end
	scoreresult = scoreresults.shift().dup()

	if lazyflag
	  docid = scoreresultitem = nil
	  for scoreresulttmp in scoreresults
	    scoreresulttmp.each() do |docid, scoreresultitem|
	      unless scoreresult[docid]
		scoreresult[docid] = scoreresultitem
	      end
	    end
	  end
	else
	  docid = scoreresultitemmax = scoreresultitem = nil
	  for scoreresulttmp in scoreresults
	    scoreresulttmp.each() do |docid, scoreresultitem|
	      scoreresultitemmax = scoreresult[docid]
	      if ((not scoreresultitemmax) or
		  (scoreresultitemmax[0] < scoreresultitem[0]))
		scoreresult[docid] = scoreresultitem
	      end
	    end
	  end
	end
	@scoreresult = scoreresult
      end

      @idf = nil
      if idfflag
	idf = Math.log(index.docnum() / @scoreresult.length()) / Math.log(2)
	if lazyflag
	  @idf = idf
	else
	  @scoreresult.each_value() do |scoreresultitem|
	    scoreresultitem[0] = scoreresultitem[0] * idf + 1
	  end
	end
      end
    end

    attr_reader(:word)

    public
    def get_scoreresultitem(docid)
      scoreresultitemmax = nil
      if @scoreresult[docid]
	scoreresultitem = nil
	for result in @results
	  scoreresultitem = result.scoreresult()[docid]
	  next unless scoreresultitem
	  if ((not scoreresultitemmax) or
	      (scoreresultitemmax[0] < scoreresultitem[0]))
	    scoreresultitemmax = scoreresultitem
	  end
	end
	if @idf
	  scoreresultitemmax[0] = scoreresultitemmax[0] * @idf + 1
	end
      end
      scoreresultitemmax
    end

    public
    def to_s()
      '{' << word() << ' :: ' << hitnum().to_s() << '}'
    end
  end # ResultRegexp

  class ResultAnd < ResultOperator
    def initialize(results, lazyflag = false)
      @results = results
      @scoreresult = nil

      if results.empty?()
	@scoreresult = {}
      else
	scoreresults = results.collect() do |result| result.scoreresult() end
	scoreresults.sort!() do |a, b| a.length() <=> b.length() end
	scoreresult = scoreresults.shift().dup()

	if lazyflag
	  docid = nil
	  for scoreresulttmp in scoreresults
	    scoreresult.delete_if() do |docid, |
	      not scoreresulttmp[docid]
	    end
	  end
	else
	  resultstmp = results.dup()
	  resultstmp.sort!() do |a, b|
	    a.scoreresult().length() <=> b.scoreresult().length()
	  end
	  resulttmp = resultstmp.shift()
	  scoreresult.each_key() do |docid|
	    scoreresult[docid] = resulttmp.get_scoreresultitem(docid)
	  end
	  score = scoreresultitemtmp = result = i = nil
	  scoreresult.each() do |docid, scoreresultitem|
	    score = scoreresultitem[0]
	    for resulttmp in resultstmp
	      scoreresultitemtmp = resulttmp.get_scoreresultitem(docid)
	      if (not scoreresultitemtmp)
		score = nil
		break
	      else
		score += scoreresultitemtmp[0]
	      end
	    end
	    if score
	      scoreresultitem[0] = score
	    else
	      scoreresult.delete(docid)
	    end
	  end
	end
	@scoreresult = scoreresult
      end
    end

    public
    def get_scoreresultitem(docid)
      scoreresultitem = nil
      if @scoreresult[docid]
	results = @results
	scoreresultitem = results[0].get_scoreresultitem(docid)
	score = scoreresultitem[0]
	for i in (1 ... results.length())
	  score += results[i].get_scoreresultitem(docid)[0]
	end
	scoreresultitem[0] = score
      end
      scoreresultitem
    end

    public
    def to_s()
      '{' << @results.collect() do |result| result.to_s() end . join(' & ') << ' :: ' << hitnum().to_s() << '}'
    end
  end # ResultAnd

  class ResultOr < ResultOperator
    def initialize(results, lazyflag = false)
      @results = results
      @scoreresult = nil

      if results.empty?()
	@scoreresult = {}
      else
	scoreresults = results.collect() do |result| result.scoreresult() end
	scoreresults.sort!() do |a, b| b.length() <=> a.length() end
	scoreresult = scoreresults.shift().dup()

	if lazyflag
	  docid = scoreresultitem = nil
	  for scoreresulttmp in scoreresults
	    scoreresulttmp.each() do |docid, scoreresultitem|
	      unless scoreresult[docid]
		scoreresult[docid] = scoreresultitem
	      end
	    end
	  end
	else
	  docid = scoreresultitem = scoreresultitemtmp = nil
	  for scoreresulttmp in scoreresults
	    scoreresulttmp.each() do |docid, scoreresultitemtmp|
	      scoreresultitem = scoreresult[docid]
	      if (not scoreresultitem)
		scoreresult[docid] = scoreresultitemtmp
	      else
		scoreresultitem[0] += scoreresultitemtmp[0]
	      end
	    end
	  end
	end
	@scoreresult = scoreresult
      end
    end

    public
    def get_scoreresultitem(docid)
      scoreresultitem = nil
      if @scoreresult[docid]
	scoreresultitemtmp = nil
	for result in @results
	  scoreresultitemtmp = result.scoreresult()[docid]
	  next unless scoreresultitemtmp
	  if (not scoreresultitem)
	    scoreresultitem = scoreresultitemtmp
	  else
	    scoreresultitem[0] += scoreresultitemtmp[0]
	  end
	end
      end
      scoreresultitem
    end

    public
    def to_s()
      '{' << @results.collect() do |result| result.to_s() end . join(' | ') << ' :: ' << hitnum().to_s() << '}'
    end
  end # ResultOr

  class ResultNot < ResultOperator
    def initialize(results, lazyflag = false)
      @results = results
      @scoreresult = nil

      if results.empty?()
	@scoreresult = {}
      else
	scoreresult = results[0].scoreresult().dup()
	scoreresults = results.collect() do |result| result.scoreresult() end
	scoreresults.shift()

	docid = nil
	scoreresults.each do |scoreresulttmp|
	  scoreresult.delete_if() do |docid, |
	    scoreresulttmp[docid]
	  end
	end
	unless lazyflag
	  resulttmp = results[0]
	  scoreresult.each_key() do |docid|
	    scoreresult[docid] = resulttmp.get_scoreresultitem(docid)
	  end
	end
	@scoreresult = scoreresult
      end
    end

    public
    def get_scoreresultitem(docid)
      scoreresultitem = nil
      if @scoreresult[docid]
	scoreresultitem = @results[0].get_scoreresultitem(docid)
      end
      scoreresultitem
    end

    public
    def to_s()
      '{' << @results.collect() do |result| result.to_s() end . join(' - ') << ' :: ' << hitnum().to_s() << '}'
    end
  end # ResultNot
end # Namazu
