/*
 *  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
 * 
 *  $Id: dictionary_maker.cpp,v 1.5 2003/03/27 11:48:08 taku-ku Exp $;
 * 
 *  Copyright (C) 2001-2002  Taku Kudo <taku-ku@is.aist-nara.ac.jp>
 *  All rights reserved.
 * 
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later verjsion.
 * 
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 * 
 *  You should have received a copy of the GNU Library General Public
 *  License along with this library; if not, write to the
 *  Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 *  Boston, MA 02111-1307, USA.
 */

#include "dictionary_maker.h"

namespace MeCab {

  void DictionaryMaker::add (const std::string &rule) { ruleSet.insert (rule); }
   
  unsigned int DictionaryMaker::getDicIdNoCache (Csv &csv)
  {
    std::vector <unsigned int> result;
    getIdList (csv, result);
    return result [result.size()-1];
  }

  unsigned int DictionaryMaker::getIdList (Csv &csv, 
					   std::vector <unsigned int> &result, 
					   int parent)
  {
    std::vector <unsigned int> candidates (ruleList.size ());
    candidates.resize (ruleList.size());
    unsigned int j;

    for (j = 0; j < ruleList.size(); j++) 
      candidates[j] = j; // $B=i4|2=(B

    for (j = 0; j < csv.size(); j++) {
      unsigned int k = 0;
      for (unsigned int n = 0; n < candidates.size(); n++) {
	if ((parent == 0 && csv[j] == "*") || 
	    (parent == 1 && ruleList[candidates[n]][j] == "*") ||
	    ruleList[candidates[n]][j] == csv[j])
	  candidates[k++] = candidates[n];
      }
      candidates.resize (k);
    }

    result.resize (candidates.size());
    std::copy (candidates.begin(), candidates.end(), result.begin());
    return result.size();
  }

  void DictionaryMaker::clear ()
  {
    ruleSet.clear ();
    ruleList.clear ();
    idList.clear ();
    isLexcalized.clear();
    dic2IdHash.clear  ();
    rule2IdHash.clear ();
  }

  void DictionaryMaker::build ()
  {
    unsigned int size = 0;
    for (std::set <std::string>::iterator str = ruleSet.begin (); str != ruleSet.end (); str++) {
      ruleList.resize (size + 1);
      rule2IdHash[*str] = size;
      unsigned int s = ruleList[size].split (*str);
      if (ruleList[size][s-1] != "*") isLexcalized[ruleList[size][s-1]] = 1;
      size++;
    }
    ruleSet.clear();

    idList.resize (ruleList.size());
    for (unsigned int i = 0; i < ruleList.size (); i++) 
      getIdList (ruleList[i], idList[i], 0);
  }

  unsigned int DictionaryMaker::getDicId (const std::string &rule)
  {
    Csv csv;
    csv.split(rule);
    std::string &lex = csv [csv.size()-1];

    if (isLexcalized[lex]) { // $B8lWC2=$5$l$F$$$k>l9g$O(B scratch $B$+$iC5$9(B
      return getDicIdNoCache (csv);
    } else {                      // $B8lWC2=$5$l$F$$$J$$(B
      std::string pos = rule.substr (0, csv.getFieldPos (csv.size()-1).first-1); 
      unsigned int r; 
      if ((r = dic2IdHash[pos]) != 0) return r-1; // 0 $B$r(B empty $B$H$_$J$9(B
      r = getDicIdNoCache (csv);
      dic2IdHash[pos] = r+1; 
      return r;
    }
     
    return 0; 
  }

  unsigned int DictionaryMaker::getRuleId (const std::string &rule) 
  {
    return rule2IdHash[rule]; 
  }

  std::vector <unsigned int> &DictionaryMaker::getRuleIdList (const std::string &rule)
  { 
    return idList[rule2IdHash[rule]];
  }
}
