/*
 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer

 $Id: tokenizer.h,v 1.14 2003/03/14 05:46:41 taku-ku Exp $;

 Copyright (C) 2001-2002  Taku Kudo <taku-ku@aist-nara.ac.jp>
 All rights reserved.

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Library General Public
 License as published by the Free Software Foundation; either
 version 2 of the License, or (at your option) any later verjsion.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Library General Public License for more details.

 You should have received a copy of the GNU Library General Public
 License along with this library; if not, write to the
 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.
*/
#ifndef _MECAB_TOKENIZER_H
#define _MECAB_TOKENIZER_H

#include "mecab.h"
#include "freelist.h"
#include "dictionary.h"
#include "param.h"
#include "common.h"

namespace MeCab {

  class Tokenizer 
  {
  private:
    FreeList<Node> nodeFreeList;
    unsigned int id;

  protected:
    Mmap <unsigned short> cmmap;
    unsigned short *matrix;
    unsigned int size1;
    unsigned int size2;
    unsigned int size3;
     
    Token bosToken, bosToken2, eosToken, unkToken;
    Node  *bosNode, *bosNode2, *eosNode;
    char*  unkFeature;
    char*  bosFeature;
    char*  eosFeature;
    Dictionary dic;
    std::string _what;

    inline char *skipCharClass (const char *begin, const char *end, 
			 int char_class, int &fail, unsigned int &next) const
    {
      register char *p = const_cast<char *>(begin);
      while (p != end && (fail = getCharClass (p, end, next)) == char_class) p += next;
      return p;
    }

    inline char *skipCharClass (const char *begin, const char *end, int char_class) const
    {
      register unsigned int next;
      register char *p = const_cast<char *>(begin);
      while (p != end && getCharClass (p, end, next) == char_class) p += next;
      return p;
    }
     
    virtual int getCharClass (const char *, const char *, unsigned int &) const = 0;

  public:

    // non vertual;
    Node *getNewNode ()
    { 
      Node *node = nodeFreeList.alloc ();
      node->id = id++;
      return node;
    }

    Node *getBOSNode ();
    Node *getEOSNode ();

    // pure-virtual functions
    virtual void preprocess  (const char *, const char *) = 0;
    virtual void postprocess (const char *, const char *) = 0;
    virtual Node *lookup     (const char *, const char *) = 0;

    // virtual functions, share implimentation, but can be rewritten
    virtual unsigned int getCost (const Node *lNode2, const Node *lNode, const Node *rNode)
    {
       return matrix [size3 * (size2 * lNode2->token->rcAttr2 
			       + lNode->token->rcAttr1) + rNode->token->lcAttr] + rNode->token->cost;
    }

    virtual bool open (Param &);
    virtual bool close ();
    virtual void clear ();

    Tokenizer (Param &);
    Tokenizer ();
    virtual ~Tokenizer () { this->close (); };
  };
}
#endif
