/*
 MeCab -- Yet Another Part-of-Speech and Morphological Analyzer

 $Id: tagger.cpp,v 1.31 2003/07/03 11:35:31 taku-ku Exp $;

 Copyright (C) 2001-2002  Taku Kudo <taku-ku@is.aist-nara.ac.jp>
 All rights reserved.

 This library is free software; you can redistribute it and/or
 modify it under the terms of the GNU Library General Public
 License as published by the Free Software Foundation; either
 version 2 of the License, or (at your option) any later verjsion.

 This library is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 Library General Public License for more details.

 You should have received a copy of the GNU Library General Public
 License along with this library; if not, write to the
 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.
*/
#include "viterbi.h"
#include "japanese_tokenizer.h"
#include "common.h"
#include "mutex.h"
#include "param.h"
#include "mecab.h"
#include "stringbuffer.h"
#include "writer.h"
#include "nbest_generator.h"
#include <stdexcept>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

namespace MeCab
{
  static const Option long_options[] = 
  {
    { "rcfile",             'r', 0, "FILE",  "use FILE as resource file" },
    { "dicdir",             'd', 0, "DIR",   "set DIR as dicdir"                        },
    { "build-all-lattice",  'a', 0, 0,       "build all lattice in result (default no)" },
    { "output-format-type", 'O', 0, "TYPE",  "set output format type (wakati,none,...)" },
    { "node-format",        'F', 0, "STR",   "use STR as the user-defined node format"  },
    { "bos-format",         'B', 0, "STR",   "use STR as the user-defined bos format"   },
    { "eos-format",         'E', 0, "STR",   "use STR as the user-defined eos format"   },
    { "input-buffer-size",  'b', 0, "INT",   "set input buffer size (default 8192)"     },
    { "nbest",              'N', "1", "INT", "output N best results  (default 1)"     },
    { "output",             'o', 0, "FILE",  "set the output file name"                 },
    { "version",            'v', 0, 0,       "show the version and exit."               },
    { "help",               'h', 0, 0,       "show this help and exit."                 },
    { 0, 0, 0, 0 }
  };


  static std::string getDefaultRc (Param &param)
  {
    std::string rcfile = param.getProfileString ("rcfile");
    if (! rcfile.empty()) return rcfile;

#ifdef HAVE_GETENV
    char *homedir = getenv ("HOME");
    if (homedir) {
      std::string s = MeCab::createFileName (std::string(homedir), ".mecabrc");
      std::ifstream ifs(s.c_str());
      if (ifs) return s;
    }

    char *rcenv = getenv ("MECABRC");
    if (rcenv) return std::string(rcenv);
#endif

#if defined  (_WIN32) && ! defined (__CYGWIN__)
    HKEY hKey;
    char v[1024];
    DWORD vt;
    DWORD size = sizeof (v);
   
    RegOpenKeyEx    (HKEY_CURRENT_USER,"software\\mecab",0,KEY_ALL_ACCESS,&hKey);
    RegQueryValueEx (hKey,"mecabrc",0,&vt,(BYTE *)v,&size);
    RegCloseKey (hKey);
    if (vt == REG_SZ) return std::string (v);
#endif

    return std::string (MECAB_DEFAULT_RC);
  }

  static std::string getDicRc (Param &param, std::string rcpath)
  {
    std::string dicdir = param.getProfileString ("dicdir");
    if (dicdir.empty()) dicdir = "."; // current
    removeFileName (rcpath);
    replaceString (dicdir, "$(rcpath)", rcpath);
    param.setProfile ("dicdir", dicdir.c_str(), true);
    dicdir = createFileName (dicdir, "dicrc");
    return dicdir;
  }

#define TAGGER_INITILIZE  tokenizer(0), viterbi (0), mutex (0), ostrs(0), writer(0), nbest(0), build_all_lattice(0)

#define TAGGER_ERROR  (std::string (param.what ()) + "\n\n" + COPYRIGHT + "\ntry '--help' for more information.\n")

  Tagger::Tagger (): TAGGER_INITILIZE {};

  Tagger::Tagger (int argc, char **argv): TAGGER_INITILIZE
  {
    if (! open (argc, argv)) throw std::runtime_error (_what);
  }

  Tagger::Tagger (const char *arg): TAGGER_INITILIZE
  {
    if (! open (arg)) throw std::runtime_error (_what);
  }

  Tagger::~Tagger() { this->close (); }
   
  const char *Tagger::what () 
  {
    return _what.c_str();
  }

  bool Tagger::open (int argc, char **argv)
  {
    Param param;

    if (! param.open (argc, argv, long_options)) {
      _what = TAGGER_ERROR;
      return false;
    }

    return open (param);
  }

  bool Tagger::open (const char *arg)
  {
    Param param;

    if (! param.open (arg, long_options)) {
      _what = TAGGER_ERROR;       
      return false;
    }

    return open (param);
  }
   
  bool Tagger::open (Param &param)
  {
    try {

      close ();

      if (param.getProfileInt ("help")) 
	 throw std::runtime_error (param.help (long_options));

      if (param.getProfileInt ("version"))
	 throw std::runtime_error (param.version (long_options));

      std::string rcfile = getDefaultRc (param);
      if (! param.load (rcfile.c_str())) throw std::runtime_error (TAGGER_ERROR);

      std::string dicrcfile = getDicRc (param, rcfile);
      if (! param.load (dicrcfile.c_str())) throw std::runtime_error (TAGGER_ERROR);

      build_all_lattice = (bool)param.getProfileInt ("build-all-lattice");

      tokenizer = new JapaneseTokenizer (param);
      viterbi   = new Viterbi           (param, tokenizer);
      writer    = new Writer            (param);
      ostrs     = new StringBuffer      ();
      
      return true;
    }

    catch (exception &e) {
      close ();
      _what = std::string ("Tagger::open(): ") + e.what ();
      return false;
    }
  }

  int Tagger::parse (int argc, char **argv)
  {
    try {

      Param param;

      if (! param.open (argc, argv, long_options))
	throw std::runtime_error (TAGGER_ERROR);

      if (param.getProfileInt ("help")) {
	std::cout << param.help (long_options);
	return EXIT_SUCCESS;
      }

      if (param.getProfileInt ("version")) {
	std::cout << param.version (long_options);
	return EXIT_SUCCESS;
      }

      int nbest = param.getProfileInt ("nbest");
      if (nbest <= 0 || nbest > NBEST_MAX)
	throw std::runtime_error ("invalid N value");

      if (nbest >= 2) param.setProfile ("build-all-lattice", "1", true);

      if (! open (param)) throw std::runtime_error (_what);

      std::ostream *ofs = &std::cout;
      std::string outputFileName = param.getProfileString ("output");

      if (! outputFileName.empty()) {
	ofs = new std::ofstream (outputFileName.c_str());
	if (! *ofs) throw std::runtime_error (outputFileName + ": no such file or directory");
      }
     
      const std::vector <std::string>& rest = param.getRestArg (); 

      unsigned int ibufsize = _min (MAX_INPUT_BUFFER_SIZE,
				    _max (param.getProfileInt ("input-buffer-size"), MIN_INPUT_BUFFER_SIZE));

      char *ibuf = new char [ibufsize];

      if (rest.size()) {
	for (unsigned int i = 0; i < rest.size(); i++) {
	  std::ifstream ifs (rest[i].c_str ());
	  if (!ifs) throw std::runtime_error (rest[i] + ": no such file or directory");
	  if (nbest >= 2) 
	    while (ifs.getline (ibuf, ibufsize)) *ofs << parseNBest (nbest, ibuf) << std::flush;
	  else 
	    while (ifs.getline (ibuf, ibufsize)) *ofs << parse (ibuf) << std::flush;
	}
      } else {
	if (nbest >= 2) 
	  while (std::cin.getline (ibuf, ibufsize)) *ofs << parseNBest (nbest, ibuf) << std::flush;
	else 
	  while (std::cin.getline (ibuf, ibufsize)) *ofs << parse (ibuf) << std::flush;
      }

      delete [] ibuf;
  
      if (ofs != &std::cout) delete ofs;

      return EXIT_SUCCESS;
    }

    catch (std::exception &e) {
      std::cerr << "FATAL: " << e.what () << std::endl;
      return EXIT_FAILURE;
    }
  }

  bool Tagger::close ()
  {
    delete tokenizer;   tokenizer = 0;
    delete viterbi;     viterbi   = 0;
    delete mutex;       mutex     = 0;
    delete ostrs;       ostrs     = 0;
    delete writer;      writer    = 0;
    delete nbest;       nbest     = 0;
    build_all_lattice = false;
    return true;
  }

  bool Tagger::lock ()
  {
     if (! mutex) mutex = new Mutex;
     return mutex->lock ();
  }
   
  bool Tagger::unlock ()
  {
     if (! mutex) return false;
     return mutex->unlock ();
  }

  const char *Tagger::parse (const char *str, unsigned int len) 
  {
    Node *n = parseToNode (str, len);
    if (!n) return 0;
    ostrs->clear ();
    writer->write (*ostrs, str, n);
    *ostrs << '\0';
    return ostrs->str ();
  }

  const char *Tagger::parse (const char *str, unsigned int len, char *out, unsigned int len2)
  {
    Node *n = parseToNode (str, len);
    if (!n) return 0;     
    StringBuffer os (out, len);
    writer->write (os, str, n);
    os << '\0';

    if (! os.str ()) {
      _what = "Tagger::parse (): output buffer overflow" ;
      return 0;
    }

    return ostrs->str ();
  }

  Node *Tagger::parseToNode (const char *str, unsigned int len) 
  {
    if (!str) {
       _what = "Tagger::parseToNode (): NULL pointer is given";
       return 0;
    }
     
    Node *bosNode = viterbi->analyze (str, len ? len : strlen (str));
    if (! bosNode) {
      _what = std::string("Tagger::parseToNode (): ") + viterbi->what ();
      return 0;
    }

    return bosNode;
  }

  bool Tagger::parseNBestInit (const char *str, unsigned int len)   
  {
    if (! build_all_lattice) {
       _what = "Tagger::parseNBestInit (): use -a option to obtain N-Best results";
       return 0;
    }
    Node *n = parseToNode (str, len);
    begin = str;
    if (! nbest) nbest = new NBestGenerator ();
    if (! n) return false;
    nbest->set (n);
    return true;
  }

  Node* Tagger::nextNode ()
  {
    if (! nbest) {
      _what = "Tagger::nextNode (): call parseNBestInit first";
      return 0;
    }

    Node *n = nbest->next ();

    if (! n) {
      _what = "Tagger::nextNode (): no more results";
      return 0;
    }

    return n;
  }

  const char* Tagger::next ()
  {
    Node *n = nextNode ();
    if (! n) return 0;
    ostrs->clear ();
    writer->write (*ostrs, (const char *)begin, n);
    *ostrs << '\0';
    return ostrs->str ();
  }

  const char* Tagger::next (char *out, unsigned int len2)
  {
    Node *n = nextNode ();
    if (! n) return 0;
    StringBuffer os (out, len2);
    writer->write (*ostrs, (const char *)begin, n);
    os << '\0';

    if (! os.str ()) {
      _what = "Tagger::next (): output buffer overflow" ;
      return 0;
    }
    return ostrs->str ();
  }


  const char* Tagger::parseNBest (unsigned int N, const char* str, unsigned int len)
  {
    if (N == 1) return parse (str, len);

    if (! parseNBestInit (str, len)) return 0;
    ostrs->clear ();

    for (unsigned int i = 0; i < N; ++i) {
      Node *n = nextNode ();
      if (! n) break;
      writer->write (*ostrs, str, n);
    }

    *ostrs << '\0';
    return ostrs->str ();
  }
   
  const char* Tagger::parseNBest (unsigned int N, const char* str, unsigned int len, 
				  char *out, unsigned int len2)
  {
    if (N == 1) return parse (str, len, out, len2);

    if (! parseNBestInit (str, len)) return 0;
    StringBuffer os (out, len2);

    for (unsigned int i = 0; i < N; ++i) {
      Node *n = nextNode ();
      if (! n) break;
      writer->write (os, str, n);
    }
    os << '\0';
     
    if (! os.str ()) {
      _what = "Tagger::parseNBest (): output buffer overflow" ;
      return 0;
    }
     
    return os.str ();
  }   
}
