/*
  MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
 
  $Id: tokenizer.cpp,v 1.3 2004/08/06 18:05:14 taku-ku Exp $;

  Copyright (C) 2001-2004 Taku Kudo <taku-ku@is.aist-nara.ac.jp>
  This is free software with ABSOLUTELY NO WARRANTY.
  
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/  
#include "tokenizer.h"

namespace MeCab {

#define TOKENIZER_INITILIZE  nodeFreeList (NODE_FREELIST_SIZE), id (0), \
                             unkFeature (0), bosFeature (0), eosFeature (0) 

  Tokenizer::Tokenizer (Param &param):  TOKENIZER_INITILIZE
  {
    if (! open (param)) throw std::runtime_error (_what);
  }
     
  Tokenizer::Tokenizer (): TOKENIZER_INITILIZE {}

  void Tokenizer::clear () { id = 0; nodeFreeList.free(); }

  Node *Tokenizer::getBOSNode ()
  {
    bosNode2 = getNewNode();
    memset (bosNode2, 0, sizeof (Node));       
    bosNode  = getNewNode();
    memset (bosNode,  0, sizeof (Node));
    bosNode->prev     = bosNode2;
    bosNode2->next    = bosNode;       
    bosNode->surface  = bosNode2->surface = 0;
    bosNode->feature  = bosFeature;
    bosNode->length   = bosNode2->length  = 0;
    bosNode->token    = &bosToken;
    bosNode2->token   = &bosToken2;
    bosNode2->feature = bosFeature;
    return bosNode;
  }

  Node *Tokenizer::getEOSNode ()
  {
    eosNode = getNewNode ();  
    memset (eosNode, 0, sizeof (Node));
    eosNode->surface = 0;
    eosNode->length  = 0;
    eosNode->feature = eosFeature;
    eosNode->token   = &eosToken;
    eosNode->id = id++; 
    return eosNode;
  }

  bool Tokenizer::open (Param &param)
  {
    try {
      close ();

      // open dic
      std::string prefix = param.getProfileString ("dicdir");
      if (! dic.open (prefix.c_str())) throw std::runtime_error (dic.what ());

      // open info
      Mmap<Token> ommap;
      std::string othersFile = createFileName (prefix, OTHERS_FILE);
      if (! ommap.open (othersFile.c_str ())) throw std::runtime_error (ommap.what ());

      if (ommap.size () < 3) throw std::runtime_error ("file size is invalid");

      memset (&bosToken2, 0,         sizeof (Token));	
      memcpy (&bosToken,  &ommap[0], sizeof (Token));
      memcpy (&eosToken,  &ommap[1], sizeof (Token));
      memcpy (&unkToken,  &ommap[2], sizeof (Token));
	 
      ommap.close ();

      unkToken.cost = param.getProfileInt ("unk-pos-cost");
      unkFeature    = mystrdup (param.getProfileString ("unk-feature").c_str());
      bosFeature    = mystrdup (param.getProfileString ("bos-feature").c_str());
      eosFeature    = mystrdup (param.getProfileString ("eos-feature").c_str());

      return true;
    }

    catch (exception &e) {
      close ();
      _what = std::string ("Tokenizer::open(): ") + e.what ();
      return false;
    }
  }

  bool Tokenizer::close () 
  {
    delete [] unkFeature; unkFeature = 0;
    delete [] bosFeature; bosFeature = 0;
    delete [] eosFeature; eosFeature = 0;
    return dic.close ();
  }
}
