#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2009 The Tegaki project contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# Contributors to this file:
# - Mathieu Blondel

import sys
import os
from optparse import OptionParser

from tegaki.character import CharacterCollection
from tegaki.trainer import Trainer, TrainerError
from tegaki.recognizer import Recognizer

from tegakitools.tomoe import tomoe_dict_to_character_collection

VERSION = '0.3'

class TegakiBuildError(Exception):
    pass

class TegakiBuild(object):

    def __init__(self, options, args):
        self._directories = options.directories
        self._charcols = options.charcols
        self._tomoe = options.tomoe
        self._list = options.list
        if not self._list:
            self._trainer = args[0]
            self._meta = args[1]

    def run(self):
        if self._list:
            self._list_trainers()
        else:
            self._train()

    def _list_trainers(self):
        avail_trainers = Trainer.get_available_trainers()
        print "\n".join(["- " + key for key in avail_trainers.keys()])

    def _train(self):
        charcol = CharacterCollection()

        # read meta file
        try:
            meta = Recognizer.read_meta_file(self._meta)
        except IOError, e:
            raise TegakiBuildError, str(e)

        # add the directories provided
        for directory in self._directories:
            charcol += CharacterCollection.from_character_directory(directory)

        # add the character collections provided
        for charcol_path in self._charcols:
            _charcol = CharacterCollection()
            gzip = False; bz2 = False
            if charcol_path.endswith(".gz"): gzip = True
            if charcol_path.endswith(".bz2"): bz2 = True
            _charcol.read(charcol_path, gzip=gzip, bz2=bz2)
            charcol += _charcol

        # add tomoe dictionaries provided
        for tomoe in self._tomoe:
            charcol += tomoe_dict_to_character_collection(tomoe)

        if len(charcol.get_all_characters()) == 0:
            raise TegakiBuildError, "No character samples to train!"

        trainer = self._get_trainer()

        path = self._meta.replace(".meta", ".model")
        if not path.endswith(".model"): path += ".model"
        trainer.train(charcol, meta, path)

        print "Training done!"

    def _get_trainer(self):
        avail_trainers = Trainer.get_available_trainers()

        if not self._trainer in avail_trainers:
            err = "Not an available trainer!\n"
            err += "Available ones include: %s" % \
                ", ".join(avail_trainers.keys())
            raise TegakiBuildError, err

        return avail_trainers[self._trainer]()

usage = """usage: %prog [options] trainer meta-file

trainer          a trainer available on the system
meta-file        path to a model .meta file"""

parser = OptionParser(usage=usage, version="%prog " + VERSION,
                      description="Train a model")

parser.add_option("-d", "--directory",
                  action="append", type="string", dest="directories",
                  default=[],
                  help="directory containing individual XML character files")
parser.add_option("-c", "--charcol",
                  action="append", type="string", dest="charcols",
                  default=[],
                  help="character collection XML files")
parser.add_option("-t", "--tomoe-dict",
                  action="append", type="string", dest="tomoe",
                  default=[],
                  help="Tomoe XML dictionary files")
parser.add_option("-l", "--list",
                  action="store_true",dest="list", default=False,
                  help="List available trainers")

(options, args) = parser.parse_args()

try:
    if not options.list and len(args) < 2:
        raise TegakiBuildError, "Needs a trainer and meta-file!"

    TegakiBuild(options, args).run()
except TegakiBuildError, e:
    sys.stderr.write(str(e) + "\n\n")
    parser.print_help()
    sys.exit(1)
