#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2009 The Tegaki project contributors
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# Contributors to this file:
# - Mathieu Blondel

import sys
import os
from optparse import OptionParser

from tegaki.character import CharacterCollection

from tegakitools.tomoe import tomoe_dict_to_character_collection
from tegakitools.kuchibue import kuchibue_to_character_collection

VERSION = '0.3'

class TegakiConvertError(Exception):
    pass

class TegakiConvert(object):

    def __init__(self, options, args):
        self._directories = options.directories
        self._charcols = options.charcols
        self._tomoe = options.tomoe
        self._kuchibue = options.kuchibue
        self._include = options.include
        self._exclude = options.exclude
        self._max_samples = options.max_samples

        try:
            self._output_path = args[0]
        except:
            self._output_path = None

    def run(self):
        charcol = CharacterCollection()

        # add the directories provided
        for directory in self._directories:
            charcol += CharacterCollection.from_character_directory(directory)

        # add the character collections provided
        for charcol_path in self._charcols:
            _charcol = CharacterCollection()
            gzip = False; bz2 = False
            if charcol_path.endswith(".gz"): gzip = True
            if charcol_path.endswith(".bz2"): bz2 = True
            _charcol.read(charcol_path, gzip=gzip, bz2=bz2)
            charcol += _charcol

        # add tomoe dictionaries provided
        for tomoe in self._tomoe:
            charcol += tomoe_dict_to_character_collection(tomoe)

        # add the kuchibue databases provided
        for kuchibue in self._kuchibue:
            charcol += kuchibue_to_character_collection(kuchibue)

        # characters to include
        buf = ""
        for inc_path in self._include:
            f = open(inc_path)
            buf += f.read()
            f.close()

        if len(buf) > 0:
            charcol.include_characters_from_text(buf)

        # characters to exclude
        buf = ""
        for exc_path in self._exclude:
            f = open(exc_path)
            buf += f.read()
            f.close()

        if len(buf) > 0:
            charcol.exclude_characters_from_text(buf)

        # max samples
        if self._max_samples:
            charcol.remove_samples(keep_at_most=self._max_samples)

        # output
        if not self._output_path:
            # outputs to stdout if not output path specified
            print charcol.to_xml()
        else:
            gzip = False; bz2 = False
            if self._output_path.endswith(".gz"): gzip = True
            if self._output_path.endswith(".bz2"): bz2 = True
            charcol.write(self._output_path, gzip=gzip, bz2=bz2)

parser = OptionParser(usage="usage: %prog [options] [output-path]",
                      version="%prog " + VERSION)

parser.add_option("-d", "--directory",
                  action="append", type="string", dest="directories",
                  default=[],
                  help="Directory containing individual XML character files")
parser.add_option("-c", "--charcol",
                  action="append", type="string", dest="charcols",
                  default=[],
                  help="character collection XML files")
parser.add_option("-t", "--tomoe-dict",
                  action="append", type="string", dest="tomoe",
                  default=[],
                  help="Tomoe XML dictionary files")
parser.add_option("-k", "--kuchibue",
                  action="append", type="string", dest="kuchibue",
                  default=[],
                  help="Kuchibue unipen database")
parser.add_option("-i", "--include",
                  action="append", type="string", dest="include",
                  default=[],
                  help="File containing characters to include")
parser.add_option("-e", "--exclude",
                  action="append", type="string", dest="exclude",
                  default=[],
                  help="File containing characters to exclude")
parser.add_option("-m", "--max-samples",
                  type="int", dest="max_samples",
                  help="Maximum number of samples per character")

(options, args) = parser.parse_args()

try:
    TegakiConvert(options, args).run()
except TegakiConvertError, e:
    sys.stderr.write(str(e) + "\n\n")
    parser.print_help()
    sys.exit(1)
