/*
 * This file is part of the Ubuntu TV Media Scanner
 * Copyright (C) 2012-2013 Canonical Ltd.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License version 3 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contact: Jim Hodapp <jim.hodapp@canonical.com>
 * Authored by: Mathias Hasselmann <mathias@openismus.com>
 */
#include "mediascanner/mediaindex.h"

// Lucene++
#include <Lucene.h>

#include <BooleanQuery.h>
#include <Collector.h>
#include <Document.h>
#include <Explanation.h>
#include <Fieldable.h>
#include <FSDirectory.h>
#include <IndexReader.h>
#include <IndexSearcher.h>
#include <MultiReader.h>
#include <Query.h>
#include <QueryParser.h>
#include <ScoreDoc.h>
#include <StandardAnalyzer.h>
#include <StringUtils.h>
#include <Term.h>
#include <TermDocs.h>
#include <TopDocs.h>

// Boost C++
#include <boost/bind.hpp>
#include <boost/foreach.hpp>
#include <boost/filesystem.hpp>
#include <boost/locale/format.hpp>
#include <boost/numeric/conversion/bounds.hpp>

// GLib
#include <glib.h>

// Standard Libary
#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <vector>

// Media Scanner
#include "mediascanner/filter.h"
#include "mediascanner/locale.h"
#include "mediascanner/logging.h"
#include "mediascanner/mediaroot.h"
#include "mediascanner/mediautils.h"
#include "mediascanner/glibutils.h"
#include "mediascanner/propertyschema.h"
#include "mediascanner/utilities.h"

namespace mediascanner {

// Boost C++
using boost::locale::format;

// Lucene++
using Lucene::LuceneException;
using Lucene::newLucene;

const int32_t MediaIndex::kUnlimited =
        boost::numeric::bounds<int32_t>::highest();
static const Lucene::LuceneVersion::Version kLuceneVersion =
        Lucene::LuceneVersion::LUCENE_30;

// Media index parameters
const char MediaIndex::kMediaIndexFormat[] =
    "Ubuntu Media Scanner Media Index 1.0";
const char MediaIndex::kMetaIndexFormat[] =
    "Ubuntu Media Scanner Meta Index 1.0";

class MediaIndex::ParamId : public std::string {
public:
    template<size_t N>
    explicit ParamId(const char (&id)[N])
        : std::string(id, N - 1) {
    }
};

const MediaIndex::ParamId MediaIndex::kParamFormat("format");
const MediaIndex::ParamId MediaIndex::kParamSegments("segments");
const MediaIndex::ParamId MediaIndex::kParamRelativePath("relative-path");

// Context specific logging domains
static const logging::Domain kWarning("warning/index", logging::warning());
static const logging::Domain kInfo("info/index", logging::info());
static const logging::Domain kTrace("trace/index", logging::trace());
static const logging::Domain kDebug("debug/index", logging::debug());
static const logging::Domain kExplain("debug/index/explain", &kDebug);

class MediaIndex::Private : public MediaRootManager::Listener {
public:
    Private(MediaIndex *q, MediaRootManagerPtr root_manager)
        : q(q)
        , params_(take(g_key_file_new()))
        , params_timestamp_(0)
        , params_changed_(false)
        , root_manager_(root_manager) {
        if (root_manager_)
            root_manager_->add_listener(this);
    }

    ~Private() {
        if (root_manager_)
            root_manager_->remove_listener(this);
    }

    void set_index_reader(Lucene::IndexReaderPtr reader) {
        index_reader_ = reader;
        searcher_.reset();
    }

    Lucene::IndexReaderPtr index_reader() const {
        return index_reader_;
    }

    void set_searcher(Lucene::IndexSearcherPtr searcher) {
        searcher_ = searcher;
    }

    Lucene::IndexSearcherPtr searcher() {
        if (not searcher_ && index_reader_)
            searcher_ = newLucene<Lucene::IndexSearcher>(index_reader_);

        return searcher_;
    }

    FileSystemPath params_path() {
        return path_ / "mediaindex";
    }

    std::time_t params_last_write_time() {
        try {
            return boost::filesystem::last_write_time(params_path());
        } catch(const boost::system::system_error &ex) {
            return 0;
        }
    }

    bool ReadParams();

    void rebuild_index_reader();

    void OnMediaRootAdded(const MediaRoot &root);
    void OnMediaRootRemoved(const MediaRoot &root);

    void report_error(const boost::locale::format &error_message);

    MediaIndex *const q;

    FileSystemPath path_;
    std::string error_message_;
    RefreshPolicyPtr refresh_policy_;

    Lucene::StandardAnalyzerPtr analyzer_;
    Lucene::QueryParserPtr query_parser_;

    Wrapper<GKeyFile> params_;
    std::time_t params_timestamp_;
    bool params_changed_;

    MediaRootManagerPtr root_manager_;

    // Multi-index readers. Actually should be a separate class.
    typedef std::map<MediaRoot, Lucene::IndexReaderPtr> MediaReaderMap;
    MediaReaderMap media_readers_;

private:
    Lucene::IndexReaderPtr index_reader_;
    Lucene::IndexSearcherPtr searcher_;
};

MediaIndex::MediaIndex(MediaRootManagerPtr root_manager)
    : d(new Private(this, root_manager)) {
    set_refresh_policy(RefreshPolicy::default_policy());
}

MediaIndex::~MediaIndex() {
    delete d;
}

void MediaIndex::Private::report_error(const format &error_message) {
    const std::string previous_message = error_message_;
    error_message_ = error_message.str();

    if (not previous_message.empty()) {
        error_message_.reserve(error_message_.length() +
                               previous_message.length() + 3);
        error_message_ += " (" + previous_message + ')';
    }
}

void MediaIndex::report_error(const format &error_message) {
    d->report_error(error_message);
}

// Returns the error message of the last failed operation.
std::string MediaIndex::error_message() {
    // FIXME(M5): Really Clear error message after returning it?
    const std::string result = d->error_message_;
    d->error_message_.clear();
    return result;
}

// Returns the default path of the underlaying Lucene index.
FileSystemPath MediaIndex::default_path() {
    static const FileSystemPath cache_dir = g_get_user_cache_dir();
    static const FileSystemPath default_path = (cache_dir / "mediascanner");
    return default_path;
}

// Returns the file system path of the underlaying Lucene index.
FileSystemPath MediaIndex::path() const {
    return d->path_;
}

// Returns true of the index got opened.
bool MediaIndex::is_open() const {
    return d->index_reader();
}

bool MediaIndex::is_current() const {
    if (d->index_reader()) {
        if (not d->index_reader()->isCurrent()
                || d->params_last_write_time() > d->params_timestamp_)
            return false;
    }

    return true;
}

void MediaIndex::set_refresh_policy(RefreshPolicyPtr policy) {
    if (not is_current()) {
        kTrace("Changing refresh policy while the index is not up "
               "to date anymore. Reopening the index.");
        Reopen();
    }

    d->refresh_policy_ = policy;
}

RefreshPolicyPtr MediaIndex::refresh_policy() const {
    return d->refresh_policy_;
}

MediaRootManagerPtr MediaIndex::root_manager() const {
    return d->root_manager_;
}

// Returns the Lucene term analyzer.
Lucene::AnalyzerPtr MediaIndex::analyzer() const {
    return d->analyzer_;
}

// Builds a Lucene term for the given URL.
Lucene::TermPtr MediaIndex::MakeLookupTerm(const std::wstring &url) {
    return newLucene<Lucene::Term>(schema::kUrl.field_name(), url);
}

bool MediaIndex::Private::ReadParams() {
    Wrapper<GError> error;

    // Grab timestamp before actually reading the file to easily avoid
    // missing updates if the file gets updated between those two operations.
    const std::time_t current_params_timestamp = params_last_write_time();

    if (not g_key_file_load_from_file(params_.get(),
                                      params_path().string().c_str(),
                                      G_KEY_FILE_NONE, error.out_param())) {
        const std::string message = to_string(error);
        report_error(format("Cannot open media index params at \"{1}\": {2}")
                     % path_ % message);
        return false;
    }

    // Now that the params have been read we also can update the timestamp.
    params_timestamp_ = current_params_timestamp;
    return true;
}

bool MediaIndex::ReadParams() {
    return d->ReadParams();
}

bool MediaIndex::FlushParams() {
    BOOST_ASSERT(is_open());

    if (d->params_changed_) {
        if (not FlushParams(d->params_))
            return false;

        d->params_timestamp_ = d->params_last_write_time();
        d->params_changed_ = false;
    }

    return true;
}

bool MediaIndex::FlushParams(Wrapper<GKeyFile> /*params*/) {
    const FileSystemPath parent_path = path().parent_path();
    report_error(format("Cannot modify read-only index at \"{1}\".")
                 % parent_path);
    return false;
}

std::string MediaIndex::get_param(const std::string &group,
                                  const ParamId &key) const {
    return take_string(g_key_file_get_string(d->params_.get(),
                                             group.c_str(), key.c_str(),
                                             null_ptr));
}

void MediaIndex::set_param(const std::string &group,
                           const ParamId &key,
                           const std::string &value) {
    g_key_file_set_string(d->params_.get(), group.c_str(),
                          key.c_str(), value.c_str());

    d->params_changed_ = true;
}

FileSystemPath MediaIndex::params_path() const {
    return d->params_path();
}

// Opens this media index.
//
// This method returns true on success. It fails when Lucene cannot
// open the requested index. It also fails when index was already open.
//
// On success this method resets the last error message.
bool MediaIndex::Open(const FileSystemPath &path) {
    d->error_message_.clear();

    // Open default path if nothing passed.
    if (path.empty())
        return Open(default_path());

    // FIXME(M4): Find better strategy for non-existing media index
    if (is_open()) {
        report_error(format("Reader already open, "
                            "call Close() before re-using"));
        return false;
    }

    kInfo("Opening media index index at \"{1}\".") % path;
    d->path_= path;

    if (not ReadParams()) {
        d->path_.clear();
        return false;
    }

    try {
        d->analyzer_ = newLucene<Lucene::StandardAnalyzer>
                (kLuceneVersion, Lucene::HashSet<std::wstring>());
    } catch(const LuceneException &ex) {
        const std::string message = FromUnicode(ex.getError());
        report_error(format("Cannot create standard analyzer: {1}") % message);
        d->path_.clear();
        return false;
    }

    const std::string media_index_format = get_param("global", kParamFormat);
    kDebug("Media index format: {1}") % media_index_format;

    if (media_index_format == kMediaIndexFormat) {
        d->set_index_reader(OpenIndex());
    } else if (media_index_format == kMetaIndexFormat) {
        d->set_index_reader(OpenMetaIndex());
    } else {
        report_error(format("Cannot open media index at \"{1}\": "
                            "Unsupported media index format")
                     % path);
        d->path_.clear();
        return false;
    }

    if (not d->index_reader()) {
        d->path_.clear();
        return false;
    }

    try {
        d->query_parser_ = newLucene<Lucene::QueryParser>
                (kLuceneVersion, L"title", d->analyzer_);
    } catch(const LuceneException &ex) {
        const std::string message = FromUnicode(ex.getError());
        report_error(format("Cannot create query parser: {1}") % message);
        d->path_.clear();
        return false;
    }

    if (not FlushParams()) {
        d->path_.clear();
        return false;
    }

    return true;
}

static Lucene::IndexReaderPtr newMultiReader
                (const Lucene::Collection<Lucene::IndexReaderPtr> &children) {
    const bool kSharedReaders = false;
    return newLucene<Lucene::MultiReader>(children, kSharedReaders);
}

void MediaIndex::Private::rebuild_index_reader() {
    Lucene::Collection<Lucene::IndexReaderPtr> children =
            Lucene::Collection<Lucene::IndexReaderPtr>::newInstance();

    BOOST_FOREACH (const MediaReaderMap::value_type &p, media_readers_) {
        children.add(p.second);
    }

    set_index_reader(newMultiReader(children));
}

void MediaIndex::Private::OnMediaRootAdded(const MediaRoot &root) {
    const std::string root_path = root.path();
    if (not root.is_valid()) {
        const std::string error_message = root.error_message();
        kWarning("Ignoring addition of invalid media root \"{1}\": {2}")
                % root_path % error_message;
        return;
    }

    if (not q->is_open() || media_readers_.count(root) > 0)
        return;

    kDebug("New media root \"{1}\" added.") % root_path;
    const Lucene::IndexReaderPtr reader = q->OpenChildIndex(root);

    if (not reader || not q->FlushParams()) {
        // Call report error to merge possible cause.
        report_error(format("Ignoring media root \"{1}\" "
                            "since no index could be created")
                     % root_path);
        const std::string error_message = q->error_message();
        kWarning("{1}.") % error_message;
        return;
    }

    media_readers_.insert(std::make_pair(root, reader));
    rebuild_index_reader();
}

void MediaIndex::AddMediaRoot(const MediaRoot &root) {
    d->OnMediaRootAdded(root);
}

void MediaIndex::Private::OnMediaRootRemoved(const MediaRoot &root) {
    const MediaReaderMap::iterator it = media_readers_.find(root);

    if (it != media_readers_.end()) {
        it->second->close();
        media_readers_.erase(it);
        rebuild_index_reader();
    }
}

void MediaIndex::RemoveMediaRoot(const MediaRoot &root) {
    d->OnMediaRootRemoved(root);
}

Lucene::IndexReaderPtr MediaIndex::OpenIndex() {
    return OpenIndexReader(path() / "index");
}

Lucene::IndexReaderPtr MediaIndex::OpenIndexReader(const FileSystemPath &path) {
    try {
        const Lucene::FSDirectoryPtr directory =
                Lucene::FSDirectory::open(ToUnicode(path.string()));

        return Lucene::IndexReader::open(directory);
    } catch(const LuceneException &ex) {
        const std::string message = FromUnicode(ex.getError());
        report_error
                (format("Cannot open media index at \"{1}\": {2}")
                 % path % message);
    }

    return Lucene::IndexReaderPtr();
}

Lucene::IndexReaderPtr MediaIndex::OpenMetaIndex() {
    Lucene::Collection<Lucene::IndexReaderPtr> children =
            Lucene::Collection<Lucene::IndexReaderPtr>::newInstance();

    if (d->root_manager_) {
        // Check previously know media roots and try to restore them.
        const Wrapper<char *> group_ids =
                take(g_key_file_get_groups(d->params_.get(), null_ptr));

        for (const char *const *group = group_ids.get(); *group; ++group) {
            if (g_str_has_prefix(*group, "media:")) {
                const std::string path = get_param(*group, kParamRelativePath);

                if (not path.empty()) {
                    kTrace("Restoring media root for \"{1}\"") % path;
                    const MediaRoot root = d->root_manager_->
                            AddRelativeRoot(path);

                    if (not root.is_valid()) {
                        // Call report error to merge possible cause.
                        std::string error = root.error_message();
                        report_error(format("Ignoring invalid media root "
                                            "\"{1}\": {2}")
                                     % path % error);
                        error = error_message();
                        kWarning("{1}.") % error;
                        continue;
                    }

                    const Lucene::IndexReaderPtr reader = OpenChildIndex(root);

                    if (not reader) {
                        // Call report error to merge possible cause.
                        const std::string root_path = root.path();
                        report_error(format("Ignoring media root \"{1}\" "
                                            "since no index could be created")
                                     % root_path);
                        const std::string error = error_message();
                        kWarning("{1}.") % error;
                        continue;
                    }

                    d->media_readers_.insert(std::make_pair(root, reader));
                    children.add(reader);
                }
            }
        }

        // Create sub indexes for each known media root.
        BOOST_FOREACH (const MediaRoot &root, d->root_manager_->media_roots()) {
            const Lucene::IndexReaderPtr reader = OpenChildIndex(root);

            if (not reader) {
                // Call report error to merge possible cause.
                const std::string root_path = root.path();
                report_error(format("Ignoring media root \"{1}\" "
                                    "since no index could be created")
                             % root_path);
                const std::string error = error_message();
                kWarning("{1}.") % error;
                continue;
            }

            d->media_readers_.insert(std::make_pair(root, reader));
            children.add(reader);
        }
    }

    return newMultiReader(children);
}

Lucene::IndexReaderPtr MediaIndex::OpenChildIndex(const MediaRoot &root) {
    const std::string dirname = get_param(root.group_id(), kParamSegments);

    if (dirname.empty()) {
        const std::string root_path = root.path();
        d->report_error(format("No media index available for \"{1}\"")
                        % root_path);
        return Lucene::IndexReaderPtr();
    }

    return OpenIndexReader(path() / dirname);
}

// Closes this media index.
void MediaIndex::Close() {
    if (d->index_reader())
        d->index_reader()->close();

    d->set_index_reader(Lucene::IndexReaderPtr());
    d->analyzer_.reset();
    d->query_parser_.reset();
    d->path_.clear();
}

bool MediaIndex::Reopen() {
    if (d->index_reader())
        d->set_index_reader(d->index_reader()->reopen());

    return true;
}

Property::Set MediaIndex::GetFields(Lucene::DocumentPtr document) {
    if (not document)
        return Property::Set();

    Property::Set fields;

    // Collect distinct field names, so that we properly report multi-value
    // properties.
    // FIXME(M4): Find a way to avoid building this set.
    // Actually Lucene::FieldSelector should be the solution.
    BOOST_FOREACH (const Lucene::FieldablePtr field, document->getFields()) {
        if (const Property &p = Property::FromFieldName(field->name()))
            fields.insert(p);
    }

    return fields;
}

MediaInfo MediaIndex::ExtractProperties(Lucene::DocumentPtr document) {
    return ExtractProperties(document, GetFields(document));
}

MediaInfo MediaIndex::ExtractProperties(Lucene::DocumentPtr document,
                                        const Property::Set &fields) {
    MediaInfo metadata;

    if (not document)
        return metadata;

    BOOST_FOREACH (const Property &property, fields) {
        if (property) {
            const Lucene::Collection<Lucene::FieldablePtr> fields =
                    document->getFieldables(property.field_name());
            const Property::Value value = property.TransformFields(fields);
            // FIXME(M3): RESTORE RELATED KEYS!!!
            metadata.add_single(std::make_pair(property, value));
        }
    }

    return metadata;
}

Lucene::DocumentPtr MediaIndex::FindDocument(const std::wstring &url) const {
    const Lucene::TermPtr term = MakeLookupTerm(url);
    const Lucene::TermDocsPtr matches = d->index_reader()->termDocs(term);

    if (matches->next())
        return d->index_reader()->document(matches->doc());

    return Lucene::DocumentPtr();
}

bool MediaIndex::Exists(const std::wstring &url) {
    d->refresh_policy_->OnBeginReading(this);
    return d->index_reader()->termDocs(MakeLookupTerm(url))->next();
}

MediaInfo MediaIndex::Lookup(const std::wstring &url) {
    d->refresh_policy_->OnBeginReading(this);
    return ExtractProperties(FindDocument(url));
}

// FIXME(M4): Use Lucene::FieldSelector
MediaInfo MediaIndex::Lookup(const std::wstring &url,
                             const Property::Set &fields) {
    d->refresh_policy_->OnBeginReading(this);
    return ExtractProperties(FindDocument(url), fields);
}

// FIXME(M4): Permit selection of keys. Use Lucene::FieldSelector for that.
void MediaIndex::VisitAll(const ItemVistor &visit_item,
                          int32_t limit, int32_t offset) {
    if (limit < 0)
        limit = kUnlimited;

    d->refresh_policy_->OnBeginReading(this);
    int32_t remaining_items = std::min(limit, d->index_reader()->numDocs());
    const Lucene::TermDocsPtr matches = d->index_reader()->termDocs();

    while (remaining_items > 0 && matches->next()) {
        if (offset > 0) {
            --offset;
            continue;
        }

        const Lucene::DocumentPtr document =
                d->index_reader()->document(matches->doc());

        if (document)
            visit_item(ExtractProperties(document), remaining_items);

        --remaining_items;
    }
}

class MediaIndex::Collector : public Lucene::Collector {
public:
    Collector(const ItemVistor &visit_item, Lucene::SearcherPtr searcher,
              Lucene::QueryPtr query, int32_t offset, int32_t limit)
        : visit_item_(visit_item)
        , searcher_(searcher)
        , query_(query)
        , offset_(offset)
        , limit_(limit) {
    }

    void setScorer(Lucene::ScorerPtr) {
    }

    void collect(int32_t doc) {
        if (limit_ == 0)
            return;

        if (offset_ > 0) {
            --offset_;
            return;
        }

        if (kExplain.enabled())
            kExplain(searcher_->explain(query_, doc)->toString());

        Lucene::DocumentPtr document = reader_->document(doc);

        if (document)
            visit_item_(ExtractProperties(document), limit_);

        if (limit_ > 0)
            --limit_;
    }

    void setNextReader(Lucene::IndexReaderPtr reader, int32_t) {
        reader_ = reader;
    }

    bool acceptsDocsOutOfOrder() {
        return true;
    }

private:
    ItemVistor visit_item_;
    Lucene::IndexReaderPtr reader_;
    Lucene::SearcherPtr searcher_;
    Lucene::QueryPtr query_;
    int32_t offset_;
    int32_t limit_;
};

static std::wstring to_wstring(Lucene::BooleanClause::Occur occur) {
    switch (occur) {
    case Lucene::BooleanClause::MUST:
        return L"must";
    case Lucene::BooleanClause::SHOULD:
        return L"should";
    case Lucene::BooleanClause::MUST_NOT:
        return L"must-not";
    }

    std::wostringstream oss;
    oss << L"unknown(" << (unsigned) occur << ")";
    return oss.str();
}

// FIXME(M4): Permit selection of keys. Use Lucene::FieldSelector that.
bool MediaIndex::Query(const ItemVistor &visit_item, const Filter &filter,
                       int32_t limit, int32_t offset) {
    std::wstring error;

    const Lucene::QueryPtr query = filter.BuildQuery(d->query_parser_, &error);

    if (not query) {
        if (error.empty()) {
            VisitAll(visit_item, limit, offset);
            return true;
        }

        const std::string error_message = FromUnicode(error);
        report_error(format("Cannot build Lucene query from filter: {1}")
                     % error_message);
        return false;
    }

    d->refresh_policy_->OnBeginReading(this);

    if (kDebug.enabled()) {
        const std::wstring query_class = query->getClassName();
        const std::wstring query_string = query->toString();
        kDebug(L"{1}(\"{2}\"), limit={3}, offset={4}")
                % query_class % query_string % limit % offset;
    }

    if (kExplain.enabled()) {
        const Lucene::WeightPtr weight = query->weight(d->searcher());

        const std::wstring weight_class = weight->getClassName();
        const double weight_value = weight->getValue();
        const Lucene::ScorerPtr scorer = weight->scorer(
            d->index_reader(), false, false);
        kExplain(L"weight={1}({2}), scorer={3}")
                % weight_class % weight_value % scorer;

        Lucene::BooleanQueryPtr boolean_query =
                boost::dynamic_pointer_cast<Lucene::BooleanQuery>(query);

        if (boolean_query) {
            BOOST_FOREACH (Lucene::BooleanClausePtr clause,
                           boolean_query->getClauses()) {
                const std::wstring occur = to_wstring(clause->getOccur());
                const std::wstring query_class =
                    clause->getQuery()->getClassName();
                const std::wstring query_string =
                    clause->getQuery()->toString();
                const Lucene::WeightPtr weight =
                        clause->getQuery()->weight(d->searcher());
                const std::wstring weight_class = weight->getClassName();
                const double weight_value = weight->getValue();
                const Lucene::ScorerPtr scorer =
                    weight->scorer(d->index_reader(), false, false);

                kExplain(L"occur={1}, query={2}(\"{3}\"), "
                         "weight={4}({5}), scorer={6}")
                        % occur
                        % query_class % query_string
                        % weight_class % weight_value
                        % scorer;
            }
        }
    }

    const Lucene::CollectorPtr media_collector =
            newLucene<Collector>(visit_item, d->searcher(),
                                 query, offset, limit);
    d->searcher()->search(query, media_collector);

    return true;
}

} // namespace mediascanner
