//   Copyright Maarten L. Hekkelman, Radboud University 2012.
//  Distributed under the Boost Software License, Version 1.0.
//     (See accompanying file LICENSE_1_0.txt or copy at
//           http://www.boost.org/LICENSE_1_0.txt)

#include "M6Lib.h"

#include <iostream>
#include <set>

#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>
#include <boost/current_function.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/thread.hpp>
#include <boost/foreach.hpp>
#define foreach BOOST_FOREACH
#include <boost/format.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/algorithm/string.hpp>

#include "M6Config.h"
#include "M6Error.h"
#include "M6BlastCache.h"

using namespace std;

namespace fs = boost::filesystem;
namespace io = boost::iostreams;
namespace ba = boost::algorithm;

const uint32 kMaxCachedEntryResults = 100;
const char* kBlastFileExtensions[] = { ".xml.bz2", ".job", ".err" };

// --------------------------------------------------------------------

bool M6BlastJob::IsJobFor(const string& inDatabank, const string& inQuery, const string& inProgram,
	const string& inMatrix, uint32 inWordSize, double inExpect, bool inLowComplexityFilter,
	bool inGapped, int32 inGapOpen, int32 inGapExtend, uint32 inReportLimit) const
{
	return db == inDatabank and
		query == inQuery and
		program == inProgram and
		matrix == inMatrix and
		wordsize == inWordSize and
		expect == inExpect and
		filter == inLowComplexityFilter and
		gapped == inGapped and
		gapOpen == inGapOpen and
		gapExtend == inGapExtend and
		reportLimit >= inReportLimit;		// <-- report limit at least inReportLimit
}

bool M6BlastJob::IsStillValid(const vector<fs::path>& inFiles) const
{
	vector<M6BlastDbInfo> fileInfo;
	
	foreach (const fs::path& file, inFiles)
	{
		M6BlastDbInfo info = { file.string(), boost::posix_time::from_time_t(fs::last_write_time(file)) };
		fileInfo.push_back(info);
	}

	return files == fileInfo;
}

// --------------------------------------------------------------------

M6BlastCache& M6BlastCache::Instance()
{
	static M6BlastCache sInstance;
	return sInstance;
}

M6BlastCache::M6BlastCache()
{
	string s = M6Config::GetDirectory("blast");
	if (s.empty())
		THROW(("Missing blastdir configuration"));

	mCacheDir = fs::path(s);
	if (not fs::exists(mCacheDir))
		fs::create_directory(mCacheDir);

	// read cached entries in result cache
	fs::directory_iterator end;
	for (fs::directory_iterator iter(mCacheDir); iter != end; ++iter)
	{
		if (iter->path().extension().string() == ".job" and iter->path().filename().string().length() > 4)
		{
			fs::ifstream file(iter->path());
			if (not file.is_open())
				continue;
	
			CacheEntry e;

			e.id = iter->path().filename().stem().string();
			e.hitCount = 0;
			e.bestScore = -1;
			
			zeep::xml::document doc(file);
			doc.deserialize("blastjob", e.job);
			
			if (fs::exists(mCacheDir / (e.id + ".err")))
				e.status = bj_Error;
			else if (fs::exists(mCacheDir / (e.id + ".xml.bz2")))
				e.status = bj_Finished;
			else
				e.status = bj_Queued;
			
			mResultCache.push_back(e);
		}
	}

	// finally start the worker thread
	mStopWorkingFlag = false;
	mWorkerThread = boost::thread([this](){ this->Work(); });
}

M6BlastCache::~M6BlastCache()
{
	mStopWorkingFlag = true;
	
	if (mWorkerThread.joinable())
	{
		mWorkerThread.interrupt();
		mWorkerThread.join();
	}
}

tr1::tuple<M6BlastJobStatus,string,uint32,double> M6BlastCache::JobStatus(const string& inJobID)
{
	boost::mutex::scoped_lock lock(mCacheMutex);

	tr1::tuple<M6BlastJobStatus,string,uint32,double> result;

	get<0>(result) = bj_Unknown;

	auto i = find_if(mResultCache.begin(), mResultCache.end(), [&inJobID](CacheEntry& e) -> bool
				{ return e.id == inJobID; });
	
	if (i != mResultCache.end())
	{
		get<0>(result) = i->status;
		
		try
		{
			if (i->status == bj_Finished)
			{
				if (i->bestScore < 0)
				{
					M6BlastResultPtr jobResult = JobResult(inJobID);
					
					if (jobResult)
					{
						i->hitCount = static_cast<uint32>(jobResult->mHits.size());
						if (not jobResult->mHits.empty() and not jobResult->mHits.front().mHsps.empty())
							i->bestScore = jobResult->mHits.front().mHsps.front().mExpect;
					}
				}
				
				get<2>(result) = i->hitCount;
				get<3>(result) = i->bestScore;
			}
			else if (i->status == bj_Error)
			{
				fs::path errPath(mCacheDir / (inJobID + ".err"));
				if (fs::exists(errPath))
				{
					fs::ifstream file(errPath);
					getline(file, get<1>(result));
				}
				else
					get<1>(result) = "missing error message";
			}
		}
		catch (exception& ex)
		{
			get<0>(result) = bj_Error;
			get<1>(result) = ex.what();
		}

		auto j = i;
		advance(j, 1);
		if (i != mResultCache.begin())
			mResultCache.splice(mResultCache.begin(), mResultCache, i, j);
	}
	
	return result;
}

M6BlastResultPtr M6BlastCache::JobResult(const string& inJobID)
{
	io::filtering_stream<io::input> in;
	fs::ifstream file(mCacheDir / (inJobID + ".xml.bz2"), ios::binary);
	if (not file.is_open())
		throw M6Exception("missing blast result file");

	in.push(io::bzip2_decompressor());
	in.push(file);
	
	M6BlastResultPtr result(new M6Blast::Result);

	zeep::xml::document doc(in);
	doc.deserialize("blast-result", const_cast<M6Blast::Result&>(*result));

	return result;
}

void M6BlastCache::CacheResult(const string& inJobID, M6BlastResultPtr inResult)
{
	boost::mutex::scoped_lock lock(mCacheMutex);
	
	auto i = find_if(mResultCache.begin(), mResultCache.end(), [&inJobID](CacheEntry& e) -> bool
				{ return e.id == inJobID; });
	
	if (i != mResultCache.end())
	{
		i->status = bj_Finished;

		i->hitCount = static_cast<uint32>(inResult->mHits.size());
		if (not inResult->mHits.empty() and not inResult->mHits.front().mHsps.empty())
			i->bestScore = inResult->mHits.front().mHsps.front().mExpect;
		else
			i->bestScore = 0;
		
		zeep::xml::document doc;
		doc.serialize("blast-result", *inResult);
	
		fs::ofstream file(mCacheDir / (inJobID + ".xml.bz2"), ios_base::out|ios_base::trunc|ios_base::binary);
		io::filtering_stream<io::output> out;

		if (not file.is_open())
			throw runtime_error("could not create output file");
	
		out.push(io::bzip2_compressor());
		out.push(file);

		out << doc;
		
		auto j = i;
		advance(j, 1);
		if (i != mResultCache.begin())
			mResultCache.splice(mResultCache.begin(), mResultCache, i, j);
	}

	// do some housekeeping
	while (mResultCache.size() > kMaxCachedEntryResults)
	{
		foreach (const char* ext, kBlastFileExtensions)
		{
			boost::system::error_code ec;
			fs::remove(mCacheDir / (mResultCache.back().id + ext), ec);
		}
		
		mResultCache.pop_back();
	}
}

void M6BlastCache::FastaFilesForDatabank(const string& inDatabank, vector<fs::path>& outFiles)
{
	vector<string> dbs;
	ba::split(dbs, inDatabank, ba::is_any_of(";"));
	
	sort(dbs.begin(), dbs.end());
	
	foreach (string& db, dbs)
	{
		fs::path dbdir = M6Config::GetDbDirectory(db);
		
		if (not fs::exists(dbdir / "fasta"))
			THROW(("Databank '%s' does not contain a fasta file", db.c_str()));
		
		outFiles.push_back(dbdir / "fasta");
	}
	
	if (outFiles.empty())
		THROW(("Databank '%s' does not contain a fasta file", inDatabank.c_str()));
}

string M6BlastCache::Submit(const string& inDatabank, const string& inQuery,
	const string& inProgram, const string& inMatrix, uint32 inWordSize,
	double inExpect, bool inLowComplexityFilter,
	bool inGapped, int32 inGapOpen, int32 inGapExtend,
	uint32 inReportLimit)
{
	if (inReportLimit > 1000)
		THROW(("Report limit exceeds maximum of 1000 hits"));

	vector<fs::path> files;
	FastaFilesForDatabank(inDatabank, files);

	string result;
	boost::mutex::scoped_lock lock(mCacheMutex);
	
	// see if the job is already done before
	foreach (CacheEntry& e, mResultCache)
	{
		if (not e.job.IsJobFor(inDatabank, inQuery, inProgram, inMatrix, inWordSize, inExpect,
				inLowComplexityFilter, inGapped, inGapOpen, inGapExtend, inReportLimit))
			continue;

		result = e.id;

		if ((e.status == bj_Finished or e.status == bj_Error) and not e.job.IsStillValid(files))
		{
			e.status = bj_Queued;
			StoreJob(e.id, e.job);	// need to store the job again, since the timestamps changed

			mWorkCondition.notify_one();
			
			// clean up stale files
			if (fs::exists(mCacheDir / (e.id + ".xml.bz2")))
				fs::remove(mCacheDir / (e.id + ".xml.bz2"));
			
			if (fs::exists(mCacheDir / (e.id + ".err")))
				fs::remove(mCacheDir / (e.id + ".err"));
		}

		break;
	}
	
	if (result.empty()) // new job, add to the queue
	{
		static boost::uuids::random_generator gen;

		CacheEntry e;

		e.id = boost::lexical_cast<string>(gen());
		e.status = bj_Queued;

		e.job.db = inDatabank;
		e.job.query = inQuery;
		e.job.program = inProgram;
		e.job.matrix = inMatrix;
		e.job.wordsize = inWordSize;
		e.job.expect = inExpect;
		e.job.filter = inLowComplexityFilter;
		e.job.gapped = inGapped;
		e.job.gapOpen = inGapOpen;
		e.job.gapExtend = inGapExtend;
		e.job.reportLimit = inReportLimit;
		
		foreach (fs::path& file, files)
		{
			M6BlastDbInfo info = { file.string(), boost::posix_time::from_time_t(fs::last_write_time(file)) };
			e.job.files.push_back(info);
		}
	
		StoreJob(e.id, e.job);
		
		mResultCache.push_back(e);
		mWorkCondition.notify_one();

		result = e.id;
	}

	return result;
}

void M6BlastCache::StoreJob(const string& inJobID, const M6BlastJob& inJob)
{
	// store the job in the cache directory
	zeep::xml::document doc;
	doc.serialize("blastjob", inJob);
		
	fs::ofstream file(mCacheDir / (inJobID + ".job"), ios_base::out|ios_base::trunc);
	file << doc;
}

void M6BlastCache::Work()
{
	using namespace boost::posix_time;

	boost::mutex::scoped_lock lock(mWorkMutex);

	while (not mStopWorkingFlag)
	{
		try
		{
			string next;
			
			{
				// fetch the first queued entry
				boost::mutex::scoped_lock lock2(mCacheMutex);
				foreach (CacheEntry& e, mResultCache)
				{
					if (e.status != bj_Queued)
						continue;
					next = e.id;
					break;
				}
			}
			
			if (next.empty())
				mWorkCondition.wait(lock);
			else
				ExecuteJob(next);
		}
		catch (boost::thread_interrupted&)
		{
		}
		catch (exception& e)
		{
			cerr << e.what() << endl;
			boost::this_thread::sleep(seconds(5));
		}
		catch (...)
		{
			cerr << "unknown exception" << endl;
			boost::this_thread::sleep(seconds(5));
		}
	}
}

void M6BlastCache::ExecuteJob(const string& inJobID)
{
	try
	{
		SetJobStatus(inJobID, bj_Running);
		
		M6BlastJob job;
	
		fs::ifstream file(mCacheDir / (inJobID + ".job"));
		if (not file.is_open())
		{
			SetJobStatus(inJobID, bj_Error);
			return;
		}
		
		{
			zeep::xml::document doc(file);
			doc.deserialize("blastjob", job);
		}
		
		vector<fs::path> files;
		transform(job.files.begin(), job.files.end(), back_inserter(files),
			[](M6BlastDbInfo& dbi) { return dbi.path; });
		
		M6BlastResultPtr result(M6Blast::Search(files, job.query, job.program,
			job.matrix, job.wordsize, job.expect, job.filter, job.gapped,
			job.gapOpen, job.gapExtend, job.reportLimit));
		
		CacheResult(inJobID, result);
	}
	catch (exception& e)
	{
		SetJobStatus(inJobID, bj_Error);

		fs::ofstream file(mCacheDir / (inJobID + ".err"));
		if (file.is_open())	// silenty ignore errors.... what else?
			file << e.what();
		
		throw;
	}
}

void M6BlastCache::SetJobStatus(const string inJobId, M6BlastJobStatus inStatus)
{
	boost::mutex::scoped_lock lock(mCacheMutex);
	
	foreach (CacheEntry& e, mResultCache)
	{
		if (e.id != inJobId)
			continue;
		
		e.status = inStatus;
		break;
	}
}

void M6BlastCache::Purge(bool inDeleteFiles)
{
	if (inDeleteFiles)
	{
		// TODO implement
	}
}

M6BlastJobDescList M6BlastCache::GetJobList()
{
	boost::mutex::scoped_lock lock(mCacheMutex);

	M6BlastJobDescList result;
	
	foreach (CacheEntry& e, mResultCache)
	{
		M6BlastJobDesc desc;
		
		desc.id = e.id;
		desc.db = e.job.db;
		desc.queryLength = static_cast<uint32>(e.job.query.length());
		
		switch (e.status)
		{
			case bj_Unknown:	desc.status = "unknown"; break;
			case bj_Error:		desc.status = "error"; break;
			case bj_Queued:		desc.status = "queued"; break;
			case bj_Running:	desc.status = "running"; break;
			case bj_Finished:	desc.status = "finished"; break;
		}
		
		result.push_back(desc);
	}
	
	return result;
}

void M6BlastCache::DeleteJob(const string& inJobID)
{
	try
	{
		(void)boost::lexical_cast<boost::uuids::uuid>(inJobID);
	}
	catch (boost::bad_lexical_cast&)
	{
		THROW(("Invalid job id"));
	}

	boost::mutex::scoped_lock lockdb(mCacheMutex);
	
	auto c = find_if(mResultCache.begin(), mResultCache.end(),
		[&inJobID](CacheEntry& e) -> bool { return e.id == inJobID; });
	
	if (c != mResultCache.end())
		mResultCache.erase(c);

	if (mWorkerThread.joinable())
		mWorkerThread.interrupt();
	
	foreach (const char* ext, kBlastFileExtensions)
	{
		boost::system::error_code ec;
		fs::remove(mCacheDir / (inJobID + ext), ec);
	}
	
	mWorkCondition.notify_one();
}
