/*
  This file is part of CDO. CDO is a collection of Operators to
  manipulate and analyse Climate model Data.

  Copyright (C) 2003-2019 Uwe Schulzweida, <uwe.schulzweida AT mpimet.mpg.de>
  See COPYING file for copying and redistribution conditions.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
*/

/*
   This module contains the following operators:

      Runstat    runrange        Running range
      Runstat    runmin          Running minimum
      Runstat    runmax          Running maximum
      Runstat    runsum          Running sum
      Runstat    runmean         Running mean
      Runstat    runavg          Running average
      Runstat    runvar          Running variance
      Runstat    runvar1         Running variance [Normalize by (n-1)]
      Runstat    runstd          Running standard deviation
      Runstat    runstd1         Running standard deviation [Normalize by (n-1)]
*/

#include <cdi.h>


#include "functs.h"
#include "process_int.h"
#include "param_conversion.h"

#include "datetime.h"

static void
addOperators(void)
{
  // clang-format off
  cdoOperatorAdd("runrange", func_range, 0, nullptr);
  cdoOperatorAdd("runmin",   func_min,   0, nullptr);
  cdoOperatorAdd("runmax",   func_max,   0, nullptr);
  cdoOperatorAdd("runsum",   func_sum,   0, nullptr);
  cdoOperatorAdd("runmean",  func_mean,  0, nullptr);
  cdoOperatorAdd("runavg",   func_avg,   0, nullptr);
  cdoOperatorAdd("runvar",   func_var,   0, nullptr);
  cdoOperatorAdd("runvar1",  func_var1,  0, nullptr);
  cdoOperatorAdd("runstd",   func_std,   0, nullptr);
  cdoOperatorAdd("runstd1",  func_std1,  0, nullptr);
  // clang-format on
}

void *
Runstat(void *process)
{
  TimeStat timestat_date = TimeStat::MEAN;
  int varID, levelID;
  bool runstat_nomiss = false;

  cdoInitialize(process);

  char *envstr = getenv("RUNSTAT_NOMISS");
  if (envstr)
    {
      char *endptr;
      const int envval = (int) strtol(envstr, &endptr, 10);
      if (envval == 1) runstat_nomiss = true;
    }

  addOperators();

  const int operatorID = cdoOperatorID();
  int operfunc = cdoOperatorF1(operatorID);

  const bool lrange = operfunc == func_range;
  const bool lmean = operfunc == func_mean || operfunc == func_avg;
  const bool lstd = operfunc == func_std || operfunc == func_std1;
  const bool lvarstd = operfunc == func_std || operfunc == func_var || operfunc == func_std1 || operfunc == func_var1;
  const int divisor = operfunc == func_std1 || operfunc == func_var1;
  const bool lvars2 = lvarstd || lrange;

  operatorInputArg("number of timesteps");
  operatorCheckArgc(1);
  int ndates = parameter2int(operatorArgv()[0]);

  const auto streamID1 = cdoOpenRead(0);

  const int vlistID1 = cdoStreamInqVlist(streamID1);
  const int vlistID2 = vlistDuplicate(vlistID1);

  const int taxisID1 = vlistInqTaxis(vlistID1);
  const int taxisID2 = taxisDuplicate(taxisID1);
  taxisWithBounds(taxisID2);
  vlistDefTaxis(vlistID2, taxisID2);
  /*  Number of timestep will be reduced compared to the input
   *  error handling in case of not enough timesteps is done per record */
  int nsteps = vlistNtsteps(vlistID1);
  if (nsteps != -1)
    {
      nsteps -= ndates - 1;
      if (nsteps > 0) vlistDefNtsteps(vlistID2, nsteps);
    }

  const auto streamID2 = cdoOpenWrite(1);
  cdoDefVlist(streamID2, vlistID2);

  const int maxrecs = vlistNrecs(vlistID1);
  std::vector<RecordInfo> recList(maxrecs);

  DateTimeList dtlist;
  dtlist.setStat(timestat_date);
  dtlist.setCalendar(taxisInqCalendar(taxisID1));

  FieldVector3D vars1(ndates + 1), vars2, samp1;
  if (!runstat_nomiss) samp1.resize(ndates + 1);
  if (lvars2) vars2.resize(ndates + 1);

  for (int its = 0; its < ndates; its++)
    {
      fieldsFromVlist(vlistID1, vars1[its], FIELD_VEC);
      if (!runstat_nomiss) fieldsFromVlist(vlistID1, samp1[its], FIELD_VEC);
      if (lvars2) fieldsFromVlist(vlistID1, vars2[its], FIELD_VEC);
    }

  const auto gridsizemax = vlistGridsizeMax(vlistID1);
  std::vector<bool> imask(gridsizemax);

  int tsID = 0;
  for (tsID = 0; tsID < ndates; tsID++)
    {
      int nrecs = cdoStreamInqTimestep(streamID1, tsID);
      if (nrecs == 0) cdoAbort("File has less then %d timesteps!", ndates);

      dtlist.taxisInqTimestep(taxisID1, tsID);

      for (int recID = 0; recID < nrecs; recID++)
        {
          cdoInqRecord(streamID1, &varID, &levelID);

          if (tsID == 0)
            {
              recList[recID].varID = varID;
              recList[recID].levelID = levelID;
              recList[recID].lconst = vlistInqVarTimetype(vlistID1, varID) == TIME_CONSTANT;
            }

          Field &rvars1 = vars1[tsID][varID][levelID];

          size_t fieldsize = rvars1.size;

          cdoReadRecord(streamID1, rvars1.vec.data(), &rvars1.nmiss);
          if (lrange)
            {
              vars2[tsID][varID][levelID].nmiss = rvars1.nmiss;
              vars2[tsID][varID][levelID].vec = rvars1.vec;
            }

          if (runstat_nomiss && rvars1.nmiss) cdoAbort("Missing values supported was swichted off by env. RUNSTAT_NOMISS!");

          if (!runstat_nomiss)
            {
              const double missval = rvars1.missval;

              for (size_t i = 0; i < fieldsize; i++) imask[i] = !DBL_IS_EQUAL(rvars1.vec[i], missval);
              for (size_t i = 0; i < fieldsize; i++) samp1[tsID][varID][levelID].vec[i] = (double) imask[i];

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tsID, imask, samp1, varID, levelID, fieldsize)
#endif
              for (int inp = 0; inp < tsID; inp++)
                {
                  auto &samp = samp1[inp][varID][levelID].vec;
                  for (size_t i = 0; i < fieldsize; i++)
                    if (imask[i]) samp[i]++;
                }
            }

          if (lvarstd)
            {
              vfarmoq(vars2[tsID][varID][levelID], vars1[tsID][varID][levelID]);
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tsID, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < tsID; inp++)
                {
                  vfarsumq(vars2[inp][varID][levelID], rvars1);
                  vfarsum(vars1[inp][varID][levelID], rvars1);
                }
            }
          else if (lrange)
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tsID, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < tsID; inp++)
                {
                  vfarmin(vars2[inp][varID][levelID], rvars1);
                  vfarmax(vars1[inp][varID][levelID], rvars1);
                }
            }
          else
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(tsID, vars1, varID, levelID, rvars1, operfunc)
#endif
              for (int inp = 0; inp < tsID; inp++)
                {
                  vfarfun(vars1[inp][varID][levelID], rvars1, operfunc);
                }
            }
        }
    }

  int otsID = 0;
  while (true)
    {
      for (int recID = 0; recID < maxrecs; recID++)
        {
          if (recList[recID].lconst) continue;

          const int varID = recList[recID].varID;
          const int levelID = recList[recID].levelID;
          Field &rvars1 = vars1[0][varID][levelID];
          const int nsets = ndates;

          if (lmean)
            {
              if (!runstat_nomiss)
                vfardiv(rvars1, samp1[0][varID][levelID]);
              else
                vfarcdiv(rvars1, (double) nsets);
            }
          else if (lvarstd)
            {
              Field &rvars2 = vars2[0][varID][levelID];
              if (!runstat_nomiss)
                {
                  if (lstd)
                    vfarstd(rvars1, rvars2, samp1[0][varID][levelID], divisor);
                  else
                    vfarvar(rvars1, rvars2, samp1[0][varID][levelID], divisor);
                }
              else
                {
                  if (lstd)
                    vfarcstd(rvars1, rvars2, nsets, divisor);
                  else
                    vfarcvar(rvars1, rvars2, nsets, divisor);
                }
            }
          else if (lrange)
            {
              Field &rvars2 = vars2[0][varID][levelID];
              vfarsub(rvars1, rvars2);
            }
        }

      dtlist.statTaxisDefTimestep(taxisID2, ndates);
      cdoDefTimestep(streamID2, otsID);

      for (int recID = 0; recID < maxrecs; recID++)
        {
          if (otsID && recList[recID].lconst) continue;

          const int varID = recList[recID].varID;
          const int levelID = recList[recID].levelID;
          Field &rvars1 = vars1[0][varID][levelID];

          cdoDefRecord(streamID2, varID, levelID);
          cdoWriteRecord(streamID2, rvars1.vec.data(), rvars1.nmiss);
        }

      otsID++;

      dtlist.shift();

      vars1[ndates] = vars1[0];
      if (!runstat_nomiss) samp1[ndates] = samp1[0];
      if (lvars2) vars2[ndates] = vars2[0];

      for (int inp = 0; inp < ndates; inp++)
        {
          vars1[inp] = vars1[inp + 1];
          if (!runstat_nomiss) samp1[inp] = samp1[inp + 1];
          if (lvars2) vars2[inp] = vars2[inp + 1];
        }

      const int nrecs = cdoStreamInqTimestep(streamID1, tsID);
      if (nrecs == 0) break;

      dtlist.taxisInqTimestep(taxisID1, ndates - 1);

      for (int recID = 0; recID < nrecs; recID++)
        {
          cdoInqRecord(streamID1, &varID, &levelID);

          Field &rvars1 = vars1[ndates - 1][varID][levelID];

          size_t fieldsize = rvars1.size;

          cdoReadRecord(streamID1, rvars1.vec.data(), &rvars1.nmiss);
          if (lrange)
            {
              vars2[ndates - 1][varID][levelID].nmiss = rvars1.nmiss;
              for (size_t i = 0; i < fieldsize; i++) vars2[ndates - 1][varID][levelID].vec[i] = rvars1.vec[i];
            }

          if (runstat_nomiss && rvars1.nmiss) cdoAbort("Missing values supported swichted off!");

          if (!runstat_nomiss)
            {
              const double missval = rvars1.missval;

              for (size_t i = 0; i < fieldsize; i++) imask[i] = !DBL_IS_EQUAL(rvars1.vec[i], missval);
              for (size_t i = 0; i < fieldsize; i++) samp1[ndates - 1][varID][levelID].vec[i] = (double) imask[i];

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(ndates, fieldsize, imask, samp1, varID, levelID)
#endif
              for (int inp = 0; inp < ndates - 1; inp++)
                {
                  auto &samp = samp1[inp][varID][levelID].vec;
                  for (size_t i = 0; i < fieldsize; i++)
                    if (imask[i]) samp[i]++;
                }
            }

          if (lvarstd)
            {
              vfarmoq(vars2[ndates - 1][varID][levelID], vars1[ndates - 1][varID][levelID]);
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(ndates, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < ndates - 1; inp++)
                {
                  vfarsumq(vars2[inp][varID][levelID], rvars1);
                  vfarsum(vars1[inp][varID][levelID], rvars1);
                }
            }
          else if (lrange)
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(ndates, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < ndates - 1; inp++)
                {
                  vfarmin(vars2[inp][varID][levelID], rvars1);
                  vfarmax(vars1[inp][varID][levelID], rvars1);
                }
            }
          else
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(ndates, vars1, varID, levelID, rvars1, operfunc)
#endif
              for (int inp = 0; inp < ndates - 1; inp++)
                {
                  vfarfun(vars1[inp][varID][levelID], rvars1, operfunc);
                }
            }
        }

      tsID++;
    }

  cdoStreamClose(streamID2);
  cdoStreamClose(streamID1);

  cdoFinish();

  return nullptr;
}
