/*
  This file is part of CDO. CDO is a collection of Operators to
  manipulate and analyse Climate model Data.

  Copyright (C) 2003-2020 Uwe Schulzweida, <uwe.schulzweida AT mpimet.mpg.de>
  See COPYING file for copying and redistribution conditions.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
*/

/*
   This module contains the following operators:

      Runstat    runrange        Running range
      Runstat    runmin          Running minimum
      Runstat    runmax          Running maximum
      Runstat    runsum          Running sum
      Runstat    runmean         Running mean
      Runstat    runavg          Running average
      Runstat    runvar          Running variance
      Runstat    runvar1         Running variance [Normalize by (n-1)]
      Runstat    runstd          Running standard deviation
      Runstat    runstd1         Running standard deviation [Normalize by (n-1)]
*/

#include <cdi.h>

#include "functs.h"
#include "process_int.h"
#include "param_conversion.h"
#include "datetime.h"

static void
addOperators(void)
{
  // clang-format off
  cdoOperatorAdd("runrange", func_range, 0, nullptr);
  cdoOperatorAdd("runmin",   func_min,   0, nullptr);
  cdoOperatorAdd("runmax",   func_max,   0, nullptr);
  cdoOperatorAdd("runsum",   func_sum,   0, nullptr);
  cdoOperatorAdd("runmean",  func_mean,  0, nullptr);
  cdoOperatorAdd("runavg",   func_avg,   0, nullptr);
  cdoOperatorAdd("runvar",   func_var,   0, nullptr);
  cdoOperatorAdd("runvar1",  func_var1,  0, nullptr);
  cdoOperatorAdd("runstd",   func_std,   0, nullptr);
  cdoOperatorAdd("runstd1",  func_std1,  0, nullptr);
  // clang-format on
}

void *
Runstat(void *process)
{
  const TimeStat timestat_date = TimeStat::MEAN;
  bool runstat_nomiss = false;

  cdoInitialize(process);

  auto envstr = getenv("RUNSTAT_NOMISS");
  if (envstr)
    {
      char *endptr;
      const auto envval = (int) strtol(envstr, &endptr, 10);
      if (envval == 1) runstat_nomiss = true;
    }

  addOperators();

  const auto operatorID = cdoOperatorID();
  auto operfunc = cdoOperatorF1(operatorID); // used in omp loop

  const auto lrange = (operfunc == func_range);
  const auto lmean = (operfunc == func_mean || operfunc == func_avg);
  const auto lstd = (operfunc == func_std || operfunc == func_std1);
  const auto lvarstd = (lstd || operfunc == func_var || operfunc == func_var1);
  const auto lvars2 = (lvarstd || lrange);
  const int divisor = (operfunc == func_std1 || operfunc == func_var1);

  auto vfarstdvar_func = lstd ? vfarstd : vfarvar;
  auto vfarcstdvar_func = lstd ? vfarcstd : vfarcvar;

  operatorInputArg("number of timesteps");
  operatorCheckArgc(1);
  auto ndates = parameter2int(cdoOperatorArgv(0));

  const auto streamID1 = cdoOpenRead(0);

  const auto vlistID1 = cdoStreamInqVlist(streamID1);
  const auto vlistID2 = vlistDuplicate(vlistID1);

  const auto taxisID1 = vlistInqTaxis(vlistID1);
  const auto taxisID2 = taxisDuplicate(taxisID1);
  taxisWithBounds(taxisID2);
  vlistDefTaxis(vlistID2, taxisID2);
  // Number of timestep will be reduced compared to the input error handling in case of not enough timesteps is done per record
  auto nsteps = vlistNtsteps(vlistID1);
  if (nsteps != -1)
    {
      nsteps -= ndates - 1;
      if (nsteps > 0) vlistDefNtsteps(vlistID2, nsteps);
    }

  const auto streamID2 = cdoOpenWrite(1);
  cdoDefVlist(streamID2, vlistID2);

  const auto maxrecs = vlistNrecs(vlistID1);
  std::vector<RecordInfo> recList(maxrecs);

  DateTimeList dtlist;
  dtlist.setStat(timestat_date);
  dtlist.setCalendar(taxisInqCalendar(taxisID1));

  VarList varList;
  varListInit(varList, vlistID1);

  int VARS_MEMTYPE = 0;
  if ((operfunc == func_min) || (operfunc == func_max)) VARS_MEMTYPE = FIELD_NAT;

  FieldVector3D vars1(ndates + 1), vars2, samp1;
  if (!runstat_nomiss) samp1.resize(ndates + 1);
  if (lvars2) vars2.resize(ndates + 1);

  for (int its = 0; its < ndates; its++)
    {
      if (!runstat_nomiss) fieldsFromVlist(vlistID1, samp1[its], FIELD_VEC);
      fieldsFromVlist(vlistID1, vars1[its], FIELD_VEC | VARS_MEMTYPE);
      if (lvars2) fieldsFromVlist(vlistID1, vars2[its], FIELD_VEC);
    }

  const auto gridsizemax = vlistGridsizeMax(vlistID1);
  std::vector<bool> imask(gridsizemax);

  int tsID = 0;
  int otsID = 0;
  int numSteps = 0;
  while (true)
    {
    FILL_FIRST_NDATES:
      const auto nrecs = cdoStreamInqTimestep(streamID1, tsID);
      if (nrecs == 0)
        {
          if (tsID < ndates)
            cdoAbort("File has less then %d timesteps!", ndates);
          else
            break;
        }

      numSteps = (tsID < ndates) ? tsID : ndates - 1;

      dtlist.taxisInqTimestep(taxisID1, numSteps);

      for (int recID = 0; recID < nrecs; recID++)
        {
          int varID, levelID;
          cdoInqRecord(streamID1, &varID, &levelID);

          if (tsID == 0)
            {
              recList[recID].varID = varID;
              recList[recID].levelID = levelID;
              recList[recID].lconst = (varList[varID].timetype == TIME_CONSTANT);
            }

          auto &rvars1 = vars1[numSteps][varID][levelID];

          auto fieldsize = rvars1.size; // used in omp loop

          cdoReadRecord(streamID1, rvars1);
          if (lrange)
            {
              vars2[numSteps][varID][levelID].nmiss = rvars1.nmiss;
              vars2[numSteps][varID][levelID].vec_d = rvars1.vec_d;
            }

          if (runstat_nomiss && rvars1.nmiss) cdoAbort("Missing values supported was swichted off by env. RUNSTAT_NOMISS!");

          if (!runstat_nomiss)
            {
              const auto missval = rvars1.missval;

              if (rvars1.memType == MemType::Float)
                for (size_t i = 0; i < fieldsize; i++) imask[i] = !DBL_IS_EQUAL(rvars1.vec_f[i], missval);
              else
                for (size_t i = 0; i < fieldsize; i++) imask[i] = !DBL_IS_EQUAL(rvars1.vec_d[i], missval);

              for (size_t i = 0; i < fieldsize; i++) samp1[numSteps][varID][levelID].vec_d[i] = (double) imask[i];

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(numSteps, imask, samp1, varID, levelID, fieldsize)
#endif
              for (int inp = 0; inp < numSteps; inp++)
                {
                  auto &samp = samp1[inp][varID][levelID].vec_d;
                  for (size_t i = 0; i < fieldsize; i++)
                    if (imask[i]) samp[i]++;
                }
            }

          if (lvarstd)
            {
              vfarmoq(vars2[numSteps][varID][levelID], vars1[numSteps][varID][levelID]);
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(numSteps, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < numSteps; inp++)
                {
                  vfarsumsumq(vars1[inp][varID][levelID], vars2[inp][varID][levelID], rvars1);
                }
            }
          else if (lrange)
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(numSteps, vars1, vars2, varID, levelID, rvars1)
#endif
              for (int inp = 0; inp < numSteps; inp++)
                {
                  vfarmaxmin(vars1[inp][varID][levelID], vars2[inp][varID][levelID], rvars1);
                }
            }
          else
            {
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(numSteps, vars1, varID, levelID, rvars1, operfunc)
#endif
              for (int inp = 0; inp < numSteps; inp++)
                {
                  vfarfun(vars1[inp][varID][levelID], rvars1, operfunc);
                }
            }
        }

      tsID++; // don't move this line

      if (tsID < ndates) goto FILL_FIRST_NDATES;

      for (int recID = 0; recID < maxrecs; recID++)
        {
          if (recList[recID].lconst) continue;

          const auto varID = recList[recID].varID;
          const auto levelID = recList[recID].levelID;
          const auto &rsamp1 = samp1[0][varID][levelID];
          auto &rvars1 = vars1[0][varID][levelID];
          const auto nsets = ndates;

          if (lmean)
            {
              if (!runstat_nomiss)
                vfardiv(rvars1, rsamp1);
              else
                vfarcdiv(rvars1, (double) nsets);
            }
          else if (lvarstd)
            {
              if (!runstat_nomiss)
                vfarstdvar_func(rvars1, vars2[0][varID][levelID], rsamp1, divisor);
              else
                vfarcstdvar_func(rvars1, vars2[0][varID][levelID], nsets, divisor);
            }
          else if (lrange)
            {
              vfarsub(rvars1, vars2[0][varID][levelID]);
            }
        }

      dtlist.statTaxisDefTimestep(taxisID2, ndates);
      cdoDefTimestep(streamID2, otsID);

      for (int recID = 0; recID < maxrecs; recID++)
        {
          if (otsID && recList[recID].lconst) continue;

          const auto varID = recList[recID].varID;
          const auto levelID = recList[recID].levelID;
          auto &rvars1 = vars1[0][varID][levelID];

          cdoDefRecord(streamID2, varID, levelID);
          cdoWriteRecord(streamID2, rvars1);
        }

      otsID++;

      dtlist.shift();

      vars1[ndates] = vars1[0];
      if (!runstat_nomiss) samp1[ndates] = samp1[0];
      if (lvars2) vars2[ndates] = vars2[0];

      for (int inp = 0; inp < ndates; inp++)
        {
          vars1[inp] = vars1[inp + 1];
          if (!runstat_nomiss) samp1[inp] = samp1[inp + 1];
          if (lvars2) vars2[inp] = vars2[inp + 1];
        }
    }

  cdoStreamClose(streamID2);
  cdoStreamClose(streamID1);

  cdoFinish();

  return nullptr;
}
