/*
  This file is part of CDO. CDO is a collection of Operators to
  manipulate and analyse Climate model Data.

  Copyright (C) 2003-2021 Uwe Schulzweida, <uwe.schulzweida AT mpimet.mpg.de>
  See COPYING file for copying and redistribution conditions.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
*/

#include "process_int.h"
#include "cdo_wtime.h"
#include <mpim_grid.h>
#include "cdo_options.h"
#include "remap.h"
#include "remap_store_link.h"
#include "progress.h"
#include "cimdOmp.h"

// bicubic interpolation

static void
bicubicSetWeights(double iw, double jw, double (&weights)[4][4])
{
  const auto iw1 = iw * iw * (iw - 1.0);
  const auto iw2 = iw * (iw - 1.0) * (iw - 1.0);
  const auto iw3 = iw * iw * (3.0 - 2.0 * iw);
  const auto jw1 = jw * jw * (jw - 1.0);
  const auto jw2 = jw * (jw - 1.0) * (jw - 1.0);
  const auto jw3 = jw * jw * (3.0 - 2.0 * jw);
  // clang-format off
  weights[0][0] = (1.0-jw3) * (1.0-iw3);
  weights[1][0] = (1.0-jw3) *      iw3;
  weights[2][0] =      jw3  *      iw3;
  weights[3][0] =      jw3  * (1.0-iw3);
  weights[0][1] = (1.0-jw3) *      iw2;
  weights[1][1] = (1.0-jw3) *      iw1;
  weights[2][1] =      jw3  *      iw1;
  weights[3][1] =      jw3  *      iw2;
  weights[0][2] =      jw2  * (1.0-iw3);
  weights[1][2] =      jw2  *      iw3;
  weights[2][2] =      jw1  *      iw3;
  weights[3][2] =      jw1  * (1.0-iw3);
  weights[0][3] =      jw2  *      iw2;
  weights[1][3] =      jw2  *      iw1;
  weights[2][3] =      jw1  *      iw1;
  weights[3][3] =      jw1  *      iw2;
  // clang-format on
}

int num_src_points(const Varray<short> &mask, const size_t (&src_add)[4], double (&src_lats)[4]);

static void
renormalizeWeights(const double (&src_lats)[4], double (&weights)[4][4])
{
  double sum_weights = 0.0;  // sum of weights for normalization
  for (unsigned n = 0; n < 4; ++n) sum_weights += std::fabs(src_lats[n]);
  for (unsigned n = 0; n < 4; ++n) weights[n][0] = std::fabs(src_lats[n]) / sum_weights;
  for (unsigned n = 0; n < 4; ++n) weights[n][1] = 0.;
  for (unsigned n = 0; n < 4; ++n) weights[n][2] = 0.;
  for (unsigned n = 0; n < 4; ++n) weights[n][3] = 0.;
}

static void
bicubicWarning()
{
  static bool lwarn = true;

  if (Options::cdoVerbose || lwarn)
    {
      lwarn = false;
      cdo_warning("Bicubic interpolation failed for some grid points - used a distance-weighted average instead!");
    }
}

/*
  -----------------------------------------------------------------------

  This routine computes the weights for a bicubic interpolation.

  -----------------------------------------------------------------------
*/
void
remap_bicubic_weights(RemapSearch &rsearch, RemapVars &rv)
{
  auto src_grid = rsearch.srcGrid;
  auto tgt_grid = rsearch.tgtGrid;

  if (Options::cdoVerbose) cdo_print("Called %s()", __func__);

  if (src_grid->rank != 2) cdo_abort("Can't do bicubic interpolation when source grid rank != 2");

  auto start = Options::cdoVerbose ? cdo_get_wtime() : 0.0;

  progress::init();

  // Compute mappings from source to target grid

  auto tgt_grid_size = tgt_grid->size;

  std::vector<WeightLinks4> weightLinks(tgt_grid_size);
  weight_links_4_alloc(tgt_grid_size, weightLinks);

  auto findex = 0.0;

  // Loop over destination grid

#ifdef _OPENMP
#pragma omp parallel for default(none) shared(findex, rsearch, weightLinks, tgt_grid_size, src_grid, tgt_grid, rv)
#endif
  for (size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add)
    {
#ifdef _OPENMP
#pragma omp atomic
#endif
      findex++;
      if (cdo_omp_get_thread_num() == 0) progress::update(0, 1, findex / tgt_grid_size);

      weightLinks[tgt_cell_add].nlinks = 0;

      if (!tgt_grid->mask[tgt_cell_add]) continue;

      const auto llpoint = remapgrid_get_lonlat(tgt_grid, tgt_cell_add);

      double src_lats[4];    //  latitudes  of four bilinear corners
      double src_lons[4];    //  longitudes of four bilinear corners
      double weights[4][4];  //  bicubic weights for four corners
      size_t src_add[4];     //  address for the four source points

      // Find nearest square of grid points on source grid
      auto search_result = remap_search_square(rsearch, llpoint, src_add, src_lats, src_lons);

      // Check to see if points are mask points
      if (search_result > 0)
        {
          for (unsigned n = 0; n < 4; ++n)
            if (!src_grid->mask[src_add[n]]) search_result = 0;
        }

      // If point found, find local iw,jw coordinates for weights
      if (search_result > 0)
        {
          tgt_grid->cell_frac[tgt_cell_add] = 1.0;

          double iw = 0.0, jw = 0.0;  // current guess for bilinear coordinate
          if (remap_find_weights(llpoint, src_lons, src_lats, &iw, &jw))
            {
              // Successfully found iw,jw - compute weights
              bicubicSetWeights(iw, jw, weights);
              store_weightlinks_bicubic(src_add, weights, tgt_cell_add, weightLinks);
            }
          else
            {
              bicubicWarning();
              search_result = -1;
            }
        }

      // Search for bicubic failed - use a distance-weighted average instead
      // (this is typically near the pole) Distance was stored in src_lats!
      if (search_result < 0)
        {
          if (num_src_points(src_grid->mask, src_add, src_lats) > 0)
            {
              tgt_grid->cell_frac[tgt_cell_add] = 1.0;
              renormalizeWeights(src_lats, weights);
              store_weightlinks_bicubic(src_add, weights, tgt_cell_add, weightLinks);
            }
        }
    }

  progress::update(0, 1, 1);

  weight_links_4_to_remap_links(tgt_grid_size, weightLinks, rv);

  if (Options::cdoVerbose) cdo_print("%s: %.2f seconds", __func__, cdo_get_wtime() - start);
}  // remap_bicubic_weights

/*
  -----------------------------------------------------------------------

  This routine computes and apply the weights for a bicubic interpolation.

  -----------------------------------------------------------------------
*/

template <typename T>
static T
bicubicRemap(const Varray<T> &src_array, const double (&weights)[4][4], const size_t (&src_add)[4], const RemapGradients &gradients)
{
  const auto &glat = gradients.grad_lat;
  const auto &glon = gradients.grad_lon;
  const auto &glatlon = gradients.grad_latlon;

  double tgt_point = 0.0;
  for (unsigned n = 0; n < 4; ++n)
    tgt_point += src_array[src_add[n]] * weights[n][0] + glat[src_add[n]] * weights[n][1] + glon[src_add[n]] * weights[n][2]
                 + glatlon[src_add[n]] * weights[n][3];

  return tgt_point;
}

template <typename T>
static void
remap_bicubic(RemapSearch &rsearch, const Varray<T> &src_array, Varray<T> &tgt_array, T missval)
{
  auto src_grid = rsearch.srcGrid;
  auto tgt_grid = rsearch.tgtGrid;

  if (Options::cdoVerbose) cdo_print("Called %s()", __func__);

  if (src_grid->rank != 2) cdo_abort("Can't do bicubic interpolation when source grid rank != 2");

  auto start = Options::cdoVerbose ? cdo_get_wtime() : 0.0;

  progress::init();

  auto tgt_grid_size = tgt_grid->size;
  auto src_grid_size = src_grid->size;

  Varray<short> src_grid_mask(src_grid_size);
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) shared(src_grid_size, src_array, src_grid_mask, missval)
#endif
  for (size_t i = 0; i < src_grid_size; ++i) src_grid_mask[i] = !DBL_IS_EQUAL(src_array[i], missval);

  // Compute mappings from source to target grid

  RemapGradients gradients(src_grid->size);
  remap_gradients(*src_grid, src_grid_mask, src_array, gradients);

  auto findex = 0.0;

  // Loop over destination grid

#ifdef _OPENMP
#pragma omp parallel for default(none) \
    shared(findex, rsearch, tgt_grid_size, src_grid, tgt_grid, src_array, tgt_array, missval, src_grid_mask, gradients)
#endif
  for (size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add)
    {
#ifdef _OPENMP
#pragma omp atomic
#endif
      findex++;
      if (cdo_omp_get_thread_num() == 0) progress::update(0, 1, findex / tgt_grid_size);

      tgt_array[tgt_cell_add] = missval;

      if (!tgt_grid->mask[tgt_cell_add]) continue;

      const auto llpoint = remapgrid_get_lonlat(tgt_grid, tgt_cell_add);

      double src_lats[4];    //  latitudes  of four bilinear corners
      double src_lons[4];    //  longitudes of four bilinear corners
      double weights[4][4];  //  bicubic weights for four corners
      size_t src_add[4];     //  address for the four source points

      // Find nearest square of grid points on source grid
      auto search_result = remap_search_square(rsearch, llpoint, src_add, src_lats, src_lons);

      // Check to see if points are mask points
      if (search_result > 0)
        {
          for (unsigned n = 0; n < 4; ++n)
            if (!src_grid_mask[src_add[n]]) search_result = 0;
        }

      // If point found, find local iw,jw coordinates for weights
      if (search_result > 0)
        {
          tgt_grid->cell_frac[tgt_cell_add] = 1.0;

          double iw = 0.0, jw = 0.0;  // current guess for bilinear coordinate
          if (remap_find_weights(llpoint, src_lons, src_lats, &iw, &jw))
            {
              // Successfully found iw,jw - compute weights
              bicubicSetWeights(iw, jw, weights);
              sort_weights_bicubic(src_add, weights);
              tgt_array[tgt_cell_add] = bicubicRemap(src_array, weights, src_add, gradients);
            }
          else
            {
              bicubicWarning();
              search_result = -1;
            }
        }

      // Search for bicubic failed - use a distance-weighted average instead
      // (this is typically near the pole) Distance was stored in src_lats!
      if (search_result < 0)
        {
          if (num_src_points(src_grid_mask, src_add, src_lats) > 0)
            {
              tgt_grid->cell_frac[tgt_cell_add] = 1.0;
              renormalizeWeights(src_lats, weights);
              sort_weights_bicubic(src_add, weights);
              tgt_array[tgt_cell_add] = bicubicRemap(src_array, weights, src_add, gradients);
            }
        }
    }

  progress::update(0, 1, 1);

  if (Options::cdoVerbose) cdo_print("%s: %.2f seconds", __func__, cdo_get_wtime() - start);
}  // remap_bicubic

void
remap_bicubic(RemapSearch &rsearch, const Field &field1, Field &field2)
{
  if (field1.memType == MemType::Float)
    remap_bicubic(rsearch, field1.vec_f, field2.vec_f, (float) field1.missval);
  else
    remap_bicubic(rsearch, field1.vec_d, field2.vec_d, field1.missval);
}
