/*
  This file is part of CDO. CDO is a collection of Operators to
  manipulate and analyse Climate model Data.

  Copyright (C) 2003-2019 Uwe Schulzweida, <uwe.schulzweida AT mpimet.mpg.de>
  See COPYING file for copying and redistribution conditions.

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
*/


#include "dmemory.h"
#include "process_int.h"
#include "cdo_wtime.h"
#include "remap.h"
#include "remap_store_link.h"
#include "cdo_options.h"
#include "progress.h"
#include "cimdOmp.h"
#include <mpim_grid.h>

extern "C"
{
#include "lib/yac/clipping.h"
#include "lib/yac/area.h"
}

struct search_t
{
  enum yac_edge_type *src_edge_type;
  size_t srch_corners;
  size_t max_srch_cells;
  std::vector<double> partial_areas;
  std::vector<double> partial_weights;
  std::vector<grid_cell> src_grid_cells;
  std::vector<grid_cell> overlap_buffer;
};

static void
cellSearchRealloc(size_t num_srch_cells, search_t &search)
{
  if (num_srch_cells > search.max_srch_cells)
    {
      search.partial_areas.resize(num_srch_cells);
      search.partial_weights.resize(num_srch_cells);
      search.overlap_buffer.resize(num_srch_cells);
      search.src_grid_cells.resize(num_srch_cells);

      for (size_t n = search.max_srch_cells; n < num_srch_cells; ++n)
        {
          search.overlap_buffer[n].array_size = 0;
          search.overlap_buffer[n].num_corners = 0;
          search.overlap_buffer[n].edge_type = nullptr;
          search.overlap_buffer[n].coordinates_x = nullptr;
          search.overlap_buffer[n].coordinates_y = nullptr;
          search.overlap_buffer[n].coordinates_xyz = nullptr;

          search.src_grid_cells[n].array_size = search.srch_corners;
          search.src_grid_cells[n].num_corners = search.srch_corners;
          search.src_grid_cells[n].edge_type = search.src_edge_type;
          search.src_grid_cells[n].coordinates_x = new double[search.srch_corners];
          search.src_grid_cells[n].coordinates_y = new double[search.srch_corners];
          search.src_grid_cells[n].coordinates_xyz = new double[search.srch_corners][3];
        }

      search.max_srch_cells = num_srch_cells;
    }
}

static void
cellSearchFree(search_t &search)
{
  const auto max_srch_cells = search.max_srch_cells;
  for (size_t n = 0; n < max_srch_cells; n++)
    {
      if (search.overlap_buffer[n].array_size > 0)
        {
          Free(search.overlap_buffer[n].coordinates_x);
          Free(search.overlap_buffer[n].coordinates_y);
          if (search.overlap_buffer[n].coordinates_xyz) Free(search.overlap_buffer[n].coordinates_xyz);
          if (search.overlap_buffer[n].edge_type) Free(search.overlap_buffer[n].edge_type);
        }

      delete[] search.src_grid_cells[n].coordinates_x;
      delete[] search.src_grid_cells[n].coordinates_y;
      delete[] search.src_grid_cells[n].coordinates_xyz;
    }

  vectorFree(search.partial_areas);
  vectorFree(search.partial_weights);
  vectorFree(search.overlap_buffer);
  vectorFree(search.src_grid_cells);
}

static void
boundbox_from_corners1r(size_t ic, size_t nc, const double *restrict corner_lon, const double *restrict corner_lat,
                        float *restrict bound_box)
{
  const size_t inc = ic * nc;

  float clat = corner_lat[inc];
  float clon = corner_lon[inc];

  bound_box[0] = clat;
  bound_box[1] = clat;
  bound_box[2] = clon;
  bound_box[3] = clon;

  for (size_t j = 1; j < nc; ++j)
    {
      clat = corner_lat[inc + j];
      clon = corner_lon[inc + j];

      if (clat < bound_box[0]) bound_box[0] = clat;
      if (clat > bound_box[1]) bound_box[1] = clat;
      if (clon < bound_box[2]) bound_box[2] = clon;
      if (clon > bound_box[3]) bound_box[3] = clon;
    }

  if (fabsf(bound_box[3] - bound_box[2]) > PI_f)
    {
      bound_box[2] = 0;
      bound_box[3] = PI2_f;
    }
  /*
  if ( fabsf(bound_box[3] - bound_box[2]) > PI_f )
    {
      if ( bound_box[3] > bound_box[2] && (bound_box[3]-PI2_f) < 0.0f )
        {
          float tmp = bound_box[2];
          bound_box[2] = bound_box[3] - PI2_f;
          bound_box[3] = tmp;
        }
    }
  */
}

static inline double
gridcell_area(grid_cell &cell)
{
  return yac_huiliers_area(cell);
}

static void
cdo_compute_overlap_areas(size_t N, search_t &search, grid_cell &target_cell)
{
  auto overlap_cells = search.overlap_buffer.data();

  // Do the clipping and get the cell for the overlapping area
  yac_cell_clipping(N, search.src_grid_cells.data(), target_cell, overlap_cells);

  // Get the partial areas for the overlapping regions
  for (size_t n = 0; n < N; n++) search.partial_areas[n] = gridcell_area(overlap_cells[n]);

#ifdef VERBOSE
  for (size_t n = 0; n < N; n++) printf("overlap area : %lf\n", search.partial_areas[n]);
#endif
}

static double constexpr tol = 1.0e-12;

enum cell_type
{
  UNDEF_CELL,
  LON_LAT_CELL,
  LAT_CELL,
  GREAT_CIRCLE_CELL,
  MIXED_CELL
};

static void
cdo_compute_concave_overlap_areas(size_t N, search_t &search, const grid_cell &target_cell, double target_node_x,
                                  double target_node_y)
{
  auto &partial_areas = search.partial_areas;
  auto overlap_cells = search.overlap_buffer.data();
  auto source_cell = search.src_grid_cells.data();

  double coordinates_x[3] = { -1, -1, -1 };
  double coordinates_y[3] = { -1, -1, -1 };
  double coordinates_xyz[3][3] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 } };
  enum yac_edge_type edge_types[3] = { GREAT_CIRCLE, GREAT_CIRCLE, GREAT_CIRCLE };

  grid_cell target_partial_cell;
  target_partial_cell.array_size = 3;
  target_partial_cell.coordinates_x = coordinates_x;
  target_partial_cell.coordinates_y = coordinates_y;
  target_partial_cell.coordinates_xyz = coordinates_xyz;
  target_partial_cell.edge_type = edge_types;
  target_partial_cell.num_corners = 3;

  // Do the clipping and get the cell for the overlapping area

  for (size_t n = 0; n < N; n++) partial_areas[n] = 0.0;

  // common node point to all partial target cells
  target_partial_cell.coordinates_x[0] = target_node_x;
  target_partial_cell.coordinates_y[0] = target_node_y;

  gcLLtoXYZ(target_node_x, target_node_y, target_partial_cell.coordinates_xyz[0]);

  for (unsigned num_corners = 0; num_corners < target_cell.num_corners; ++num_corners)
    {
      const unsigned corner_a = num_corners;
      const unsigned corner_b = (num_corners + 1) % target_cell.num_corners;

      // skip clipping and area calculation for degenerated triangles
      //
      // If this is not sufficient, instead we can try something like:
      //
      //     point_list target_list
      //     init_point_list(&target_list);
      //     generate_point_list(&target_list, target_cell);
      //     grid_cell temp_target_cell;
      //     generate_overlap_cell(target_list, temp_target_cell);
      //     free_point_list(&target_list);
      //
      // and use temp_target_cell for triangulation.
      //
      // Compared to the if statement below the alternative seems to be quite costly.

      if (((std::fabs(target_cell.coordinates_xyz[corner_a][0] - target_cell.coordinates_xyz[corner_b][0]) < tol)
           && (std::fabs(target_cell.coordinates_xyz[corner_a][1] - target_cell.coordinates_xyz[corner_b][1]) < tol)
           && (std::fabs(target_cell.coordinates_xyz[corner_a][2] - target_cell.coordinates_xyz[corner_b][2]) < tol))
          || ((std::fabs(target_cell.coordinates_xyz[corner_a][0] - target_partial_cell.coordinates_xyz[0][0]) < tol)
              && (std::fabs(target_cell.coordinates_xyz[corner_a][1] - target_partial_cell.coordinates_xyz[0][1]) < tol)
              && (std::fabs(target_cell.coordinates_xyz[corner_a][2] - target_partial_cell.coordinates_xyz[0][2]) < tol))
          || ((std::fabs(target_cell.coordinates_xyz[corner_b][0] - target_partial_cell.coordinates_xyz[0][0]) < tol)
              && (std::fabs(target_cell.coordinates_xyz[corner_b][1] - target_partial_cell.coordinates_xyz[0][1]) < tol)
              && (std::fabs(target_cell.coordinates_xyz[corner_b][2] - target_partial_cell.coordinates_xyz[0][2]) < tol)))
        continue;

      target_partial_cell.coordinates_x[1] = target_cell.coordinates_x[corner_a];
      target_partial_cell.coordinates_y[1] = target_cell.coordinates_y[corner_a];
      target_partial_cell.coordinates_x[2] = target_cell.coordinates_x[corner_b];
      target_partial_cell.coordinates_y[2] = target_cell.coordinates_y[corner_b];

      target_partial_cell.coordinates_xyz[1][0] = target_cell.coordinates_xyz[corner_a][0];
      target_partial_cell.coordinates_xyz[1][1] = target_cell.coordinates_xyz[corner_a][1];
      target_partial_cell.coordinates_xyz[1][2] = target_cell.coordinates_xyz[corner_a][2];
      target_partial_cell.coordinates_xyz[2][0] = target_cell.coordinates_xyz[corner_b][0];
      target_partial_cell.coordinates_xyz[2][1] = target_cell.coordinates_xyz[corner_b][1];
      target_partial_cell.coordinates_xyz[2][2] = target_cell.coordinates_xyz[corner_b][2];

      yac_cell_clipping(N, source_cell, target_partial_cell, overlap_cells);

      // Get the partial areas for the overlapping regions as sum over the partial target cells.
      for (size_t n = 0; n < N; n++)
        {
          partial_areas[n] += gridcell_area(overlap_cells[n]);
          // we cannot use pole_area because it is rather inaccurate for great
          // circle edges that are nearly circles of longitude
          // partial_areas[n] = pole_area (overlap_cells[n]);
        }
    }

#ifdef VERBOSE
  for (size_t n = 0; n < N; n++) printf("overlap area %zu: %lf\n", n, partial_areas[n]);
#endif
}

//#endif

static int
get_lonlat_circle_index(RemapGrid *remap_grid)
{
  int lonlat_circle_index = -1;

  if (remap_grid->num_cell_corners == 4)
    {
      if (remap_grid->type == RemapGridType::Reg2D)
        {
          lonlat_circle_index = 1;
        }
      else
        {
          const auto cell_corner_lon = remap_grid->cell_corner_lon;
          const auto cell_corner_lat = remap_grid->cell_corner_lat;
          const auto gridsize = remap_grid->size;
          size_t num_i = 0, num_eq0 = 0, num_eq1 = 0;

          long iadd = gridsize / 30 - 1;
          if (iadd == 0) iadd++;

          for (size_t i = 0; i < gridsize; i += iadd)
            {
              num_i++;

              if (IS_EQUAL(cell_corner_lon[i * 4 + 1], cell_corner_lon[i * 4 + 2])
                  && IS_EQUAL(cell_corner_lon[i * 4 + 3], cell_corner_lon[i * 4 + 0])
                  && IS_EQUAL(cell_corner_lat[i * 4 + 0], cell_corner_lat[i * 4 + 1])
                  && IS_EQUAL(cell_corner_lat[i * 4 + 2], cell_corner_lat[i * 4 + 3]))
                {
                  num_eq1++;
                }
              else if (IS_EQUAL(cell_corner_lon[i * 4 + 0], cell_corner_lon[i * 4 + 1])
                       && IS_EQUAL(cell_corner_lon[i * 4 + 2], cell_corner_lon[i * 4 + 3])
                       && IS_EQUAL(cell_corner_lat[i * 4 + 1], cell_corner_lat[i * 4 + 2])
                       && IS_EQUAL(cell_corner_lat[i * 4 + 3], cell_corner_lat[i * 4 + 0]))
                {
                  num_eq0++;
                }
            }

          if (num_i == num_eq1) lonlat_circle_index = 1;
          if (num_i == num_eq0) lonlat_circle_index = 0;
        }
    }

  // printf("lonlat_circle_index %d\n", lonlat_circle_index);

  return lonlat_circle_index;
}

static void
remapNormalizeWeights(RemapGrid *tgt_grid, RemapVars &rv)
{
  // Include centroids in weights and normalize using destination area if requested
  auto num_links = rv.num_links;
  auto num_wts = rv.num_wts;

  if (rv.normOpt == NormOpt::DESTAREA)
    {
      const double *cell_area = tgt_grid->cell_area.data();
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(num_wts, num_links, rv, cell_area)
#endif
      for (size_t n = 0; n < num_links; ++n)
        {
          const auto i = rv.tgt_cell_add[n];  // current linear address for target grid cell
          const double norm_factor = IS_NOT_EQUAL(cell_area[i], 0) ? 1. / cell_area[i] : 0.;
          rv.wts[n * num_wts] *= norm_factor;
        }
    }
  else if (rv.normOpt == NormOpt::FRACAREA)
    {
      const double *cell_frac = tgt_grid->cell_frac.data();
#ifdef _OPENMP
#pragma omp parallel for default(none) shared(num_wts, num_links, rv, cell_frac)
#endif
      for (size_t n = 0; n < num_links; ++n)
        {
          const auto i = rv.tgt_cell_add[n];  // current linear address for target grid cell
          const double norm_factor = IS_NOT_EQUAL(cell_frac[i], 0) ? 1. / cell_frac[i] : 0.;
          rv.wts[n * num_wts] *= norm_factor;
        }
    }
  else if (rv.normOpt == NormOpt::NONE)
    {
    }
}

static void
remapNormalizeWeights(NormOpt normOpt, double cell_area, double cell_frac, size_t num_weights, double *weights)
{
  if (normOpt == NormOpt::DESTAREA)
    {
      const double norm_factor = IS_NOT_EQUAL(cell_area, 0) ? 1. / cell_area : 0.;
      for (size_t n = 0; n < num_weights; ++n) weights[n] *= norm_factor;
    }
  else if (normOpt == NormOpt::FRACAREA)
    {
      const double norm_factor = IS_NOT_EQUAL(cell_frac, 0) ? 1. / cell_frac : 0.;
      for (size_t n = 0; n < num_weights; ++n) weights[n] *= norm_factor;
    }
  else if (normOpt == NormOpt::NONE)
    {
    }
}

static void
setCellCoordinatesYac(RemapGridType remapGridType, const size_t cellAddr, const size_t numCellCorners, RemapGrid *remap_grid,
                      grid_cell &yac_grid_cell)
{
  auto x = yac_grid_cell.coordinates_x;
  auto y = yac_grid_cell.coordinates_y;

  if (remapGridType == RemapGridType::Reg2D)
    {
      const auto nx = remap_grid->dims[0];
      const auto iy = cellAddr / nx;
      const auto ix = cellAddr - iy * nx;
      const auto reg2d_corner_lon = remap_grid->reg2d_corner_lon;
      const auto reg2d_corner_lat = remap_grid->reg2d_corner_lat;
      x[0] = reg2d_corner_lon[ix];
      y[0] = reg2d_corner_lat[iy];
      x[1] = reg2d_corner_lon[ix + 1];
      y[1] = reg2d_corner_lat[iy];
      x[2] = reg2d_corner_lon[ix + 1];
      y[2] = reg2d_corner_lat[iy + 1];
      x[3] = reg2d_corner_lon[ix];
      y[3] = reg2d_corner_lat[iy + 1];
    }
  else
    {
      const auto cell_corner_lon = remap_grid->cell_corner_lon;
      const auto cell_corner_lat = remap_grid->cell_corner_lat;
      for (size_t ic = 0; ic < numCellCorners; ++ic)
        {
          x[ic] = cell_corner_lon[cellAddr * numCellCorners + ic];
          y[ic] = cell_corner_lat[cellAddr * numCellCorners + ic];
        }
    }

  double(*restrict xyz)[3] = yac_grid_cell.coordinates_xyz;
  for (size_t ic = 0; ic < numCellCorners; ++ic) gcLLtoXYZ(x[ic], y[ic], xyz[ic]);
}

static void
setCoordinatesYac(const size_t numCells, RemapGridType remapGridType, const std::vector<size_t> &cellIndices,
                  const size_t numCellCorners, RemapGrid *remap_grid, std::vector<grid_cell> &yac_grid_cell)
{
  for (size_t n = 0; n < numCells; ++n)
    {
      setCellCoordinatesYac(remapGridType, cellIndices[n], numCellCorners, remap_grid, yac_grid_cell[n]);
    }
}

static void
reg2d_bound_box(RemapGrid *remap_grid, double *grid_bound_box)
{
  const auto nx = remap_grid->dims[0];
  const auto ny = remap_grid->dims[1];
  const auto reg2d_corner_lon = remap_grid->reg2d_corner_lon;
  const auto reg2d_corner_lat = remap_grid->reg2d_corner_lat;

  grid_bound_box[0] = reg2d_corner_lat[0];
  grid_bound_box[1] = reg2d_corner_lat[ny];
  if (grid_bound_box[0] > grid_bound_box[1])
    {
      grid_bound_box[0] = reg2d_corner_lat[ny];
      grid_bound_box[1] = reg2d_corner_lat[0];
    }
  grid_bound_box[2] = reg2d_corner_lon[0];
  grid_bound_box[3] = reg2d_corner_lon[nx];
}

static void
greatCircleTypeInit(std::vector<enum yac_edge_type> &great_circle_type, size_t src_num_cell_corners, size_t tgt_num_cell_corners)
{
  auto max_num_cell_corners = src_num_cell_corners;
  if (tgt_num_cell_corners > max_num_cell_corners) max_num_cell_corners = tgt_num_cell_corners;

  great_circle_type.resize(max_num_cell_corners);
  for (size_t i = 0; i < max_num_cell_corners; ++i) great_circle_type[i] = GREAT_CIRCLE;
}

static void
scaleCellFrac(size_t numCells, std::vector<double> &cellFrac, const std::vector<double> &cellArea)
{
  for (size_t n = 0; n < numCells; ++n)
    if (IS_NOT_EQUAL(cellArea[n], 0)) cellFrac[n] /= cellArea[n];
}

static void
yacGridCellInit(grid_cell &cell, size_t numCellCorners, enum yac_edge_type *edgeType)
{
  cell.array_size = numCellCorners;
  cell.num_corners = numCellCorners;
  cell.edge_type = edgeType;
  cell.coordinates_x = new double[numCellCorners];
  cell.coordinates_y = new double[numCellCorners];
  cell.coordinates_xyz = new double[numCellCorners][3];
}

static void
yacGridCellFree(grid_cell &cell)
{
  delete[] cell.coordinates_x;
  delete[] cell.coordinates_y;
  delete[] cell.coordinates_xyz;
}

void
remapConservWeights(RemapSearch &rsearch, RemapVars &rv)
{
  auto src_grid = rsearch.srcGrid;
  auto tgt_grid = rsearch.tgtGrid;

  bool lcheck = true;

  // Variables necessary if segment manages to hit pole
  auto srcGridType = src_grid->type;
  auto tgtGridType = tgt_grid->type;

  if (Options::cdoVerbose) cdoPrint("Called %s()", __func__);

  Progress::init();

  double start = Options::cdoVerbose ? cdo_get_wtime() : 0;

  auto src_grid_size = src_grid->size;
  auto tgt_grid_size = tgt_grid->size;

  auto src_num_cell_corners = src_grid->num_cell_corners;
  auto tgt_num_cell_corners = tgt_grid->num_cell_corners;

  enum yac_edge_type lonlat_circle_type[] = { LON_CIRCLE, LAT_CIRCLE, LON_CIRCLE, LAT_CIRCLE, LON_CIRCLE };
  std::vector<enum yac_edge_type> great_circle_type;
  greatCircleTypeInit(great_circle_type, src_num_cell_corners, tgt_num_cell_corners);

  auto src_edge_type = great_circle_type.data();
  auto tgt_edge_type = great_circle_type.data();

  enum cell_type target_cell_type = UNDEF_CELL;

  if (src_num_cell_corners == 4)
    {
      const auto lonlat_circle_index = get_lonlat_circle_index(src_grid);
      if (lonlat_circle_index >= 0) src_edge_type = &lonlat_circle_type[lonlat_circle_index];
    }

  if (tgt_num_cell_corners == 4)
    {
      const auto lonlat_circle_index = get_lonlat_circle_index(tgt_grid);
      if (lonlat_circle_index >= 0)
        {
          target_cell_type = LON_LAT_CELL;
          tgt_edge_type = &lonlat_circle_type[lonlat_circle_index];
        }
    }

  if (!(tgt_num_cell_corners < 4 || target_cell_type == LON_LAT_CELL))
    {
      if (tgt_grid->cell_center_lon == nullptr || tgt_grid->cell_center_lat == nullptr)
        cdoAbort("Internal problem (%s): missing target point coordinates!", __func__);
    }

  std::vector<grid_cell> tgt_grid_cell(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i) yacGridCellInit(tgt_grid_cell[i], tgt_num_cell_corners, tgt_edge_type);

  std::vector<search_t> search(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i)
    {
      search[i].srch_corners = src_num_cell_corners;
      search[i].src_edge_type = src_edge_type;
      search[i].max_srch_cells = 0;
    }

  std::vector<std::vector<size_t>> srch_add(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i) srch_add[i].resize(src_grid_size);

  auto srch_corners = src_num_cell_corners;  // num of corners of srch cells

  double src_grid_bound_box[4];
  if (srcGridType == RemapGridType::Reg2D) reg2d_bound_box(src_grid, src_grid_bound_box);

  std::vector<WeightLinks> weightLinks(tgt_grid_size);

  double findex = 0;

  size_t num_srch_cells_stat[3];
  num_srch_cells_stat[0] = 0;
  num_srch_cells_stat[1] = 100000;
  num_srch_cells_stat[2] = 0;

  extern CellSearchMethod cellSearchMethod;
  bool useCellsearch = cellSearchMethod == CellSearchMethod::spherepart || srcGridType == RemapGridType::Reg2D;

  // Loop over destination grid

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none)                                                                    \
    shared(findex, rsearch, srcGridType, tgtGridType, src_grid_bound_box, rv, Options::cdoVerbose, tgt_num_cell_corners,    \
           target_cell_type, weightLinks, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, search, srch_add, \
           tgt_grid_cell, num_srch_cells_stat, useCellsearch)
#endif
  for (size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add)
    {
      double partial_weight;
      size_t src_cell_add;  // current linear address for source grid cell
      size_t num_weights, num_weights_old;
      const int ompthID = cdo_omp_get_thread_num();

#ifdef _OPENMP
#pragma omp atomic
#endif
      findex++;
      if (ompthID == 0) Progress::update(0, 1, findex / tgt_grid_size);

      weightLinks[tgt_cell_add].nlinks = 0;

      if (!tgt_grid->mask[tgt_cell_add]) continue;

      setCellCoordinatesYac(tgtGridType, tgt_cell_add, tgt_num_cell_corners, tgt_grid, tgt_grid_cell[ompthID]);

      // Get search cells
      size_t num_srch_cells;
      if (useCellsearch)
        {
          // num_srch_cells = remapSearchCells(rsearch, target_cell_type == LON_LAT_CELL, tgt_grid_cell[ompthID],
          // srch_add[ompthID].data());
          num_srch_cells
              = remapSearchCells(rsearch, tgtGridType == RemapGridType::Reg2D, tgt_grid_cell[ompthID], srch_add[ompthID].data());
        }
      else
        {
          float tgt_cell_bound_box_r[4];
          boundbox_from_corners1r(tgt_cell_add, tgt_num_cell_corners, tgt_grid->cell_corner_lon, tgt_grid->cell_corner_lat,
                                  tgt_cell_bound_box_r);

          num_srch_cells
              = get_srch_cells(tgt_cell_add, rsearch.tgtBins, rsearch.srcBins, tgt_cell_bound_box_r, srch_add[ompthID].data());
        }

      if (1 && Options::cdoVerbose)
        {
          num_srch_cells_stat[0] += num_srch_cells;
          if (num_srch_cells < num_srch_cells_stat[1]) num_srch_cells_stat[1] = num_srch_cells;
          if (num_srch_cells > num_srch_cells_stat[2]) num_srch_cells_stat[2] = num_srch_cells;
        }

      if (0 && Options::cdoVerbose) printf("tgt_cell_add %zu  num_srch_cells %zu\n", tgt_cell_add, num_srch_cells);

      if (num_srch_cells == 0) continue;

      // Create search arrays

      cellSearchRealloc(num_srch_cells, search[ompthID]);

      auto partial_areas = &search[ompthID].partial_areas[0];
      auto partial_weights = &search[ompthID].partial_weights[0];

      setCoordinatesYac(num_srch_cells, srcGridType, srch_add[ompthID], srch_corners, src_grid, search[ompthID].src_grid_cells);

      if (tgt_num_cell_corners < 4 || target_cell_type == LON_LAT_CELL)
        {
          cdo_compute_overlap_areas(num_srch_cells, search[ompthID], tgt_grid_cell[ompthID]);
        }
      else
        {
          cdo_compute_concave_overlap_areas(num_srch_cells, search[ompthID], tgt_grid_cell[ompthID],
                                            tgt_grid->cell_center_lon[tgt_cell_add], tgt_grid->cell_center_lat[tgt_cell_add]);
        }

      num_weights = 0;
      for (size_t n = 0; n < num_srch_cells; ++n)
        {
          if (partial_areas[n] > 0)
            {
              partial_areas[num_weights] = partial_areas[n];
              srch_add[ompthID][num_weights] = srch_add[ompthID][n];
              num_weights++;
            }
        }

      const auto tgt_area = gridcell_area(tgt_grid_cell[ompthID]);

      for (size_t n = 0; n < num_weights; ++n) partial_weights[n] = partial_areas[n] / tgt_area;

      if (rv.normOpt == NormOpt::FRACAREA) yac_correct_weights(num_weights, partial_weights);

      for (size_t n = 0; n < num_weights; ++n) partial_weights[n] *= tgt_area;

      tgt_grid->cell_area[tgt_cell_add] = tgt_area;

      num_weights_old = num_weights;
      num_weights = 0;
      for (size_t n = 0; n < num_weights_old; ++n)
        {
          src_cell_add = srch_add[ompthID][n];
          if (partial_weights[n] <= 0.) src_cell_add = src_grid_size;
          if (src_cell_add != src_grid_size)
            {
              partial_weights[num_weights] = partial_weights[n];
              srch_add[ompthID][num_weights] = src_cell_add;
              num_weights++;
            }
        }

      for (size_t n = 0; n < num_weights; ++n)
        {
          partial_weight = partial_weights[n];
          src_cell_add = srch_add[ompthID][n];

#ifdef _OPENMP
#pragma omp atomic
#endif
          src_grid->cell_area[src_cell_add] += partial_weight;
        }

      num_weights_old = num_weights;
      num_weights = 0;
      for (size_t n = 0; n < num_weights_old; ++n)
        {
          src_cell_add = srch_add[ompthID][n];

          /*
            Store the appropriate addresses and weights.
            Also add contributions to cell areas.
            The source grid mask is the master mask.
          */
          if (src_grid->mask[src_cell_add])
            {
              partial_weights[num_weights] = partial_weights[n];
              srch_add[ompthID][num_weights] = src_cell_add;
              num_weights++;
            }
        }

      for (size_t n = 0; n < num_weights; ++n)
        {
          partial_weight = partial_weights[n];
          src_cell_add = srch_add[ompthID][n];

#ifdef _OPENMP
#pragma omp atomic
#endif
          src_grid->cell_frac[src_cell_add] += partial_weight;

          tgt_grid->cell_frac[tgt_cell_add] += partial_weight;
        }

      storeWeightLinks(1, num_weights, srch_add[ompthID].data(), partial_weights, tgt_cell_add, weightLinks);
    }

  Progress::update(0, 1, 1);

  if (1 && Options::cdoVerbose)
    {
      cdoPrint("Num search cells min,mean,max :  %zu  %3.1f  %zu", num_srch_cells_stat[1],
               num_srch_cells_stat[0] / (double) tgt_grid_size, num_srch_cells_stat[2]);
    }

  // Finished with all cells: deallocate search arrays

  for (int ompthID = 0; ompthID < Threading::ompNumThreads; ++ompthID)
    {
      cellSearchFree(search[ompthID]);
      yacGridCellFree(tgt_grid_cell[ompthID]);
    }

  weightLinksToRemapLinks(1, tgt_grid_size, weightLinks, rv);

  // Normalize weights using destination area if requested
  remapNormalizeWeights(tgt_grid, rv);

  if (Options::cdoVerbose) cdoPrint("Total number of links = %zu", rv.num_links);

  scaleCellFrac(src_grid_size, src_grid->cell_frac, src_grid->cell_area);
  scaleCellFrac(tgt_grid_size, tgt_grid->cell_frac, tgt_grid->cell_area);

  // Perform some error checking on final weights
  if (lcheck)
    {
      remapCheckArea(src_grid_size, &src_grid->cell_area[0], "Source");
      remapCheckArea(tgt_grid_size, &tgt_grid->cell_area[0], "Target");

      remapVarsCheckWeights(rv);
    }

  if (Options::cdoVerbose) cdoPrint("Cells search: %.2f seconds", cdo_get_wtime() - start);
}  // remapConservWeights


static void
conservRemap(double *restrict tgt_point, const double *restrict src_array, size_t num_weights, const double *wgts, const size_t *src_add)
{
  *tgt_point = 0.;
  for (unsigned n = 0; n < num_weights; ++n) *tgt_point += src_array[src_add[n]]*wgts[n];
}

void
remapConserv(NormOpt normOpt, RemapSearch &rsearch, const double *restrict src_array, double *restrict tgt_array, double missval)
{
  auto src_grid = rsearch.srcGrid;
  auto tgt_grid = rsearch.tgtGrid;

  bool lcheck = true;

  // Variables necessary if segment manages to hit pole
  auto srcGridType = src_grid->type;
  auto tgtGridType = tgt_grid->type;

  if (Options::cdoVerbose) cdoPrint("Called %s()", __func__);

  Progress::init();

  double start = Options::cdoVerbose ? cdo_get_wtime() : 0;

  auto src_grid_size = src_grid->size;
  auto tgt_grid_size = tgt_grid->size;

  std::vector<bool> src_grid_mask(src_grid_size);
#ifdef _OPENMP
#pragma omp parallel for default(none) schedule(static) shared(src_grid_size, src_array, src_grid_mask, missval)
#endif
  for (size_t i = 0; i < src_grid_size; ++i)
    src_grid_mask[i] = !DBL_IS_EQUAL(src_array[i], missval);

  auto src_num_cell_corners = src_grid->num_cell_corners;
  auto tgt_num_cell_corners = tgt_grid->num_cell_corners;

  enum yac_edge_type lonlat_circle_type[] = { LON_CIRCLE, LAT_CIRCLE, LON_CIRCLE, LAT_CIRCLE, LON_CIRCLE };
  std::vector<enum yac_edge_type> great_circle_type;
  greatCircleTypeInit(great_circle_type, src_num_cell_corners, tgt_num_cell_corners);

  auto src_edge_type = great_circle_type.data();
  auto tgt_edge_type = great_circle_type.data();

  enum cell_type target_cell_type = UNDEF_CELL;

  if (src_num_cell_corners == 4)
    {
      const auto lonlat_circle_index = get_lonlat_circle_index(src_grid);
      if (lonlat_circle_index >= 0) src_edge_type = &lonlat_circle_type[lonlat_circle_index];
    }

  if (tgt_num_cell_corners == 4)
    {
      const auto lonlat_circle_index = get_lonlat_circle_index(tgt_grid);
      if (lonlat_circle_index >= 0)
        {
          target_cell_type = LON_LAT_CELL;
          tgt_edge_type = &lonlat_circle_type[lonlat_circle_index];
        }
    }

  if (!(tgt_num_cell_corners < 4 || target_cell_type == LON_LAT_CELL))
    {
      if (tgt_grid->cell_center_lon == nullptr || tgt_grid->cell_center_lat == nullptr)
        cdoAbort("Internal problem (%s): missing target point coordinates!", __func__);
    }

  std::vector<grid_cell> tgt_grid_cell(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i) yacGridCellInit(tgt_grid_cell[i], tgt_num_cell_corners, tgt_edge_type);

  std::vector<search_t> search(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i)
    {
      search[i].srch_corners = src_num_cell_corners;
      search[i].src_edge_type = src_edge_type;
      search[i].max_srch_cells = 0;
    }

  std::vector<std::vector<size_t>> srch_add(Threading::ompNumThreads);
  for (int i = 0; i < Threading::ompNumThreads; ++i) srch_add[i].resize(src_grid_size);

  size_t srch_corners = src_num_cell_corners;  // num of corners of srch cells

  double src_grid_bound_box[4];
  if (srcGridType == RemapGridType::Reg2D) reg2d_bound_box(src_grid, src_grid_bound_box);

  double findex = 0;

  size_t num_srch_cells_stat[3];
  num_srch_cells_stat[0] = 0;
  num_srch_cells_stat[1] = 100000;
  num_srch_cells_stat[2] = 0;

  extern CellSearchMethod cellSearchMethod;
  bool useCellsearch = cellSearchMethod == CellSearchMethod::spherepart || srcGridType == RemapGridType::Reg2D;

  // Loop over destination grid

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) default(none)                                                                    \
    shared(findex, rsearch, srcGridType, tgtGridType, src_grid_bound_box, Options::cdoVerbose, tgt_num_cell_corners,    \
           target_cell_type, srch_corners, src_grid, tgt_grid, tgt_grid_size, src_grid_size, search, srch_add, \
           tgt_grid_cell, num_srch_cells_stat, useCellsearch, src_array, tgt_array, missval, normOpt, src_grid_mask)
#endif
  for (size_t tgt_cell_add = 0; tgt_cell_add < tgt_grid_size; ++tgt_cell_add)
    {
      size_t src_cell_add;  // current linear address for source grid cell
      size_t num_weights, num_weights_old;
      const int ompthID = cdo_omp_get_thread_num();

#ifdef _OPENMP
#pragma omp atomic
#endif
      findex++;
      if (ompthID == 0) Progress::update(0, 1, findex / tgt_grid_size);

      tgt_array[tgt_cell_add] = missval;

      if (!tgt_grid->mask[tgt_cell_add]) continue;

      setCellCoordinatesYac(tgtGridType, tgt_cell_add, tgt_num_cell_corners, tgt_grid, tgt_grid_cell[ompthID]);

      // Get search cells
      size_t num_srch_cells;
      if (useCellsearch)
        {
          // num_srch_cells = remapSearchCells(rsearch, target_cell_type == LON_LAT_CELL, tgt_grid_cell[ompthID],
          // srch_add[ompthID].data());
          num_srch_cells
              = remapSearchCells(rsearch, tgtGridType == RemapGridType::Reg2D, tgt_grid_cell[ompthID], srch_add[ompthID].data());
        }
      else
        {
          float tgt_cell_bound_box_r[4];
          boundbox_from_corners1r(tgt_cell_add, tgt_num_cell_corners, tgt_grid->cell_corner_lon, tgt_grid->cell_corner_lat,
                                  tgt_cell_bound_box_r);

          num_srch_cells
              = get_srch_cells(tgt_cell_add, rsearch.tgtBins, rsearch.srcBins, tgt_cell_bound_box_r, srch_add[ompthID].data());
        }

      if (1 && Options::cdoVerbose)
        {
          num_srch_cells_stat[0] += num_srch_cells;
          if (num_srch_cells < num_srch_cells_stat[1]) num_srch_cells_stat[1] = num_srch_cells;
          if (num_srch_cells > num_srch_cells_stat[2]) num_srch_cells_stat[2] = num_srch_cells;
        }

      if (0 && Options::cdoVerbose) printf("tgt_cell_add %zu  num_srch_cells %zu\n", tgt_cell_add, num_srch_cells);

      if (num_srch_cells == 0) continue;

      // Create search arrays

      cellSearchRealloc(num_srch_cells, search[ompthID]);

      auto partial_areas = &search[ompthID].partial_areas[0];
      auto partial_weights = &search[ompthID].partial_weights[0];

      setCoordinatesYac(num_srch_cells, srcGridType, srch_add[ompthID], srch_corners, src_grid, search[ompthID].src_grid_cells);

      if (tgt_num_cell_corners < 4 || target_cell_type == LON_LAT_CELL)
        {
          cdo_compute_overlap_areas(num_srch_cells, search[ompthID], tgt_grid_cell[ompthID]);
        }
      else
        {
          cdo_compute_concave_overlap_areas(num_srch_cells, search[ompthID], tgt_grid_cell[ompthID],
                                            tgt_grid->cell_center_lon[tgt_cell_add], tgt_grid->cell_center_lat[tgt_cell_add]);
        }

      num_weights = 0;
      for (size_t n = 0; n < num_srch_cells; ++n)
        {
          if (partial_areas[n] > 0)
            {
              partial_areas[num_weights] = partial_areas[n];
              srch_add[ompthID][num_weights] = srch_add[ompthID][n];
              num_weights++;
            }
        }

      const auto tgt_area = gridcell_area(tgt_grid_cell[ompthID]);

      for (size_t n = 0; n < num_weights; ++n) partial_weights[n] = partial_areas[n] / tgt_area;

      if (normOpt == NormOpt::FRACAREA) yac_correct_weights(num_weights, partial_weights);

      for (size_t n = 0; n < num_weights; ++n) partial_weights[n] *= tgt_area;

      tgt_grid->cell_area[tgt_cell_add] = tgt_area;

      num_weights_old = num_weights;
      num_weights = 0;
      for (size_t n = 0; n < num_weights_old; ++n)
        {
          src_cell_add = srch_add[ompthID][n];
          if (partial_weights[n] <= 0.) src_cell_add = src_grid_size;
          if (src_cell_add != src_grid_size)
            {
              partial_weights[num_weights] = partial_weights[n];
              srch_add[ompthID][num_weights] = src_cell_add;
              num_weights++;
            }
        }

      num_weights_old = num_weights;
      num_weights = 0;
      for (size_t n = 0; n < num_weights_old; ++n)
        {
          src_cell_add = srch_add[ompthID][n];

          /*
            Store the appropriate addresses and weights.
            Also add contributions to cell areas.
            The source grid mask is the master mask.
          */
          if (src_grid_mask[src_cell_add])
            {
              partial_weights[num_weights] = partial_weights[n];
              srch_add[ompthID][num_weights] = src_cell_add;
              num_weights++;
            }
        }

      double cell_frac = 0;
      for (size_t n = 0; n < num_weights; ++n) cell_frac += partial_weights[n];

      tgt_grid->cell_frac[tgt_cell_add] = cell_frac;

      if (num_weights)
        {
          sort_add_and_wgts(num_weights, srch_add[ompthID].data(), partial_weights);
          // Normalize weights using destination area if requested
          remapNormalizeWeights(normOpt, tgt_area, cell_frac, num_weights, partial_weights);
          conservRemap(&tgt_array[tgt_cell_add], src_array, num_weights, partial_weights, srch_add[ompthID].data());
        }
    }

  Progress::update(0, 1, 1);

  if (1 && Options::cdoVerbose)
    {
      cdoPrint("Num search cells min,mean,max :  %zu  %3.1f  %zu", num_srch_cells_stat[1],
               num_srch_cells_stat[0] / (double) tgt_grid_size, num_srch_cells_stat[2]);
    }

  // Finished with all cells: deallocate search arrays

  for (int ompthID = 0; ompthID < Threading::ompNumThreads; ++ompthID)
    {
      cellSearchFree(search[ompthID]);
      yacGridCellFree(tgt_grid_cell[ompthID]);
    }

  scaleCellFrac(tgt_grid_size, tgt_grid->cell_frac, tgt_grid->cell_area);

  // Perform some error checking on final weights
  if (lcheck)
    {
      remapCheckArea(tgt_grid_size, &tgt_grid->cell_area[0], "Target");
    }

  if (Options::cdoVerbose) cdoPrint("Cells search: %.2f seconds", cdo_get_wtime() - start);
}  // remapConserv
