/* $Id: cpl_fft_body.h,v 1.10 2012/03/12 12:48:10 llundin Exp $
 *
 * This file is part of the ESO cpl package
 * Copyright (C) 2012 European Southern Observatory
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FFTNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#define CPL_FFTW_ADD(a) CPL_CONCAT2X(CPL_FFTW, a)
#define CPL_TYPE_ADD(a) CPL_CONCAT2X(a, CPL_TYPE)
#define CPL_TYPE_ADD_CONST(a) CPL_CONCAT3X(a, CPL_TYPE, const)
#define CPL_TYPE_ADD_COMPLEX(a) CPL_CONCAT2X(a, CPL_TYPE_C)
#define CPL_TYPE_ADD_COMPLEX_CONST(a) CPL_CONCAT3X(a, CPL_TYPE_C, const)

/*----------------------------------------------------------------------------*/
/**
  @internal
  @brief  Perform a FFT operation on an image of a specific type
  @param  self  Pre-allocated output image of the given type
  @param  other Input image
  @param  mode CPL_FFT_FORWARD or CPL_FFT_BACKWARD, optionally CPL_FFT_NOSCALE
  @return CPL_ERROR_NONE or the corresponding #_cpl_error_code_
  @see cpl_fft_image()
  @note The precision for both images must be either double or float

 */
/*----------------------------------------------------------------------------*/
static cpl_error_code CPL_TYPE_ADD(cpl_fft_image)(cpl_image       * self,
                                                  const cpl_image * other,
                                                  cpl_fft_mode      mode)
{
    const cpl_type typin  = cpl_image_get_type(other);
    const cpl_type typout = cpl_image_get_type(self);
    const int nxin  = (int)cpl_image_get_size_x(other);
    const int nyin  = (int)cpl_image_get_size_y(other);
    const int nxout = (int)cpl_image_get_size_x(self);
    const int nxh   = ((mode & CPL_FFT_FORWARD) ? nxin : nxout) / 2 + 1;

    cpl_error_code error = CPL_ERROR_NONE;

    /* FIXME: This should be verified during configure and replaced by
       an assert() */
    cpl_ensure_code(sizeof(CPL_TYPE complex) == sizeof(CPL_FFTW_TYPE),
                    CPL_ERROR_UNSUPPORTED_MODE);

    if (mode & CPL_FFT_FORWARD) {
        CPL_FFTW_ADD(plan) pforw;
        CPL_FFTW_TYPE    * out_b = (CPL_FFTW_TYPE*)
            CPL_TYPE_ADD_COMPLEX(cpl_image_get_data)(self);
        CPL_FFTW_TYPE    * out_bh  = NULL;

        cpl_ensure_code(out_b != NULL, CPL_ERROR_TYPE_MISMATCH);
        /* Make sure mode contains only the supported flags */
        cpl_ensure_code(!(mode & ~(CPL_FFT_FORWARD | CPL_FFT_NOSCALE)),
                        CPL_ERROR_ILLEGAL_INPUT);

        if (typin & CPL_TYPE_COMPLEX) {
            const CPL_FFTW_TYPE * in_b = (const CPL_FFTW_TYPE *)
                CPL_TYPE_ADD_COMPLEX_CONST(cpl_image_get_data)(other);
            const int flags = in_b == out_b ? FFTW_ESTIMATE : FFTW_ESTIMATE |
                FFTW_PRESERVE_INPUT;

#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
            pforw = CPL_FFTW_ADD(plan_dft_2d)(nyin, nxin, (CPL_FFTW_TYPE *)in_b,
                                              out_b, FFTW_FORWARD, flags);
        } else {
            const CPL_TYPE * in_b =
                CPL_TYPE_ADD_CONST(cpl_image_get_data)(other);

            /* For the real-to-complex transform, only the left half of
               the result is computed. The size of the output image may
               either match that, in which case the output image buffer can
               be used directly - or it matches the input buffer, in which
               case the data has to be repacked afterwards */


#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
            {
                out_bh = nxout == nxh ? out_b :
                    CPL_FFTW_ADD(malloc)(nxh * sizeof(*out_bh) * nyin);

                pforw = CPL_FFTW_ADD(plan_dft_r2c_2d)(nyin, nxin,
                                                      (CPL_TYPE *)in_b, out_bh,
                                                      FFTW_ESTIMATE |
                                                      FFTW_PRESERVE_INPUT);
            }
        }
        CPL_FFTW_ADD(execute)(pforw);

        if (!(typin & CPL_TYPE_COMPLEX) && out_bh != out_b) {
            /* Need to repack the transformed half */
            const CPL_FFTW_TYPE * out_bhj = out_bh;
            CPL_FFTW_TYPE       * out_bj  = out_b;
            int                   j;

            for (j = 0; j < nyin; j++, out_bhj += nxh, out_bj += nxin) {
                (void)memcpy(out_bj, out_bhj, nxh * sizeof(*out_bj));
            }
        }

#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
        {
            CPL_FFTW_ADD(destroy_plan)(pforw);
            if (out_bh != out_b) CPL_FFTW_ADD(free)(out_bh);
        }
    } else if (mode & CPL_FFT_BACKWARD) {

        CPL_FFTW_ADD(plan) pback;
        const CPL_FFTW_TYPE * in_b  = (const CPL_FFTW_TYPE *)
            CPL_TYPE_ADD_COMPLEX_CONST(cpl_image_get_data)(other);
        /* FFTW modifies the input array in the C2R transform,
           so in this case a temporary input buffer is needed */
        CPL_FFTW_TYPE * in_bh;

        /* Make sure mode contains only the supported flags */
        cpl_ensure_code(!(mode & ~(CPL_FFT_BACKWARD | CPL_FFT_NOSCALE)),
                        CPL_ERROR_ILLEGAL_INPUT);
        cpl_ensure_code(typin & CPL_TYPE_COMPLEX,  CPL_ERROR_TYPE_MISMATCH);

        if (typout & CPL_TYPE_COMPLEX) {
            CPL_FFTW_TYPE * out_b = (CPL_FFTW_TYPE *)
                CPL_TYPE_ADD_COMPLEX(cpl_image_get_data)(self);
            const int flags = in_b == out_b ? FFTW_ESTIMATE : FFTW_ESTIMATE |
                FFTW_PRESERVE_INPUT;

            in_bh = NULL; /* Not needed for C2C */

#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
            pback = CPL_FFTW_ADD(plan_dft_2d)(nyin, nxin, (CPL_FFTW_TYPE *)in_b,
                                              out_b, FFTW_BACKWARD, flags);

        } else {
            CPL_TYPE            * out_b =
                CPL_TYPE_ADD(cpl_image_get_data)(self);

#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
            {
                in_bh = CPL_FFTW_ADD(malloc)(nxh * sizeof(*in_bh) * nyin);
                pback = CPL_FFTW_ADD(plan_dft_c2r_2d)(nyin, nxout, in_bh, out_b,
                                                      FFTW_ESTIMATE |
                                                      FFTW_DESTROY_INPUT);
            }

            if (nxin == nxh) {
                /* For the complex-to-real transform, only the left half of
                   the input is transformed. The input matches that. */

                (void)memcpy(in_bh, in_b, nxh * sizeof(*in_bh) * nyin);

            } else {
                /* For the complex-to-real transform, only the left half of
                   the input is transformed. It needs to be repacked first */
                const CPL_FFTW_TYPE * in_bj  = in_b;
                CPL_FFTW_TYPE       * in_bhj = in_bh;
                int                   j;

                for (j = 0; j < nyin; j++, in_bhj += nxh, in_bj += nxin) {
                    (void)memcpy(in_bhj, in_bj, nxh * sizeof(*in_bhj));
                }
            }

        }

        CPL_FFTW_ADD(execute)(pback);

#ifdef _OPENMP
#pragma omp critical(cpl_fft_fftw)
#endif
        {
            CPL_FFTW_ADD(destroy_plan)(pback);
            CPL_FFTW_ADD(free)(in_bh);
        }

        if (!(mode & CPL_FFT_NOSCALE)) {
            error = cpl_image_divide_scalar(self, (double)(nxout * nyin));
        }
    } else {
        error = CPL_ERROR_ILLEGAL_INPUT;
    }

   return cpl_error_set_(error); /* Set or propagate error, if any */
}

#undef CPL_TYPE_ADD
#undef CPL_TYPE_ADD_CONST
#undef CPL_TYPE_ADD_COMPLEX
#undef CPL_TYPE_ADD_COMPLEX_CONST
