/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2008 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#ifndef COORDS_SEEN
#define COORDS_SEEN

#include <stdio.h>
#include "DLTmath.h"
#include "PDBCoords.h"


typedef struct Algorithm_
{
    char            cmdline[1024]; /* copy of the command line */
    int             argc;
    char          **argv;
    char          **infiles; /* an array of the input files listed on the command line */
    int             filenum; /* number of input files */
    char            rootname[FILENAME_MAX];
    int             method; /* Kabsch, Kearsley, Horn, or SVD derivative superposition algorithms */
    int             weight; /* weighting method */
    int             verbose;  /* lots of output */
    double          precision; /* requested relative precision to converge to */
    int             iterations; /* max # of iterations allowed for outer loop of MultiPose() */
    int             rounds; /* running counter of rounds of the outer loop of MultiPose() */
    int             innerrounds; /* running counter of rounds of the inner loop of MultiPose() */
    double          milliseconds; /* how long the calculation took */
    int             print_weight; /* flag to print weights */
    int             print_trans; /* flag to print translations */
    int             write_file; /* flag to write output pdb file, default = 1 = yes */
    int             atoms; /* flag for atom types to include in superposition, CA, CB, backbone, P, etc. */
    int             reflection;
    char           *selection; /* character array holding user input for residues/alignment columns to include */
    char           *atomslxn; /* character array holding user input for atom types to include */
    int             revsel; /* reverse the sense of the residues to select in ->selection above (i.e. exclude them) */
    int             embedave; /* flag to initialize the algorithm with an embedded average (distance geometry) */
    int             jackknife; /* flag to do jacknifing, experimental */
    int             writestats; /* flag to write out stat files */
    int             FragDist;
    int             random;
    int             pca; /* flag to do Principal Components Analysis on covariance matrix */
    int             fullpca;
    int             cormat; /* flag to do PCA with correlation matrix instead of covariance matrix */
    int             tenberge;
    int             morph;
    int             stats; /* calculate moment stats */
    double          constant; /* minimum variance allowed */
    int             info; /* just calculate stats for given pdb file and quit */
    int             princaxes; /* flag to align final superposition with principle axes of mean structure */
    int             nullrun;
    int             binary; /* flag to read and write binary structure files */
    int             modelpca;
    double          raxes[3];
    int             mbias; /* flag to calculate bias-corrected mean */
    int             notrans;
    int             norot;
    int             alignment; /* flag for superimposing based on a sequence alignment */
    int             dimweight; /* flag to do dimensionally (axially) weighting */
    int             covweight; /* flag to do atomic, row-wise covariance matrix weighting */
    int             varweight; /* flag to do variance weighing (i.e., a diagonal covariance matrix) */
    int             leastsquares; /* flag to do classical least squares, all variances equal, no covars */
    int             LedoitWolf; /* flag to use Ledoit-Wolf covariance shrinkage estimator */
    int             hierarch; /* flag to use hierarchical variances, e.g. inverse gamma distributed vars */
    int             fmodel; /* read only first or all models in a pdb file */
    int             noave; /* don't calculate an average structure */
    int             noinnerloop; /* don't iterate the inner loop */
    int             htrans; /* flag to calculate hierarchical translations */
    int             fasta; /* flag to write out FASTA sequence files for each PDB model read in */
    int             olve; /* Olve Peersen's pet requests */
    int             abort;
    int             seed; /* random number seed, can be specified by user */
    int             mixture;
    int             threads; /* flag to run with pthreads */
    double          minc;
    int             printlogL;
    int             bfact;
    int             convlele; /* flag to convert Lele's formatted files */
    double          param[2]; /* Random generation of structures, params for inverse gamma */
    double          radii[3]; /* Random generation of structures, radii of gyration for generating mean forms */
    int             ssm;
    int             lele5;
    int             bayes;
    int             ipmat;
    int             commandeur; /* Commandeur algorithm for missing atom translations */
    int             missing;
    int             scale; /* calculate scale factors for each structure */
    int             instfile; /* print out PDB files in each internal round of the MultiPose algorithm */
} Algorithm;


typedef struct Statistics_
{
    double          stddev; /* combined standard deviation of all atomic positions */
    double          starting_paRMSD, starting_pawRMSD; /* some stats for initial superposition */
    double          starting_mlRMSD, starting_ave_wRMSD_from_mean;
    double          starting_stddev, starting_logL;
    double          ave_paRMSD; /* average pairwise RMSD */
    double          ave_pawRMSD; /* average weighted pairwise RMSD */
    double          RMSD_from_mean; /* average RMSD from the mean structure */
    double          wRMSD_from_mean; /* average weighted RMSD from the mean structure */
    double          mlRMSD; /* max lik RMSD, actually a sigma */
    double          anova_RMSD, anova_AIC, anova_logL;
    double          ave_ref_wRMSD_from_mean, refl_RMSD;
    double          KSp, Fp, signp, wilcoxonp, dw; /* some frequentist stats */
    double          logL, AIC, BIC, nparams, ndata, chi2; /* likelihood statistics */

    double          skewness[4]; /* for x, y, z residuals and total */
    double          kurtosis[4];
    double          SES, SEK;

    double          ledoit1, ledoit2; /* Ledoit-Wolf covar estimator params */
    double          condition_num; /* condition number of the covariance matrix */
    int             median; /* index of structure closest to mean */
    double          trace_inv_sigma;
    double          wtnorm; /* normalization factor for atomic row-wise weight matrix */
    double          hierarch_p1, hierarch_p2; /* parameters of the PDF for hierarchical variances */
    double          htrans_ave, htrans_var; /* parameters of Gaussian for hierarhcial translations */
    double          hierarch_chi2;/* chi^2 value for fit of hierarchical variances */
    double          hierarch_chi2_P; /* P-value */
    double          htrans_chi2; /* chi^2 value for fit of hierarchical translations to Gaussian */
    double          htrans_chi2_P; /* P-value */
    double          omnibus_chi2; /* overall chi^2, including hierarchical stuff and overall fit */
    double          omnibus_chi2_P; /* P-value */
    double          precision; /* actual precision to which the algorithm converged */
    double          fperr; /* empirically determined floating point error of superposition */
    double          minvar; /* empirically determined theoretical minimum variance */
    double          lsvar; /* least-squares variances, homoscedastic */
} Statistics;


/* Coords is for holding working sets of coordinates */
typedef struct Coords_
{
    char            filename[FILENAME_MAX];
    int             model; /* model number, not really used */
    int             vlen;  /* number of coordinates */
    int             aalen; /* number of real residues, no gaps, used for CA alignments */

    char          **resName; /* residue name */
    char           *chainID; /* chain ID */
    int            *resSeq;  /* residue number */

    double         *x, *y, *z; /* x,y,z atomic coordinates */
    double         *o;         /* occupancy */
    double         *b;         /* B-factor */

    double         *prvar;      /* prior variances */

    double         *residual_x, *residual_y, *residual_z;
    double         *covx, *covy, *covz; /* covariance matrix weighted x,y,z coords */

    double        **matrix;      /* 3x3 rotation matrix */
    double        **last_matrix; /* temp 3x3 rotation matrix */
    double        **jackmat;     /* 3x3 temp rotation matrix to accumulate JK rotations*/

    double          radgyr;     /* radius of gyration */
    double        **innerprod;  /* vlen x vlen inner product matrix */
    double        **innerprod2; /* 3 x 3 inner product matrix */

    double          center[3];       /* weighted centroid of coordinates */
    double          last_center[3];  /* temp centroid of coordinates */
    double          translation[3];  /* translation vector, based on weighted center */
    double          transsum[3];
    double          jktranslation[3];
    double          RMSD_from_mean;  /* rmsd from the mean structure */
    double          wRMSD_from_mean; /* weighted rmsd from mean structure */
    double          ref_wRMSD_from_mean;
    double          evals[4]; /* quaternion evals (residual sums) */
    double        **evecs;    /* 4x4 quaternion evecs (rotation vectors) */

    double        **tmpmat1, **tmpmat2;  /* a bunch of scratch matrices and vectors to be passed around */
    double        **tmpmatKK1;           /* must be careful that these aren't doubly accessed by subroutines */
    double        **tmpmatKK2;
    double         *tmpvecK;
    double        **tmpmat3K, **tmpmatK3a, **tmpmatK3b;
    double        **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */
    double          tmpvec3a[3];

    double          bfact_c;
    double          scale;

    /* not to be accessed - for space only */
    char           *resName_space;
} Coords;


/* CoordsArray is an array of Coords, plus a bunch of stuff necessary to
   do the ML superposition for this family of Coords.
*/
typedef struct Coords_Array
{
    char            outfile_name[FILENAME_MAX];
    int             vlen;       /* number of coordinates */
    int             cnum;       /* number of Coords in array */
    char           *anchorf_name;
    char           *mapfile_name;
    char           *msafile_name;
    struct PDB_Coords_Array *pdbA;       /* associated PDBCoordsArray */
    struct Coords_Array    *scratchA;   /* associated scratch array of Coords */

    Coords        **coords;     /* pointer to an array of pointers to Coords */
    Coords         *avecoords;  /* average Coords of all in CoordsArray */
    Coords         *tcoords;    /* target Coords */
    Coords         *jkcoords;   /* average bootstrapped Coords for SuperJack() */

    double         *w;          /* diagonal atomic weights */
    double         *var;        /* atomic variances */
    int            *df;         /* degrees of freedom for variances, used for incomplete data alignments */
    double         *S2;         /* theoretical NMR order parameters */

    Algorithm      *algo;
    Statistics     *stats;

    double         *residuals; /* 3 x vlen x cnum vector of normalized residuals */

    double        **Var_matrix; /* the variances of the distances in distmat */
    double        **Dij_matrix; /* average distance distance matrix for the CoordsArray */
    Matrix3D       *distmat;
    double        **CovMat;     /* the atomic, row-wise covariance matrix */
    double        **WtMat;      /* normalized inverse of the CovMat */
    double        **FullCovMat;
    double        **MVCovMat;   /* a 3x3 matrix */
    double        **SCovMat;    /* a cnum x cnum matrix */

    double        **AxCovMat;   /* the axial (dimensional), column-wise covariance matrix */
    double        **AxWtMat;    /* normalized and constrained inverse of the AxCovMat */
    double          axesvar[3]; /* eigenvalues of AxCovMat */
    double          axesw[3];   /* inverse of the eigenvalues of AxCovMat */

    double        **pcamat;     /* vlen x vlen sized matrix for principle component eigenvectors */
    double         *pcavals;    /* PCA eigenvalues */
    double        **modpcamat;  /* cnum x cnum sized matrix for model principle component eigenvectors */
    double         *modpcavals; /* model PCA eigenvalues */

    double        **tmpmat1, **tmpmat2;  /* a bunch of scratch matrices and vectors to be passed around */
    double        **tmpmatKK1;            /* must be careful that these aren't accesses by subroutines */
    double        **tmpmatKK2;
    double         *tmpvecK;
    double        **tmpmat3K, **tmpmatK3a, **tmpmatK3b;
    double        **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */
    double          tmpvec3a[3];
} CoordsArray;

#endif
