#!/bin/sh
version=2.0.9
license="Copyright (C) 1997, 2001, 2006, 2007, 2009 Dimitar Ivanov

License: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law."
#set -vx
################################################################################
#
# mintegrate - integrates 1-d numerical data using Awk
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
################################################################################
xc=1              # x column
yc=1              # f(x) column: default is to compute the sum of the 1 column
dx=1.0            # x delta interval
x0=00; x1=00      # x data range
pr_digits="%.6g"  # Print format of the result data
################################################################################

### Get options function
#
getoptions() {
opts=$1
shift
getopt=`which getopt 2>&1| grep "^/"`
if [ $getopt ]; then
      options=`$getopt $opts $*` || options="-h"
else # build-in function
   options=
   while getopts $opts Option
   do
      [ $Option != '?' ] || Option=h ;
      options="$options -$Option $OPTARG"
   done
   shift `expr $OPTIND - 1`
   options="$options -- $*"
fi
echo $options
}

# To create a man page try:
# mintegrate --help |sed -ne "s/Usage:/[SYNOPSIS]\n/;s/mintegrate/\nmintegrate/g;/SYNOPSIS/,/Options:/p" |grep -v Options: > /tmp/synopsis.txt ; help2man -N -n "`mintegrate -h |grep ^mintegrate |cut -f2- -d:`" mintegrate -I /tmp/synopsis.txt |sed 's/ \\fB/ /g' |man -l -

### Print usage
separator=`echo |awk '{printf( "%080s", 0 )}' |tr 0 -`
#
_print_usage_ () {
cat << HELP
$separator
$1
$separator

Usage: $progname [OPTION]... [FILE]

Options:
    -a         $2 compute mean value (arithmetic average) and standard deviation
    -d <float> $2 compute integral on open x-data interval with the specified dx
    -c         $2 compute integral on closed x-data interval;
                 In this case dx specified by the '-d' flag is ignored - data
                 are supposed to be from an irregular x-grid, dx is computed
                 separately for every x-interval, and the integral is computed
                 by the trapezoidal rule.
    -x <int>   $2 x-data column (default is $xc). If 0, the x-range is an index;
    -y <int>   $2 y-data column, where y=f(x) (default is $yc)
    -r x_0:x_1 $2 x-data range to consider
    -s         $2 print out accumulated y_i sums: x_i versus accumulated f(x_i);
                 In the case of a closed integral you have to specify also the
                 x-data resolution dx (see '-d' above).
    -S         $2 compute the accumulated y_i-sums and add it to the output
    -p <str>   $2 print format of the result ("$pr_digits" is default)
    -t <str>   $2 output text in front of the result (invalid with '-s' or '-S');
                 A blank can be printed by using a double underscore character
                 '__'.
    -V         $2 print version number
    --version  $2 output version and license message
    --help|-H  $2 display help
    -h         $2 display short help (options summary)


If none of the options '-a', '-d', or '-c' is used, then the sum of the
provided data will be computed. Empty lines or lines starting with '#' are
skipped.

This program is perfectly suitable as a basic tool for initial data analysis
and will meet the expected accuracy of a numerical solution for the most
demanding computer users and professionals. Yet be aware that, although the
computations are carried with double floating precision, the computational
techniques used for evaluating an integral or a standard deviation are
analytically low-order approximations, and thus not intended to be used for
numerical computations in engineering or mathematical sciences for cases
where an ultimate numerical precision is a must. For deeper understanding of
the topic see http://de.wikipedia.org/wiki/Numerical_Recipes.

HELP
}

### Print only version number
#
_print_version_ () {
      cat << !VERSION
$progname $version
$license
!VERSION
}

################################################################################
#
# MAIN
#
progname=`basename $0`

case $1 in
     --help|-H) separator=""
                _print_usage_ "$progname is a program to compute averages, sums or integrals of 1-d data in situations where ultimate numerical precision is not needed." " "
                exit
     ;;
     --version) _print_version_
                exit
     ;;
esac
#
# Find proper awk flavour
#
exec 3>&2 2>&-
for a in gawk nawk awk
do
  [ "`echo |$a -v a=a '{}' 2>&1`" = "" ] && AWK=$a
done
exec 2>&3
[ x"$AWK" = x ] && \
  echo "Error: can't find 'awk' supporting assignments" && \
  exit 1

gops="Vhd:x:r:y:sSt:cap:"
set -- `getoptions $gops $*`

while [ $# -gt 0 ]
do
   case $1 in
   -h ) _print_usage_ "$progname ${version}: evaluate average/sum/integral of 1-d numerical data" "-" \
              |egrep "^($progname|Usage:|Options:|\ *-)"
        exit 0
        ;;
   -d ) dx=$2
        shift
        ;;
   -c ) closed=1
        ;;
   -a ) dx=0
        average=1
        ;;
   -y ) yc=$2
        shift
        ;;
   -x ) xc=$2
        shift
        ;;
   -r ) x0=`echo $2 |cut -f1 -d:`
        x1=`echo $2 |cut -f2 -d:`
        shift
        ;;
   -p ) pr_digits=$2
        shift
        ;;
   -s ) accu_sum=1
        ;;
   -S ) accu_sum=2
        ;;
   -t ) text=$2
        text=`echo $text |sed "s/__/ /"`
        shift
        ;;
   -V ) echo $version
        exit
        ;;
   -- ) shift
        break ;;
   esac
   shift
done

################################################################################
#
# MAIN
#
cat $1 |$AWK -v dx=$dx -v yc=$yc -v xc=$xc -v x0=$x0 -v x1=$x1 -v text="$text" \
             -v pr_digits=$pr_digits -v accu_sum=$accu_sum \
             -v closed=$closed -v average=$average \
'BEGIN \
{
  sum=0; sum1=0; sum2=0; I=0; i=0; two_values_read_flag=0;

  if( x0 != "00" && x1 != "00" ) range_defined=1; 
  else                           consider_data=1;
     # Specify computational accuracy in awk
  OFMT="%.15g"
     # Print out results with accuracy
  if( pr_digits ) pd = pr_digits;
  else            pd = OFMT;
}
   #
   # Main loop: skip empty lines or comments
   #
$0 !~ /^ *#/ && $0 !~ /^ *$/\
{
     # In case x-data column = 0, then index is used
  if( xc ) { x_i = $xc;    }
  else     { I++; x_i = I; }
  y_i = $yc;

  if( range_defined && x_i <= x1 && x_i >= x0 ) consider_data=1;
  
     # If x-data are in the specified range
  if( consider_data ) {
      i++;
         # Integral / Sum
      sum  += y_i;

         # sum2 is used for computing the standard deviation
      if( average ) sum2 += y_i * y_i;

         # Integral over closed interval: irregular grid sampling allowed
      if( closed ) {

          if( two_values_read_flag ) {
              dx_i = x_i - x_o;
              if( dx_i < 0.0 ) dx_i = -dx_i;
              sum1 += dx_i * (y_i + y_o);
          } else {
              two_values_read_flag++;
          }
          x_o  = x_i;
          y_o = y_i;
      }

         # Accumulated values
      if( accu_sum ) {
          if( accu_sum < 2 && xc != 0 ) printf( "%s%s", $xc, FS ); # x
          else if( I )                  printf( "%s%s",   I, FS ); # index
          else                          printf( "%s%s",  $0, FS ); # line
          if( closed )                  printf( pd"\n", sum1 * 0.5 );
          else                          printf( pd"\n", sum * dx );
      }
  }
  if( range_defined ) consider_data=0;
}

END \
{
  if( accu_sum ) exit 0;

  if( text ) printf( "%s", text );

  if( average ) {
      if( i > 1 ) {
          dx=1/i;
             # Variance = [Sum(y_i^ 2) - N*(y_i_mean)^2 ]/(N-1)
             # This formula magnifies roundoff errors significantly, but
             # it is an one-pass algorithm.
          sum2 = ( sum2 - dx * sum*sum ) / ( i - 1 );
          sum2 = sqrt( sum2 );
          sum  = sum * dx;
      } else {
          sum2 = 0;
      }
         # Mean value and standard deviation
      printf( pd"%s+/-%s"pd"\n", sum, FS, FS, sum2 );
  } else {
         # Integral (closed) - trapezoidal rule
      if( closed ) printf( pd"\n", sum1 * 0.5 );
         # Integral (open) / Sum
      else         printf( pd"\n", sum * dx );
  }
}'

exit 0
