#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2017-05-30 14:51:03 +0300 (An, 30 geg. 2017) $ 
#$Revision: 5376 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.3/scripts/cif_cell_contents $
#------------------------------------------------------------------------------
#*
#* Calculate cell contents from atom coordinates and symmetry information.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Data::CellContents qw( cif_cell_contents );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages );
use COD::ToolsVersion;

my $use_parser = 'c';

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $Z_value;
my $use_attached_hydrogens = 0;
my $assume_full_occupancies = 0;
my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;
my $print_datablock_name = 0;

#* OPTIONS:
#*   -c, --continue-on-errors
#*                     Keep processing subsequent data blocks even if one data
#*                     block had fatal processing errors.
#*
#*   -c-, --die-on-errors, --dont-continue-on-errors,
#*   --do-not-continue-on-errors, --no-continue-on-errors
#*                     Terminate script immediately if some data block could
#*                     not be processed (default).
#*
#*   --continue-on-warnings
#*                     Do not terminate script if warnings are raised (default)
#*
#*   --die-on-warnings
#*                     Terminate script immediately if warnings are raised
#*
#*   --continue-on-notes
#*                     Do not terminate script if notes are raised (default)
#*
#*   --die-on-notes
#*                     Terminate script immediately if notes are raised
#*
#*   -Z, --cell-formula-units-Z  1
#*                     Specify number of formula sum units in the unit cell.
#*                     Default: taken from the input CIF if present, otherwise
#*                     assumed to be 1.
#*
#*   --use-attached-hydrogens
#*                     Include number of implicit hydrogens, specified using
#*                     _atom_site_attached_hydrogens tag, into the formula sum.
#*
#*   --dont-use-attached-hydrogens,
#*   --no-use-attached-hydrogens,
#*   --ignore-attached-hydrogens
#*                     Ignore number of implicit hydrogens, specified using
#*                     _atom_site_attached_hydrogens tag, in calculation of
#*                     the formula sum (default).
#*
#*   --assume-full-occupancies
#*                     Assume that all atoms have full (1.0) occupancies.
#*
#*   --dont-assume-full-occupancies,
#*   --no-assume-full-occupancies,
#*   --use-original-occupancies
#*                     Use original atom site occupancies, as given in CIF file
#*                     (default).
#*
#*   --print-datablock-name
#*                     Print data block name, tab-separated, before each formula.
#*
#*   --dont-print-datablock-name,
#*   --do-not-print-datablock-name,
#*   --no-print-datablock-name
#*                     Do not print data block name (default).
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-c,--continue-on-errors"       => sub { $die_on_errors = 0 },
    "-c-,--dont-continue-on-errors" => sub { $die_on_errors = 1 },
    "--die-on-errors"               => sub { $die_on_errors = 1 },
    "--do-not-continue-on-errors"   => sub { $die_on_errors = 1 },
    "--no-continue-on-errors"       => sub { $die_on_errors = 1 },

    "--continue-on-warnings" => sub { $die_on_warnings = 0 },
    "--die-on-warnings"      => sub { $die_on_warnings = 1 },

    "--continue-on-notes"    => sub { $die_on_notes = 0 },
    "--die-on-notes"         => sub { $die_on_notes = 1 },

    "-Z,--cell-formula-units-Z" => \$Z_value,

    "--use-attached-hydrogens" => sub { $use_attached_hydrogens = 1 },
    "--dont-use-attached-hydrogens" => sub { $use_attached_hydrogens = 0 },
    "--no-use-attached-hydrogens" => sub { $use_attached_hydrogens = 0 },
    "--ignore-attached-hydrogens" => sub { $use_attached_hydrogens = 0 },

    "--assume-full-occupancies" => sub { $assume_full_occupancies = 1 },
    "--dont-assume-full-occupancies" => sub { $assume_full_occupancies = 0 },
    "--no-assume-full-occupancies" => sub { $assume_full_occupancies = 0 },
    "--use-original-occupancies" => sub { $assume_full_occupancies = 0 },

    "--print-datablock-name" => sub{ $print_datablock_name = 1 },
    "--no-print-datablock-name" => sub{ $print_datablock_name = 0 },
    "--dont-print-datablock-name" => sub{ $print_datablock_name = 0 },
    "--do-not-print-datablock-name" => sub{ $print_datablock_name = 0 },

    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },
    "--options"         => sub { options; exit },
    "--help,--usage"    => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

my $Id = '$Id: cif_cell_contents 5376 2017-05-30 11:51:03Z andrius $';

@ARGV = ( "-" ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );
    next if ( $err_count > 0 );

    canonicalize_all_names( $data );

    for my $dataset (@$data) {

        my $dataname = 'data_' . $dataset->{name};
        my $values   = $dataset->{values};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'  => @_,
                                       'program'  => $0,
                                       'filename' => $filename,
                                       'add_pos'  => $dataname
                                     }, {
                                       'WARNING' => $die_on_warnings,
                                       'NOTE'    => $die_on_notes
                                     } ) };

        my $formula;
        eval {
            if ( defined $Z_value &&
                 exists $values->{'_cell_formula_units_Z'} ) {
                my $file_Z = $values->{_cell_formula_units_Z}[0];
                    if( $Z_value != $file_Z ) {
                    warn "WARNING, overriding _cell_formula_units_Z ($file_Z) " .
                         "with command-line value $Z_value" . "\n";
                }
            }

            $formula = cif_cell_contents ( $dataset, $Z_value,
                                           $use_attached_hydrogens,
                                           $assume_full_occupancies );
        };
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        };

        if( defined $formula ) {
            if( $print_datablock_name ) {
                print $dataset->{name}, "\t";
            }
            print $formula, "\n";
        }
    }
}
