#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2019-11-18 11:08:55 +0200 (Mon, 18 Nov 2019) $ 
#$Revision: 7429 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.7/scripts/cif_create_AMCSD_pressure_temp_tags $
#------------------------------------------------------------------------------
#*
#* Parse a CIF file, determine pressure and temperature tag values from
#* the values of other tags. Designed for CIFs from AMCSD.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::DictTags;
use COD::CIF::Tags::COD;
use COD::CIF::Tags::Print qw( print_cif );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw(process_warnings
                         process_errors
                         process_parser_messages);
use COD::ToolsVersion;

my $use_parser = 'c';
my $keep_tag_order = 0;

#* OPTIONS:
#*   --keep-tag-order
#*                     Keep the original tag order in CIF file (default).
#*   --sort-tags
#*                     Reorder tags in CIF file according to COD.
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--keep-tag-order"  => sub { $keep_tag_order = 1; },
    "--sort-tags"       => sub { $keep_tag_order = 0; },
    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },
    "--options"         => sub { options; exit },
    "--help,--usage"    => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

my $die_on_error_level = {
    ERROR   => 0,
    WARNING => 0,
    NOTE    => 0
};

my @dictionary_tags = ( @COD::CIF::Tags::DictTags::tag_list,
                        @COD::CIF::Tags::COD::tag_list );
my %dictionary_tags = map { $_, $_ } @dictionary_tags;

sub get_pressure_unit_coefficient($);
sub get_temperature_unit_shift($);

@ARGV = ("-") unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, { 'ERROR'   => 1,
                                          'WARNING' => 0,
                                          'NOTE'    => 0 } );

    canonicalize_all_names( $data );

    for my $dataset (@$data) {

        my $values = $dataset->{values};
        my $dataname = 'data_' . $dataset->{name} if exists $dataset->{name};

        local $SIG{__WARN__} = sub { process_warnings( {
                                       'message'       => @_,
                                       'program'       => $0,
                                       'filename'      => $filename,
                                       'add_pos'       => $dataname
                                     }, $die_on_error_level ) };

        eval {
            my $temperature_is_processing = 0;

            for my $tag ( @{$dataset->{tags}} ) {
                if( $values->{$tag}[0] =~
                    /((?:\w+\s+)?(?:anneal\w*|quench\w*|prepare\w*)(?:\s+\w+)?)\s+
                     (P(?:ressure)?|T(?:emperature)?)\s*=\s*([-+0-9.E]+)\s*
                     ((?:deg(?:ree(?:s)?)?\s+)?[\w]+)/xi
                    ) {
                    warn "WARNING, structure marked as '$1', not taking "
                        . ($1 eq "P" ? "pressure" : "temperature") . " value\n";
                    $temperature_is_processing = 1;
                }

                my $number =
                    '(?:[-+]?' .
                    '(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]+)' .
                    '(?:[eE][-+]?[0-9]+)?)';
                my $number_word =
                    '(?:one|two|three|four|five|six|seven|eight|nine|ten)';
                my $number_or_word = "(?:$number|$number_word\\s)";
                my $temp_units =
                    '(?-i:(?:[KC]/h(?:our)?|[KC]\s+hr?\^-1\^|' .
                    'K|[Cc]|oC|\(C|[Dd]eg(?:rees?)?\s*C))';
                my $pressure_units = '(?-i:GPa|MPa|kPa|Pa|kbar|bar)';
                my $time_units =
                    '(?:min|months?|m|seconds?|sec|s|hours?|hrs?|h|days?|d|' .
                    'years?|y|weeks?|[KC]/h(?:our)?|[KC]\s+hr?\^-1\^)';
                my $initial_words =
                    '(?:initially|after)';
                my $processing_words =
                    '(?:un)?' .
                    '(?:cool|rate|quench|anneal|heat[ie]|heat-treat' .
                    '|synthe[sz]ized|prepare|isoth?ermally\s+order)';
                my $synthesis_words =
                    '(?:prepar|synthe[sz]iz)';

                for my $units (( $temp_units, $pressure_units )) {
                    my $value;
                    for my $tag_value ( split m/(?<![0-9\s]min)\.\s/,
                                              $values->{$tag}[0] ) {
                        while( $tag_value =~
                           /((?:
                              (?:^|\s)(?:$initial_words\s+)?$processing_words
                              .*?\s+$number_or_word+\s*$units
                              (?:[^0-9]*?\s$number_or_word\s*$time_units)?
                              (?:.*?(?:quench|anneal|heat|synthe[sz]ized)\w*$)?)
                              |
                              (?:(?:^|\s)$number_or_word\s*$units
                               (?:.*\s$number_or_word\s*$time_units)?
                               (?:.*(?:quench|anneal)\w*))$
                              |(?:P(?:ressure)?\s*=\s*$number\s*$units\s+
                                  in\s+(?:air|.*\scell)
                                  (?:\s+after\s+decompression)?
                               )
                              )(?:\W|$)/imsxg
                        ) {
                            my $newval = $1;
                            $newval =~ s/(?:$initial_words\s+)?
                                  ($processing_words)(?:ed|i?ng)
                                  (.+\1)
                                  /$1/ixsm;
                            $newval =~ s/($synthesis_words(?:ed|i?ng)\s+
                                  (?:by|in))
                                  .+($processing_words)
                                  /$2/ixsm;
                            $value = defined $value ?
                                  $value . "; " . $newval : $newval;
                        }
                    }
                    if( defined $value ) {
                        my $key;
                        if( $units ne $pressure_units ) {
                            $key  = "_exptl_crystal_thermal_history";
                        } else {
                            $key  = "_exptl_crystal_pressure_history";
                        }
                        $value =~ s/\n/ /g;
                        $value =~ s/\s+/ /g;
                        $value =~ s/^\s+|\s+$//g;
                        if( $value =~ /^(P(?:ressure)?\s*=\s*)($number)(\s.*)$/ ) {
                            my $prefix = $1;
                            my $numeric_value = $2;
                            my $suffix = $3;
                            $value =
                                $prefix . sprintf( "%g", $numeric_value ) . $suffix;
                        }
                        if( !exists $values->{$key} ) {
                            $values->{$key}[0] = $value;
                        }
                    }
                }
            }
            for my $tag ( @{$dataset->{tags}} ) {
                if( $values->{$tag}[0] =~
                    /P(?:ressure)?\s*=\s*([-+0-9.eE]+)\s*([\w]+)/ ) {
                    my $pressure = $1;
                    my $units = $2;
                    my $coefficient;
                    eval {
                        $coefficient = get_pressure_unit_coefficient( $units );
                    };
                    if( $@ ) {
                        warn $@;
                        next;
                    }
                    my $pressure_kPa =
                        sprintf( "%7.5g", $pressure * $coefficient );
                    $pressure_kPa =~ s/\s//g;
                    if( !exists $values->{_diffrn_ambient_pressure} ) {
                        $values->{_diffrn_ambient_pressure}[0] = $pressure_kPa;
                        warn "NOTE, P = $pressure, $units, $coefficient\n";
                    } else {
                        if( $values->{_diffrn_ambient_pressure}[0] !=
                            $pressure_kPa ) {
                            warn "WARNING, pressure given in '$tag' value, "
                               . "interpreted as $pressure $units "
                               . ($units eq "kPa" ? "" : "($pressure_kPa kPa) ")
                               . "is not equal to the value in the "
                               . "_diffrn_ambient_pressure tag ("
                               . $values->{_diffrn_ambient_pressure}[0] . " GPa)\n";
                        }
                    }
                }
                if( !$temperature_is_processing &&
                    $values->{$tag}[0] =~
                      /T(?:emperature)?\s*=\s*([-+0-9.eE]+)\s*
                      ((?:deg(?:ree(?:s)?)?\s+)?[\w]+)/x ) {
                    my $temperature = $1;
                    my $units = $2;
                    my $shift;
                    eval {
                        $shift = get_temperature_unit_shift( $units );
                    };
                    if( $@ ) {
                        warn $@;
                        next;
                    };
                    my $temp_K = $temperature + $shift;
                    if( !exists $values->{_diffrn_ambient_temperature} ) {
                        $values->{_diffrn_ambient_temperature}[0] = $temp_K;
                        warn "NOTE, T = $temperature, $units, $shift\n";
                    } else {
                        if( abs($values->{_diffrn_ambient_temperature}[0] - $temp_K)
                            >= 0.001 ) {
                            warn "WARNING, temperature given in '$tag' value, "
                               . "interpreted as $temperature $units "
                               . ($units eq "K" ? "" : "($temp_K K) ")
                               . "is not equal to the value in the "
                               . "_diffrn_ambient_temperature tag ("
                               . $values->{_diffrn_ambient_temperature}[0] . " K)\n";
                        }
                    }
                }
            }
        };
        if ($@) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_error_level->{'ERROR'} )
        };

        print_cif( $dataset, {
            exclude_misspelled_tags => 0,
            preserve_loop_order => 1,
            fold_long_fields => 0,
            dictionary_tags => \%dictionary_tags,
            dictionary_tag_list => \@dictionary_tags,
            keep_tag_order => $keep_tag_order,
        } );
    }
}

sub get_pressure_unit_coefficient($)
{
    my ($units) = @_;

    for ($units) {
        return 1.0E+6 * 1E-1 if /^kbar$/;
        return 1.0E+6 * 1E-1 if /^Kbar$/;
        return 1.0E+6 * 1E-1 if /^kb$/;
        return 1.0E+6 * 1E-4 if /^atm$/;
        return 1.0E+6 * 1E-4 if /^bar$/;
        return 1.0E+6 * 1E-6 if /^kPa$/;
        return 1.0E+6 * 1E-3 if /^MPa$/;
        return 1.0E+6 * 1    if /^GPa$/;
        return 1.0E+6 * 1    if /^Gpa$/;
    }
    die "ERROR, unknown pressure units '$units'\n";
}

sub get_temperature_unit_shift($)
{
    my ($units) = @_;
    for ($units) {
        return 0 if /^K$/;
        return 273.15 if /^C$/;
        return 273.15 if /^deg C$/;
        return 273.15 if /^degree C$/;
        return 273.15 if /^degrees C$/;
    }
    die "ERROR, unknown temperature units '$units'\n";
}
