#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: andrius $
#$Date: 2019-01-15 16:14:18 +0200 (Tue, 15 Jan 2019) $ 
#$Revision: 6641 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.6/scripts/cif_hkl_check $
#------------------------------------------------------------------------------
#*
#* Check the correspondence between CIF and diffraction data files, taking
#* data block names, cell parameters, author lists and symmetry group names
#* into account.
#*
#* USAGE:
#*    $0 --options input.cif input.hkl input2.cif input2.hkl input*.{cif,hkl}
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Data qw( get_source_data_block_name );
use COD::CIF::Data::Diff qw( comm );
use COD::CIF::Data::Check qw( check_pdcif_relations );
use COD::CIF::Data::CODFlags qw( has_hkl
                                 has_powder_diffraction_intensities
                                 has_twin_hkl
                                 @hkl_tags
                                 @powder_diffraction_intensity_tags );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::Precision qw( cmp_cif_numbers );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_parser_messages report_message );
use COD::ToolsVersion;

my $use_parser = "c";

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

#* OPTIONS:
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },
    "--options"         => sub { options; exit },
    "--help,--usage"    => sub { usage; exit },
    '--version'       => sub { print 'cod-tools version ',
                               $COD::ToolsVersion::Version, "\n";
                               exit }
);

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

PAIR:
while( @ARGV > 0 ) {
    my ( $cif_file, $hkl_file );
    $cif_file = shift( @ARGV );
    if( @ARGV > 0 ) {
        $hkl_file = shift( @ARGV );
    } else {
        report_message( {
            'program'   => $0,
            'filename'  => $cif_file,
            'err_level' => 'ERROR',
            'message'   => 'missing diffraction data file'
        }, 1 );
    };

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my $files = [ { 'filename' => $cif_file },
                  { 'filename' => $hkl_file } ];

    for my $file ( @$files ) {
        my ( $data, $error_count, $messages ) =
            parse_cif( $file->{filename}, $options );

        process_parser_messages( $messages, $die_on_error_level );
        next PAIR if $error_count > 0;

        if( !@$data || !defined $data->[0] ||
            !defined $data->[0]{name} ) {
            report_message( {
                'program'   => $0,
                'filename'  => $file->{filename},
                'err_level' => 'WARNING',
                'message'   => 'file seems to be empty'
            }, $die_on_error_level->{'WARNING'} );
            next PAIR;
        }
        canonicalize_all_names( $data );
        $file->{data} = $data;
    }

    my $cif_data = $files->[0]{data};
    my $hkl_data = $files->[1]{data};

    if( @$hkl_data > 1 ) {
        report_message( {
            'program'   => $0,
            'filename'  => $hkl_file,
            'err_level' => 'WARNING',
            'message'   => 'file contains more than one data block',
        }, $die_on_error_level->{'WARNING'} );
        next;
    }

    my $hkl_dataset = $hkl_data->[0];
    my $hkl_values = $hkl_dataset->{values};
    if( !has_hkl( $hkl_dataset ) &&
        !has_powder_diffraction_intensities( $hkl_dataset ) &&
        !has_twin_hkl( $hkl_dataset ) ) {
        report_message( {
            'program'   => $0,
            'filename'  => $hkl_file,
            'err_level' => 'WARNING',
            'message'   => 'cannot confirm that the file contains Fobs ' .
                           'or powder diffraction data -- not all data ' .
                           'items from list (' .
                           join( ', ', map { "'$_'" } @hkl_tags ) .
                           ') and no data items from list (' .
                           join( ', ', map { "'$_'" }
                                 @powder_diffraction_intensity_tags ) .
                           ') or \'_twin_refln_[]\' category are present'
        }, $die_on_error_level->{'WARNING'} );
        next PAIR;
    }

    my $hkl_dataname = get_source_data_block_name( $hkl_dataset );
    my $cif_index;
    my @cif_datanames;
    for( my $i = 0; $i < @$cif_data; $i++ ) {
        my $cif_dataname = get_source_data_block_name( $cif_data->[$i] );
        push( @cif_datanames, $cif_dataname );
        if( $cif_dataname eq $hkl_dataname ) {
            $cif_index = $i;
            last;
        }
    }

    if( !defined $cif_index ) {
        report_message( {
            'program'   => $0,
            'filename'  => $cif_file,
            'err_level' => 'WARNING',
            'message'   => 'cannot confirm relation between any data blocks '
                         . 'in CIF and diffraction data files -- there are '
                         . 'no data blocks with the same name in both files '
                         . '(' . join( ", ", map { "'$_'" } @cif_datanames )
                         . " (CIF data block(s)) and '$hkl_dataname' "
                         . '(diffraction data data block))',
        }, $die_on_error_level->{'WARNING'} );
        next PAIR;
    }

    my $comm = comm( $cif_data->[$cif_index], $hkl_dataset,
        {
            'compare_only' =>
                [
                    '_cell_length_a',
                    '_cell_length_b',
                    '_cell_length_c',
                    '_cell_angle_alpha',
                    '_cell_angle_beta',
                    '_cell_angle_gamma',
                    '_publ_author_name',
                    '_symmetry_space_group_name_Hall',
                    '_symmetry_space_group_name_H-M',
                ],
            'comparators'  =>
                {
                    '_cell_length_a' => \&cmp_cif_numbers,
                    '_cell_length_b' => \&cmp_cif_numbers,
                    '_cell_length_c' => \&cmp_cif_numbers,
                    '_cell_angle_alpha' => \&cmp_cif_numbers,
                    '_cell_angle_beta'  => \&cmp_cif_numbers,
                    '_cell_angle_gamma' => \&cmp_cif_numbers,
                    '_publ_author_name' => \&compare_lc_strings,
                }
        } );

    foreach my $line ( @$comm ) {
        next if !defined $line->[0] || !defined $line->[2];
        my $reason;
        if( $line->[0] eq '_publ_author_name' ) {
            $reason = "publication author lists differ";
        } else {
            $reason = "values of tag '$line->[0]' differ";
        }
        $reason .= " ('" . join( ', ', map { "'$_'" }
              @{$cif_data->[$cif_index]{values}{$line->[0]}} ) .
              "' (CIF) and '" . join( ', ', map { "'$_'" }
              @{$hkl_values->{$line->[0]}} ) .
              "' (diffraction data))";
        report_message( {
            'program'   => $0,
            'filename'  => $cif_file,
            'add_pos'   => defined $hkl_dataname ? 'data_' . $hkl_dataname
                                                 : $hkl_dataname,
            'err_level' => 'WARNING',
            'message'   => 'cannot confirm relation between data blocks '
                         . "named '$hkl_dataname' from supplied CIF and "
                         . "diffraction data files -- $reason"
        }, $die_on_error_level->{'WARNING'} );
        next PAIR;
    }

    my $pdcif_messages = check_pdcif_relations( [ @$cif_data, @$hkl_data ] );
    foreach my $message ( @$pdcif_messages ) {
        my $err_level = 'NOTE';
        if( $message =~ s/^([A-Z]+), // ) {
            $err_level = $1;
        }
        report_message( {
            'program'   => $0,
            'filename'  => $cif_file,
            'add_pos'   => defined $hkl_dataname ? 'data_' . $hkl_dataname
                                                 : $hkl_dataname,
            'err_level' => $err_level,
            'message'   => $message
        }, $die_on_error_level->{$err_level} );
    }
    next if @$pdcif_messages;

    report_message( {
        'program'   => $0,
        'filename'  => $cif_file,
        'err_level' => 'NOTE',
        'message'   => 'OK',
    }, $die_on_error_level->{'NOTE'} );
}

sub compare_lc_strings
{
    my( $a, $b ) = @_;
    return lc( $a ) cmp lc( $b );
}
