#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2020-07-20 16:43:00 +0300 (Mon, 20 Jul 2020) $ 
#$Revision: 8230 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.0.1/scripts/cif2xyz $
#------------------------------------------------------------------------------
#*
#* Read CIF files and print atom coordinates in XYZ format.
#*
#* USAGE:
#*    $0 --options input.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::AtomProperties;
use COD::CIF::Data qw( get_cell );
use COD::Fractional qw( symop_ortho_from_fract );
use COD::Spacegroups::Symop::Algebra qw( symop_vector_mul );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages
                          report_message );
use COD::ToolsVersion;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $print_radii = 0;
my $add_xyz_header = 1;
my $use_parser = "c"; # Used CIF parser

#* OPTIONS:
#*   --print-radii
#*                     Append covalent radii to the atom coordinates.
#*   --no-print-radii, --dont-print-radii, --xyz-only
#*                     Print only Cartesian XYZ coordinates for input atoms
#*                     (default).
#*
#*   --add-xyz-header
#*                     Add the total number of atoms on the first line of
#*                     the output, followed by an empty line (default).
#*
#*   --do-not-add-xyz-header,
#*   --dont-add-xyz-header,
#*   --no-add-xyz-header
#*                     Do not add the total number of atoms on the first line.
#*
#*   --use-perl-parser
#*   --use-c-parser
#*                     Specify parser to parse CIF files (default: C parser).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--print-radii"                       => sub { $print_radii = 1 },
    "--no-print-radii,--dont-print-radii" => sub { $print_radii = 0 },
    "--xyz-only"                          => sub { $print_radii = 0 },

    "--add-xyz-header" => sub { $add_xyz_header = 1 },
    "--do-not-add-xyz-header,--dont-add-xyz-header,--no-add-xyz-header" =>
        sub { $add_xyz_header = 0 },

    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },

    "--options"      => sub { options; exit },
    "--help,--usage" => sub { usage; exit },
    '--version'      => sub { print 'cod-tools version ',
                              $COD::ToolsVersion::Version, "\n";
                              exit }
);

@ARGV = ("-") unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@{$data} || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_error_level->{'WARNING'} );
        next;
    }

    for my $dataset (@$data) {
        my $dataname = 'data_' . $dataset->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'       => @_,
                'program'       => $0,
                'filename'      => $filename,
                'add_pos'       => $dataname
            }, $die_on_error_level )
        };

        eval {
            local $, = ' ';
            local $\ = "\n";

            my $values = $dataset->{values};

            my @cell = get_cell( $values );

            my $f2o = symop_ortho_from_fract(@cell);

            my $atom_site_tag;
            if( exists $values->{_atom_site_label} ) {
                $atom_site_tag = '_atom_site_label';
            } elsif( exists $values->{_atom_site_type_symbol} ) {
                $atom_site_tag = '_atom_site_type_symbol';
            } else {
                die 'ERROR, neither \'_atom_site_label\' nor ' .
                    "'_atom_site_type_symbol' data items were found\n";
            }

            my $atom_labels = $values->{$atom_site_tag};

            if( $add_xyz_header ) {
                print scalar @$atom_labels;
                print '';
            }

            for (my $i = 0; $i < @$atom_labels; $i++) {
                my $atom_type;
                if( exists $values->{_atom_site_type_symbol} &&
                    defined $values->{_atom_site_type_symbol}[$i] &&
                    $values->{_atom_site_type_symbol}[$i] ne '?' ) {
                    $atom_type = $values->{_atom_site_type_symbol}[$i];
                    if( $atom_type =~ m/^([A-Za-z]{1,2})/ ) {
                        $atom_type = ucfirst( lc( $1 ));
                    }
                } else {
                    if( $values->{_atom_site_label}[$i] =~
                        m/^([A-Za-z]{1,2})/ ) {
                        $atom_type = ucfirst( lc( $1 ));
                    } else {
                        die 'ERROR, could not determine atom type for '
                          . "atom '$values->{_atom_site_label}[$i]'\n";
                    }
                }

                if( $print_radii && !exists $COD::AtomProperties::atoms{$atom_type} ) {
                    die "ERROR, unknown atom type '$atom_type'\n";
                }
                my $covalent_radius =
                    $COD::AtomProperties::atoms{$atom_type}->{covalent_radius};

                my @atom_xyz = map { s/\([0-9]+\)$//; $_ }
                               map { $values->{"_atom_site_fract_$_"}[$i] }
                                   qw( x y z );
                my $coordinates_ortho = symop_vector_mul( $f2o, \@atom_xyz );

                print $atom_type,
                      @{$coordinates_ortho},
                      ($print_radii ? $covalent_radius : ());
            }
        };
        if ($@) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_error_level->{'ERROR'} );
        }
    }
}
