#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2019-02-28 11:13:12 +0200 (Thu, 28 Feb 2019) $ 
#$Revision: 6708 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.7/scripts/cif2xyz $
#------------------------------------------------------------------------------
#*
#* Read CIF files and print atom coordinates in XYZ format.
#*
#* USAGE:
#*    $0 --options input.cif
#**

use strict;
use warnings;
use COD::CIF::Parser qw( parse_cif );
use COD::AtomProperties;
use COD::CIF::Data qw( get_cell );
use COD::Fractional qw( symop_ortho_from_fract );
use COD::Spacegroups::Symop::Algebra qw( symop_vector_mul );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages
                          report_message );
use COD::ToolsVersion;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $print_radii = 0;
my $add_xyz_header = 0; # Clearly incorrect historic default, to be
                        # changed either in bugfix or a major release
my $use_parser = "c"; # Used CIF parser

#* OPTIONS:
#*   --print-radii
#*                     Append covalent radii to the atom coordinates.
#*   --no-print-radii, --dont-print-radii, --xyz-only
#*                     Print only Cartesian XYZ coordinates for input atoms
#*                     (default).
#*
#*   --add-xyz-header
#*                     Add the total number of atoms on the first line of
#*                     the output, followed by an empty line.
#*
#*   --do-not-add-xyz-header,
#*   --dont-add-xyz-header,
#*   --no-add-xyz-header
#*                     Do not add the total number of atoms on the first line
#*                     (default).
#*
#*   --use-perl-parser
#*   --use-c-parser
#*                     Specify parser to parse CIF files. C parser is default.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--print-radii" => sub{ $print_radii = 1 },
    "--no-print-radii,--dont-print-radii" => sub{ $print_radii = 0 },
    "--xyz-only" => sub{ $print_radii = 0 },

    "--add-xyz-header" => sub { $add_xyz_header = 1 },
    "--do-not-add-xyz-header,--dont-add-xyz-header,--no-add-xyz-header" =>
        sub { $add_xyz_header => 0 },

    "--use-perl-parser" => sub{ $use_parser = "perl" },
    "--use-c-parser"    => sub{ $use_parser = "c" },

    "--options"      => sub { options; exit },
    "--help,--usage" => sub { usage; exit },
    '--version'      => sub { print 'cod-tools version ',
                              $COD::ToolsVersion::Version, "\n";
                              exit }
);

@ARGV = ("-") unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@{$data} || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_error_level->{'WARNING'} );
        next;
    }

    for my $dataset (@$data) {
        my $dataname = 'data_' . $dataset->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'       => @_,
                'program'       => $0,
                'filename'      => $filename,
                'add_pos'       => $dataname
            }, $die_on_error_level )
        };

        eval {
            my $values = $dataset->{values};

            my @cell = get_cell( $values );

            my $f2o = symop_ortho_from_fract(@cell);
            # perform fractional to orthogonal conversion here

            my $atom_site_tag;

            if( exists $values->{"_atom_site_label"} ) {
                $atom_site_tag = "_atom_site_label";
            } elsif( exists $values->{"_atom_site_type_symbol"} ) {
                $atom_site_tag = "_atom_site_type_symbol";
                warn 'WARNING, _atom_site_label tag was not found -- '
                   . 'a serial number will be appended to '
                   . "_atom_site_type_symbol to make atom labels\n";
            } else {
                die 'ERROR, neither _atom_site_label nor '
                  . "_atom_site_type_symbol tags were found\n";
            }

            my $atom_labels = $values->{$atom_site_tag};

            if( $add_xyz_header ) {
                print scalar @{$atom_labels}, "\n\n";
            }

            for (my $i = 0; $i < @{$atom_labels}; $i++) {
                my $covalent_radius;
                if( $print_radii ) {
                    my $atom_type;
                    if( exists $values->{_atom_site_type_symbol} &&
                        defined $values->{_atom_site_type_symbol}[$i] &&
                        $values->{_atom_site_type_symbol}[$i] ne '?' ) {
                        $atom_type = $values->{_atom_site_type_symbol}[$i];
                        if( $atom_type =~ m/^([A-Za-z]{1,2})/ ) {
                            $atom_type = ucfirst( lc( $1 ));
                        }
                    } else {
                        if( $values->{_atom_site_label}[$i] =~
                            m/^([A-Za-z]{1,2})/ ) {
                            $atom_type = ucfirst( lc( $1 ));
                        } else {
                            die 'ERROR, could not determine atom type for '
                              . "atom '$values->{_atom_site_label}[$i]'\n";
                        }
                    }
                    if( !exists $COD::AtomProperties::atoms{$atom_type} ) {
                        die "ERROR, unknown atom type '$atom_type'\n";
                    }
                    $covalent_radius =
                        $COD::AtomProperties::atoms{$atom_type}->{covalent_radius};
                }

                my @atom_xyz;
                for my $cif_fract ( "_atom_site_fract_x",
                                    "_atom_site_fract_y",
                                    "_atom_site_fract_z",)
                {
                    push( @atom_xyz, $values->{$cif_fract}[$i] );
                    $atom_xyz[-1] =~ s/\([0-9]+\)$//;
                }
                my $coordinates_ortho = symop_vector_mul( $f2o, \@atom_xyz );

                print $coordinates_ortho->[0], ' ',
                      $coordinates_ortho->[1], ' ',
                      $coordinates_ortho->[2], ' ',
                      ($print_radii ? "$covalent_radius " : ""),
                      '# ', $atom_labels->[$i], "\n";
            }
        };
        if ($@) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_error_level->{'ERROR'} );
        }
    }
}
