#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: saulius $
#$Date: 2022-04-06 10:12:29 +0300 (Wed, 06 Apr 2022) $ 
#$Revision: 9257 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.6.0/scripts/codxyz2fract $
#------------------------------------------------------------------------------
#*
#* Take an XYZ format [1] molecular file and convert orthogonal coordinates
#* to fractional ones. Unit cell constants can be given as command line option
#* parameters.
#*
#* [1] https://en.wikipedia.org/wiki/XYZ_file_format
#*
#* USAGE:
#*    $0 --options inputs*.xyz
#*    $0 --options < inp.xyz
#**

use strict;
use warnings;

use COD::Fractional qw( symop_fract_from_ortho fract_from_ortho );
use COD::XYZ qw( 
    unit_cell_from_vectors
    matrix3x3_invert
    cells_are_equal
);

use COD::ErrorHandler qw( process_errors process_warnings );
use COD::SOptions qw( getOptions get_value );
use COD::SUsage qw( usage options );
use COD::ToolsVersion qw( get_version_string );

# Print atom records from the @{$atoms} array to STDOUT in the XYZ
# format:
sub print_atoms($$$$)
{
    my ($atoms, $comment, $cell, $float_format) = @_;

    print int(@{$atoms}), "\n";
    print $comment;
    if( $cell ) {
        local $, = ' ';
        print ' CELL:', map {sprintf($float_format,$_)} @{$cell};
    }
    print "\n";

    for my $atom (@{$atoms}) {
        printf "%-8s $float_format $float_format $float_format\n", @{$atom};
    }

    return;
}

# The '$unit_cell_is_given' flag signals if information that provides
# unit cell parameters (--cell or --lattice) is given on the command
# line:
my $unit_cell_is_given = 0;

my $cell;
my $lattice;
my $float_format = '%21.14e';

my @cell;
my @lattice;

my $f4o;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

#** OPTIONS:
#**   -c, --cell "10 10 10 90 90 90"
#**                     Specify unit cell for conversion.
#**
#**   -l, --lattice "0.1 0 0  0 0.2 0  0 0 0.15"
#**                     Specify unit cell vectors in ortho frame for conversion.
#**
#**   -f, --float-format "%21.14e"
#**                     Specify format for floating point output.
#**                     For Perl, a usual "printf" format can be given.
#**   -H, --human-readable
#**                     Use format "%12.6f" for better human readability.
#**   -M, --machine-readable
#**                     Use format "%21.14e" to maintain precision. Default.
#**
#
#           For Ada, floating point format consists of three numbers:
#           the integer part length, the fraction part length and the exponent length.
#           Specifying exponent part as 0 outputs no exponent at all (as with C '%f' format).
# 
#**   --help, --usage
#**                     Output a short usage message (this message) and exit.
#**   --version
#**                     Output version information and exit.
#**
@ARGV = getOptions(
    '-c,--cell' => sub {
        $cell = get_value();
        $cell =~ s/,/ /g;
        @cell = split( ' ', $cell );
        $f4o = symop_fract_from_ortho( @cell );
        @lattice = @{$f4o};
        $unit_cell_is_given = 1;
    },

    '-l,--lattice' => sub {
        $lattice = get_value();
        $lattice =~ s/,/ /;
        my @m = split( ' ', $lattice );
        @lattice = (
            [ $m[0], $m[3], $m[6] ],
            [ $m[1], $m[4], $m[7] ],
            [ $m[2], $m[5], $m[8] ],
            );
        @cell = unit_cell_from_vectors( \@lattice );
        $f4o = matrix3x3_invert( \@lattice );
        $unit_cell_is_given = 1;
    },

    '-f,--float-format' => \$float_format,
    '-H,--human-readable' => sub {
        $float_format = '%12.6f';
    },
    '-M,--machine-readable' => sub {
        $float_format = '%21.14e';
    },
    '--options'         => sub { options; exit },
    '--help,--usage'    => sub { usage; exit },
    '--version'         => sub { print get_version_string(), "\n"; exit }
);

while(<>) {

    local $SIG{__WARN__} = sub {
        process_warnings( {
            'message'       => @_,
            'program'       => $0,
            'filename'      => $ARGV,
        }, $die_on_error_level )
    };

    my $N = $_;
    my $comment = <>;

    chomp($comment);

    my @atoms;
    for my $i (0..$N-1) {
        push( @atoms, [split(' ', <>)] );
    }

    my $unit_cell_is_known = $unit_cell_is_given;

    eval {
        if( $comment =~ s/\s*LATTICE:(.*)$// ) {
            my @cell_vectors = split( ' ', $1 );
            my @file_lattice = (
                [ $cell_vectors[0], $cell_vectors[3], $cell_vectors[6] ],
                [ $cell_vectors[1], $cell_vectors[4], $cell_vectors[7] ],
                [ $cell_vectors[2], $cell_vectors[5], $cell_vectors[8] ],
                );
            my $file_f4o = matrix3x3_invert( \@file_lattice );
            my @file_cell = unit_cell_from_vectors( \@file_lattice );
            if( $unit_cell_is_given ) {
                if( !cells_are_equal( \@cell, \@file_cell ) ) {
                    warn 'unit cell derived from the lattice given in the ' .
                         'file differs from the one provided on the command ' .
                         'line -- unit cell from the command line will be ' .
                         'used' . ".\n";
                }
            } else {
                @cell = @file_cell;
                @lattice = @file_lattice;
                $f4o = $file_f4o;
                $unit_cell_is_known = 1;
            }
        }

        if( !$unit_cell_is_known ) {
            die 'unable to process the file -- lattice vectors not known' . 
                "\n";
        }

        for my $a (@atoms) {
            my ($x, $y, $z);
            # The script named '*-direct' (e.g. codxyz2fract-direct) calls
            # the "fract_from_ortho()" function that converts coordinates
            # directly, without first generating a conversion matrix. This
            # method might be more convenient in some cases (shorter code)
            # but in general it should be slower and is provided here only
            # for testing. Since the results from both direct conversion
            # and the conversion that uses the intermediate matrix MUST be
            # the same, there is no option to switch the behaviour, but
            # the behaviour is changed if the script is called under a
            # special name, e.g. via a symlink in the test directory:
            if( $0 =~ /-direct$/ ) {
                ($x, $y, $z) = fract_from_ortho( \@cell, @{$a}[1..3] );
            } else {
                $x = $f4o->[0][0] * $a->[1] + $f4o->[0][1] * $a->[2] + $f4o->[0][2] * $a->[3];
                $y = $f4o->[1][0] * $a->[1] + $f4o->[1][1] * $a->[2] + $f4o->[1][2] * $a->[3];
                $z = $f4o->[2][0] * $a->[1] + $f4o->[2][1] * $a->[2] + $f4o->[2][2] * $a->[3];
            }
            ($a->[1],$a->[2],$a->[3]) = ($x,$y,$z);
        }

        print_atoms( \@atoms, $comment, \@cell, $float_format );
    };
    if ($@) {
        my $additional_position = length($comment) > 20 ?
                                  substr($comment, 0, 20) . '...' :
                                  $comment;
        process_errors( {
          'message'  => $@,
          'program'  => $0,
          'filename' => $ARGV,
          'add_pos'  => $additional_position
        }, $die_on_error_level->{'ERROR'} );
    }
}
