#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2020-07-20 16:43:00 +0300 (Mon, 20 Jul 2020) $
#$Revision: 8230 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.0.1/scripts/cif_find_symmetry $
#------------------------------------------------------------------------------
#*
#* Find the symmetry of the atoms in the unit cell of a CIF crystal
#* structure. In order to obtain correct results most of the time, all
#* atoms of the unit cell should be present in the input. This can be
#* achieved by using cif_p1 or similar programs.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use File::Basename qw( basename );
use COD::Algebra::Vector qw( matrix_vector_mul
                             vector_add
                             vector_modulo_1
                             vector_sub );
use COD::AtomProperties;
use COD::Cell qw( vectors2cell );
use COD::CIF::Data qw( get_cell );
use COD::CIF::Data::AtomList qw( atom_array_from_cif );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( exclude_tag rename_tag set_loop_tag set_tag );
use COD::CIF::Tags::Print qw( print_cif );
use COD::ErrorHandler qw( process_parser_messages
                          process_warnings
                          report_message );
use COD::Fractional qw( ortho_from_fract );
use COD::Serialise qw( serialiseRef );
use COD::Spacegroups::Lookup::COD qw( @table @extra_settings );
use COD::Spacegroups::Names qw( @names );
use COD::Spacegroups::Symop::Algebra qw( flush_zeros_in_symop
                                         symop_det
                                         symop_mul
                                         symop_transpose );
use COD::Spacegroups::Symop::Parse qw( string_from_symop_reduced );
use COD::SOptions qw( getOptions get_value );
use COD::SPGLib qw( get_sym_dataset );
use COD::SUsage qw( usage options );
use COD::ToolsVersion;
use COD::UserMessage qw( error );

my $use_parser = 'c';
my $findsym_method = 'spglib';
my $symprec = 1e-5;
my $zero_threshold = 1e-10;
my $dump_symmetry_dataset = 0;
my $invert_CoB = 1;

if( $COD::SPGLib::spglib_version =~ /^0\./ ||
    $COD::SPGLib::spglib_version =~ /^1\.[1234567]\./ ||
    $COD::SPGLib::spglib_version eq '1.8' ) {
    # Prior to spglib version 1.8.1, spglib-provided change-of-basis
    # matrix was used to obtain cell constants and atomic positions.
    # Starting with 1.8.1 the change-of-basis matrix has to be inverted
    # before obtaining cell constants and atomic positions.
    $invert_CoB = 0;
}

# Format for floating point numbers:
my $fformat = "%.12g";

my $die_on_error_level = {
    ERROR   => 0,
    WARNING => 0,
    NOTE    => 0,
};

my %tags_to_move = (
    '_symmetry_Int_Tables_number'     => '_cod_original_sg_number',
    '_symmetry_space_group_name_H-M'  => '_cod_original_sg_symbol_H-M',
    '_symmetry_space_group_name_Hall' => '_cod_original_sg_symbol_Hall',
    '_space_group_IT_number'          => '_cod_original_sg_number',
    '_space_group_name_H-M_alt'       => '_cod_original_sg_symbol_H-M',
    '_space_group_name_Hall'          => '_cod_original_sg_symbol_Hall',
);

my @cell_tags = qw(
    _cell_length_a
    _cell_length_b
    _cell_length_c
    _cell_angle_alpha
    _cell_angle_beta
    _cell_angle_gamma
);

#* OPTIONS:
#*   --precision  1e-5,
#*   --symmetry-precision  1e-5
#*                     Tolerance of distance between atomic positions and between
#*                     lengths of lattice vectors to be tolerated in the symmetry
#*                     finding (default: 1e-5).
#*
#*   --use-spglib
#*                     Use spglib for symmetry search. Currently, spglib is
#*                     the only implemented method.
#*
#*   -F, --float-format "%.12g"
#*                     Specify format to print out floating point numbers.
#*
#*   --dump-symmetry-dataset
#*                     Suppress regular output and dump symmetry data structure,
#*                     as returned from spglib.
#*
#*   --dont-dump-symmetry-dataset,
#*   --no-dump-symmetry-dataset
#*                     Produce regular output.
#*
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**

@ARGV = getOptions(
    '--precision,--symmetry-precision' => \$symprec,

    '--use-spglib' => sub { $findsym_method = 'spglib' },

    "-F,--float-format" => \$fformat,

    '--dump-symmetry-dataset'      => sub { $dump_symmetry_dataset = 1 },
    '--dont-dump-symmetry-dataset' => sub { $dump_symmetry_dataset = 0 },
    '--no-dump-symmetry-dataset'   => sub { $dump_symmetry_dataset = 0 },

    '--use-perl-parser' => sub { $use_parser = 'perl' },
    '--use-c-parser'    => sub { $use_parser = 'c' },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print 'cod-tools version ',
                              $COD::ToolsVersion::Version, "\n";
                              exit }
);

@ARGV = ( '-' ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {
    my $options = { parser   => $use_parser,
                    no_print => 1 };

    my( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@{$data} || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, 0 );
        next;
    }

    canonicalize_all_names( $data );

    for my $dataset (@$data) {

        my $dataname = 'data_' . $dataset->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname
            }, $die_on_error_level )
        };

        eval {
            if( $findsym_method eq 'spglib' ) {
                find_symmetry_spglib( $dataset,
                                      $symprec,
                                      $invert_CoB,
                                      $dump_symmetry_dataset );
            } else {
                die 'ERROR, unknown symmetry search algorithm: ' .
                    "'$findsym_method'\n";
            }

            if( !$dump_symmetry_dataset ) {
                print_cif( $dataset, {
                    preserve_loop_order => 1,
                    keep_tag_order => 1
                } );
            }
        };
        if( $@ ) {
            $@ =~ s/^ERROR, //;
            error( {
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname,
                'message'  => $@
            } );
        }
    }
}

sub find_symmetry_spglib
{
    my( $dataset, $symprec, $invert_CoB, $dump_symmetry_dataset ) = @_;

    $symprec = 1e-5 if !defined $symprec;

    my $values = $dataset->{values};

    my @cell = get_cell( $values );
    my $lattice = symop_transpose( [
        [ ortho_from_fract( \@cell, 1, 0, 0 ) ],
        [ ortho_from_fract( \@cell, 0, 1, 0 ) ],
        [ ortho_from_fract( \@cell, 0, 0, 1 ) ]
    ] );

    my $atoms = atom_array_from_cif( $dataset, {} );

    my @positions = map { $_->{coordinates_fract} } @$atoms;
    my @types = map { $COD::AtomProperties::atoms{$_->{chemical_type}}->{atomic_number} }
                    @$atoms;

    my $sym = get_sym_dataset( $lattice,
                               \@positions,
                               \@types,
                               $symprec );

    if( !$dump_symmetry_dataset ) {

        my $CoB = $sym->{transform_matrix};

        if( $invert_CoB ) {
            $CoB = mat3_inv( $CoB );
        }

        my $lattice_now = symop_mul( $lattice, $CoB );

        my $a = [ $lattice_now->[0][0],
                  $lattice_now->[1][0],
                  $lattice_now->[2][0] ];

        my $b = [ $lattice_now->[0][1],
                  $lattice_now->[1][1],
                  $lattice_now->[2][1] ];

        my $c = [ $lattice_now->[0][2],
                  $lattice_now->[1][2],
                  $lattice_now->[2][2] ];

        my @cell_now = vectors2cell( $a, $b, $c );

        for( my $i = 0; $i < @cell_tags; $i++ ) {
            set_tag( $dataset, $cell_tags[$i],
                     sprintf( $fformat, $cell_now[$i] ) );
        }

        for my $tag (sort keys %tags_to_move) {
            next if !exists $values->{$tag};
            # next if exists $values->{$tags_to_move{$tag}};
            rename_tag( $dataset, $tag, $tags_to_move{$tag} );
        }

        my( $spacegroup_info ) = grep { $_->{hall} eq $sym->{hall} }
                                      (@table, @extra_settings);

        if( !$spacegroup_info ) {
            my $sgHall = $sym->{hall};
            $sgHall =~ s/ //g;
            ( $spacegroup_info ) = grep { my $key = $_->{hall};
                                          $key =~ s/ //g;
                                          $key eq $sgHall }
                                        (@table, @extra_settings);
        }
        my $choice = $sym->{choice};
        $choice =~ s/\s//g;
        if( !$spacegroup_info && $sym->{choice} ) {
            ( $spacegroup_info ) = grep { $_->{hall} eq
                                          $sym->{hall} . $choice }
                                        (@table, @extra_settings);
        }

        if( !$spacegroup_info ) {
            if( $choice ) {
                warn "neither space group symbol '$sym->{hall}', " .
                     "nor '$sym->{hall} $sym->{choice}' is recognised\n";
            } else {
                warn 'spacegroup symbol ' .
                     "'$sym->{hall}' is not recognised\n";
            }
            $spacegroup_info = {
                                    hall   => $sym->{hall},
                                    hermann_mauguin =>
                                        $sym->{international_symbol} .
                                        ($choice ? " $choice" : ''),
                                    number => $sym->{number}
                               };
        }

        set_tag( $dataset,
                 '_space_group_IT_number',
                 $spacegroup_info->{number} );
        set_tag( $dataset,
                 '_space_group_name_H-M_alt',
                 $spacegroup_info->{hermann_mauguin} );
        set_tag( $dataset,
                 '_space_group_name_Hall',
                 $spacegroup_info->{hall} );

        for my $tag (qw( _space_group_symop_operation_xyz
                         _symmetry_equiv_pos_as_xyz )) {
            next if !exists $values->{$tag};
            if( exists $dataset->{inloop}{$tag} ) {
                my $loop_id = $dataset->{inloop}{$tag};
                my @tags_to_remove = @{$dataset->{loops}[$loop_id]};
                for my $loop_tag (@tags_to_remove) {
                    exclude_tag( $dataset, $loop_tag );
                }
            } else {
                exclude_tag( $dataset, $tag );
            }
        }

        set_loop_tag( $dataset, '_space_group_symop_id', undef,
                      [ 1..@{$sym->{symops}} ] );
        set_loop_tag( $dataset, '_space_group_symop_operation_xyz',
                      '_space_group_symop_id',
                      [ map { string_from_symop_reduced( $_ ) }
                        map { flush_zeros_in_symop( $_ ) }
                            @{$sym->{symops}} ] );

        my $atom_site_tag;
        for my $tag (qw( _atom_site_label
                         _atom_site_type_symbol )) {
            next if !exists $values->{$tag};
            $atom_site_tag = $tag;
            last;
        }
        if( !$atom_site_tag ) {
            die 'ERROR, neither \'_atom_site_label\' nor ' .
                '\'_atom_site_type_symbol\' tag present' . "\n";
        }

        my %atom_site_loop_tags;
        my $atom_site_loop_id = $dataset->{inloop}{$atom_site_tag};

        for my $tag (@{$dataset->{loops}[$atom_site_loop_id]}) {
            $atom_site_loop_tags{$tag} = $values->{$tag};
        }

        my %atom_site_loop_tags_now;
        my %seen_atoms;

        for( my $i = 0; $i < @{$values->{$atom_site_tag}}; $i++ ) {
            next if exists $seen_atoms{$sym->{equivalent_atoms}[$i]};
            for my $tag (keys %atom_site_loop_tags) {
                if( !exists $atom_site_loop_tags_now{$tag} ) {
                    $atom_site_loop_tags_now{$tag} = []
                }
                push( @{$atom_site_loop_tags_now{$tag}},
                      $atom_site_loop_tags{$tag}->[$i] );
            }
            $seen_atoms{$sym->{equivalent_atoms}[$i]} = 1;
        }

        for my $tag (keys %atom_site_loop_tags_now) {
            set_loop_tag( $dataset, $tag, $atom_site_tag,
                          $atom_site_loop_tags_now{$tag} );
        }

        my $shift;
        if( $invert_CoB ) {
            $CoB = $sym->{transform_matrix};
            $shift = $sym->{origin_shift};
        } else {
            $CoB = mat3_inv( $CoB );
            $shift = vector_sub( [0, 0, 0], $sym->{origin_shift} );
        }

        for( my $i = 0; $i < @{$values->{$atom_site_tag}}; $i++ ) {
            my $coordinates = [ map { s/\([0-9]+\)$//; $_ }
                (
                    $values->{'_atom_site_fract_x'}[$i],
                    $values->{'_atom_site_fract_y'}[$i],
                    $values->{'_atom_site_fract_z'}[$i],
                )
            ];
            my $coordinates_now =
                vector_modulo_1( vector_add( matrix_vector_mul( $CoB,
                                                                $coordinates ),
                                             $shift ) );
                
            ( $values->{'_atom_site_fract_x'}[$i],
              $values->{'_atom_site_fract_y'}[$i],
              $values->{'_atom_site_fract_z'}[$i] ) =
                map { $_ = ( $_ > $zero_threshold ? $_ : 0 );
                     sprintf( $fformat, $_ ) }
                    @$coordinates_now;
        }
    } else {
        print "name: $dataset->{name}\n";
        serialiseRef( $sym );
    }
}

sub mat3_inv
{
    my( $m ) = @_;
    my $c = 1 / symop_det($m);
    return [[ $c * ( $m->[1][1] * $m->[2][2] - $m->[1][2] * $m->[2][1] ),
             -$c * ( $m->[0][1] * $m->[2][2] - $m->[0][2] * $m->[2][1] ),
              $c * ( $m->[0][1] * $m->[1][2] - $m->[0][2] * $m->[1][1] )],
            [-$c * ( $m->[1][0] * $m->[2][2] - $m->[1][2] * $m->[2][0] ),
              $c * ( $m->[0][0] * $m->[2][2] - $m->[0][2] * $m->[2][0] ),
             -$c * ( $m->[0][0] * $m->[1][2] - $m->[0][2] * $m->[1][0] )],
            [ $c * ( $m->[1][0] * $m->[2][1] - $m->[1][1] * $m->[2][0] ),
             -$c * ( $m->[0][0] * $m->[2][1] - $m->[0][1] * $m->[2][0] ),
              $c * ( $m->[0][0] * $m->[1][1] - $m->[0][1] * $m->[1][0] )]];
}
