#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-07-30 20:10:23 +0300 (Fri, 30 Jul 2021) $
#$Revision: 8841 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.6.0/scripts/cif_automorphism $
#------------------------------------------------------------------------------
#*
#* Find automorphism orbits in CIF files.
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use COD::AtomNeighbours qw( neighbour_list_from_cif
                            neighbour_list_to_graph );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( contains_data_item exclude_tag set_loop_tag );
use COD::CIF::Tags::Print qw( print_cif );
use COD::ErrorHandler qw( process_errors process_parser_messages process_warnings );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( options usage );
use COD::ToolsVersion qw( get_version_string );
use Graph::Nauty qw( orbits );

my $neighbour_list_options = {};
my $print_atom_site_type_symbol = 0;

my $use_parser = 'c';
my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0,
};

#* OPTIONS:
#*   --ignore-hydrogens
#*                     Do not consider hydrogen atoms in automorphism
#*                     detection.
#*   --no-ignore-hydrogens
#*                     Take hydrogen atoms into consideration when searching
#*                     for automorphisms (default).
#*
#*   --print-atom-site-type-symbol
#*                     Output assumed atom chemical types as values of
#*                     '_atom_site_type_symbol', if this data item is not
#*                     used.
#*   --no-print-atom-site-type-symbol
#*                     Do not output assumed atom chemical types as values
#*                     of '_atom_site_type_symbol' (default).
#*
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "--ignore-hydrogens" =>
        sub { $neighbour_list_options->{exclude_hydrogens} = 1 },
    "--no-ignore-hydrogens" =>
        sub { $neighbour_list_options->{exclude_hydrogens} = 0 },

    "--print-atom-site-type-symbol"    => sub { $print_atom_site_type_symbol = 1 },
    "--no-print-atom-site-type-symbol" => sub { $print_atom_site_type_symbol = 0 },

    "--use-c-parser"    => sub { $use_parser = "c" },
    "--use-perl-parser" => sub { $use_parser = "perl" },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

@ARGV = ("-") unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

for my $filename (@ARGV) {
    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    next if $err_count > 0;

    canonicalize_all_names( $data );

    for my $dataset (@$data) {
        my $dataname = 'data_' . $dataset->{name};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname
            }, $die_on_error_level )
        };

        eval {
            my $neighbour_list =
                neighbour_list_from_cif( $dataset,
                                         $neighbour_list_options );
            my $graph = neighbour_list_to_graph( $neighbour_list );
            my @orbits = orbits( $graph,
                                 sub { return $_[0]->{chemical_type} },
                                 sub { return $_[0]->{index} } );
            for my $orbit (0..$#orbits) {
                for (@{$orbits[$orbit]}) {
                    $_->{orbit} = $orbit;
                }
            }

            my %labels;
            for my $i (0..$#{$dataset->{values}{_atom_site_label}}) {
                my $label =  $dataset->{values}{_atom_site_label}[$i];
                if( exists $labels{$label} ) {
                    # TODO: warn
                    next;
                }
                $labels{$label} = $i;
            }

            if( $print_atom_site_type_symbol ) {
                if( contains_data_item( $dataset, '_atom_site_type_symbol' ) ) {
                    warn 'output assumed chemical types requested, but data ' .
                         'item \'_atom_site_type_symbol\' is already present, ' .
                         'overwriting' . "\n";
                    exclude_tag( $dataset, '_atom_site_type_symbol' );
                }

                my @atom_site_type_symbol =
                    ( 'H' ) x scalar @{$dataset->{values}{_atom_site_label}};
                for my $atom (@{$neighbour_list->{atoms}}) {
                    $atom_site_type_symbol[$labels{$atom->{site_label}}] =
                        $atom->{chemical_type};
                }

                set_loop_tag( $dataset,
                              '_atom_site_type_symbol',
                              '_atom_site_label',
                              \@atom_site_type_symbol );
            }

            my @orbit_number =
                ( '.' ) x scalar @{$dataset->{values}{_atom_site_label}};
            for my $atom (@{$neighbour_list->{atoms}}) {
                $orbit_number[$labels{$atom->{site_label}}] = $atom->{orbit};
            }
            set_loop_tag( $dataset,
                          '_[local]_cod_molecule_automorphism_orbit_number',
                          '_atom_site_label',
                          \@orbit_number );
            print_cif( $dataset,
                       {
                            preserve_loop_order => 1,
                            keep_tag_order      => 1,
                       }
                     );
        };
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_error_level )
        }
    }
}
