#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2021-04-28 20:15:10 +0300 (Wed, 28 Apr 2021) $
#$Revision: 8744 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.2.0/scripts/cif_p1 $
#------------------------------------------------------------------------------
#*
#* Expand atoms in CIF to the P1 space group, preserving molecules
#* (i.e. outputting atoms that are bonded at bond distances from each other).
#*
#* USAGE:
#*    $0 --options input1.cif input*.cif
#**

use strict;
use warnings;
use Clone qw( clone );
use COD::Algebra qw( gcd );
use COD::AtomBricks qw( build_bricks get_atom_index get_search_span );
use COD::AtomNeighbours qw( get_max_covalent_radius );
use COD::AtomProperties;
use COD::CIF::Parser qw( parse_cif );
use COD::Spacegroups::Symop::Algebra qw( symop_is_unity
                                         symop_invert
                                         symop_mul
                                         symop_vector_mul );
use COD::Spacegroups::Symop::Parse qw( modulo_1
                                       string_from_symop
                                       symop_from_string
                                       symop_string_canonical_form );
use COD::Algebra::Vector qw( distance );
use COD::CIF::Data qw( get_cell get_symmetry_operators );
use COD::CIF::Data::AtomList qw( atom_array_from_cif
                                 atom_groups
                                 dump_atoms_as_cif );
use COD::CIF::Data::SymmetryGenerator qw( apply_shifts
                                          atoms_coincide
                                          chemical_formula_sum
                                          symops_apply_modulo1
                                          test_bond
                                          test_bump
                                          translate_atom
                                          translation
                                          trim_polymer );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( rename_tags );
use COD::CIF::Tags::Print qw( print_cif
                              print_single_tag_and_value
                              print_value );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages
                          report_message );
use COD::ToolsVersion qw( get_version_string );

no warnings 'recursion';

my $Id = '$Id: cif_p1 8744 2021-04-28 17:15:10Z antanas $';

my $use_parser = "c";

# Program flags that can be set from options:

my $debug;
my $verbose = 0;
my $audit = 1;
my $uniquify_atoms = 1;
my $dump_atoms = 0;
my $continue_on_errors = 0;
my $exclude_zero_occupancies = 1; # Do not use atoms with zero
                                  # occupancies

my $format = "%8.6f";

my $use_one_output_datablock = 1; # Put all molecules, and all
                                  # disorder groups, into a single
                                  # data block in the output.

my $merge_disorder_groups = 0; # Put all alternative conformations
                               # into one data block.

my $simplify_formula = 0; # Simplify chemical formula and output not the whole
                          # P1 cell but only a minimal stoichiometric set of
                          # molecules. Should have the same result as
                          # 'cif_molecule --preserve-stoichiometry'

# Extra distance added to covalent radii sum when testing if two atoms
# form a covalent bond:
my $covalent_sensitivity = 0.35;

# A fraction of covalent bond radii Used to determine when atoms are
# too close and are considered a bump:

my $bump_distance_factor = 0.75;

my $ignore_bumps = 1; # detect and warn about close atom "bumps"
                      # but do not stop processing.

# A span, in +/- unit cells, in which polymeric molecules (repeating
# units) will be constructed:

my $max_polymer_span = 4;

# A maximum allowed count of polymer example atoms: more than this
# amount of symmetry (translational) equivalent atoms, for each AU
# atom, will not be written to the output file:

my $max_polymer_atoms = 100;

my $die_on_errors   = 1;
my $die_on_warnings = 0;
my $die_on_notes    = 0;

#* OPTIONS:
#*   --always-continue
#*                     Continue processing and return successful return status
#*                     even if errors are diagnosed.
#*   -c-, --always-die
#*                     Stop and return error status if errors are diagnosed.
#*
#*   -1, --one-datablock-output
#*                     Output all molecules and all alternative conformations
#*                     to a single output data block.
#*
#*   -1-, --multiple-datablocks-output
#*                     Separate each molecule and each example of an alternative
#*                     conformation into a separate data block.
#*
#*   -c, --covalent-sensitivity
#*                     Set a new covalent sensitivity value (default: 0.35).
#*
#*   --simplify-formula
#*                     Simplify chemical formula and output not the whole
#*                     P1 cell but only a minimal stoichiometric set of
#*                     molecules. Should have the same result as
#*                     'cif_molecule --preserve-stoichiometry'.
#*
#*   --dont-simplify-formula, --no-simplify-formula
#*                     Do not simplify formula, output the whole P1 cell (default).
#*
#*   --exclude-zero-occupancies
#*                     Do not use atoms with 0 occupancies in calculations
#*                     (default).
#*
#*   --dump-atoms
#*                     Dump atoms (including symmetry-equivalent) in CIF
#*                     format, for inspection with some graphics program.
#*
#*   --dont-dump-atoms, --no-dump-atoms
#*                     Do not dump atoms (default).
#*
#*   -i, --ignore-bumps
#*                     Detect and warn about close atom "bumps" but do not
#*                     stop processing.
#*
#*   --dont-ignore-bumps, --no-ignore-bumps
#*                     Stop processing immediately if bumps are detected (default).
#*
#*   --bump-distance-factor 0.75
#*                     A fraction of covalent bond radii sum used to determine
#*                     when atoms are too close and are considered a bump.
#*
#*   --max-polymer-span 4
#*                     A span, in +/- unit cells, in which polymeric molecules
#*                     (repeating units) will be constructed.
#*
#*   --max-polymer-atoms 100
#*                     A maximum allowed count of polymer example atoms: more
#*                     than this amount of symmetry (translational) equivalent
#*                     atoms, for each AU atom, will not be written to the output
#*                     file:
#*
#*                     Using --max-polymer-span=0 --max-polymer-atoms=1
#*                     essentially switches off the polymer detection.
#*
#*   --split-disorder-groups, --dont-merge-disorder-groups
#*                     Put examples of disorder group conformations into
#*                     separate data blocks (default).
#*
#*   --merge-disorder-groups, --dont-split-disorder-groups
#*                     Put all disorder groups into one data block.
#*
#*   --use-perl-parser
#*                     Use development CIF parser written in Perl.
#*   --use-c-parser
#*                     Use faster C/Yacc CIF parser (default).
#*
#*   --debug
#*                     Print some human-readable debug output.
#*   --no-debug
#*                     Suppress any debug output (default).
#*
#*   --format "%8.6f"
#*                     Use the specified format for output coordinate printout.
#*
#*   --audit
#*                     Print audit information to the generated CIF file (default).
#*   --no-audit
#*                     Do not print audit information to the generated CIF file.
#*
#*   --continue-on-errors
#*                     Do not terminate script if errors such as unrecognised
#*                     atoms are encountered; the output may be incorrect and
#*                     missing some atoms if this option is used!
#*   --die-on-errors, --dont-continue-on-errors,
#*   --no-continue-on-errors
#*                     Terminate script immediately if errors are raised (default).
#*   --continue-on-warnings
#*                     Do not terminate script if warnings are raised (default).
#*   --die-on-warnings
#*                     Terminate script immediately if warnings are raised.
#*   --continue-on-notes
#*                     Do not terminate script if notes are raised (default).
#*   --die-on-notes
#*                     Terminate script immediately if notes are raised.
#*
#*   --verbose
#*                     Print warning messages in long format.
#*   --no-verbose
#*                     Print warning messages in concise format (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-1,--one-datablock-output" => sub { $use_one_output_datablock = 1; },
    "-1-,--multiple-datablocks-output" =>
        sub { $use_one_output_datablock = 0; },

    "-c,--covalent-sensitivity" => \$covalent_sensitivity,

    "-i,--ignore-bumps"   => sub{ $ignore_bumps = 1 },
    "--no-ignore-bumps"   => sub{ $ignore_bumps = 0 },
    "--dont-ignore-bumps" => sub{ $ignore_bumps = 0 },

    "--simplify-formula"  => sub{ $simplify_formula = 1 },
    "--no-simplify-formula"     => sub{ $simplify_formula = 0 },
    "--dont-simplify-formula"   => sub{ $simplify_formula = 0 },
    "--do-not-simplify-formula" => sub{ $simplify_formula = 0 },

    "--format" => \$format,

    "--dump-atoms"      => sub{ $dump_atoms = 1 },
    "--dont-dump-atoms" => sub{ $dump_atoms = 0 },
    "--no-dump-atoms"   => sub{ $dump_atoms = 0 },

    "--exclude-zero-occupancies"    => sub { $exclude_zero_occupancies = 1; },
    "--no-exclude-zero-occupancies" => sub { $exclude_zero_occupancies = 0; },
    "--dont-exclude-zero-occupancies" => sub { $exclude_zero_occupancies = 0; },

    "--bump-distance-factor" => \$bump_distance_factor,

    "--max-polymer-span" => \$max_polymer_span,
    "--max-polymer-atoms" => \$max_polymer_atoms ,

    "--debug"    => sub { $debug = 1 },
    "--no-debug" => sub { $debug = 0 },

    "--split-disorder-groups,--dont-merge-disorder-groups," .
    "--do-not-merge-disorder-groups,--no-merge-disorder-groups"
        => sub { $merge_disorder_groups = 0 },
    "--merge-disorder-groups,--dont-split-disorder-groups" .
    "--do-not-split-disorder-groups,--no-split-disorder-groups"
        => sub { $merge_disorder_groups = 1 },

    "--always-continue"                 => sub { $die_on_errors   = 0;
                                                 $die_on_warnings = 0;
                                                 $die_on_notes    = 0 },
    "-c-,--always-die"                  => sub { $die_on_errors   = 1;
                                                 $die_on_warnings = 1;
                                                 $die_on_notes    = 1 },

    "--continue-on-errors"          => sub { $die_on_errors = 0 },
    "--dont-continue-on-errors"     => sub { $die_on_errors = 1 },
    "--die-on-errors"               => sub { $die_on_errors = 1 },
    "--no-continue-on-errors"       => sub { $die_on_errors = 1 },

    "--continue-on-warnings" => sub { $die_on_warnings = 0 },
    "--die-on-warnings"      => sub { $die_on_warnings = 1 },

    "--continue-on-notes"    => sub { $die_on_notes = 0 },
    "--die-on-notes"         => sub { $die_on_notes = 1 },

    "--use-perl-parser"  => sub{ $use_parser = "perl" },
    "--use-c-parser"     => sub{ $use_parser = "c" },

    "--audit"            => sub { $audit = 1; },
    "--no-audit"         => sub { $audit = 0; },

    "--verbose"          => sub { $verbose = 1; },
    "--no-verbose"       => sub { $verbose = 0; },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

# The "atom" hash structure used by this script:

if( 0 ) {
    my $atom = {
        site_label    => "C1",
        cell_label    => "C1_2",
        name          => "C1_2_436",
        chemical_type => "C",
        coordinates_fract => [ 1.5, 0.22, 1.0 ], # Fractional atom coordinates
        coordinates_ortho => [ 150., 21.7, 50 ], # Orthogonal coordinates
        f2o        => [
            # Orthogonalisation matrix used to convert fractional
            # coordinates to orthogonal (Cartesian) ones:
            [ 100,  0,  0 ],
            [   0, 50,  0 ],
            [   0,  0, 30 ]
        ],
        occupancy => 1.0,
        multiplicity => 8,
        assembly => "A", # "."
        group    => "1", # "."
    }
}

# Function prototypes to be checked:

sub symops_apply_to_atom_modulo1($$);
sub symops_apply_to_atoms_modulo1($$);
sub get_molecules($$$);
sub find_molecules($$$$$$);
sub find_molecule($$$$$$$$$);

my $die_on_error_level = {
    ERROR   => $die_on_errors,
    WARNING => $die_on_warnings,
    NOTE    => $die_on_notes
};

@ARGV = ( "-" ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $cif_atom_list_options = {
    allow_unknown_chemical_types => 1,
    continue_on_errors => $continue_on_errors,
    modulo_1 => 1,
    uniquify_atom_names => 1,
    uniquify_atoms => $uniquify_atoms,
    atom_properties => \%COD::AtomProperties::atoms
};

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@$data || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_warnings );
        next;
    }

    canonicalize_all_names( $data );

    for my $datablock (@$data) {

        my $dataname = 'data_' . $datablock->{name};
        my $dataset_dataname = $datablock->{name};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'filename' => $filename,
                'add_pos'  => $dataname
            }, {
                'WARNING' => $die_on_warnings,
                'NOTE'    => $die_on_notes
            } )
        };

        eval {
            my $symop_strings = get_symmetry_operators( $datablock );
            my @symops = map { symop_from_string($_) } @{$symop_strings};

            # Parse symmetry operators:
            my @sym_operators = map { symop_from_string($_) } @{$symop_strings};

            # Create a list of symmetry operators:
            my $symop_list = { symops => [ map { symop_from_string($_) } @$symop_strings ],
                               symop_ids => {} };
            for (my $i = 0; $i < @{$symop_strings}; $i++)
            {
                $symop_list->{symop_ids}
                             {symop_string_canonical_form($symop_strings->[$i])} = $i;
            }

            $cif_atom_list_options->{symop_list} = $symop_list;

            # Build an atom array from the CIF data structure:
            my $atoms = atom_array_from_cif( $datablock, $cif_atom_list_options );

            my $unit_cell_atoms =
                symops_apply_to_atoms_modulo1( \@symops, $atoms );

            ## use COD::Serialise qw( serialiseRef );
            ## serialiseRef( \@unit_cell_atoms );

            if( $dump_atoms ) {
                dump_atoms_as_cif( 1, $unit_cell_atoms,
                                   [ get_cell( $datablock->{values}) ]
                                 );
                next;
            }

            my $molecules = get_molecules( $covalent_sensitivity,
                                           $unit_cell_atoms,
                                           \%COD::AtomProperties::atoms );

            # Print molecule to the CIF file:

            @$molecules = sort {@{$b->{atoms}} <=> @{$a->{atoms}}}
            @$molecules;

            # Find molecular Z value:

            my $Z = 1;
            my %moieties;
            for my $moiety (@$molecules) {
                my $moiety_key;
                $moiety_key =
                    join( "\0", sort map {$_->{site_label}}
                          @{$moiety->{atoms}} );
                push( @{$moieties{$moiety_key}}, $moiety );
            }

            $Z = gcd( map { int(@$_) } values %moieties );

            if( $simplify_formula ) {
                # Simplify molecular formula:
                if( $Z > 1 ) {
                    my @stoichiometric_molecules = ();
                    for my $molecule_key (keys %moieties) {
                        my $N = int(@{$moieties{$molecule_key}});
                        for my $i (0 .. $N/$Z - 1) {
                            push( @stoichiometric_molecules,
                                  $moieties{$molecule_key}[$i] );
                        }
                    }
                    $molecules = \@stoichiometric_molecules;
                }
                $Z = 1;
            }

            if( $use_one_output_datablock ) {
                my @all_atoms = map { @{$_->{atoms}} } @$molecules;
                $molecules = [{
                    atoms =>
                        \@all_atoms,
                    chemical_formula_sum =>
                        chemical_formula_sum( \@all_atoms, $Z ),
                          }];
            }

            # Split init atoms into assemblies and groups, if requested
            if( !$merge_disorder_groups ) {
                my @split_molecules;
                my $n = 1;
                for my $molecule (@$molecules) {
                    ## print ">>> molecule No. ", $n++, "\n";
                    my $atom_list = $molecule->{atoms};
                    my $disorder_groups = atom_groups($atom_list);
                    ## print ">>> ngroups = ", int(@$disorder_groups), "\n";
                    ## use COD::Serialise qw( serialiseRef ); serialiseRef( $disorder_groups );
                    for my $disorder_representative (@$disorder_groups) {
                        push( @split_molecules,
                              {
                                  atoms =>
                                      $disorder_representative,
                                  chemical_formula_sum =>
                                      chemical_formula_sum
                                      ( $disorder_representative, $Z ),
                              }
                            );
                    }
                }
                $molecules = \@split_molecules;
            }

            my $molecule_id = 0;
            foreach my $molecule (@$molecules) {
                my $id;
                unless( $use_one_output_datablock &&
                         $merge_disorder_groups ) {
                    $id = $molecule_id;
                } else {
                    $id = undef;
                }

                ## if( $output_geom_bond ) {
                ##     $molecule->{bonds} = atom_bonds( $molecule->{atoms},
                ##                                      \%COD::AtomProperties::atoms,
                ##                                      $covalent_sensitivity );
                ## }

                print_molecule( $id, $audit, $molecule, $Id,
                                $datablock, $dataset_dataname, $filename,
                                $symop_strings, $Z );

                $molecule_id++;
            }
        }; # eval block end
        if ( $@ ) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_errors )
        };
    }
}

# End of main program:
exit 0;

#===============================================================#
# Generate symmetry equivalents of an atom, exclude duplicates
# on special positions

sub symops_apply_to_atom_modulo1($$)
{
    my ( $atom, $sym_operators ) = @_;

    my( $sym_atoms, $multiplicity, $multiplicity_ratio ) =
        symops_apply_modulo1( $atom, $sym_operators,
                              { append_symop_to_label => 1 } );

    if( $multiplicity_ratio == 1 ) {
        return @$sym_atoms;
    } else {
        my @unique_atoms;
        my %to_be_deleted;
        for my $i (0..$#$sym_atoms-1) {
            for my $j ($i+1..$#$sym_atoms) {
                if( atoms_coincide( $sym_atoms->[$i],
                                    $sym_atoms->[$j],
                                    $sym_atoms->[$i]{f2o} )) {
                    $to_be_deleted{$sym_atoms->[$j]{name}} = 1;
                }
            }
        }
        for my $atom (@$sym_atoms) {
            if( !defined $to_be_deleted{$atom->{name}} ) {
                push( @unique_atoms, $atom );
            }
        }
        return @unique_atoms;
    }
}

#===============================================================#
# Generate symmetry equivalents of all atoms from a list, exclude
# duplicates on special positions

sub symops_apply_to_atoms_modulo1($$)
{
    my ( $sym_operators, $atoms ) = @_;

    my @sym_atoms = ();

    for my $atom (@{$atoms}) {
        push( @sym_atoms,
              symops_apply_to_atom_modulo1( $atom, $sym_operators ));
    }

    return \@sym_atoms;
}

#==============================================================================#
# This is the main function where other functions such as find_molecules are
# called.
# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     atom_list            - an array of atom hashes from which
#                            molecules will be formed;
#     atom_properties      - an hash with atom properties, e.g. as provided
#                            by %COD::AtomProperties::atoms
#
# Returns
#     unique_molecules     - an array of hashes
#                     %molecule = (
#                         atoms=>[\%atom_info1, \%atom_info2], #covalent bond
#                         chemical_formula_sum=>"C6 H6",
#                                 );

sub get_molecules($$$)
{
    my $covalent_sensitivity = shift;
    my $atom_list            = shift;
    my $atom_properties      = shift;

    return [] unless defined $atom_list;

    if( @$atom_list == 0 ) {
        warn "WARNING, no atoms suitable for processing were found -- maybe "
           . "all occupancies were unknown, zero, or all atom types were "
           . "unrecognised\n";
        return [];
    }

    my $max_covalent_radius = get_max_covalent_radius( $atom_properties );

    my %seen_molecules;

    my $symmetric_atoms = apply_shifts( $atom_list );

    my @initial_atoms = @$atom_list;

    my $bricks = build_bricks( $symmetric_atoms,
                               $max_covalent_radius * 2 +
                               $covalent_sensitivity );

    # Finds molecules
    my @generated_molecules = find_molecules( $covalent_sensitivity,
                                              $atom_properties,
                                              $symmetric_atoms,
                                              \@initial_atoms,
                                              $bricks,
                                              \%seen_molecules );

    # Calculates chemical formula sum
    foreach my $molecule (@generated_molecules) {
        $molecule->{chemical_formula_sum} =
            chemical_formula_sum( $molecule->{atoms} );
    }

    return \@generated_molecules;
}

#===============================================================#
# Finds all possible molecules in the CIF file. If two atoms are connected via
# then the algorithm states that there in no bond between these two atoms.

# The algorithm:
# 1. Takes an initial atom and tests if it has not been found in the other
#    molecule yet
# 2. If not, then begins to search for the other molecule:
# 2.1  Does modulo_1 for the initial atom
# 2.2  Finds a translation from initial atom to atom_modulo_1
# 2.3  Searches for all neighbors of atom_modulo_1
# 2.4  For each neighbor of atom_modulo_1 does 2.1 -- 2.4
# 2.5  atom_modulo_1 and all its neigbors translates according translation
#       vector. atom_modulo_1 now becomes initial atom. The others - accordingly
# 3. Stops and does the step 1 until there is no left any initial atom.


# Accepts
#     covalent_sensitivity - a threshold for covalent sensitivity
#     atom_properties(
#           H => {
#                     name => Hydrogen, #(chemical_type)
#                     period => 1,
#                     group => 1,
#                     block => s,
#                     atomic_number => "1",
#                     atomic_weight => 1.008,
#                     covalent_radius => 0.23,
#                     vdw_radius => 1.09,
#                     valency => [1],
#                     },
#          );
# symmetric_atoms and initial_atoms are arrays of
#                                 $atom_info = {
#                                             name=>"C1_2",
#                                             site_label=>"C1",
#                                             chemical_type=>"C",
#                                             coordinates_fract=>[1.0, 1.0,1.0],
#                                             coordinates_ortho=>[1.0, 1.0,1.0],
#                                             unity_matrix_applied=>1
#                                             }
# Returns an array of
# %molecule = (
#               atoms => [
#                   \%atom1_info, \%atom2_info, \%atom3_info, \%atom4_info
#               ],
#               bonds => [
#                   [ \%atom1_info, \%atom2_info ],
#                   [ \%atom1_info, \%atom3_info ],
#                   [ \%atom4_info, \%atom3_info ],
#               ] # covalent bond description
#               chemical_formula_sum => "C6 H6",
#             );

sub find_molecules($$$$$$)
{
    my $covalent_sensitivity = shift(@_);
    my $atom_properties      = shift(@_);
    my $symmetric_atoms      = shift(@_);
    my $initial_atoms        = shift(@_);
    my $bricks               = shift(@_);
    my $seen_molecules       = shift(@_);

    my @unique_molecules;
    my %used_atoms;
    my %used_originals;
    my %used_uc_atoms;
    my %checked_pairs;
    my $nbumps = 0;

    foreach my $initial_atom (@$initial_atoms) {
        if ((not exists $used_originals{$initial_atom->{cell_label}})) {
        print STDERR ">>>> starting new molecule\n" if $debug;

            ## if( ! $expand_to_p1 &&
            ##     $initial_atom->{cell_label} ne $initial_atom->{site_label} ) {
            ##     print STDERR
            ##         ">>>> site: $initial_atom->{site_label}, " .
            ##         "cell: $initial_atom->{cell_label}\n";
            ## }

            my( $molecule_atoms, $mol_nbumps ) =
                find_molecule( $covalent_sensitivity,
                               $atom_properties,
                               $symmetric_atoms,
                               \%used_atoms,
                               \%used_originals,
                               \%used_uc_atoms,
                               \%checked_pairs,
                               $initial_atom, $bricks );

            my @molecule_atoms = @$molecule_atoms;
            $nbumps += $mol_nbumps;

            if( @molecule_atoms > 0 ) {
                my %molecule = (
                    atoms => \@molecule_atoms,
                    chemical_formula_sum => "",
                );

                push( @unique_molecules, \%molecule );
            } else {
                warn "WARNING, found molecule with no atoms -- strange...\n";
            }
        }
    }

    if( !$verbose && $nbumps > 0 ) {
        warn "WARNING, $nbumps pair(s) of atoms are too close to each "
           . "other and are considered as bumps\n";
    }

    return @unique_molecules;
}

# ============================================================================ #

sub find_molecule($$$$$$$$$)
{
    my $covalent_sensitivity = shift(@_);
    my $atom_properties      = shift(@_);
    my $symmetric_atoms      = shift(@_);
    my $used_atoms           = shift(@_);
    my $used_originals       = shift(@_);
    my $used_uc_atoms        = shift(@_);
    my $checked_pairs        = shift(@_);
    my $current_atom         = shift(@_);
    my $bricks               = shift(@_);

    my @current_coords_fract_modulo_1 =
        map { modulo_1($_) } @{$current_atom->{coordinates_fract}};

    my $atom_in_unit_cell_coords_ortho =
        symop_vector_mul( $current_atom->{f2o}, \@current_coords_fract_modulo_1 );

    my $current_translation = translation( $current_atom->{coordinates_fract},
                                           \@current_coords_fract_modulo_1 );

    my @neighbors;

    do {
        no warnings;
        if( exists $used_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}}
            {$current_atom->{translation_id}} ) {
            print STDERR "<<<< atom labeled '$current_atom->{name}' " .
                "is already in some molecule, returning\n"
                if $debug;
            return ( \@neighbors, 0 );
        }

        $used_atoms->{$current_atom->{site_label}}
            {$current_atom->{symop_id}}
            {$current_atom->{translation_id}} = $current_atom;
    }; # end no warnings

    $used_originals->{$current_atom->{cell_label}} =
        $current_atom->{cell_label};

    do {
        no warnings;
        if( exists $used_uc_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}} ) {
            my $used_uc_atom = $used_uc_atoms->
                    {$current_atom->{site_label}}
                    {$current_atom->{symop_id}};
            print STDERR ">>> !!!! detected a used unit cell " .
                "label $current_atom->{name}/$current_atom->{symop_id}/" .
                "$current_atom->{translation_id} (${used_uc_atom}-th time)\n"
            if $debug;

            $current_atom->{is_polymer} = 1;

            if( $used_uc_atoms->
                {$current_atom->{site_label}}
                {$current_atom->{symop_id}} > $max_polymer_atoms ) {
                my $message = "the maximum number of polymer atom " .
                    "repetitions $max_polymer_atoms was hit for the " .
                    "atom $current_atom->{site_label} " .
                    "($current_atom->{symop_id})\n" .
                    "To get around this limit, please increase " .
                    "--max-polymer-atoms, to say, " .
                    "--max-polymer-atoms=" . (2 * $max_polymer_atoms) . " " .
                    "or decrease --max-polymer-span (e.g. " .
                    "--max-polymer-span=" . int($max_polymer_span/2) . ", " .
                    "but expect increased computation times and " .
                    "memory consumption)";
                if( $continue_on_errors ) {
                    warn "WARNING, $message\n";
                    return ( [], 0 );
                } else {
                    die "ERROR, $message\n";
                }
            }

            if( abs($current_atom->{translation}[0]) > $max_polymer_span ||
                abs($current_atom->{translation}[1]) > $max_polymer_span ||
                abs($current_atom->{translation}[2]) > $max_polymer_span ) {
                return ( [], 0 );
            }
        }

        $used_uc_atoms->
            {$current_atom->{site_label}}
            {$current_atom->{symop_id}} ++;
    }; # end no warnings

    print STDERR
        ">>> considering atom $current_atom->{name} " .
        "(@{$atom_in_unit_cell_coords_ortho}) " .
        "($current_atom->{cell_label}/" .
        "$current_atom->{symop_id}/$current_atom->{translation_id})\n"
        if $debug;

    push( @neighbors, $current_atom );

    my ($i_init, $j_init, $k_init) =
        get_atom_index( $bricks, @{$atom_in_unit_cell_coords_ortho});

    my ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k );

    eval {
        ( $min_i, $max_i, $min_j, $max_j, $min_k, $max_k ) =
            get_search_span( $bricks, $i_init, $j_init, $k_init );
    };
    if( $@ ) {
        use COD::Serialise qw( serialiseRef );
        serialiseRef( $atom_in_unit_cell_coords_ortho );
        serialiseRef( [ $i_init, $j_init, $k_init ] );
        serialiseRef( $bricks );
        die( $@ );
    }

    if( $debug ) {
        local $" = ", ";
        print STDERR
            ">>> now scanning its distinct neighbours " .
            "around @{$atom_in_unit_cell_coords_ortho}:\n";
    };

    my $nbumps = 0;

    ## foreach my $sym_atom (@$symmetric_atoms)
    for my $i ($min_i .. $max_i) {
    for my $j ($min_j .. $max_j) {
    for my $k ($min_k .. $max_k) {
        for my $sym_atom ( @{$bricks->{atoms}[$i][$j][$k]} ) {
            my $sym_atom_coords_ortho = $sym_atom->{coordinates_ortho};
            my $new_label = $current_atom->{name};
            my $sym_label = $sym_atom->{name};

            if( $current_atom->{name} eq $sym_atom->{name} ) {
                # We have found the same atom, no need to add bond or
                # neighbour
                next;
            }

            my $dist = distance( $atom_in_unit_cell_coords_ortho,
                                 $sym_atom_coords_ortho );

            do {
                local $" = ' ';
                print STDERR ">>> checking neighbour $sym_label " .
                    "(@{$sym_atom_coords_ortho}), " .
                    "d = $dist\n";
            } if $debug;

            my $is_bump = test_bump( $atom_properties,
                                     $current_atom->{chemical_type},
                                     $sym_atom->{chemical_type},
                                     $current_atom->{site_label},
                                     $sym_atom->{site_label},
                                     $dist, $bump_distance_factor );

            if( $is_bump &&
                ( ( !defined $current_atom->{group} &&
                    !defined $current_atom->{assembly} &&
                    !defined $sym_atom->{group} &&
                    !defined $sym_atom->{assembly} ) ||
                  ( $current_atom->{group} eq $sym_atom->{group} &&
                    $current_atom->{assembly} eq $sym_atom->{assembly} ) ||
                  ( $current_atom->{assembly} eq "." &&
                    $current_atom->{group} eq "." ) ||
                  ( $sym_atom->{assembly} eq "." &&
                    $sym_atom->{group} eq "." ) )) {
                if( not exists $checked_pairs->{$sym_label}{$new_label} ) {
                    my $message =  "atoms \"$current_atom->{name}\" and " .
                        "\"$sym_atom->{name}\" are too close " .
                        "(distance = " .
                        sprintf( "%6.4f", $dist ) .
                        ") and are considered a bump";
                    if( $ignore_bumps ) {
                        if( $verbose ) {
                            warn "WARNING, $message\n";
                        } else {
                            $nbumps++;
                        }
                    } else {
                        die "ERROR, $message -- aborting calculations\n";
                    }
                }
            }

            $checked_pairs->{$sym_label}{$new_label} = 1;
            $checked_pairs->{$new_label}{$sym_label} = 1;

            my $is_bond = test_bond($atom_properties,
                                    $current_atom->{chemical_type},
                                    $sym_atom->{chemical_type},
                                    $dist,
                                    $covalent_sensitivity);

            if( $is_bond &&
                ( ( !defined $current_atom->{group} &&
                    !defined $current_atom->{assembly} &&
                    !defined $sym_atom->{group} &&
                    !defined $sym_atom->{assembly} ) ||
                  ( $current_atom->{group} eq $sym_atom->{group} &&
                    $current_atom->{assembly} eq $sym_atom->{assembly} ) ||
                  ( $current_atom->{assembly} eq "." &&
                    $current_atom->{group} eq "." ) ||
                  ( $sym_atom->{assembly} eq "." &&
                    $sym_atom->{group} eq "." ) )) {
                do {
                    use COD::Serialise qw( serialiseRef );
                    local $" = ' ';
                    print STDERR ">>> found bond:\n";
                    serialiseRef( { "translation" => $current_translation,
                                    "original atom" => $current_atom,
                                    "sym atom" => $sym_atom } );
                } if $debug;

                my $back_shifted_sym_atom =
                    translate_atom( $sym_atom, $current_translation );

                do {
                    use COD::Serialise qw( serialiseRef );
                    print ">>>> back-shifted atom:\n";
                    serialiseRef( { sym_atom => $sym_atom,
                                    backshifted => $back_shifted_sym_atom } );
                } if $debug;

                my( $neighbours, $mol_nbumps ) =
                    find_molecule( $covalent_sensitivity,
                                   $atom_properties,
                                   $symmetric_atoms,
                                   $used_atoms,
                                   $used_originals,
                                   $used_uc_atoms,
                                   $checked_pairs,
                                   $back_shifted_sym_atom,
                                   $bricks );
                push(@neighbors, @$neighbours);
                $nbumps += $mol_nbumps;
            }
        }
    }}}

    print ">>> Finished checks;\n" if $debug;

    do {
        use COD::Serialise qw( serialiseRef );
        print ">>> Before translation:";
        serialiseRef( \@neighbors );
    } if $debug;

    return ( \@neighbors, $nbumps );
}

#===============================================================#
# Prints molecule to the CIF file.

# Accepts a hash
# %molecule = (
#               atoms=>[\%atom_info1, \%atom_info2], #covalent bond
#               chemical_formula_sum=>"\\'C6 H6\\'",
#             );

sub print_molecule
{
    my( $molecule_id, $audit, $molecule, $Id, $dataset, $dataset_name,
        $filename, $sym_data, $Z ) = @_;

    $dataset->{name} = $dataset_name;

    if( defined $molecule_id ) {
        $dataset->{name} .= "_molecule_" . $molecule_id;
    }

    my @data2copy = qw(
    _publ_author_name
    _publ_section_title
    _journal_issue
    _journal_name_full
    _journal_page_first
    _journal_page_last
    _journal_volume
    _journal_year

    _cell_length_a
    _cell_length_b
    _cell_length_c
    _cell_angle_alpha
    _cell_angle_beta
    _cell_angle_gamma

    _cell_measurement_pressure
    _cell_measurement.pressure
    _cell_measurement.pressure_esd
    _cell_measurement_pressure_gPa
    _cell_measurement_radiation
    _cell_measurement.radiation
    _cell_measurement.temp
    _cell_measurement_temperature
    _cell_measurement_temperature_C
    _cell_measurement.temp_esd
    _cell_measurement_wavelength
    _cell_measurement.wavelength
    _cell_measurement_wavelength_nm
    _cell_measurement_wavelength_pm

    _diffrn_ambient_environment
    _diffrn.ambient_environment
    _diffrn_ambient_pressure
    _diffrn.ambient_pressure
    _diffrn.ambient_pressure_esd
    _diffrn_ambient_pressure_gPa
    _diffrn_ambient_pressure_gt
    _diffrn.ambient_pressure_gt
    _diffrn_ambient_pressure_lt
    _diffrn.ambient_pressure_lt
    _diffrn.ambient_temp
    _diffrn.ambient_temp_details
    _diffrn_ambient_temperature
    _diffrn_ambient_temperature_C
    _diffrn_ambient_temperature_gt
    _diffrn_ambient_temperature_lt
    _diffrn.ambient_temp_esd
    _diffrn.ambient_temp_gt
    _diffrn.ambient_temp_lt

    _diffrn_radiation_collimation
    _diffrn_radiation.collimation
    _diffrn_radiation_detector
    _diffrn_radiation_detector_dtime
    _diffrn_radiation.diffrn_id
    _diffrn_radiation.div_x_source
    _diffrn_radiation.div_x_y_source
    _diffrn_radiation.div_y_source
    _diffrn_radiation_filter_edge
    _diffrn_radiation.filter_edge
    _diffrn_radiation_filter_edge_nm
    _diffrn_radiation_filter_edge_pm
    _diffrn_radiation_inhomogeneity
    _diffrn_radiation.inhomogeneity
    _diffrn_radiation_monochromator
    _diffrn_radiation.monochromator
    _diffrn_radiation_polarisn_norm
    _diffrn_radiation.polarisn_norm
    _diffrn_radiation_polarisn_ratio
    _diffrn_radiation.polarisn_ratio
    _diffrn_radiation.polarizn_source_norm
    _diffrn_radiation.polarizn_source_ratio
    _diffrn_radiation_probe
    _diffrn_radiation.probe
    _diffrn_radiation_source
    _diffrn_radiation_type
    _diffrn_radiation.type
    _diffrn_radiation_wavelength
    _diffrn_radiation_wavelength_id
    _diffrn_radiation_wavelength.id
    _diffrn_radiation.wavelength_id
    _diffrn_radiation_wavelength_nm
    _diffrn_radiation_wavelength_pm
    _diffrn_radiation_wavelength.wavelength
    _diffrn_radiation_wavelength_wt
    _diffrn_radiation_wavelength.wt
    _diffrn_radiation_xray_symbol
    _diffrn_radiation.xray_symbol

    _diffrn_reflns_theta_full
    _diffrn_reflns_resolution_full
    _diffrn_reflns_theta_max
    _diffrn_reflns_resolution_max
    _reflns_d_resolution_high
    _reflns.d_resolution_high
    _reflns_d_resolution_high_nm
    _reflns_d_resolution_high_pm
    _reflns_d_resolution_low
    _reflns.d_resolution_low
    _reflns_d_resolution_low_nm
    _reflns_d_resolution_low_pm
    _diffrn_reflns_limit_h_max
    _diffrn_reflns.limit_h_max
    _diffrn_reflns_limit_h_min
    _diffrn_reflns.limit_h_min
    _diffrn_reflns_limit_k_max
    _diffrn_reflns.limit_k_max
    _diffrn_reflns_limit_k_min
    _diffrn_reflns.limit_k_min
    _diffrn_reflns_limit_l_max
    _diffrn_reflns.limit_l_max
    _diffrn_reflns_limit_l_min
    _diffrn_reflns.limit_l_min

    _cod_duplicate_entry
    _[local]_cod_duplicate_entry
);

    my @data2rename = qw(
    _chemical_formula_analytical
    _chemical_formula.analytical
    _chemical_formula.entry_id
    _chemical_formula_iupac
    _chemical_formula.iupac
    _chemical_formula_moiety
    _chemical_formula.moiety
    _chemical_formula_structural
    _chemical_formula.structural
    _chemical_formula_sum
    _chemical_formula.sum
    _pd_proc_ls_prof_R_factor
    _pd_proc_ls_prof_wR_factor
    _refine_hist.R_factor_all
    _refine_hist.R_factor_obs
    _refine_hist.R_factor_R_free
    _refine_hist.R_factor_R_work
    _refine_ls_class_R_factor_all
    _refine_ls_class.R_factor_all
    _refine_ls_class_R_factor_gt
    _refine_ls_class.R_factor_gt
    _refine_ls_class_wR_factor_all
    _refine_ls_class.wR_factor_all
    _refine_ls_R_factor_all
    _refine.ls_R_factor_all
    _refine_ls_R_factor_gt
    _refine.ls_R_factor_gt
    _refine_ls_R_factor_obs
    _refine.ls_R_factor_obs
    _refine.ls_R_factor_R_free
    _refine.ls_R_factor_R_free_error
    _refine.ls_R_factor_R_free_error_details
    _refine.ls_R_factor_R_work
    _refine_ls_shell.R_factor_all
    _refine_ls_shell.R_factor_obs
    _refine_ls_shell.R_factor_R_free
    _refine_ls_shell.R_factor_R_free_error
    _refine_ls_shell.R_factor_R_work
    _refine_ls_shell.wR_factor_all
    _refine_ls_shell.wR_factor_obs
    _refine_ls_shell.wR_factor_R_free
    _refine_ls_shell.wR_factor_R_work
    _refine_ls_wR_factor_all
    _refine.ls_wR_factor_all
    _refine_ls_wR_factor_gt
    _refine_ls_wR_factor_obs
    _refine.ls_wR_factor_obs
    _refine_ls_wR_factor_ref
    _refine.ls_wR_factor_R_free
    _refine.ls_wR_factor_R_work
    _reflns_class_R_factor_all
    _reflns_class.R_factor_all
    _reflns_class_R_factor_gt
    _reflns_class.R_factor_gt
    _reflns_class_wR_factor_all
    _reflns_class.wR_factor_all
);

    my %data2copy = map { $_, $_ } @data2copy;

    my @tag_list = @{$dataset->{tags}};

    my $atom_site_type_symbol = $dataset->{values}{_atom_site_type_symbol};
    my $atom_site_occupancy   = $dataset->{values}{_atom_site_occupancy};
    my $atom_site_U_iso_or_equiv =
        $dataset->{values}{_atom_site_U_iso_or_equiv};

    my $src_tag_prefix = '_[local]_cod_src';
    my %renamed_tags = rename_tags( $dataset,
                                    \@data2rename,
                                    $src_tag_prefix );
    my @renamed_tags = map { $src_tag_prefix . $_ } @data2rename;

    my %known_tags = ( %data2copy, %renamed_tags );
    my @known_tags = ( @data2copy, @renamed_tags );

    print_cif( clone( $dataset ),
                        {
                            dictionary_tags => \%known_tags,
                            dictionary_tag_list => \@known_tags,
                            exclude_misspelled_tags => 1,
                        } );

    my @data2generate = qw( _audit_creation_method
                            _chemical_formula_sum
                            _cod_data_source_file
                            _cod_data_source_block
                            _cell_formula_units_Z
    );

    foreach my $data_tag (@data2generate)
    {
        if ( $data_tag eq "_audit_creation_method" )
        {
            if ( $audit == 1 )
            {
                my $id_value = $Id;
                $id_value =~ s/\s*\$\s*//g;
                print_single_tag_and_value( $data_tag, $id_value );
            }
        }
        elsif ( $data_tag eq "_chemical_formula_sum" )
        {
            print_single_tag_and_value( $data_tag,
                                        $molecule->{chemical_formula_sum} );
        }
        elsif ( $data_tag eq "_cell_formula_units_Z" )
        {
            print_single_tag_and_value( $data_tag, $Z );
        }
        elsif ( $data_tag =~ /_cod_data_source_file/ )
        {
            use File::Basename;
            print_single_tag_and_value( $data_tag,
                                        basename( $filename ));
        }
        elsif ( $data_tag =~ /_cod_data_source_block/ )
        {
            print_single_tag_and_value( $data_tag, $dataset_name );
        }
        else
        {
            die "ERROR, do not know how to generate tag '$data_tag'\n";
        }

    }

    my $has_disorder = 0;
    my $is_polymer   = 0;
    my $has_attached_hydrogens = 0;
    my $has_site_symops = 0;
    my $has_refinement_flags = 0;
    my $has_posn_refinement_flags = 0;
    my $has_adp_refinement_flags = 0;
    my $has_occupancy_refinement_flags = 0;
    foreach my $atom ( @{$molecule->{atoms}} ) {
        if( $atom->{group} ne '.' || $atom->{assembly} ne '.' ) {
            $has_disorder = 1;
        }
        if( exists $atom->{is_polymer} && $atom->{is_polymer} == 1 ) {
            $is_polymer = 1;
        }
        if( $atom->{attached_hydrogens} ) {
            $has_attached_hydrogens = 1;
        }
        if( exists $atom->{site_symops} && @{$atom->{site_symops}} > 0 ) {
            $has_site_symops = 1;
        }
        if( exists $atom->{refinement_flags} &&
            $atom->{refinement_flags} ne '.' ) {
            $has_refinement_flags = 1;
        }
        if( exists $atom->{refinement_flags_position} &&
            $atom->{refinement_flags_position} ne '.' ) {
            $has_posn_refinement_flags = 1;
        }
        if( exists $atom->{refinement_flags_adp} &&
            $atom->{refinement_flags_adp} ne '.' ) {
            $has_adp_refinement_flags = 1;
        }
        if( exists $atom->{refinement_flags_occupancy} &&
            $atom->{refinement_flags_occupancy} ne '.' ) {
            $has_occupancy_refinement_flags = 1;
        }
    }

    my $trimmed_atoms;

    if( $is_polymer ) {
        $trimmed_atoms = trim_polymer( $molecule->{atoms}, $max_polymer_span );
    } else {
        $trimmed_atoms = $molecule->{atoms};
    }

    print_single_tag_and_value( '_space_group_name_H-M_alt', 'P 1' );
    print "loop_ _symmetry_equiv_pos_as_xyz \'x, y, z\'\n";
    print "loop_\n";
    print "_atom_site_label\n";
    print "_atom_site_type_symbol\n"
        if defined $atom_site_type_symbol;
    print "_atom_site_fract_x\n";
    print "_atom_site_fract_y\n";
    print "_atom_site_fract_z\n";
    print "_atom_site_U_iso_or_equiv\n"
        if defined $atom_site_U_iso_or_equiv;
    print "_atom_site_occupancy\n"
        if defined $atom_site_occupancy;
    print "_atom_site_refinement_flags\n"
        if $has_refinement_flags;
    print "_atom_site_refinement_flags_posn\n"
        if $has_posn_refinement_flags;
    print "_atom_site_refinement_flags_adp\n"
        if $has_adp_refinement_flags;
    print "_atom_site_refinement_flags_occupancy\n"
        if $has_occupancy_refinement_flags;
    print "_atom_site_disorder_assembly\n"
        if $use_one_output_datablock && $has_disorder;
    print "_atom_site_disorder_group\n"
        if $use_one_output_datablock && $has_disorder;
    print "_atom_site_attached_hydrogens\n"
        if $has_attached_hydrogens;

    my $print_format = join( " ", ($format)x3 );
    foreach my $atom ( sort {
        length($a->{name}) == length($b->{name}) ?
            $a->{name} cmp $b->{name} :
            length($a->{name}) <=> length($b->{name})
        } @{$trimmed_atoms} )
    {
        print_value( $atom->{"name"} );
        print " ";
        if( defined $atom_site_type_symbol ) {
            if( defined $atom->{atom_site_type_symbol} ) {
                print_value( $atom->{atom_site_type_symbol} );
                print " ";
            } else {
                print "? ";
            }
        }
        printf $print_format,
               $atom->{"coordinates_fract"}[0],
               $atom->{"coordinates_fract"}[1],
               $atom->{"coordinates_fract"}[2];
        if( defined $atom_site_U_iso_or_equiv ) {
            print " ";
            print_value( $atom->{atom_site_U_iso_or_equiv} );
        }
        if( defined $atom_site_occupancy ) {
            print " ";
            print_value( $atom->{atom_site_occupancy} );
        }
        if( $has_refinement_flags ) {
            print " ";
            print_value( (exists $atom->{refinement_flags}
                                      ? $atom->{refinement_flags} : '.') );
        }
        if( $has_posn_refinement_flags ) {
            print " ";
            print_value( (exists $atom->{refinement_flags_position}
                                      ? $atom->{refinement_flags_position} : '.') );
        }
        if( $has_adp_refinement_flags ) {
            print " ";
            print_value( (exists $atom->{refinement_flags_adp}
                                      ? $atom->{refinement_flags_adp} : '.') );
        }
        if( $has_occupancy_refinement_flags ) {
            print " ";
            print_value( (exists $atom->{refinement_flags_occupancy}
                                      ? $atom->{refinement_flags_occupancy} : '.') );
        }
        if( $use_one_output_datablock && $has_disorder ) {
            print " ";
            print_value( $atom->{assembly} );
            print " ";
            print_value( $atom->{group} );
        }
        if( $has_attached_hydrogens ) {
            print " ";
            print_value( $atom->{attached_hydrogens} );
        }
        print "\n";
    }

    if( $is_polymer ) {
        print_single_tag_and_value( '_cod_molecule_is_polymer', 'yes' );
    }
    print "loop_\n";
    print "_cod_molecule_atom_label\n";
    print "_cod_molecule_atom_orig_label\n";
    print "_cod_molecule_atom_symmetry\n";
    print "_cod_molecule_atom_symop_id\n";
    print "_cod_molecule_atom_symop_xyz\n";
    print "_cod_molecule_atom_transl_id\n";
    print "_cod_molecule_atom_transl_x\n";
    print "_cod_molecule_atom_transl_y\n";
    print "_cod_molecule_atom_transl_z\n";
    print "_cod_molecule_atom_mult\n";
    print "_cod_molecule_atom_mult_ratio\n";
    if( $has_disorder ) {
        print "_cod_molecule_atom_assembly\n";
        print "_cod_molecule_atom_group\n";
    }

    foreach my $atom ( sort {
        length($a->{name}) == length($b->{name}) ?
            $a->{name} cmp $b->{name} :
            length($a->{name}) <=> length($b->{name})
        } @{$trimmed_atoms} )
    {
        print_value( $atom->{"name"} );
        print " ";
        print_value( $atom->{"site_label"} );
        print " ";
        print_value( $atom->{"symop_id"} );
        print "_";
        print_value( $atom->{"translation_id"} );
        print " ";
        print_value( $atom->{"symop_id"} );
        print " ";
        print_value( string_from_symop( $atom->{"symop"} ) );
        print " ";
        print_value( $atom->{"translation_id"} );
        print " ";
        print_value( $atom->{"translation"}[0] );
        print " ";
        print_value( $atom->{"translation"}[1] );
        print " ";
        print_value( $atom->{"translation"}[2] );
        print " ";
        print_value( $atom->{"multiplicity"} );
        print " ";
        print_value( $atom->{"multiplicity_ratio"} );
        if( $has_disorder ) {
            print " ";
            print_value( $atom->{"assembly"} );
            print " ";
            print_value( $atom->{"group"} );
        }
        print "\n";
    }

    if( $has_site_symops ) {
        print "loop_\n";
        print "_cod_molecule_transform_label\n";
        print "_cod_molecule_transform_symop\n";

        foreach my $atom ( sort {
            length($a->{name}) == length($b->{name}) ?
                $a->{name} cmp $b->{name} :
                length($a->{name}) <=> length($b->{name})
            } @{$trimmed_atoms} )
        {
            foreach my $symop ( @{$atom->{"site_symops"}} ) {
                print_value( $atom->{name} );
                print " ";
                print_value( symop_string_canonical_form(
                                string_from_symop( $symop ) ) );
                print "\n";
            }
        }
    }

    ## if( $output_geom_bond ) {
    ##     if( exists $molecule->{bonds} ) {
    ##         print "loop_\n";
    ##         print "_geom_bond_atom_site_label_1\n";
    ##         print "_geom_bond_atom_site_label_2\n";
    ##         print "_geom_bond_distance\n";
    ##         print "_geom_bond_valence\n";
    ##         for my $bond (@{$molecule->{bonds}}) {
    ##             printf "%s %s %8.5f %d\n",
    ##             $bond->{atom1}{name}, $bond->{atom2}{name},
    ##             $bond->{distance},
    ##             $bond->{order};
    ##         }
    ##     } else {
    ##         warning( $0, $filename, $dataset_name,
    ##                  "bond data necessary to compute _geom_bond_ data " .
    ##                  "items was not calculated, undef" );
    ##     }
    ## }

    return;
}
