#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Revision: 7416 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.6/scripts/cif2json $
#$Date: 2019-11-15 19:15:35 +0200 (Fri, 15 Nov 2019) $
#$Id: cif2json 7416 2019-11-15 17:15:35Z antanas $
#------------------------------------------------------------------------------
#*
#* Parse CIF file and print out the structure generated by CIF parser.
#*
#* USAGE:
#*    $0 [options] input.cif [input2.cif ...]
#**

use strict;
use COD::CIF::JSON qw( cif2json );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::UserMessage qw( error );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ToolsVersion;

my $use_parser = 'c';
my %options;
my $strict = 0;
my $canonical = 0;
@ARGV = getOptions(

#* OPTIONS:
#*   --fix-errors
#*                     Try to fix syntax errors in the input CIF files that can
#*                     be corrected unambiguously.
#*
#*   --dont-fix-errors, --no-fix-errors
#*                     Do not try to fix syntax errors in input CIF files (default).
#*
#*   --strict
#*                     Strictly adhere to JSON syntax, i.e., make a top level
#*                     list container for CIF data blocks and put comma (",")
#*                     separators between consecutive CIF data blocks.
#*   --no-strict, --relaxed, --stream, --concatenate
#*                     Print stream of JSON objects, where an object stands for
#*                     a CIF data block. Such output does not include comma (",")
#*                     separators between top level objects, thus streaming
#*                     parsers have to be used (default mode).
#*
#*   --canonical
#*                     Print JSON objects with key-value pairs sorted in a
#*                     canonical way. The usage of this option is discouraged
#*                     due to the added overhead.
#*   --no-canonical
#*                     Print JSON object with key-value pairs sorted in the
#*                     order Perl stores them. Due to this, the outputs might
#*                     differ between different runs of the script with
#*                     identical parameters (default).
#*
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*
#*   --help,--usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**

    "--use-perl-parser" => sub{ $use_parser = 'perl' },
    "--use-c-parser"    => sub{ $use_parser = 'c' },

    "--strict" => sub{ $strict = 1 },
    "--no-strict,--relaxed,--stream,--concatenate" => sub{ $strict = 0 },

    '--canonical'    => sub { $canonical = 1 },
    '--no-canonical' => sub { $canonical = 0 },

    "--fix-errors"      => sub{ $options{fix_errors} = 1 },
    "--no-fix-errors"   => sub{ $options{fix_errors} = 0 },
    "--dont-fix-errors" => sub{ $options{fix_errors} = 0 },

    "--do-not-unprefix-text" => sub{ $options{do_not_unprefix_text} = 1 },
    "--do-not-unfold-text"   => sub{ $options{do_not_unfold_text} = 1 },
    "--fix-duplicate-tags-with-same-values"  =>
        sub{ $options{fix_duplicate_tags_with_same_values} = 1 },
    "--fix-duplicate-tags-with-empty-values" =>
        sub{ $options{fix_duplicate_tags_with_empty_values} = 1 },
    "--fix-data-header"     => sub{ $options{fix_data_header} = 1 },
    "--fix-datablock-names" => sub{ $options{fix_datablock_names} = 1 },
    "--fix-string-quotes"   => sub{ $options{fix_string_quotes} = 1 },
    "--fix-missing-closing-double-quote" =>
        sub{ $options{fix_missing_closing_double_quote} = 1 },
    "--fix-missing-closing-single-quote" =>
        sub{ $options{fix_missing_closing_single_quote} = 1 },
    "--fix-ctrl-z"  => sub{ $options{fix_ctrl_z} = 1 },
    "--allow-uqstring-brackets" => sub{ $options{allow_uqstring_brackets} = 1 },

    "--help,--usage" => sub { usage; exit },
    '--version'      => sub { print 'cod-tools version ',
                              $COD::ToolsVersion::Version, "\n";
                              exit }
);

@ARGV = ( "-" ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $json_options = {
    'canonical' => $canonical
};

print '[' if $strict;
my $first = 1;

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );

    if ( $err_count > 0 ) {
        print STDERR $_ foreach ( @$messages );
        error( $0, $filename, undef, "$err_count error(s) "
             . "encountered while parsing the file", undef );
        next;
    }
    print STDERR $_ foreach ( @$messages );

    canonicalize_all_names( $data, $options );

    foreach( @$data ) {
        print ',' if $strict && !$first;
        $first = 0;
        print cif2json( $_, $json_options );
    }
}

print ']' if $strict;
