#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: antanas $
#$Date: 2017-10-24 15:04:52 +0300 (Tue, 24 Oct 2017) $ 
#$Revision: 5642 $
#$URL: svn://www.crystallography.net/cod-tools/tags/v2.6/scripts/cif_tcod_tree $
#------------------------------------------------------------------------------
#*
#* Prepares a calculation tree from descriptions in TCOD CIF file.
#*
#* USAGE:
#*    $0 --options input1.cif input2.cif
#**

use strict;
use warnings;
use Digest::MD5 qw( md5_hex );
use Digest::SHA qw( sha1_hex );
use File::Basename qw( dirname );
use File::Path qw( make_path );
use COD::CIF::Parser qw( parse_cif );
use COD::CIF::Data qw( get_content_encodings );
use COD::CIF::Tags::CanonicalNames qw( canonicalize_all_names );
use COD::CIF::Tags::Manage qw( has_inapplicable_value
                               has_special_value );
use COD::Escape qw( decode_textfield );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages );
use COD::ToolsVersion;

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $use_parser = 'c';
my $output_dir = '.';
my $dry_run = 0;
my $list = 0;

my $extract_outputs = 1;

#* OPTIONS:
#*   -o, --output-dir out/
#*                     Base path of the created tree.
#*
#*   --no-outputs
#*                     Do not extract files of role 'output'.
#*
#*   --dry-run
#*                     Do not create any folders and files, just extract
#*                     the files and compare their checksums.
#*
#*   -l, --list
#*                     Print the list of created folders and files.
#*
#*   --use-perl-parser
#*                     Use Perl parser for CIF parsing.
#*
#*   --use-c-parser
#*                     Use Perl & C parser for CIF parsing (default).
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    "-o,--output-dir" => \$output_dir,

    "--no-outputs" => sub{ $extract_outputs = 0 },

    "--dry-run" => sub{ $dry_run = 1 },

    "-l,--list" => sub{ $list = 1 },

    "--use-perl-parser" => sub { $use_parser = "perl" },
    "--use-c-parser"    => sub { $use_parser = "c" },
    "--options"         => sub { options; exit },
    "--help,--usage"    => sub { usage; exit },
    '--version'         => sub { print 'cod-tools version ',
                                 $COD::ToolsVersion::Version, "\n";
                                 exit }
);

@ARGV = ( "-" ) unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my %created_dirs = ('.' => 1);

for my $filename (@ARGV) {

    my $options = { 'parser' => $use_parser, 'no_print' => 1 };
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $options );
    process_parser_messages( $messages, $die_on_error_level );
    next if ( $err_count > 0 );

    canonicalize_all_names( $data );

    for my $datablock (@$data) {
        my $values   = $datablock->{values};
        my $dataname = 'data_' . $datablock->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'  => @_,
                'program'  => $0,
                'add_pos'  => $dataname,
                'filename' => $filename
            }, $die_on_error_level )
        };

        eval { {
            my $encodings = get_content_encodings( $datablock );

            for my $i (0..$#{$values->{_tcod_file_id}}) {
                my $path     = $values->{_tcod_file_name}[$i];
                my $dirname  = dirname( $values->{_tcod_file_name}[$i] );
                my $contents = $values->{_tcod_file_contents}[$i];
                my $role     = $values->{_tcod_file_role}[$i];
                my $md5sum   = $values->{_tcod_file_md5sum}[$i];
                my $sha1sum  = $values->{_tcod_file_sha1sum}[$i];
                my $uri      = $values->{_tcod_file_URI}[$i];
                my $encoding;
                if( exists $values->{_tcod_file_content_encoding} &&
                    !has_inapplicable_value( $datablock,
                                             '_tcod_file_content_encoding',
                                             $i ) ) {
                    $encoding = $values->{_tcod_file_content_encoding}[$i];
                }
                if( !exists $created_dirs{$dirname} ) {
                    if( !$dry_run ) {
                        die "ERROR, cannot create path $output_dir/$dirname -- "
                          . 'path already exists' . "\n" if -e "$output_dir/$dirname";
                        make_path( "$output_dir/$dirname" );
                    }
                    print "$output_dir/$dirname\n" if $list;
                    $created_dirs{$dirname} = 1;
                }
                next if $path =~ /\/$/; # Skipping folders
                if( !defined $contents ||
                    has_special_value( $datablock, '_tcod_file_contents', $i ) ) {
                    # Contents are not provided
                    if( defined $uri &&
                        !has_special_value( $datablock, '_tcod_file_URI', $i ) ) {
                        # If the URI is given instead of contents, it has to be
                        # fetched
                        use WWW::Curl::Easy;
                        my $curl = WWW::Curl::Easy->new();
                        $curl->setopt( CURLOPT_URL, $uri );
                        $contents = '';
                        open( my $curlhandle, '>', \$contents );
                        $curl->setopt( CURLOPT_WRITEDATA, $curlhandle );
                        if( $curl->perform ) {
                            warn "WARNING, unable to fetch URI '$uri': "
                               . $curl->errbuf . "\n";
                        }
                    } else {
                        warn "WARNING, contents for file '$path' are not "
                           . 'provided -- skipping' . "\n";
                        next
                    }
                }

                eval {
                    if( !$encoding || !$encodings ||
                        !exists $encodings->{$encoding} ) {
                        if( $encoding && $encodings &&
                            !exists $encodings->{$encoding} ) {
                            warn "WARNING, content encoding stack '$encoding' "
                               . "is not described -- trying to guess\n";
                        }
                        # Perform a default decoding, try to guess the encoding
                        # layer type from the encoding ID
                        $contents = decode_textfield( $contents, $encoding );
                    } else {
                        for my $layer (reverse @{$encodings->{$encoding}}) {
                            $contents = decode_textfield( $contents, $layer );
                        }
                    }
                };
                if( $@ ) {
                    $@ =~ s/^ERROR,\s*//;
                    warn 'WARNING, could not decode contents for file '
                        . "'$path' -- $@; will not decode contents\n";
                    $contents = $values->{_tcod_file_contents}[$i];
                }

                if( $md5sum ) {
                    if( md5_hex($contents) ne $md5sum ) {
                        warn "WARNING, MD5 checksums of the original '$path' "
                           . 'and decoded files are different' . "\n";
                    }
                }
                if( $sha1sum ) {
                    if( sha1_hex($contents) ne $sha1sum ) {
                        warn "WARNING, SHA1 checksums of the original '$path' "
                           . 'and decoded files are different' . "\n";
                    }
                }

				if( !$extract_outputs && $role && $role eq 'output' ) {
					next;
				}

                if( !$dry_run ) {
                    open my $out, '>', "$output_dir/$path" or die 'ERROR, '
                        . 'could not open file ' . "'$output_dir/$path'"
                        . ' for writing -- ' . lcfirst($!) . "\n";
                    print $out $contents;
                    close $out or die 'ERROR, '
                        . 'could not close file ' . "'$output_dir/$path'"
                        . ' after writing -- ' . lcfirst($!) . "\n";
                    if( $role && $role eq 'script' ) {
                        chmod( 0775, "$output_dir/$path" );
                    }
                }
                print "$output_dir/$path\n" if $list;
            }

            local $\ = "\n";
            print "$output_dir/main.sh" if $list;
            if( !$dry_run ) {
                my $main_exec_path = "$output_dir/main.sh";
                open( my $main_fh, '>', $main_exec_path ) or die 'ERROR, '
                    . "could not open file '$main_exec_path' -- "
                    . lcfirst($!);
                print $main_fh "#!/bin/bash\n";

                if( exists $values->{_audit_creation_method} &&
                    $values->{_audit_creation_method}[0] =~ /^aiida/i &&
                    exists $created_dirs{aiida} ) {
                    print $main_fh 'verdi import aiida';
                }

                for my $i (0..$#{$values->{_tcod_computation_step}}) {
                    my $env = $values->{_tcod_computation_environment}[$i];
                    my $command = $values->{_tcod_computation_command}[$i];
                    print $main_fh '(';
                    if( $env ) {
                        foreach ( split m/\n/, $env ) {
                            print $main_fh '    ' . $_;
                        };
                        print $main_fh '';
                    }
                    print $main_fh '    ' . $command;
                    print $main_fh ')';
                }
                close $main_fh or die 'ERROR, '
                    . "could not close file '$main_exec_path' -- "
                    . lcfirst($!);
                chmod( 0775, $main_exec_path );
            }
        }};
        if ($@) {
            process_errors( {
                'program'       => $0,
                'message'       => $@,
                'filename'      => $filename,
                'add_pos'       => $dataname
            }, $die_on_error_level->{'ERROR'} );
        }
    }
}
