#!/bin/bash -e

pkg=gffread
if [ "$ADTTMP" = "" ] ; then
        ADTTMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
fi
cd "$ADTTMP"

# the below is copied from control
# generated via
# apt-file search --package-only --regexp '\.gff.?$' | grep -v -E "bedops|cct|ugene|gbrowse-data"

PACKAGES="augustus-doc, bedtools-test, cnvkit, emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir, proteinortho, python3-gffutils, python3-pybedtools, r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, spades, trinityrnaseq-examples"

IFS=", " read -r -a pkgs <<< ${PACKAGES}

# ugene is non-free

# bedops:
# gffread /usr/lib/R/site-library/Gviz/extdata/test.gff2
# Error parsing strand (?) from GFF line:
# chr1	Canada	exon	3000	3902	.	?	2	ID=exon00003;score=4;Name=foo

# r-bioc-rtracklayer
# /usr/lib/R/site-library/rtracklayer/tests/genes.gff3
# Error parsing strand (?) from GFF line:
# chr12	rtracklayer	CDS	90796	91263	.	?	.	Parent=4644;

# cct (and similar for cct-examples)
# /usr/share/cct/lib/assign_cogs/sample_output/sample_1.gff
# Warning: invalid start coordinate at line:
# seqname	source	feature	start	end	score	strand	frame

# emboss-test
# /usr/share/EMBOSS/test/data/featexample2.gff3
# Warning: unrecognized parented feature without ID found before its parent:
# ctg123  .       TF_binding_site 1000    1012    .       +       .       Parent=gene00001
# gffread /usr/share/EMBOSS/test/data/gmod-quantitative.gff3
# Warning: invalid start coordinate at line:
# ctg123  affy    microarray_oligo                        1       100     281     .       .       Name=Expt1

# gbrowse
# gffread /usr/share/gbrowse/htdocs/tutorial/data_files/volvox.gff3
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_start     1000    1500    .       +       .       Parent=b101.2
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_end       19500   20000   .       -       .       Parent=b101.2
# gffread /usr/share/gbrowse/htdocs/tutorial/data_files/volvox_all.gff3
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_start     1000    1500    .       +       .       Parent=b101.2
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_end       19500   20000   .       -       .       Parent=b101.2
# gffread /usr/share/gbrowse/htdocs/tutorial/data_files/volvox_bacs.gff3
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_start     1000    1500    .       +       .       Parent=b101.2
# Warning: unrecognized parented feature without ID found before its parent:
# ctgA    example clone_end       19500   20000   .       -       .       Parent=b101.2

# gbrowse-data
# gffread /var/lib/gbrowse/databases/gbrowse_syn/rice/rice.gff3
# Error: cannot open input file /var/lib/gbrowse/databases/gbrowse_syn/rice/rice.gff3!
# ls -l /var/lib/gbrowse/databases/gbrowse_syn/rice/rice.gff3
# -rwxrwx--- 1 root www-data 235531 Jan 15  2017 /var/lib/gbrowse/databases/gbrowse_syn/rice/rice.gff3
# (it is not world readable)

# python-gffutils
# gffread /usr/lib/python3/dist-packages/gffutils/test/data/F3-unique-3.v2.gff
# Warning: invalid start coordinate at line:
# ##hdr seqname   source  feature start   end     score   strand  frame   [attributes]    [comments]
# gffread /usr/lib/python3/dist-packages/gffutils/test/data/mouse_extra_comma.gff3
# Warning: cannot parse value of GFF attribute "ID=" at line:
# chr17   RefSeq  protein 6806527 6812289 .       +       .       ID=;Parent=XM_001475631.1
# gffread /usr/lib/python3/dist-packages/gffutils/test/data/dmel-all-no-analysis-r5.49_50k_lines.gff
# Error: discarding overlapping duplicate orthologous_region feature (11218-15711) with ID=ortho:5391
# gffread /usr/lib/python3/dist-packages/gffutils/test/data/glimmer_nokeyval.gff3
# Error: discarding overlapping duplicate mRNA feature (3-62) with ID=GL0000006
# gffread /usr/lib/python3/dist-packages/gffutils/test/data/unsanitized.gff
# Error: invalid feature coordinates (end<start!) at line:
# 3R      FlyBase gene    1000    500     .       +       .       ID=FBgnFAKE1;


# r-bioc-genomicfeatures
# gffread /usr/lib/R/site-library/GenomicFeatures/extdata/GFF3_files/TheCanonicalGene_v2.gff3
# Warning: unrecognized parented feature without ID found before its parent:
# ctg123  .       TF_binding_site 1000    1012    .       +       .       Parent=gene00001

# seqan-apps
# gffread /usr/share/doc/seqan-apps/insegt/example/annoOutput.gff
# Warning: invalid start coordinate at line:
# test_chromosome Annotation_Count        region  .       .       3       +       .       ID=ENSG00000146556;459136.822773;

# optimir
# gffread /usr/lib/python3/dist-packages/optimir/resources/coordinates/hsa_miRCarta_v1.1.gff3
# Error: discarding overlapping duplicate miRNA feature (177339349-177339370) with ID=m-3914

# python-pybedtools
# gffread /usr/lib/python3/dist-packages/pybedtools/test/data/c.gff
# Error: discarding overlapping duplicate mRNA feature (496-576) with ID=AT1G01010.mRNA

SKIPS="/usr/share/doc/augustus/tutorial/results/augustus.abinitio.gff|/usr/share/doc/augustus/tutorial/results/augustus.hints.gff|/usr/lib/R/site-library/Gviz/extdata/test.gff2|/usr/lib/R/site-library/rtracklayer/tests/genes.gff3|/usr/share/EMBOSS/test/data/featexample2.gff3|/usr/share/EMBOSS/test/data/gmod-quantitative.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_all.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_bacs.gff3|/var/lib/gbrowse/databases/ideograms/human_cytobands.gff|/var/lib/gbrowse/databases/ideograms/mouse_cytobands.gff|/var/lib/gbrowse/databases/ideograms/rat_cytobands.gff|/var/lib/gbrowse/databases/yeast_chr1\+2/yeast_chr1\+2.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/F3-unique-3.v2.gff|/usr/lib/python3/dist-packages/gffutils/test/data/mouse_extra_comma.gff3|/usr/lib/R/site-library/GenomicFeatures/extdata/GFF3_files/TheCanonicalGene_v2.gff3|/usr/share/doc/seqan-apps/insegt/example/annoOutput.gff|/usr/lib/python3/dist-packages/optimir/resources/coordinates/hsa_miRCarta_v1.1.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/dmel-all-no-analysis-r5.49_50k_lines.gff|/usr/lib/python3/dist-packages/gffutils/test/data/glimmer_nokeyval.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/unsanitized.gff|/usr/lib/python3/dist-packages/pybedtools/test/data/c.gff|/usr/lib/R/site-library/rtracklayer/tests/v2.gff"

for package in "${pkgs[@]}"
do
   gffs=$(dpkg -L ${package} | grep -E '\.gff.?$' | grep -v -E ${SKIPS})
   for gff in ${gffs}
   do
      echo -n "${gff}: "
      gffread ${gff} > /dev/null && echo is okay.
   done
done
