# coding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search
   metadata harvest, extract authors and their writings from document set

 * Author: Ralph Amissah

 * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: simple xml representation (sax style)

=end
module HARVEST_authors
  require "#{SiSU_lib}/author_format"
  @@the_idx_authors=[]
  class Songsheet
    def initialize(opt)
      @opt=opt
      @file_list=opt.files
      @env=SiSU_Env::Info_env.new
    end
    def songsheet
      files,idx_array=[],[]
      @file_list.each do  |f|
        if f =~/.+?\.ss[tm]$/
          files << f[/(.+?\.ss[tm])$/,1]
        else
          print "not .sst or .ssm ? << #{f} >> "
        end
      end
      files.each do |filename|
        file_array=[]
        File.open(filename,'r') do |file|
          file.each_line("\n\n") do |line|
            if line =~/^@(?:title|subtitle|author|creator|date|original_publication):(?:\s|$)/m
              file_array << line
            elsif line =~/^@\S+?:(?:\s|$)/m \
            or line =~/^(?:\s*\n|%+ )/
            else break
            end
          end
        end
        idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest
      end
      the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index
      #HARVEST_authors::Output_index.new(the_idx).screen_print.cycle
      HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet
      puts "file://#{@env.path.output_md_harvest}/harvest_authors.html"
      puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/-M/
    end
  end
  class Harvest
    def initialize(data,filename,idx_array)
      @data,@filename,@idx_array=data,filename,idx_array
    end
    def extract_harvest
      data,filename,idx_array=@data,@filename,@idx_array
      @orig_pub,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil
      @authors=[]
      rgx={}
      rgx[:author]=/^@(?:author|creator):(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
      rgx[:title]=/^@title:\s+(.+)/
      rgx[:subtitle]=/^(?:@subtitle:\s+|@title:.+?:subtitle:[ ]+)(.+)/m
      rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
      rgx[:orig_pub]=/^@original_publication:\s+(.+)/
      data.each do |para|
        if para=~ rgx[:orig_pub]
          @orig_pub=rgx[:orig_pub].match(para)[1]
        end
        if para=~ rgx[:title]
          @title=rgx[:title].match(para)[1]
        end
        if para=~ rgx[:subtitle]
          @subtitle=rgx[:subtitle].match(para)[1]
        end
        if para=~ rgx[:author]
          @author_format=rgx[:author].match(para)[1]
        end
        if para=~ rgx[:date]
          @date=rgx[:date].match(para)[1]
        end
        break if @title and @subtitle and @author and @date and @orig_pub
      end
      @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title
      if @title and @author_format #and @orig_pub (publication details)
        creator=FORMAT::Author.new(@author_format.strip).author_details
        @authors,@authorship=creator[:authors],creator[:authorship]
        file=if filename=~/~[a-z]{2,3}\.ss[mt]$/
          lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1]
          filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
        else
          lang=''
          filename.sub(/\.ss[mt]$/,'')
        end
        page="sisu_manifest#{lang}.html"
        idx_array <<= { :filename => filename, :file => file, :orig_pub => @orig_pub, :date => @date, :title => @fulltitle, :author => creator, :page => page }
      else
        #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}; idx: #{@orig_pub}"
      end
      idx_array.flatten!
      idx_array
    end
  end
  class Index
    def initialize(idx_array,the_idx)
      @idx_array,@the_idx=idx_array,the_idx
      @@the_idx_authors=@the_idx
    end
    def capital(txt)
      txt[0].chr.capitalize + txt[1,txt.length]
    end
    def construct_book_author_index
      idx_array=@idx_array
      idx_array.each do |idx|
        idx[:author][:last_first_format_a].each do |author|
          author.strip!
          if @@the_idx_authors[author].class==NilClass
            @@the_idx_authors[author]={:md => []}
          end
          @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] }
        end
      end
      @the_idx=@@the_idx_authors
    end
  end
  class Output_index
    def initialize(opt,the_idx)
      @opt,@the_idx=opt,the_idx
      @env=SiSU_Env::Info_env.new
      @rc=Get_init.instance.yamlrc
      @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
      @letter=@alph.shift
      @vz=SiSU_Env::Get_init.instance.skin
    end
    def html_file_open
      @output={}
      @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w')
      @output[:html_mnt]= if @opt.cmd.inspect =~/-M/
        File.new("#{@env.path.pwd}/harvest_authors.html",'w')
      else nil
      end
    end
    def html_file_close
      @output[:html].close
      @output[:html_mnt].close if @output[:html_mnt].class==File
    end
    def html_print
      def html_songsheet
        html_file_open
        html_head
        html_alph
        html_body
        html_tail
        html_file_close
      end
      def html_head_adjust(type='')
        css_path=if type !~/maintenance/
          '../_sisu/css/harvest.css'
        else 'harvest.css'
        end
        sv=SiSU_Env::Info_version.instance.get_version
        <<WOK
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>SiSU Metadata Harvest - Authors</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" />
<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
<meta name="generator" content="#{sv[:project]} #{sv[:version]} of #{sv[:date_stamp]} (n*x and Ruby!)" />
<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
<link rel="stylesheet" href="#{css_path}" type="text/css" />
<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
</head>
<body bgcolor="#ffffff" text="#000000" link="#003090" lang="en" xml:lang="en">
<a name="top" id="top"></a>
<a name="up" id="up"></a>
<a name="start" id="start"></a>
<h1>SiSU Metadata Harvest - Authors</h1>
<p>[<a href="../index.html">&nbsp;HOME&nbsp;</a>] also see <a href="harvest_topics.html">SiSU Metadata Harvest - Topics</a></p>
<hr />
WOK
      end
      def html_head
        @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/-M/
        @output[:html] << html_head_adjust
      end
      def html_alph
        a=[]
        a << '<p>'
        @alph.each do |x|
          a << (x =~/[0-9]/) ? '' : %{<a href="##{x}">#{x}</a>,&nbsp;}
        end
        @output[:html_mnt] << a.join if @output[:html_mnt].class==File
        @output[:html] << a.join
      end
      def html_tail
        a=[]
        a <<<<WOK
<hr />
<a name="bottom" id="bottom"></a>
<a name="down" id="down"></a>
<a name="end" id="end"></a>
<a name="finish" id="finish"></a>
<a name="stop" id="stop"></a>
<a name="credits"></a>
#{@vz.credits_sisu}
</body>
</html>
WOK
        @output[:html_mnt] << a if @output[:html_mnt].class==File
        @output[:html] << a
      end
      def do_html(html)
        @output[:html_mnt] << html if @output[:html_mnt].class==File
        @output[:html] << html
      end
      def do_string(attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html(html)
      end
      def do_string_name(attrib,string)
        f=/^(\S)/.match(string[0])[1]
        if @letter < f
          while @letter < f
            if @alph.length > 0
              @letter=@alph.shift
              if @output[:html_mnt].class==File
                @output[:html_mnt] << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
              end
              @output[:html] << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
            else break
            end
          end
        end
      end
      def html_body
        the_idx=@the_idx
        the_idx.sort.each do |a|
          do_string_name('',a)
          name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
          x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>}
          if @output[:html_mnt].class==File
            @output[:html_mnt] << x
          end
          @output[:html] << x
          works=[]
          a[1][:md].each do |x|
            work=[ "#{x[:date]} #{x[:title]}", %{<p class="publication">#{x[:date]} <a href="../#{x[:file]}/#{x[:page]}">#{x[:title]}</a>, #{x[:author][:authors_s]}</p>} ]
            works<<=if @output[:html_mnt].class==File
              work.concat([%{<p class="publication">[<a href="#{x[:file]}.sst">src</a>]&nbsp;&nbsp;#{x[:date]} <a href="file://#{@env.path.output}/#{x[:file]}/#{x[:page]}">#{x[:title]}</a>, #{x[:author][:authors_s]} -- [<a href="#{x[:file]}.sst">#{x[:file]}.sst</a>]</p>}])
            else work
            end
          end
          works.sort_by {|x| x[0]}.each do |x|
            @output[:html] << x[1]
            @output[:html_mnt] << x[2] if @output[:html_mnt].class==File
          end
        end
      end
      self
    end
    def screen_print
      def cycle
        the_idx=@the_idx
        the_idx.sort.each do |a|
          puts a[0]
          a[1][:md].each do |x|
            puts "\t" + x[:file]
          end
        end
      end
      self
    end
  end
end
__END__
