# encoding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: modules shared by the different db types, dbi, postgresql,
   sqlite

=end
module SiSU_DbImport
  require_relative 'db_columns'                         # db_columns.rb
  require_relative 'db_load_tuple'                      # db_load_tuple.rb
  require_relative 'db_sqltxt'                          # db_sqltxt.rb
  require_relative 'shared_html_lite'                   # shared_html_lite.rb
  require 'sqlite3'
  class Import < SiSU_DbText::Prepare
    include SiSU_Param
    include SiSU_Screen
    @@dl=nil
    @@hname=nil
    attr_accessor :tp
    def initialize(opt,conn,file_maint,sql_type='pg')
      @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type
      @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX
      @env=SiSU_Env::InfoEnv.new(@opt.fns)
      @dal="#{@env.processing_path.dal}"
      if @opt.fns.empty? or @opt.cmd.empty?; @fnb=''
      else
        @md=SiSU_Param::Parameters.new(@opt).get
        @fnb=@md.fnb
      end
      @suffix=@opt.fns[/(?:.+?)(?:\.ssm\.sst|\.-?sst)/,1]
      @fnc="#{@dal}/#{@opt.fns}.content.rbm"
      @@seg,@@seg_full='',''                                  #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg  is 7
      @col=Hash.new('')
      @col[:ocn]=''
      @counter={}
      @db=SiSU_Env::InfoDb.new
      if @sql_type=='sqlite'
        @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \
        ? true
        : false
      end
      sql='SELECT MAX(lid) FROM doc_objects'
      begin
        @col[:lid] ||=0
        @col[:lid]=@driver_sqlite3 \
        ? @conn.execute( sql ).join.to_i
        : @conn.execute( sql ) { |x| x.fetch_all.flatten[0] }
      rescue
        puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
      end
      @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty?
      sql='SELECT MAX(nid) FROM endnotes'
      begin
        @id_n=@driver_sqlite3 \
        ? @conn.execute( sql ).join.to_i
        : @id_n=@conn.execute( sql ) { |x| x.fetch_all.flatten[0] }
        @id_n ||=0
      rescue
        puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
      end
      @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty?
      @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0
      @db=SiSU_Env::InfoDb.new
      @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename
      @@dl ||=SiSU_Env::InfoEnv.new.digest.length
    end
    def marshal_load
      require_relative 'dal'                            # dal.rb
      @dal_array=SiSU_DAL::Source.new(@opt).get            # dal file drawn here
      SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}").puts_blue if @opt.cmd =~/vVM/
      SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc).puts_grey if @opt.cmd =~/v/
#%
      select_first_match=%{
        SELECT metadata_and_text.tid
        FROM metadata_and_text
        WHERE metadata_and_text.src_filename = '#{@opt.fns}'
        AND metadata_and_text.language_document_char = '#{@opt.lng}'
      ;}
      file_exist=@sql_type=~/sqlite/ \
      ? @conn.get_first_value(select_first_match)
      : @conn.select_one(select_first_match)
      if not file_exist
        t_d=[]                                                              # transaction_data
        t_d << db_import_metadata
        t_d << db_import_documents(@dal_array)
        t_d << db_import_urls(@dal_array,@fnc)                              #import OID on/off
        t_d=t_d.flatten
        if @opt.cmd =~/[MV]/
          puts @conn.class if defined? @conn.class
          puts @conn.driver_name if defined? @conn.driver_name
          puts @conn.driver if defined? @conn.driver
        end
        begin
          sql=''
          if @sql_type=~/sqlite/
            @conn.transaction do |conn|
              t_d.each do |sql|
                conn.execute(sql)
              end
            end
            #also 'execute' works for sqlite
            #@conn.execute("BEGIN")
            #  t_d.each do |sql|
            #    @conn.execute(sql)
            #  end
            #@conn.execute("COMMIT")
          else
            #'do' works for postgresql
            @conn.do("BEGIN")
              t_d.each do |sql|
                @conn.do(sql)
              end
            @conn.do("COMMIT")
          end
        rescue DBI::DatabaseError => e
          puts "Error code: #{e.err}"
          puts "Error message: #{e.errstr}"
          puts "Error SQLSTATE: #{e.state}"
          SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do
            __LINE__.to_s + ':' + __FILE__
          end
          sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql"
          sql=File.new(sqlfn,'w')
          t_d.each {|i| sql.puts i}
          p sqlfn
          if @opt.cmd =~/M/
            puts sql
            p @conn.methods.sort
            puts "#{__FILE__}:#{__LINE__}"
          end
        rescue
          SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do
            __LINE__.to_s + ':' + __FILE__
          end
          sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql"
          sql=File.new(sqlfn,'w')
          t_d.each {|i| sql.puts i}
          p sqlfn
          if @opt.cmd =~/M/
            puts sql
            p @conn.methods.sort
            puts "#{__FILE__}:#{__LINE__}"
          end
        ensure
        end
      else
        if file_exist
          @db=SiSU_Env::InfoDb.new
          puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} in language code #{cX.blue}#{@opt.lng}#{cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}"
        end
      end
    end
    def pf_db_import_transaction_open
    end
    def pf_db_import_transaction_close
    end
    def db_import_metadata                                                       #% import documents - populate database
      print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } if @opt.cmd =~/vVM/
      @tp={}
      @md=SiSU_Param::Parameters.new(@opt).get
#% sisutxt & fulltxt
      if FileTest.exist?(@md.fns)
        txt_arr=IO.readlines(@md.fns,'')
        src=txt_arr.join("\n")
        src=special_character_escape(src)
        @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', "
        txt=clean_searchable_text(txt_arr)
        #txt=special_character_escape(txt)
        @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', "
      end
#% title
      if defined? @md.title.full \
      and @md.title.full=~/\S+/                                              # DublinCore 1 - title
        #@tp[:title]=@md.title.full
        #special_character_escape(@tp[:title])
        #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
        sql='SELECT MAX(tid) FROM metadata_and_text;'
        begin
          @@id_t ||=0
          id_t=if @driver_sqlite3
            @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i }
          else
            @conn.execute( sql ) { |x| x.fetch_all.flatten[0] }
          end
          @@id_t=id_t if id_t
        rescue
          puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/
        end
        @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:
        puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} if @opt.cmd =~/vVM/
      end
      ################ CLEAR ##############
      SiSU_DbDBI::Test.new(self,@opt).verify                          #% import title names, filenames (tuple)
      t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint)
      tuple=t.tuple
      tuple
    end
    def db_import_documents(dal_array)                                     #% import documents - populate main database table, import into substantive database tables (tuple)
      begin
        @col[:tid]=@@id_t
        @en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
        @col[:en_a],@col[:en_z]=nil,nil
        dal_array.each do |data|
          data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
          data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
          data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
          @col[:seg]=@@seg
          if data.of ==:para \
          || :heading \
          || :heading_insert \
          || :block \
          || :group      # regular text what of code-blocks grouped text etc.
            notedata=data.obj.dup
                                                                               #% :headings
            if data.is==:heading \
            && (data.ln.inspect=~/[123]/)
              @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
              @col[:lid]+=1
              txt=endnotes(txt).extract_any
              @col[:body]=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
              @col[:body]=special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              @col[:plaintext]=strip_markup(@col[:plaintext])
              @col[:plaintext]=clean_searchable_text(@col[:plaintext])
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
              @tuple_array << t.tuple
              case @col[:lev]
              when /1/; @col[:lv1]+=1
              when /2/; @col[:lv2]+=1
              when /3/; @col[:lv3]+=1
              end
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data.is==:heading \
            && data.ln==4
              @@seg,txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.name,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
              @col[:seg]=@@seg
              @col[:lv4]+=1
              @col[:lid]+=1
              @col[:lev]=4
              @hname=if @col[:seg] \
              and not @col[:seg].to_s.empty?
                @@hname=@col[:seg].to_s
              else @@hname
              end
              @env=SiSU_Env::InfoEnv.new(@md.fns)
              @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
              txt=endnotes(txt).extract_any
              @col[:body]=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
              @col[:body]=special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              @col[:plaintext]=strip_markup(@col[:plaintext])
              @col[:plaintext]=clean_searchable_text(@col[:plaintext])
              @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
              @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
              @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
              t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
              @tuple_array << t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data.is==:heading \
            && data.ln==5
              txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
              @@seg_full=data.name if data.is==:heading \
              && data.ln==5 \
              && data.name #check data.name
              @@seg ||='' #nil # watch
              @col[:seg]=@@seg
              @col[:lv5]+=1
              @col[:lid]+=1
              @col[:lev]=5
              @hname=if @col[:seg] \
              and not @col[:seg].to_s.empty?
                @@hname=@col[:seg].to_s
              else @@hname
              end
              @env=SiSU_Env::InfoEnv.new(@md.fns)
              @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
              txt=endnotes(txt).extract_any
              @col[:body]=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
              @col[:body]=special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              @col[:plaintext]=strip_markup(@col[:plaintext])
              @col[:plaintext]=clean_searchable_text(@col[:plaintext])
              @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
              @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
              @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
              t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
              @tuple_array << t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data.is==:heading \
            && data.ln==6
              txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
              @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name
              @@seg ||='' #nil # watch
              @col[:seg]=@@seg
              @col[:lv6]+=1
              @col[:lid]+=1
              @col[:lev]=6
              @hname=if @col[:seg] \
              and not @col[:seg].to_s.empty?
                @@hname=@col[:seg].to_s
              else @@hname
              end
              @env=SiSU_Env::InfoEnv.new(@md.fns)
              @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
              txt=endnotes(txt).extract_any
              @col[:body]=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
              @col[:body]=special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              @col[:plaintext]=strip_markup(@col[:plaintext])
              @col[:plaintext]=clean_searchable_text(@col[:plaintext])
              @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
              @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
              @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
              t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
              @tuple_array << t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
                                                                               #% :structure :layout :comment
            elsif data.of==:structure \
            || data.of==:layout \
            || data.of==:comment
              #added watch
                                                                               #% :
            else                                                               #% regular text
              @col[:lid]+=1
              txt=''
              txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.odv,data.osp,data.of,data.is,'',data.parent,'',''
              @hname=if @col[:seg] \
              and not @col[:seg].to_s.empty?
                @@hname=@col[:seg].to_s
              else @@hname
              end
              @env=SiSU_Env::InfoEnv.new(@md.fns)
              @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
              txt=endnotes(txt).extract_any
              if @sql_type=~/pg/ \
              and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1)             # examine pg build & remove limitation
                puts "\n\nTOO LARGE (TXT - see error log)\n\n"
                open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                  error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
                end
                txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
              end
              @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
              @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
              @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
              @col[:body]=if data.is==:table
                SiSU_FormatShared::CSS_Format.new(@md,data).html_table
              elsif data.is==:code
                SiSU_FormatShared::CSS_Format.new(@md,data).code
              elsif defined? data.indent \
              and defined? data.hang \
              and data.indent =~/[1-9]/ \
              and data.indent == data.hang
                SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent)
              elsif defined? data.indent \
              and defined? data.hang \
              and data.hang =~/[0-9]/ \
              and data.indent != data.hang
                SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent)
              else
                SiSU_FormatShared::CSS_Format.new(@md,data).norm
              end
              @col[:body]=special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              @col[:plaintext]=strip_markup(@col[:plaintext])
              @col[:plaintext]=clean_searchable_text(@col[:plaintext])
              t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
              @tuple_array << t.tuple
              @en,@en_ast,@en_pls=[],[],[]
              @col[:en_a]=@col[:en_z]=nil
              @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
            end
            if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/                                         #% import into database endnotes tables
              endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
              endnote_array.each do |inf|
                if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/]
                  if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/]
                    nr,txt,digest_clean=$1,$2.strip,0
                  end
                  @id_n+=1
                  txt=special_character_escape(txt)
                  body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
                  txt=strip_markup(txt)
                  if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    en={
                      type: 'endnotes',
                      id:      @id_n,
                      lid:     @col[:lid],
                      nr:      nr,
                      txt:     txt,
                      body:    body,
                      ocn:     @col[:ocn],
                      ocnd:    @col[:ocnd],
                      ocns:    @col[:ocns],
                      id_t:    @@id_t,
                      hash:    digest_clean
                    }
                    t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                    @tuple_array << t.tuple
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
            if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/                                      #% import into database endnotes tables
              endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/)
              endnote_array.each do |inf|
                if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/]                    # dal new endnotes 2003w31/1
                  if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/]           # dal new endnotes 2003w31/1
                    nr,txt,digest_clean=$1,$2.strip,0
                  end
                  @id_n+=1
                  txt=special_character_escape(txt)
                  body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
                  txt=strip_markup(txt)
                  if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    en={
                      type: 'endnotes_asterisk',
                      id:      @id_n,
                      lid:     @col[:lid],
                      nr:      nr,
                      txt:     txt,
                      body:    body,
                      ocn:     @col[:ocn],
                      ocnd:    @col[:ocnd],
                      ocns:    @col[:ocns],
                      id_t:    @@id_t,
                      hash:    digest_clean
                    }
                    t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                    @tuple_array << t.tuple
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
            if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/                                           #% import into database endnotes tables
              endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/)
              endnote_array.each do |inf|
                if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/]                        # dal new endnotes 2003w31/1
                  if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/]               # dal new endnotes 2003w31/1
                    nr,txt,digest_clean=$1,$2.strip,0
                  end
                  @id_n+=1
                  txt=special_character_escape(txt)
                  body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
                  txt=strip_markup(txt)
                  if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    en={
                      type: 'endnotes_plus',
                      id:      @id_n,
                      lid:     @col[:lid],
                      nr:      nr,
                      txt:     txt,
                      body:    body,
                      ocn:     @col[:ocn],
                      ocnd:    @col[:ocnd],
                      ocns:    @col[:ocns],
                      id_t:    @@id_t,
                      hash:    digest_clean
                    }
                    t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                    @tuple_array << t.tuple
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
          end
        end
      rescue; SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
      @tuple_array
    end
    def endnotes(txt)
      @txt=txt
      def extract_any
        if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
          endnotes(@txt).range
          @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
          @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
          @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
          @txt=endnotes(@txt).clean_text
        end
        @txt
      end
      def standard
        x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
        ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
        : nil
      end
      def asterisk
        x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
        ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
        : nil
      end
      def plus
        x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
        ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
        : nil
      end
      def clean_text(base_url=nil)
        @txt=if base_url
          @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
            gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
            gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
        else
          @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'<sup>\1</sup>').
            gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>').
            gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
        end
        @txt
      end
      def range
        @col[:en_a]=@col[:en_z]=nil
        if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/
          word_array=@txt.scan(/\S+/)
          word_array.each do |w|
            if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/]                                                # not tested since change 2003w31
              @col[:en_a]=$1 unless @col[:en_a]
              @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
              @col[:en_z]=$1 if @col[:en_a]
            end
          end
        end
        @col
      end
      self
    end
    def db_import_urls(dbi_unit,content)                                           #% import documents OID - populate database
      begin
        @fnc=content
        @env=SiSU_Env::InfoEnv.new(@opt.fns)
        base=@env.url.root
        out=@env.path.output
        f,u={},{}
        if @fnb.empty? \
        or @fnb.nil?
          p 'file output path error' #remove
        end
        if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true
          f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}',"
        end
        if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_seg}")==true
          f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_seg}',"
        end
        if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true
          f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}',"
        end
        if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true
          f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}',"
        end
        if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true
          f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}',"
        end
        if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true
          f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}',"
        end
        if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true
          f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}',"
        end
        if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true
          f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}',"
        end
        if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\
        #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true
          f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}',"
        end
        if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\
        #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true
          f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}',"
        end
        if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true
          f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}',"
        end
        #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
        #  f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
        #end
        ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true
        ##  f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex',"
        ##end
        #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
        #  f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
        #end
        ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true
        ##  f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex',"
        ##end
        if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true
          f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}',"
        end
        if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html
          f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}',"
        end
        if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true
          f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}',"
        end
        if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true
          f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}',"
        end
        t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint)
        tuple=t.tuple
      rescue; SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
      tuple
    end
  end
end
__END__
