# encoding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
   2007, 2008, 2009, 2010, 2011, 2012, 2013 Ralph Amissah, All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.sisudoc.org/sisu/en/SiSU/download.html>

 * Git
   <http://sources.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
   <http://sources.sisudoc.org/?p=code/sisu.git;a=blob;f=lib/sisu/v4/dal_numbering.rb;hb=HEAD>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: system environment, resource control and configuration details

=end
module SiSU_DAL_Numbering
  class Numbering
    attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
    def initialize(md,data)
      @md,@data=md,data
      @obj=@type=@ocn=@lv=@name=@index=@comment=nil
    end
    def numbering_song
      data=@data
      data=number_plaintext_para(data)
      data=auto_number_heading_ie_title(data.compact) #tr issue
      data=ocn(data.compact) #watch
      data=xml(data.compact)
      data=minor_numbering(data.compact)
      data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
      data=set_heading_top(data) unless @md.set_heading_top
      [data,tags_map,ocn_html_seg_map]
    end
    def number_plaintext_para(data)
      @tuned_file=[]
      data.each do |dob|
        if (dob.of !=:block \
        && dob.of !=:comment \
        && dob.of !=:layout) \
        && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
          dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
        end
        unless dob.obj.is_a?(Array)
          dob.obj=dob.obj.gsub(/^\s+/,'').
            gsub(/\s$/,"\n")
        end
        @tuned_file << dob
      end
      @tuned_file=@tuned_file.flatten
    end
    def number_sub_heading(dob,num,title_no)
      unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
        dob.obj=case dob.name
        when /-/;  dob.obj.gsub(/^/,"#{title_no} ")
        when /^#/; dob.obj.gsub(/^/,"#{title_no} ")
        when /^[a-z_\.]+/; dob.obj.gsub(/^/,"#{title_no} ")
        else
          dob.name=title_no if dob.name=~/^$/ #where title contains title number
          dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
        end
        if @md.toc_lev_limit \
        and @md.toc_lev_limit < num
          dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch
        end
      end
      dob
    end
    def heading_tag_clean(heading_tag)
      heading_tag=heading_tag.gsub(/[ ]+/,'_').
        gsub(/["']/,'').
        gsub(/[\/]/,'-').
        gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'').
        gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'').
        gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'').
        gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'').
        gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'').
        gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'').
        gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'').
        gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'').
        gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'').
        gsub(/#{Mx[:gl_bullet]}/,'')
    end
    def auto_number_heading_ie_title(data)                                             #also does some segment naming
      @tuned_file=[]
      if defined? @md.make.num_top \
      and @md.make.num_top \
      and @md.make.num_top !~/^$/
        input||=@md.make.num_top
      end
      num_top=(input ? input.to_i : nil)
      t_no1=t_no2=t_no3=t_no4=0
      if num_top
        no1=num_top; no2=(num_top + 1); no3=(num_top + 2);  no4=(num_top + 3)
      end
      t_not=0
      chapter_number_counter=0
      data=data.compact
      data.each do |dob| #@md.seg_names << [additions to segment names]
        title_no=nil
        dob=SiSU_DAL_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require
        if dob.is ==:heading \
        && dob.autonum_ \
        and defined? @md.make.num_top \
        and @md.make.num_top !~/^$/
          if  dob.lv=='1' \
          and dob.obj =~/^#\s|\s#(?:\s|$)/
            chapter_number_counter +=1
            dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} ").
              gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
          end
          if dob.ln==no1
            @subnumber=1
            @subnumber=0 if dob.ln==no1
          end
          if dob.ln.to_s =~/^[1-6]/ \
          and not dob.toc_ \
          and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
            if dob.ln==no1
              t_no1+=1; t_no2=0; t_no3=0
              title_no="#{t_no1}"
              if @md.seg_names.is_a?(Array) \
              and not @md.seg_names.include?(title_no)
                if dob.ln==no1
                  dob.name="#{title_no}" if not dob.name
                  dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/  #check whether will work across file types with stop signs
                  tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
                  tag=heading_tag_clean(tag)
                  dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
                  dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
                  ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
                  : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
                end
                if dob.ln !=no1 \
                and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
                  dob.name ="#{title_no}" if not dob.name
                  dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
                  dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
                end
                @md.seg_names << title_no
              end
              if dob.ln!=no1 \
              and dob.name!~/^[a-z_\.]+$/ \
              and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
                dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
                dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
              end
            end
            if dob.ln==no1         #watch because here you change dob.name
              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
            end
            if dob.ln==no2         #watch because here you change dob.name
              t_no2+=1; t_no3=0
              title_no="#{t_no1}.#{t_no2}"
              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
              dob=number_sub_heading(dob,no2,title_no)
            end
            if dob.ln==no3         #watch because here you change dob.name
              t_no3+=1
              title_no="#{t_no1}.#{t_no2}.#{t_no3}"
              dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
              dob=number_sub_heading(dob,no3,title_no)
            end
          elsif dob.ln.to_s =~/^[1-6]/ \
          and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
            dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/  #check whether will work across file types with stop signs
            dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
          end
        elsif dob.is ==:heading \
        and dob.autonum_ \
        and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
          #here lies a bug, as is nil when run from -Dv --update, FIX
          if (dob.name.nil? or dob.name.empty?) \
          and dob.ln.to_s =~/^[1-9]/ \
          and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
            dob.name=$1
            dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
          end
          if @md.toc_lev_limit
          end
        elsif defined? dob.name \
        and  dob.name
          dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
        end
        dob.tags=dob.tags.uniq if defined? dob.tags
        @tuned_file << dob
      end
      @tuned_file=@tuned_file.flatten
    end
    def ocn(data)                                                                      #and auto segment numbering increment
      @tuned_file=SiSU_DAL_DocumentStructureExtract::OCN.new(@md,data).ocn
      @tuned_file
    end
    def xml(data)
      @tuned_file=SiSU_DAL_DocumentStructureExtract::XML.new(@md,data).dom
      @tuned_file
    end
    def minor_numbering(data)                                                          #and auto segment numbering increment
      @tuned_file=[]
      number_small,letter_small=0,0
      letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
      data.each do |dob|
        if dob.of ==:heading \
        || dob.of ==:heading_insert \
        || dob.of ==:para \
        || dob.of ==:block
          if dob.is ==:heading \
          and dob.ln.to_s=~/^[1-9]/                                                    #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
            number_small,letter_small=0,0
          elsif dob.is ==:para
            if dob.obj =~/^#[ 1]/ \
            and dob.obj !~/^#\s+(?:~#)?$/
              letter_small=0
              number_small=0 if dob.obj =~ /^#1/
              number_small+=1
              dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ")
            end
            if dob.obj =~/^_# /
              dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ")
              dob.indent='1'
              letter_small+=1
            end
          end
        end
        @tuned_file << dob
      end
      @tuned_file=@tuned_file.flatten
    end
    def name_para_seg_filename(data)                                                   #segment naming, remaining
      # paragraph name/numbering rules
      # manual naming overrides, manual naming may be
      #   alpha-numeric characters mixed,
      #   numeric only (a number), if
      #     all segments have been named,
      #     the numbers used are over 1000 or
      #     it is  not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
      #       [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
      # auto-naming takes the form of giving numbers to segments
      # the rules for which are as follows
      #   if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
      #   otherwise the level 4 segment number from the embedded document structure info is used
      #   if there is none a sequential number is designated, preceded by an underscore
      @tuned_file,@unique_auto_name=[],[]
      tags={}
      art_filename_auto=1
      @counter=1
      if not @md.seg_autoname_safe and @md.opt.cmd =~/[MV]/
        puts 'manual segment names, numbers used as names, risk warning (segmented html)'
      end
      ocn_html_seg=[]
      data.each do |dob|
        if dob.is==:heading \
        && dob.ln \
        and dob.ln.to_s =~/^[456]/
          if dob.ln==4 \
          and not dob.name \
          and not @md.set_heading_seg
            @md.set_heading_seg=true
          end
          if dob.name !~/^\S+/ \
          and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m      #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
            possible_seg_name=$1
            possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.').
              gsub(/\.$/,'')
            if @md.seg_names.is_a?(Array) \
            and not @md.seg_names.include?(possible_seg_name)
              dob.name=possible_seg_name
              dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
              @md.seg_names << possible_seg_name
            else puts 'warn, there may be a conflicting numbering scheme' if @md.opt.cmd =~/[VM]/
            end
          end
          if dob.ln==4 \
          and dob.name                                     #extract segment name from embedded document structure info
            if @md.seg_names.is_a?(Array) \
            and not @md.seg_names.include?(dob.name)
              dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
              @md.seg_names << dob.name
            end
          end
          if dob.ln==4 \
          and not dob.name                                 #if still no segment name, provide a numerical one
            pf='_'                                         #pg='' #may use e.g. '' or '~' or '_'
            segn_auto="#{pf}#{art_filename_auto.to_s}"
            if @md.seg_names.is_a?(Array) \
            and not @md.seg_names.include?(segn_auto)
             dob.name=segn_auto
             dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
              @md.seg_names << segn_auto
            else puts 'segment name (numbering) error'
            end
            art_filename_auto+=1
          end
          if dob.ln==4 \
          and not dob.name #should not occur
            puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
          end
        end
        if (dob.is ==:heading \
        || dob.is ==:heading_insert) \
        && dob.ln==4
          @seg=dob.name
        end
        @tuned_file << if dob.is==:heading \
        && (@md.pagenew || @md.pagebreak || @md.pageline)
          m=dob.ln.to_s
          dob_tmp=[]
          if @md.pagenew.inspect =~/#{m}/
            dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob
          elsif @md.pagebreak.inspect =~/#{m}/
            dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob
          elsif @md.pageline.inspect =~/#{m}/
            dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob
          end
          para_result=unless dob_tmp.length > 0; dob
          else                                   dob_tmp
          end
        else dob
        end
        if defined? dob.ocn \
        and dob.ocn
          @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \
          ? (dob.name)
          : @segname
          tags["#{dob.ocn}"]={ segname: @segname }
          ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert)
            x=if dob.ln =~/[1-3]/
              { seg: nil, level: dob.ln }
            else #elsif dob.ln =~/[4-6]/
              { seg: @seg, level: dob.ln }
            end
          else
            { seg: @seg, level: nil }
          end
        end
        dob.tags=dob.tags.uniq if defined? dob.tags
        if defined? dob.tags \
        and dob.tags.length > 0
          #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
          #? (dob.name) \
          #: @segname
          dob.tags.each do |x|
            tags[x]={ ocn: dob.ocn.to_s, segname: @segname }
          end
        end
        dob
      end
      ocn_html_seg.each_with_index do |ocn,i|
        if ocn \
        and ocn[:level].to_s=~/[1-3]/
          ocn_seg=nil
          (1..4).each do |x|
            if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4
              ocn[:seg]=ocn_html_seg[i+x][:seg]
            end
          end
        end
      end
      if @md.seg_names.length > 0
        @md.set_heading_seg=true
      end
      tuned_file=@tuned_file.flatten
      [tuned_file,tags,ocn_html_seg]
    end
    def set_heading_top(data)                                                          #% make sure no false positives
      unless @md.set_heading_top
        puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
        @tuned_file=[]
        data.each do |t_o|
          unless @md.set_heading_top
            if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
            and t_o !~/\A\s*\Z/m
              @md.set_heading_top=true
              if defined? @md.title \
              and @md.title \
              and defined? @md.title.full \
              and defined? @md.creator \
              and @md.creator
                head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]'])
                @tuned_file << head
              end
            end
          end
          @tuned_file << t_o
        end
        @tuned_file=@tuned_file.flatten
      end
    end
    def set_heading_seg(data)                                                          #% make sure no false positives
      unless @md.set_heading_seg
        puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
        @tuned_file=[]
        data.each do |dob|
          unless @md.set_heading_seg
            if defined? dob.ln and dob.ln.to_s !~/^[123]/m \
            and dob.obj !~/\A\s*\Z/m \
            and dob.is !=:layout
              @md.set_heading_seg=true
              head=@md.title.main \
              ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main)
              : (dob.ln,dob.name,dob.obj=4,'seg','[segment]')
              @tuned_file << head
            end
          end
          @tuned_file << dob
        end
        @tuned_file=@tuned_file.flatten
      end
    end
    def set_header_title(data)                                                         #% make sure no false positives
      unless @md.set_header_title
        puts "\t no document title provided, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
        @tuned_file=[]
        data.each do |t_o|
          unless @md.set_header_title
            if t_o !~/^%{1,2}\s/m \
            and t_o !~/\A\s*\Z/m
              @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
              @md.title.main=@md.heading_seg_first
              @md.set_header_title=true
            end
          end
          @tuned_file << t_o
        end
        @tuned_file=@tuned_file.flatten
      end
    end
  end
end
__END__
