if not modules then modules = { } end modules ['mtx-chars'] = {
    version   = 1.001,
    comment   = "companion to mtxrun.lua",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- obsolete: --stix                convert stix table to math table

local helpinfo = [[
--xtx                 generate xetx-*.tex (used by xetex)
--pdf                 generate pdfr-def.tex (used by pdftex)
--entities            generate entities table
]]

local application = logs.application {
    name     = "mtx-chars",
    banner   = "MkII Character Table Generators 0.10",
    helpinfo = helpinfo,
}

local report = application.report

local format, gmatch, upper, lower = string.format, string.gmatch, string.upper, string.lower
local tonumber = tonumber
local concat = table.concat
local utfchar = utf.char

scripts       = scripts       or { }
scripts.chars = scripts.chars or { }

--~ local banner = [[
--~ -- filename : char-mth.lua
--~ -- comment  : companion to char-mth.tex (in ConTeXt)
--~ -- author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
--~ -- license  : see context related readme files
--~ -- comment  : generated from data file downloaded from STIX website
--~
--~ if not versions   then versions   = { } end versions['char-mth'] = 1.001
--~ if not characters then characters = { } end
--~ ]]
--~
--~ function scripts.chars.stixtomkiv(inname,outname)
--~     if inname == "" then
--~         report("aquiring math data, invalid datafilename")
--~     end
--~     local f = io.open(inname)
--~     if not f then
--~         report("aquiring math data, invalid datafile")
--~     else
--~         report("aquiring math data, processing %s",inname)
--~         if not outname or outname == "" then
--~             outname = "char-mth.lua"
--~         end
--~         local classes = {
--~             N = "normal",
--~             A = "alphabetic",
--~             D = "diacritic",
--~             P = "punctuation",
--~             B = "binary",
--~             R = "relation",
--~             L = "large",
--~             O = "opening",
--~             C = "closing",
--~             F = "fence"
--~         }
--~         local valid, done = false, { }
--~         local g = io.open(outname,'w')
--~         g:write(banner)
--~         g:write(format("\ncharacters.math = {\n"))
--~         for l in f:lines() do
--~             if not valid then
--~                 valid = l:find("AMS/TeX name")
--~             end
--~             if valid then
--~                 local unicode = l:sub(2,6)
--~                 if unicode:sub(1,1) ~= " " and unicode ~= "" and not done[unicode] then
--~                     local mathclass, adobename, texname = l:sub(57,57) or "", l:sub(13,36) or "", l:sub(84,109) or ""
--~                     texname, adobename = texname:gsub("[\\ ]",""), adobename:gsub("[\\ ]","")
--~                     local t = { }
--~                     if mathclass ~= "" then t[#t+1] = format("mathclass='%s'", classes[mathclass] or "unknown") end
--~                     if adobename ~= "" then t[#t+1] = format("adobename='%s'", adobename                      ) end
--~                     if texname   ~= "" then t[#t+1] = format("texname='%s'"  , texname                        ) end
--~                     if #t > 0 then
--~                         g:write(format("\t[0x%s] = { %s },\n",unicode, concat(t,", ")))
--~                     end
--~                     done[unicode] = true
--~                 end
--~             end
--~         end
--~         if not valid then
--~             g:write("\t-- The data file is corrupt, invalid or maybe the format has changed.\n")
--~             report("aquiring math data, problems with data table")
--~         else
--~             report("aquiring math data, table saved in %s",outname)
--~         end
--~         g:write("}\n")
--~         g:close()
--~         f:close()
--~     end
--~ end

function scripts.chars.stixtomkiv(inname,outname)
    report("we no longer use this options but use our own tables instead")
end

local banner_pdf_1 = [[
% filename : pdfr-def.tex
% comment  : generated by mtxrun --script chars --pdf
% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
% copyright: PRAGMA ADE / ConTeXt Development Team
% license  : see context related readme files
%
]]

local banner_pdf_2 = [[
%
\endinput
]]

function scripts.chars.makepdfr()
    local chartable = resolvers.findfile("char-def.lua") or ""
    if chartable ~= "" then
        dofile(chartable)
        if characters and characters.data then
            local f = io.open("pdfr-def.tex", 'w')
            if f then
                f:write(banner_pdf_1)
                local cd = characters.data
                local sd = table.sortedkeys(cd)
                for i=1,#sd do
                    local char = cd[sd[i]]
                    if char.adobename then
                        f:write(format("\\pdfglyphtounicode{%s}{%04X}%%\n",char.adobename,char.unicodeslot))
                    end
                end
                f:write(banner_pdf_2)
                f:close()
            end
        end
    end
end

local banner_utf_module = [[
%% filename : %s
%% comment  : generated by mtxrun --script chars --xtx
%% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
%% copyright: PRAGMA ADE / ConTeXt Development Team
%% license  : see context related readme files
]]

local banner_utf_mappings = [[

% lc/uc/catcode mappings

]]

local banner_utf_patch = [[

% patch needed for turkish

\setXTXcharcodes "201C "201C "201C
\setXTXcharcodes "201D "201D "201D

% patch needed for french

\setXTXcharcodes "2019 "2019 "2019

]]

local banner_utf_names = [[

% named characters mapped onto utf (\\char is needed for accents)

]]

local banner_utf_classes = [[

% some character classes for xetex; seems to be rather hard coded, these numbers
% and also a mix of several classes; here we do linebreaks

]]

local banner_utf_finish = [[

\endinput
]]

local xtxclasses = {
    id =   1,
    ex =   3,
    is =   3,
    cm = 256,
    op =   2,
    ns =   3,
    cl =   3,
}

function scripts.chars.makeencoutf()
    local chartable = resolvers.findfile("char-def.lua") or ""
    if chartable ~= "" then
        dofile(chartable)
        local function open(name,banner)
            local f = io.open(name,'w')
            if f then
                report("writing '%s'",name)
                f:write(format(banner_utf_module,name))
                f:write(banner)
                f:write()
                return f
            end
        end
        local function close(f)
            f:write(banner_utf_finish)
            f:close()
        end
        local data = characters and characters.data
        if data then
            local list = table.sortedkeys(characters.data)
            local f = open("xetx-utf.tex",banner_utf_mappings)
            if f then
                for i=1,#list do
                    local code = list[i]
                    if code <= 0xFFFF then
                        local chr = data[code]
                        local cc = chr.category
                        if cc == 'll' or cc == 'lu' or cc == 'lt' then
                            if not chr.lccode then chr.lccode = code end
                            if not chr.uccode then chr.uccode = code end
                            f:write(format('\\setXTXcharcodes "%05X "%05X "%05X %% %s\n',code,chr.lccode,chr.uccode,chr.description))
                        end
                    end
                end
                f:write("\n")
                for i=1,#list do
                    local code = list[i]
                    local chr = data[code]
                    if chr and chr.range then
                        local cc = chr.category
                        if cc == 'lo' then
                            f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n',code,chr.range))
                        end
                    end
                end
                f:write(banner_utf_patch)
                close(f)
            end
            local f = open("xetx-chr.tex",banner_utf_names)
            if f then
                local length = 0
                for i=1,#list do
                    local code = list[i]
                    if code > 0x5B and code <= 0xFFFF then
                        local chr = data[code]
                        if chr and #(chr.contextname or "") > length then
                            length = #chr.contextname
                        end
                    end
                end
                local template = "\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n"
                for i=1,#list do
                    local code = list[i]
                    if code > 0x5B and code <= 0xFFFF then
                        local chr = data[code]
                        if chr and chr.contextname then
                            local ch = utfchar(code)
                            f:write(format(template, chr.contextname, code, chr.description, ch))
                        end
                    end
                end
                close(f)
            end
            local f = open("xetx-cls.tex",banner_utf_classes)
            if f then
                for k, v in next, xtxclasses do
                    f:write(format("\\defineXTXcharinjectionclass[lb:%s]\n",k))
                end
                f:write("\n")
                local i_first, i_last, i_clb = nil, nil, nil
                local function flush()
                    if i_first then
                        if i_first == i_last then
                            f:write(format('\\dosetXTXcharacterclass{"%05X}{lb:%s}\n',i_first,i_clb))
                        else
                            f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',i_first,i_last,i_clb))
                        end
                    end
                    i_first, i_last, i_clb = nil, nil, nil
                end
                for i=1,#list do
                    local code      = list[i]
                    local code_next = list[i+1]
                    local chr       = data[code]
                    local chr_next  = data[code_next]
                    local clb       = chr and chr.linebreak
                    local lbc       = xtxclasses[clb]
                    if not lbc then
                        flush()
                    elseif clb == i_clb then
                        if i_first then
                            i_last = code
                        else
                            i_first, i_last, i_clb = code, code, clb
                        end
                    else
                        flush()
                        i_first, i_last, i_clb = code, code, clb
                    end
                end
                flush()
                f:write("\n")
                for i=1,#list do
                    local code = list[i]
                    local chr = data[code]
                    if chr and chr.range then
                        local lbc = chr.linebreak
                        if xtxclasses[lbc] then
                            f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',code,chr.range,lbc))
                        end
                    end
                end
                close(f)
            end
        end
    end
end

local entityfiles = {
    "http://www.w3.org/2003/entities/2007/w3centities-f.ent",
    "http://www.w3.org/2003/entities/2007/htmlmathml-f.ent",
}

function scripts.chars.xmlentities()
    local done = { }
    local entities = { "local entities = utilities.storage.allocate {" }
    for i=1,#entityfiles do
        local f = entityfiles[i]
        local s = url.hashed(f)
        local b = file.basename(s.path)
        local n = resolvers.findfile(b)
        local data = io.loaddata(n)
        for name, value in gmatch(data,'<!ENTITY +(%S+) +"(.-)" *>') do
            if not done[name] then
                done[name] = true
                local str, hex
                local low = lower(name)
                if name == "newline" then
                    -- let's forget about that one
                elseif name == "lt" then
                    str, hex = "<", format("%s %05X",hex,c)
                elseif name == "gt" then
                    str, hex = ">", format("%s %05X",hex,c)
                elseif name == "amp" then
                    str, hex = "&", format("%s %05X",hex,c)
                else
                    for t, c in gmatch(value,"&#([x]*)([^;]+);") do
                        if t == "x" then
                            c = tonumber(c,16)
                        else
                            c = tonumber(c)
                        end
                        if str then
                            str, hex = str .. utfchar(c), format("%s %05X",hex,c)
                        else
                            str, hex = utfchar(c), format("U+%05X",c)
                        end
                    end
                end
                if str and hex then
                    entities[#entities+1] = format('    ["%s"] = %q, -- %s',name,str,hex)
                end
            end
        end
    end
    entities[#entities+1] = "}"
    io.savedata("xmlentities.tmp",concat(entities,"\n"))
end

if environment.argument("stix") then
    local inname  = environment.files[1] or ""
    local outname = environment.files[2] or ""
    scripts.chars.stixtomkiv(inname,outname)
elseif environment.argument("entities") then
    scripts.chars.xmlentities()
elseif environment.argument("xtx") then
    scripts.chars.makeencoutf()
elseif environment.argument("pdf") then
    scripts.chars.makepdfr()
else
    application.help()
end

-- local http  = require("socket.http")
-- local ltn12 = require("ltn12")
--
-- local t = { }
-- local status, message = http.request {
--     url = f,
--     sink = ltn12.sink.table(t)
-- }
--
-- local template = [[
-- <?xml version='1.0' ?>
--
-- <!DOCTYPE dummy [
--
-- %s
--
-- ]>
--
-- <dummy>This is just a placeholder.</dummy>
-- ]]
--
-- local e = string.format(template,io.loaddata(n))
-- local x = xml.convert(e, { utfize_entities = true } )
-- local entities = x.entities
