#!/usr/bin/perl

# code generator to help with writing Tcl extensions
# Copyright 2006 Ian Jackson
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301, USA.


# Input format is line-based, ws-significant, offside rule (some kind
#  of, anyway).
#
#  Type TYPE:       C-TYPE-DECLARATOR
#     Defines TYPE as a type (for arguments and return values)
#     which corresponds to the C type specified.  C-TYPE-DECLARATOR
#     must contain one `@' where the identifier would go.
#     The type may contain allocated memory, etc., in which case
#     `Init' and `Fini' must be used.
#
#     TYPE may be either TYPENAME or TYPENAME(ARGS) - in this case,
#     ARGS should be C argument declarations as for in a function
#     prototype, of extra arguments for the application-supplied
#     parser/returner functions.  Each time a TYPE is used elsewhere,
#     the ARGS should be the actual arguments to pass, and will be
#     textually copied into the calls to the parser/returner
#     functions.
#
#     `Type' causes declarations in the .h file of these functions:
#        int cht_pat_TYPENAME(Tcl_Interp*, Tcl_Obj *obj, C-TYPE *val, ARGS);
#        Tcl_Obj *cht_ret_TYPENAME(Tcl_Interp*, C-TYPE val, ARGS);
#
#     cht_pat_... must attempt to parse obj into the appropriate type.
#     val will already have been initialised with `Init' statements if
#     relevant.  Whether cht_pat_... fails or succeeds it may allocate
#     memory into the object and must leave the object valid (for
#     `Fini').
#
#     cht_ret_... must convert the value back to a new Tcl_Obj.  It may
#     not fail.
#
#  Init TYPENAME    C-STATEMENTS
#     Provides some statements which are used to initialise a variable
#     of type TYPENAME.  C-STATEMENTS should contain one or more `@',
#     which will be replaced by the actual variable name.  The
#     variable will have been declared with the C declarator specified
#     with `Type'.  C-STATEMENTS may not fail or longjmp, and they may
#     not allocate memory or other resources.  If no `Init' is
#     supplied then there is no invariant (so no `Fini' may be
#     supplied either, and the type is `flat' - no memory, external
#     refs, etc.)
#
#  Fini TYPENAME    C-STATEMENTS
#     Provides some statements (like `Init') which are used to free a
#     variable of type TYPENAME.  The variable will already have been
#     initialised with the `Init' statements, and may have been
#     modified since by application per-type or per-command code.  Its
#     invariant will be satisfied before C-STATEMENTS.  Afterwards the
#     invariant may or may not be satisfied, but it may not have any
#     memory or other resources allocated.  C-STATEMENTS may not fail
#     or longjmp.
#
#  H-Include    C-INCLUDE-SPECIFIER
#     Arranges for generated .h files to #include the specified
#     file.  C-INCLUDE-SPECIFIER should include the <..> or "..".
#
#  Table [*]TABLENAME C-ENTRY-TYPE
#     Starts a table of commands or subcommands.  The generated .h
#     will contain a definition of C-ENTRY-TYPE containing
#         const char *name;
#         Tcl_ObjCmdProc *func;
#     and the generated .c will contain
#         const C-ENTRY-TYPE C-ARRAY-NAME[];
#     where C-ARRAY-NAME is TABLENAME, with `_entries' appended
#     and `cht_' prepended.  The entries are indented one level (one
#     or more spaces) and look like this:
#        ENTRYNAME [ C-EXTRA-ENTRY-VALUES ]
#            FORMALARGNAME   TYPE
#            ...
#          [ =>  RESULT-TYPE ]
#     This will cause the declaration of
#        int cht_do_TABLENAME_ENTRYNAME(ClientData cd, Tcl_Interp *ip,
#                                   FORMAL-ARGUMENTS, RESULT-C-TYPE*);
#     which is the procedure which the application must supply to
#     implement the function.  If the `=> RESULT-TYPE' is omitted, so
#     is the result argument to the function.  Each argument to the
#     function is of the C type corresponding to the specified type.
#     TYPE may be `...', in which case the C function will be passed
#     two args (int objc, Tcl_Obj *const *objv) for the remaining
#     arguments.
#
#     The cht_do_... function should not eat any memory associated with
#     the arguments.  The result buffer (if any) will be initialised
#     using the `Init' and should on success contain the relevant
#     result.  On failure it should leave the result unmodified (or at
#     least, not in need of freeing).
#
#     As an alternative, the arguments can be replaced with just
#            dispatch(TYPE-ARGS-FOR-ENUM)
#     which is a shorthand for
#            subcmd   enum(TYPE-ARGS-FOR-ENUM)
#            args     ...
#     and also generates and uses a standard dispatch function.
#
#     There will be an entry in C-ARRAY-NAME for every table entry.
#     The name will be ENTRYNAME, and the func will be a function
#     suitable for use as a Tcl command procedure, which parses the
#     arguments, processes the command, and sets any result, as
#     applicable.
#
#     `*' should be used if the table name is not useful for error
#     messages.  It suppresses `TABLENAME ' from the front of the
#     autogenerated argument parsing error strings.
#
#  EntryExtra C-ENTRY-TYPE
#     Introduces a section of additional C code which will be inserted
#     into the definition of C-ENTRY-TYPE by `Table'.  The C
#     code, which follows on several indented lines, should be
#     structure member definitions.
#
#     When EntryExtra is used, in the corresponding Table, each
#     ENTRYNAME should be followed on the same line by whitespace and
#     EXTRA-VALUES; the EXTRA-VALUES are used as initialisers for the
#     additional structure elements.
#
#  NoEntryDefine C-ENTRY-TYPE
#     Prevents the definition of C-ENTRY-TYPE by Table.
#     The C type must be defined elsewhere.
#
#  Also expected are these functions:
#    void cht_setstringresult(Tcl_Interp*, const char*);
#        sets the Tcl result from the supplied string
#    int cht_pat_enum(Tcl_Interp*, Tcl_Obj*, const void **c_e_t_array,
#                 const void *c_e_t_return, size_t c_e_t_sz, const char *what);
#        scans a table of C-ENTRY-TYPEs looking for the
#        string matching the string supplied by the script
#        (as a Tcl_Obj).  On error sets the result, using
#        what (a noun phrase describing the type of thing).
#        Assumes (unportably!) that the name and func members
#        are in the same places no matter what the rest of
#        the struct contains.
#  and the two predefined types `int' (C `int') and `obj' (Tcl_Obj*,
#  unmodified.)  The corresponding definitions are in tcmdiflib.c.

use IO;
use Data::Dumper;

parse('builtins','DATA');

while (@ARGV) {
    $_= shift @ARGV;
    if (m/^\-p([-_0-9a-z]+)$/) {
	$prefix= $1;
	$prefix =~ y/-/_/;
    } elsif (m/^\-w(c|h)$/) {
	$write= $1;
    } elsif (m/^\-o(.+)$/) {
	$output= $1;
    } elsif (m/^\-/) {
	die "unknown option $_\n";
    } else {
	if (!defined $prefix) { $prefix= $_;  $prefix =~ s/\.[^.]+$//; }
	$x= new IO::File $_,'r' or die "$_: $!\n";
	parse($_,$x);
    }
}

die "must say -w<something>\n" if !defined $write;

sub zilch () {
    undef $c_table;
    undef $c_entryextra;
    undef $c_of;
}

sub enumargs ($) {
    my ($a) = @_;
    $a =~ m:/(.*),: or die "invalid enum type \`$a'\n";
    my ($a_tab, $ee_type, $estr) = ($`,$1,$');
    if ($ee_type !~ m/^[^_]/) {
	$ee_type= $a_tab.$ee_type;
	$a_tab= lc($a_tab).'_entries';
    }
    return ($a_tab, $ee_type, $estr);
}

sub parse ($$) {
    my ($wh,$f) = @_;
    while (defined($_= $f->getline)) {
	chomp; s/\s+$//;
	next if m/^\s*\#/;
	next if !m/\S/;
	while (s/\t/ ' 'x(8 - (length $`) % 8) /e) { }

	s/^\s*//;
	$this_indent= length $&;
	while (@i && $this_indent < $i[0]) {
	    shift @i;
	}
	if ($this_indent && (!@i || $this_indent > $i[0])) {
	    unshift @i, $this_indent;
	}

	if (@i==0 && m/^Table\s+(\*?)(\w+)\s+(\w+)$/) {
	    zilch();
	    $c_table= $2;
	    $table_x{$c_table}{T}= $1;
	    $table_x{$c_table}{C}= $3;
	    $entrytype_x{$3}= '' unless exists $entrytype_x{$3};
	} elsif (@i==0 && m/^Untabled$/) {
	    zilch();
	    $c_table= '';
	} elsif (@i==0 && m/^(C|H)\-Include\s+(\S.*)$/) {
	    o(lc $1, 30, "#include $2\n");
	} elsif (@i==0 && m/^EntryExtra\s+(\w+)$/) {
	    zilch();
	    $c_entryextra= $1;
	} elsif (@i==0 && m/^NoEntryDefine\s+(\w+)$/) {
	    zilch();
	    $entrytype_x{$1}= " ";
	} elsif (@i>=1 && defined $c_entryextra) {
	    $entrytype_x{$c_entryextra} .= "  $_\n";
	} elsif (@i==1 && m/^[a-z].*$/ && defined $c_table) {
	    if (m/^[-_0-9A-Za-z]+$/) {
		$c_entry= $_;
	    } elsif (m/^([-_0-9A-Za-z]+)\s+(\S.*)$/) {
		$c_entry= $1;
		$tables{$c_table}{$c_entry}{I} .= ", $2";
	    } else {
		badsyntax($wh,$.,"bad entry");
	    }
	    $tables{$c_table}{$c_entry}{A} = [ ];
	} elsif (@i==2 && m/^\.\.\.\s+(\w+)$/ && defined $c_entry) {
	    $tables{$c_table}{$c_entry}{V}= $1;
	} elsif (@i==2 && m:^dispatch\(((.*)/(.*)\,.*)\)$: && defined $c_entry) {
	    my $enumargs= $1;
	    my $subcmdtype= $2.$3;
	    $tables{$c_table}{$c_entry}{D}= $subcmdtype;
	    $tables{$c_table}{$c_entry}{V}= 'obj';
	    push @{ $tables{$c_table}{$c_entry}{A} },
	        { N => 'subcmd', T => 'enum', A => $enumargs, O => '' };
	} elsif (@i==2 && m/^(\??)([a-z]\w*)\s*(\S.*)/
		 && defined $c_entry) {
	    ($opt, $var, $type) = ($1,$2,$3);
	    ($type, $xtypeargs) = split_type_args($type);
	    push @{ $tables{$c_table}{$c_entry}{A} },
	        { N => $var, T => $type, A => $xtypeargs, O => ($opt eq '?') };
	} elsif (@i==2 && m/^\=\>\s*(\S.*)$/ && defined $c_entry) {
	    ($type, $xtypeargs) = split_type_args($1);
	    $tables{$c_table}{$c_entry}{R}= $type;
	    $tables{$c_table}{$c_entry}{X}= $xtypeargs;
	} elsif (@i==0 && m/^Type\s+([^\:]+)\:\s+(\S.*)$/) {
	    ($typename,$ctype)= ($1,$2);
	    $ctype .= ' @' unless $ctype =~ m/\@/;
	    ($typename,$xtypeargs) = split_type_args($typename);
	    $types{$typename}= { C => $ctype, X => $xtypeargs };
	} elsif (@i==0 && s/^Init\s+(\w+)\s+(\S.*)//) {
	    $type_init{$1}= $2;
	} elsif (@i==0 && s/^Fini\s+(\w+)\s+(\S.*)//) {
	    $type_fini{$1}= $2;
	} else {
	    badsyntax($wh,$., sprintf
		      "bad directive (indent level %d)", scalar @i);
	}
    }
    $f->error and die $!;
    $f->close;
}

#print Dumper(\%tables),"\n";
#print Dumper(\%types),"\n";

foreach $t (sort keys %types) {
    $type= $types{$t};
    $c= $type->{C};
    $xta= $type->{X};
    $decl= "int cht_pat_$t(Tcl_Interp *ip, Tcl_Obj *obj, ";
    $decl .= subst_in_decl('*val', $c, "type $t");
    $decl .= ", $xta",  if length $xta;
    $decl .= ");\n";
    o('h',160, $decl);

    $decl= "Tcl_Obj *cht_ret_$t(Tcl_Interp *ip, ".subst_in_decl('val',$c);
    $decl .= ", $xta" if length $xta;
    $decl .= ");\n";
    o('h',170, $decl);
}

foreach $c_entrytype (sort keys %entrytype_x) {
    next if $entrytype_x{$c_entrytype} =~ m/^\s$/;
    o('h', 20, "typedef struct $c_entrytype $c_entrytype;\n");
    o('h', 100,
      "struct $c_entrytype {\n".
      "  const char *name;\n".
      "  Tcl_ObjCmdProc *func;\n".
      $entrytype_x{$c_entrytype}.
      "};\n\n");
}

foreach $c_table (sort keys %tables) {
    $r_table= $tables{$c_table};
    $x_table= $table_x{$c_table};
    $op_tab= '';

    foreach $c_entry (sort keys %$r_table) {
	$c_entry_c= $c_entry; $c_entry_c =~ y/-/_/;
	$r_entry= $r_table->{$c_entry};
	$pa_decl= "int pa_${c_table}_${c_entry_c}(ClientData cd,".
	    " Tcl_Interp *ip, int objc, Tcl_Obj *const *objv)";
	$pa_func= "cht_do_${c_table}_${c_entry_c}";
	if (exists $r_entry->{D}) {
	    $pa_func= "cht_dispatch_$r_entry->{D}";
	}
	$do_decl= "int $pa_func(";
	@do_al= ('ClientData cd', 'Tcl_Interp *ip');
	@do_aa= qw(cd ip);
	$pa_init= '';
	$pa_argc= "  objc--; objv++;\n";
	$pa_vars= "  int rc;\n";
	$pa_body= '';
	$pa_rslt= '';
	$pa_free= '';
	$pa_fini= '';
	$any_mand= 0;
	$any_optl= 0;
	$any_eerr= 0;
	$any_eargc= 0;
	$pa_hint= '';
	$pa_hint .= "$c_table " if length $c_table &&
	    !length $table_x{$c_table}{T};
	$pa_hint.= $c_entry;
	foreach $arg (@{ $r_entry->{A} }) {
	    $n= $arg->{N};
	    $t= $arg->{T};
	    $a= $arg->{A};
	    push @do_al, make_decl($n, $t, $arg->{A},
				   "table $c_table entry $c_entry arg $n");
	    $pa_vars .= make_decl_init("a_$n", $t, $a, \$pa_init, "pa_vars");
	    if ($arg->{O}) {
		$pa_hint .= " ?$n?";
		if ($any_mand) {
		    $any_mand= 0;
		    $any_eerr= 1;
		}
		$pa_body .= "  if (!objc--) goto end_optional;\n";
		$any_optl= 1;
	    } else {
		$pa_hint .= " $n";
		$pa_body .= "  if (!objc--) goto wrong_count_args;\n";
		$any_mand++;
		$any_eargc= 1;
		die if $any_optl;
	    }
	    $paarg= "&a_$n";
	    $pafin= '';
	    if ($t eq 'enum') {
		$pa_vars .= "  const void *v_$n= 0;\n";
		$paarg= "&v_$n";
		$pafin= "\n  a_$n= v_$n; ";
		($a_tab, $ee_type, $estr) = enumargs($a);
		$a = "cht_$a_tab, sizeof($ee_type), $estr";
		o('h', 210, "extern const $ee_type cht_$a_tab".'[]'.";\n");
	    }
	    if (exists $type_fini{$t}) {
		$pa_fini .= '  '.subst_in("a_$n", $type_fini{$t})."\n";
	    }
	    $pa_body .= "  rc= cht_pat_$t(ip, *objv++, $paarg";
	    $pa_body .= ", ".$a if length $a;
	    $pa_body .= ");$pafin if (rc) goto rc_err;\n";
	    push @do_aa, "a_$n";
	}
	if (exists $r_entry->{V}) {
	    $pa_hint .= " ...";
	    $va= $r_entry->{V};
	    push @do_al, subst_in_decl("${va}c", 'int @');
	    push @do_al, subst_in_decl("${va}v", 'Tcl_Obj *const *@');
	    push @do_aa, "objc+1", "objv-1";
	} else {
	    if (!$any_optl) {
		$pa_body .= "  if (objc) goto wrong_count_args;\n";
		$any_eargc= 1;
	    }
	}
	if ($any_optl) {
	    $pa_body .= "end_optional:\n";
	}
	if (exists $r_entry->{R}) {
	    $t= $r_entry->{R};
	    $xta= $r_entry->{X};
	    push @do_al, make_decl("*result", $t, "cht_do_al result");
	    $pa_vars .= make_decl_init("result", $t, $xta, \$pa_init,
				       "pa_vars result");
	    push @do_aa, "&result";
	    $pa_rslt .= "  Tcl_SetObjResult(ip, cht_ret_$t(ip, result";
	    $pa_rslt .= ", $xta" if length $xta;
	    $pa_rslt .= "));\n";
	}
	$pa_body .= "\n";
	$pa_body .= "  rc= $pa_func(";
	$pa_body .= join ', ', @do_aa;
	$pa_body .= ");\n";
	$pa_body .= "  if (rc) goto rc_err;\n";

	$pa_rslt .= "  rc= TCL_OK;\n\n";
	$pa_rslt .= "rc_err:\n";
	
	$pa_fini .= "  return rc;\n";
	if ($any_eargc) {
	    $pa_fini .= "\nwrong_count_args:\n";
	    $pa_fini .= "  e=\"wrong # args: should be \\\"$pa_hint\\\"\";\n";
	    $pa_fini .= "  goto e_err;";
	    $any_eerr= 1;
	}
	if ($any_eerr) {
	    $pa_vars .= "  const char *e;\n";
	    $pa_fini .= "\n";
	    $pa_fini .= "e_err:\n";
	    $pa_fini .= "  cht_setstringresult(ip,e);\n";
	    $pa_fini .= "  rc= TCL_ERROR; goto rc_err;\n";
	}
	$pa_vars .= "\n";
	$pa_init .= "\n" if length $pa_init;
	$pa_fini .= "}\n\n";

	if (length $c_table) {
	    $static= 'static ';
	} else {
	    $static= '';
	    o('h',90, "$pa_decl;\n");
	}
	o('c',100,
	  $static.$pa_decl." {\n".
	  $pa_vars.
	  $pa_init.
	  $pa_argc.
	  $pa_body.
	  $pa_rslt.
          $pa_free.
	  $pa_fini);
	$do_decl .= join ', ', @do_al;
	$do_decl .= ")";

	if (exists $r_entry->{D}) {
	    my $subcmdtype= $r_entry->{D};
	    if (!exists $dispatch_done{$subcmdtype}) {
		my $di_body='';
		$di_body .= "static $do_decl {\n";
		$di_body .= "  return subcmd->func(0,ip,objc,objv);\n";
		$di_body .= "}\n";
		o('c',50, $di_body) or die $!;
	    }
	} else {
	    o('h',100, $do_decl.";\n") or die $!;
	}

	$op_tab .= sprintf("  { %-20s %-40s%s },\n",
			   "\"$c_entry\",",
			   "pa_${c_table}_${c_entry_c}",
			   $r_entry->{I});
    }
    if (length $c_table) {
	$decl= "const $x_table->{C} cht_${c_table}_entries[]";
	o('h', 500, "extern $decl;\n");
	o('c', 100,
	  "$decl = {\n".
	  $op_tab.
	  "  { 0 }\n".
	  "};\n\n");
    }
}

o(c, 0, "#include \"$prefix.h\"\n");

o(h, 0,
  "#ifndef INCLUDED_\U${prefix}_H\n".
  "#define INCLUDED_\U${prefix}_H\n\n");

o(h, 999,
  "#endif /*INCLUDED_\U${prefix}_H*/\n");

if (defined $output) {
    $oh= new IO::File "$output.tmp", 'w' or die "$output.tmp: $!\n";
} else {
    $oh= 'STDOUT';
}

print $oh "/* AUTOGENERATED - DO NOT EDIT */\n" or die $!;
foreach $pr (sort keys %{ $o{$write} }) {
    print $oh "\n" or die $!;
    print $oh $o{$write}{$pr} or die $!;
}

die if $oh->error;
die $! unless $oh->close;

if (defined $output) {
    rename "$output.tmp", $output or die $!;
}

sub o ($$) {
    my ($wh,$pr,$s) = @_;
    $o{$wh}{sprintf "%010d", $pr} .= $s;
}

sub split_type_args ($) {
    my ($type) = @_;
    my ($xtypeargs);
    if ($type =~ m/^\w+$/) {
	$xtypeargs='';
    } elsif ($type =~ m/^(\w+)\((.+)\)$/) {
	$type= $1;
	$xtypeargs= $2;
    } else {
	badsyntax($wh,$.,"bad type name/args \`$type'\n");
    }
    return ($type,$xtypeargs);
}

sub make_decl_init ($$$$$) {
    my ($n, $t, $a, $initcode, $why) = @_;
    my ($o,$init);
    $o= make_decl($n,$t,$a,"$why _init");
    if (exists $type_init{$t}) {
	$init= $type_init{$t};
	$$initcode .= "  ".subst_in("$n", $init)."\n"
	    if length $init;
    } else {
	$o .= ' =0';
    }
    return "  ".$o.";\n";
}

sub make_decl ($$$$) {
    my ($n, $t, $ta, $why) = @_;
    my ($type);
    if ($t eq 'enum') {
	($a_tab, $ee_type, $estr) = enumargs($ta);
	$c= "const $ee_type* @";
    } else { 
	defined $types{$t} or die "unknown type $t ($why)\n";
	$c= $types{$t}{C};
    }
    return subst_in_decl($n,$c);
}

sub subst_in_decl ($$$) {
    my ($val, $pat, $why) = @_;
    local ($_) = subst_in($val, $pat, $why);
    s/ *(\**) *$/$1/;
    return $_;
}
    
sub subst_in ($$$) {
    my ($val, $pat, $why) = @_;
    $pat =~ m/\@/ or die "$pat for $val in $why ?";
    $pat =~ s/\@/$val/g;
    return $pat;
}

sub badsyntax ($$$) {
    die "$_[0]:$_[1]: $_[2]\n";
}

__DATA__
Type int:	int
Type obj:	Tcl_Obj *@
