#! /bin/sh

if make Unihan.txt
then	true
else	echo Could not acquire Unicode data file Unihan.txt
	exit 1
fi

punct=
case "$1" in
-c)	punct=Chinese
	shift;;
-j)	punct=Japanese
	shift;;
-k)	punct=Korean
	shift;;
esac

name=$1
shortcut=$2
version=`sed -e "/version/ q" -e d Unihan.txt | sed -e "s,.*: *,,"`

# insert priority marks before Unicode values:
#	4E00..9FFF; CJK Unified Ideographs
#	3400..4DBF; CJK Unified Ideographs Extension A
#	20000..2A6DF; CJK Unified Ideographs Extension B
#	2E80..2EFF; CJK Radicals Supplement
#	F900..FAFF; CJK Compatibility Ideographs
#	2F800..2FA1F; CJK Compatibility Ideographs Supplement
#	3300..33FF; CJK Compatibility
#	FE30..FE4F; CJK Compatibility Forms
#	3000..303F; CJK Symbols and Punctuation
#	3200..32FF; Enclosed CJK Letters and Months
priouni () {
    sed	-e "s,\(U+0*4[EeFf]\),01 \1," -e t \
	-e "s,\(U+0*[5-9]\),01 \1," -e t \
	-e "s,\(U+0*3[4-9A-Fa-f]\),02 \1," -e t \
	-e "s,\(U+0*4[0-9A-Da-d]\),02 \1," -e t \
	-e "s,\(U+2[0-9Aa]\),03 \1," -e t \
	-e "s,\(U+0*2[Ee][89A-Fa-f]\),11 \1," -e t \
	-e "s,\(U+0*[Ff][9Aa]\),21 \1," -e t \
	-e "s,\(U+2[Ff][89Aa]\),22 \1," -e t \
	-e "s,\(U+0*33\),23 \1," -e t \
	-e "s,\(U+0*[Ff][Ee][34]\),31 \1," -e t \
	-e "s,\(U+0*30[0-3]\),41 \1," -e t \
	-e "s,\(U+0*32\),42 \1," -e t \
	-e "s,\(U+\),99 \1,"
}

(
cat <<\/eoc
char * keys = "";

void
printutf8 (unichar)
	unsigned long unichar;
{
	if (unichar < 0x80) {
		printf ("%c", unichar);
	} else if (unichar < 0x800) {
		printf ("%c", 0xC0 | (unichar >> 6));
		printf ("%c", 0x80 | (unichar & 0x3F));
	} else if (unichar < 0x10000) {
		printf ("%c", 0xE0 | (unichar >> 12));
		printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
		printf ("%c", 0x80 | (unichar & 0x3F));
	} else if (unichar < 0x200000) {
		printf ("%c", 0xF0 | (unichar >> 18));
		printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
		printf ("%c", 0x80 | (unichar & 0x3F));
	} else if (unichar < 0x4000000) {
		printf ("%c", 0xF8 | (unichar >> 24));
		printf ("%c", 0x80 | ((unichar >> 18) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
		printf ("%c", 0x80 | (unichar & 0x3F));
	} else if (unichar < 0x80000000) {
		printf ("%c", 0xFC | (unichar >> 30));
		printf ("%c", 0x80 | ((unichar >> 24) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 18) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
		printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
		printf ("%c", 0x80 | (unichar & 0x3F));
	}
}

void
addmap (k, ch)
	char * k;
	unsigned long ch;
{
	if (strcmp (k, keys) != 0) {
		if (* keys != '\0') {
			printf ("\"},\n");
		}
		if (* k != '\0') {
			printf ("	{\"%s\", \"", k);
		}
	} else {
		printf (" ");
	}
	if (* k != '\0') {
		printutf8 (ch);
	}
	keys = k;
}

int
main () {
/eoc

LC_CTYPE=C
export LC_CTYPE
# extract mappings from Unihan data
sed	-e "s/^U+\([^	]*\)	k$name	\([^	]*\)$/\2	\1/" \
	-e t -e d Unihan.txt |
sed -f $0.sed |
tr 'A-Z' 'a-z' |
sed -e "s,Ü,ü,g" -e "s,	\(....\)$,	0\1," -e "s,	,	U+," |
priouni |
LC_CTYPE=C sort |
sed	-e 's/\(.*\)	.. U+\(.*\)/	addmap ("\1", 0x\2);/'

cat <</eoc
	addmap ("", 0);
}
/eoc
) > keymaps/$name.c

if cc -o keymaps/$name.exe keymaps/$name.c
then	if [ -f keymaps/$name.h ]
	then	echo saving previous keyboard mapping file to keymaps/$name.h.sav
		mv -i keymaps/$name.h keymaps/$name.h.sav
	fi
	(
	echo "/***************************************************"
	echo "	mined keyboard mapping table"
	echo "	* generated with mkkbmap"
	echo "	  from Unihan.txt (version $version), k$name entries"
	if [ -n "$punct" ]
	then	echo "	* supplemented with punctuation mappings"
	fi
	echo "*/"
	echo "struct keymap keymap_$name [] = {"
	if [ -n "$punct" ]
	then	cat keymaps0/punctuation.$punct
		echo
	fi
	keymaps/$name.exe
	echo "	{NIL_PTR}"
	echo "};"
	) > keymaps/$name.h

	./mkkentry $name $shortcut

	rm -f keymaps/$name.c keymaps/$name.exe
fi
