#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>

#include "scim-bridge-string.h"

static const char ERROR_MESSAGE_INVALID_UTF8[] = "Invalid utf8 string";
static const char ERROR_MESSAGE_INVALID_UCS4[] = "Invalid ucs4 string";
static const char ERROR_MESSAGE_NO_ENOUGH_BUFFER[] = "No enough buffer";

int scim_bridge_string_mbstowcs (ScimBridgeException *except, ucs4_t *wstr, const char *str, const size_t max_wstr_len, size_t * wstr_len)
{
    const size_t str_len = strlen (str);
    size_t str_index = 0;

    int i;
    for (i = 0; i <= *wstr_len; ++i) {
        if (str_index > str_len) {
            goto no_enough_buffer;
        }
        unsigned char a, b, c, d, e, f;

        a = (unsigned char)str[str_index];
        if (str_index + 1 <= str_len) b = (unsigned char)str[str_index + 1];
        if (str_index + 2 <= str_len) c = (unsigned char)str[str_index + 2];
        if (str_index + 3 <= str_len) d = (unsigned char)str[str_index + 3];
        if (str_index + 4 <= str_len) e = (unsigned char)str[str_index + 4];
        if (str_index + 5 <= str_len) f = (unsigned char)str[str_index + 5];

        if (a < 0x80) {
            wstr[i] = c;
            str_index += 1;
        } else if (a < 0xc2) {
            goto invalid_utf8;
        } else if (a < 0xe0) {
            if (str_index + 1 > str_len) {
                goto invalid_utf8;
            } else if (! ((b ^ 0x80) < 0x40)) {
                goto invalid_utf8;
            } else {
                wstr[i] = ((ucs4_t) (a & 0x1f) << 6)
                    | (ucs4_t) (b ^ 0x80);
                str_index += 2;
            }
        } else if (a < 0xf0) {
            if (str_index + 2 > str_len) {
                goto invalid_utf8;
            } else if (! ((b ^ 0x80) < 0x40 && (c ^ 0x80) < 0x40
                && (a >= 0xe1 || b >= 0xa0))) {
                goto invalid_utf8;
            } else {
                wstr[i] = ((ucs4_t) (a & 0x0f) << 12)
                    | ((ucs4_t) (b ^ 0x80) << 6)
                    | (ucs4_t) (c ^ 0x80);
                str_index += 3;
            }
        } else if (a < 0xf8) {
            if (str_index + 3 > str_len) {
                goto invalid_utf8;
            } else if (! ((b ^ 0x80) < 0x40 && (c ^ 0x80) < 0x40
                && (d ^ 0x80) < 0x40 && (a >= 0xf1 || b >= 0x90))) {
                goto invalid_utf8;
            } else {
                wstr[i] = ((ucs4_t) (a & 0x07) << 18)
                    | ((ucs4_t) (b ^ 0x80) << 12)
                    | ((ucs4_t) (c ^ 0x80) << 6)
                    | (ucs4_t) (d ^ 0x80);
                str_index += 4;
            }
        } else if (a < 0xfc) {
            if (str_index + 4 > str_len) {
                goto invalid_utf8;
            } else if (! ((b ^ 0x80) < 0x40 && (c ^ 0x80) < 0x40
                && (d ^ 0x80) < 0x40 && (e ^ 0x80) < 0x40
                && (a >= 0xf9 || b >= 0x88))) {
                goto invalid_utf8;
            } else {
                wstr[i] = ((ucs4_t) (a & 0x03) << 24)
                    | ((ucs4_t) (b ^ 0x80) << 18)
                    | ((ucs4_t) (c ^ 0x80) << 12)
                    | ((ucs4_t) (d ^ 0x80) << 6)
                    | (ucs4_t) (e ^ 0x80);
                str_index +=5;
            }
        } else if (a < 0xfe) {
            if (str_index + 5 > str_len) {
                goto invalid_utf8;
            } else if (! ((b ^ 0x80) < 0x40 && (c ^ 0x80) < 0x40
                && (d ^ 0x80) < 0x40 && (e ^ 0x80) < 0x40
                && (f ^ 0x80) < 0x40 && (a >= 0xfd || b >= 0x84))) {
                goto invalid_utf8;
            } else {
                wstr[i] = ((ucs4_t) (a & 0x01) << 30)
                    | ((ucs4_t) (b ^ 0x80) << 24)
                    | ((ucs4_t) (c ^ 0x80) << 18)
                    | ((ucs4_t) (d ^ 0x80) << 12)
                    | ((ucs4_t) (e ^ 0x80) << 6)
                    | (ucs4_t) (f ^ 0x80);
                str_index += 6;
            }
        } else {
            goto invalid_utf8;
        }
    }

    invalid_utf8:
    scim_bridge_exception_set_errno (except, errno);
    scim_bridge_exception_set_message (except, ERROR_MESSAGE_INVALID_UTF8);

    wstr[0] = L'\0';
    *wstr_len = 0;
    return -1;

    no_enough_buffer:
    scim_bridge_exception_set_errno (except, ENOBUFS);
    scim_bridge_exception_set_message (except, ERROR_MESSAGE_NO_ENOUGH_BUFFER);

    wstr[0] = L'\0';
    *wstr_len = 0;
    return -1;

}


/* Imported from scim-utility.cpp */
int scim_bridge_string_wcstombs (ScimBridgeException *except, char *str, const ucs4_t *wstr, const size_t max_str_len, size_t *str_len)
{
    const size_t wstr_len = scim_bridge_string_wstrlen (wstr);
    size_t str_index = 0;

    int i;
    for (i = 0; i <= wstr_len; ++i) {
        size_t count;
        ucs4_t wc = wstr[i];

        if (wc < 0x80) {
            count = 1;
        } else if (wc < 0x800) {
            count = 2;
        } else if (wc < 0x10000) {
            count = 3;
        } else if (wc < 0x200000) {
            count = 4;
        } else if (wc < 0x4000000) {
            count = 5;
        } else if (wc <= 0x7fffffff) {
            count = 6;
        } else {
            scim_bridge_exception_set_errno (except, errno);
            scim_bridge_exception_set_message (except, ERROR_MESSAGE_INVALID_UCS4);

            str[0] = '\0';
            *str_len = 0;
            return -1;
        }

        if (str_index + count > max_str_len + 1) {
            scim_bridge_exception_set_errno (except, ENOBUFS);
            scim_bridge_exception_set_message (except, ERROR_MESSAGE_NO_ENOUGH_BUFFER);

            str[0] = '\0';
            *str_len = 0;
            return -1;
        }
        switch (count) {                          /* note: code falls through cases! */
            case 6: str[str_index + 5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
            case 5: str[str_index + 4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
            case 4: str[str_index + 3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
            case 3: str[str_index + 2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
            case 2: str[str_index + 1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
            case 1: str[str_index + 0] = wc;
        }
        str_index += count;
    }

    *str_len = str_index - 1;
    return 0;
}


size_t scim_bridge_string_wstrlen (const ucs4_t *wstr)
{
    size_t i;
    for (i = 0; wstr[i] != L'\0'; ++i);

    return i;
}


size_t scim_bridge_string_strbuflen (const ucs4_t *wstr)
{
    return scim_bridge_string_wstrlen (wstr) * 4;
}


size_t scim_bridge_string_wstrbuflen (const char *str)
{
    return strlen (str);
}
