/*
 * pymigemo.c - C/Migemo wrapper for Python
 * Copyright(C) 2005-2009, Atzm WATANABE <atzm@atzm.org>
 *
 * $Id$
 */

#include <Python.h>
#include <structmember.h>
#include <migemo.h>
#include <string.h>

#define PYMIGEMO_VERSION "0.2"

/* for dereference migemo object members */
struct _migemo {
    int   enable;
    void *mtree;
    int   charset;
    void *roma2hira;
    void *hira2kata;
    void *han2zen;
    void *zen2han;
    void *rx;
    void *addword;
    void *char2int;
};

typedef struct {
    PyObject_HEAD
    migemo *migemo_obj;
} Migemo;

static void
Migemo_dealloc(Migemo *self)
{
    if (self->migemo_obj) {
        migemo_close(self->migemo_obj);
    }

    self->ob_type->tp_free((PyObject *)self);
}

static PyObject *
Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    Migemo *self;

    self = (Migemo *)type->tp_alloc(type, 0);

    if (self != NULL) {
        self->migemo_obj = NULL;
    }

    return (PyObject *)self;
}

static int
Migemo_init(Migemo *self, PyObject *args, PyObject *kwds)
{
    migemo *migemo_obj;
    char   *dictionary;

    static char *kwlist[] = {"dictionary", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) {
        return -1;
    }

    if (dictionary) {
        if (self->migemo_obj) {
            migemo_close(self->migemo_obj);
        }

        migemo_obj = migemo_open(dictionary);

        if (migemo_obj) {
            self->migemo_obj = migemo_obj;
        }
        else {
            return -1;
        }
    }

    return 0;
}

static int
get_encoding(char *encoding, size_t size, int charset)
{
    char *enc;

    switch(charset) {
    case 1:
        enc = "cp932";
        break;
    case 2:
        enc = "euc_jp";
        break;
    case 3:
        enc = "utf_8";
        break;
    default:
        enc = "ascii";
    }

    if (strlen(enc) < size) {
        strcpy(encoding, enc);
        return 1;
    }

    return 0;
}

static PyObject *
Migemo_get_encoding(Migemo *self)
{
    char encoding[7];

    if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) {
        return NULL;
    }

    return PyString_FromString(encoding);
}

static PyObject *
Migemo_query(Migemo *self, PyObject *args, PyObject *kwds)
{
    PyObject      *result, *query_obj, *query_str = NULL, *regex_strobj = NULL;
    char          *query, encoding[7];
    unsigned char *regex;

    static char *kwlist[] = {"query", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &query_obj)) {
        return NULL;
    }

    if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) {
        return NULL;
    }

    if (PyUnicode_Check(query_obj)) {
        query_str = PyUnicode_AsEncodedString(query_obj, encoding, "strict");
        query     = PyString_AS_STRING(query_str);
    }
    else if (PyString_Check(query_obj)) {
        query = PyString_AS_STRING(query_obj);
    }
    else {
        return NULL;
    }

    if (query) {
        regex = migemo_query(self->migemo_obj, query);

        if (regex) {
            regex_strobj = PyString_FromString(regex);

            if (regex_strobj) {
                result = PyUnicode_FromEncodedObject(regex_strobj, encoding, "strict");
            }

            migemo_release(self->migemo_obj, regex);
        }
    }

    Py_XDECREF(regex_strobj);
    Py_XDECREF(query_str);

    if (!result) {
        return NULL;
    }

    return result;
}

static PyObject *
Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds)
{
    PyObject *result;
    char     *op;
    int       index;
  
    static char *kwlist[] = {"index", "op", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) {
        return NULL;
    }

    if (op) {
        result = PyInt_FromLong((long)migemo_set_operator(self->migemo_obj, index, op));
    }

    if (!result) {
        return NULL;
    }

    return result;
}

static PyObject *
Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds)
{
    PyObject      *result;
    unsigned char *op;
    int            index;
  
    static char *kwlist[] = {"index", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) {
        return NULL;
    }

    if (op = migemo_get_operator(self->migemo_obj, index)) {
        result = PyString_FromString(op);
    }

    if (!result) {
        return NULL;
    }

    return result;
}

static PyObject *
Migemo_load(Migemo *self, PyObject *args, PyObject *kwds)
{
    PyObject *result;
    char     *dict_file;
    int       dict_id;
  
    static char *kwlist[] = {"dict_id", "dict_file", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) {
        return NULL;
    }

    if (dict_file) {
        result = PyInt_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file));
    }

    if (!result) {
        return NULL;
    }

    return result;
}

static PyObject *
Migemo_is_enable(Migemo *self)
{
    return PyInt_FromLong((long)migemo_is_enable(self->migemo_obj));
}

static PyMethodDef Migemo_methods[] = {
    {"query", (PyCFunction)Migemo_query, METH_KEYWORDS,
     "return regex from romaji string\n\
\n\
def query(query)\n\
  query: romaji string (str or unicode)\n\
\n\
  returns: regex string as Unicode object"},
    {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS,
     "set operator string as the meta character of regex\n\
\n\
def set_operator(index, op):\n\
  index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\
          OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\
  op: operator string (str)\n\
\n\
  returns: boolean value"},
    {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS,
     "get operator string as the meta character of regex\n\
\n\
def get_operator(index)\n\
  index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\
          OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\
\n\
  returns: operator string (str)"},
    {"load", (PyCFunction)Migemo_load, METH_KEYWORDS,
     "add dictionary to Migemo object\n\
\n\
def load(dict_id, dict_file)\n\
  dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\
            DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\
  dict_file: path to dictionary file (str)\n\
\n\
  returns: boolean value"},
    {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS,
     "check internal migemo_dict\n\
\n\
def is_enable()\n\
  returns: boolean value"},
    {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS,
     "get dictionary encoding\n\
\n\
def get_encoding()\n\
  returns: encoding string (str)"},
    {NULL} /* Sentinel */
};

static PyMemberDef Migemo_members[] = {
    {NULL} /* Sentinel */
};

static PyTypeObject MigemoType = {
    PyObject_HEAD_INIT(NULL)
    0,                          /*ob_size*/
    "migemo.Migemo",            /*tp_name*/
    sizeof(Migemo),             /*tp_basicsize*/
    0,                          /*tp_itemsize*/
    (destructor)Migemo_dealloc, /*tp_dealloc*/
    0,                          /*tp_print*/
    0,                          /*tp_getattr*/
    0,                          /*tp_setattr*/
    0,                          /*tp_compare*/
    0,                          /*tp_repr*/
    0,                          /*tp_as_number*/
    0,                          /*tp_as_sequence*/
    0,                          /*tp_as_mapping*/
    0,                          /*tp_hash */
    0,                          /*tp_call*/
    0,                          /*tp_str*/
    0,                          /*tp_getattro*/
    0,                          /*tp_setattro*/
    0,                          /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
    "Migemo wrapper object",    /* tp_doc */
    0,                          /* tp_traverse */
    0,                          /* tp_clear */
    0,                          /* tp_richcompare */
    0,                          /* tp_weaklistoffset */
    0,                          /* tp_iter */
    0,                          /* tp_iternext */
    Migemo_methods,             /* tp_methods */
    Migemo_members,             /* tp_members */
    0,                          /* tp_getset */
    0,                          /* tp_base */
    0,                          /* tp_dict */
    0,                          /* tp_descr_get */
    0,                          /* tp_descr_set */
    0,                          /* tp_dictoffset */
    (initproc)Migemo_init,      /* tp_init */
    0,                          /* tp_alloc */
    Migemo_new,                 /* tp_new */
};

static PyMethodDef module_methods[] = {
    {NULL} /* Sentinel */
};

#ifndef PyMODINIT_FUNC
#define PyMODINIT_FUNC void
#endif
PyMODINIT_FUNC
initmigemo(void) 
{
    PyObject* m;

    if (PyType_Ready(&MigemoType) < 0)
        return;

    m = Py_InitModule3("migemo", module_methods, "C/Migemo wrapper");

    Py_INCREF(&MigemoType);
    PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType);
    PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION));

    PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION));

    PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID));
    PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO));
    PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA));
    PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA));
    PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN));
    PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN));

    PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR));
    PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN));
    PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT));
    PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN));
    PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT));
    PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE));
}
