/* * pymigemo.c - C/Migemo Python binding * Copyright(C) 2005-2009, Atzm WATANABE * * $Id$ */ #include #include #include #include #include #include #include #include #include #include #define PYMIGEMO_VERSION "0.3" /* for dereference migemo object members */ struct _migemo { int enable; void *mtree; int charset; void *roma2hira; void *hira2kata; void *han2zen; void *zen2han; void *rx; void *addword; void *char2int; }; typedef struct { PyObject_HEAD migemo *migemo_obj; } Migemo; static bool get_encoding(char *encoding, size_t size, int charset) { char *enc; switch(charset) { case 1: enc = "cp932"; break; case 2: enc = "euc_jp"; break; case 3: enc = "utf_8"; break; default: enc = "ascii"; } if (strlen(enc) < size) { strcpy(encoding, enc); return true; } return false; } static int isloadable(const char *path) { struct stat st; int ret = 0; int fd = open(path, O_RDONLY); if (fd < 0) { return errno; } if (fstat(fd, &st) < 0) { ret = errno; goto isloadable_end; } if (S_ISDIR(st.st_mode)) { ret = EISDIR; goto isloadable_end; } isloadable_end: if (close(fd) < 0) { ret = errno; } return ret; } static void Migemo_dealloc(Migemo *self) { if (self->migemo_obj) { migemo_close(self->migemo_obj); } self->ob_type->tp_free((PyObject *)self); } static PyObject * Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { Migemo *self; self = (Migemo *)type->tp_alloc(type, 0); if (self != NULL) { self->migemo_obj = NULL; } return (PyObject *)self; } static int Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) { migemo *migemo_obj; char *dictionary; static char *kwlist[] = {"dictionary", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) { return -1; } if (dictionary) { int ret = isloadable(dictionary); if (ret != 0) { PyErr_SetString(PyExc_ValueError, strerror(ret)); return -1; } if (self->migemo_obj) { migemo_close(self->migemo_obj); } migemo_obj = migemo_open(dictionary); if (migemo_obj) { self->migemo_obj = migemo_obj; } else { PyErr_SetString(PyExc_AssertionError, "migemo_open() failed"); return -1; } } return 0; } static PyObject * Migemo_get_encoding(Migemo *self) { char encoding[7]; if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } return PyString_FromString(encoding); } static PyObject * Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result, *pyquery, *pyrestr; char *query, encoding[7]; unsigned char *regex; static char *kwlist[] = {"query", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &pyquery)) { return NULL; } if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); return NULL; } if (PyUnicode_Check(pyquery)) { PyObject *q = PyUnicode_AsEncodedString(pyquery, encoding, "strict"); if (q == NULL) { return NULL; } query = PyString_AS_STRING(q); Py_DECREF(q); } else if (PyString_Check(pyquery)) { query = PyString_AS_STRING(pyquery); } else { PyErr_SetString(PyExc_ValueError, "argument must be string"); return NULL; } if (query == NULL) { return NULL; } regex = migemo_query(self->migemo_obj, query); if (regex == NULL) { PyErr_SetString(PyExc_AssertionError, "migemo_query() failed"); return NULL; } pyrestr = PyString_FromString(regex); migemo_release(self->migemo_obj, regex); if (pyrestr == NULL) { return NULL; } result = PyUnicode_FromEncodedObject(pyrestr, encoding, "strict"); Py_DECREF(pyrestr); return result; } static PyObject * Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result = NULL; char *op; int index; static char *kwlist[] = {"index", "op", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) { return NULL; } if (op) { result = PyBool_FromLong((long)migemo_set_operator(self->migemo_obj, index, op)); } return result; } static PyObject * Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result = NULL; const unsigned char *op; int index; static char *kwlist[] = {"index", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) { return NULL; } if (op = migemo_get_operator(self->migemo_obj, index)) { result = PyString_FromString(op); } else { PyErr_SetString(PyExc_ValueError, "invalid opindex"); } return result; } static PyObject * Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) { PyObject *result = NULL; char *dict_file; int dict_id; static char *kwlist[] = {"dict_id", "dict_file", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) { return NULL; } if (dict_file) { int ret = isloadable(dict_file); if (ret != 0) { PyErr_SetString(PyExc_ValueError, strerror(ret)); return NULL; } result = PyInt_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file)); } return result; } static PyObject * Migemo_is_enable(Migemo *self) { return PyBool_FromLong((long)migemo_is_enable(self->migemo_obj)); } static PyMethodDef Migemo_methods[] = { {"query", (PyCFunction)Migemo_query, METH_KEYWORDS, "return regex from romaji string\n\ \n\ def query(query)\n\ query: romaji string (str or unicode)\n\ \n\ returns: regex string as Unicode object"}, {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS, "set operator string as the meta character of regex\n\ \n\ def set_operator(index, op):\n\ index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ op: operator string (str)\n\ \n\ returns: boolean value"}, {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS, "get operator string as the meta character of regex\n\ \n\ def get_operator(index)\n\ index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ \n\ returns: operator string (str)"}, {"load", (PyCFunction)Migemo_load, METH_KEYWORDS, "add dictionary to Migemo object\n\ \n\ def load(dict_id, dict_file)\n\ dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ dict_file: path to dictionary file (str)\n\ \n\ returns: ID of loaded dictionary"}, {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, "check internal migemo_dict\n\ \n\ def is_enable()\n\ returns: boolean value"}, {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, "get dictionary encoding\n\ \n\ def get_encoding()\n\ returns: encoding string (str)"}, {NULL} /* Sentinel */ }; static PyMemberDef Migemo_members[] = { {NULL} /* Sentinel */ }; static PyTypeObject MigemoType = { PyObject_HEAD_INIT(NULL) 0, /*ob_size*/ "migemo.Migemo", /*tp_name*/ sizeof(Migemo), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)Migemo_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ 0, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ Migemo_methods, /* tp_methods */ Migemo_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)Migemo_init, /* tp_init */ 0, /* tp_alloc */ Migemo_new, /* tp_new */ }; static PyMethodDef module_methods[] = { {NULL} /* Sentinel */ }; #ifndef PyMODINIT_FUNC #define PyMODINIT_FUNC void #endif PyMODINIT_FUNC initmigemo(void) { PyObject* m; if (PyType_Ready(&MigemoType) < 0) return; m = Py_InitModule3("migemo", module_methods, "C/Migemo Python binding"); Py_INCREF(&MigemoType); PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); }