[21] | 1 | /* |
---|
| 2 | * pymigemo.c - C/Migemo wrapper for Python |
---|
[30] | 3 | * Copyright(C) 2005-2009, Atzm WATANABE <atzm@atzm.org> |
---|
[21] | 4 | * |
---|
| 5 | * $Id$ |
---|
| 6 | */ |
---|
| 7 | |
---|
| 8 | #include <Python.h> |
---|
[30] | 9 | #include <structmember.h> |
---|
| 10 | #include <migemo.h> |
---|
[34] | 11 | #include <stdbool.h> |
---|
[30] | 12 | #include <string.h> |
---|
[21] | 13 | |
---|
[34] | 14 | #define PYMIGEMO_VERSION "0.3" |
---|
[21] | 15 | |
---|
| 16 | /* for dereference migemo object members */ |
---|
[30] | 17 | struct _migemo { |
---|
| 18 | int enable; |
---|
| 19 | void *mtree; |
---|
| 20 | int charset; |
---|
| 21 | void *roma2hira; |
---|
| 22 | void *hira2kata; |
---|
| 23 | void *han2zen; |
---|
| 24 | void *zen2han; |
---|
| 25 | void *rx; |
---|
| 26 | void *addword; |
---|
| 27 | void *char2int; |
---|
[21] | 28 | }; |
---|
| 29 | |
---|
| 30 | typedef struct { |
---|
[30] | 31 | PyObject_HEAD |
---|
| 32 | migemo *migemo_obj; |
---|
[21] | 33 | } Migemo; |
---|
| 34 | |
---|
[34] | 35 | static bool |
---|
| 36 | get_encoding(char *encoding, size_t size, int charset) |
---|
| 37 | { |
---|
| 38 | char *enc; |
---|
| 39 | |
---|
| 40 | switch(charset) { |
---|
| 41 | case 1: |
---|
| 42 | enc = "cp932"; |
---|
| 43 | break; |
---|
| 44 | case 2: |
---|
| 45 | enc = "euc_jp"; |
---|
| 46 | break; |
---|
| 47 | case 3: |
---|
| 48 | enc = "utf_8"; |
---|
| 49 | break; |
---|
| 50 | default: |
---|
| 51 | enc = "ascii"; |
---|
| 52 | } |
---|
| 53 | |
---|
| 54 | if (strlen(enc) < size) { |
---|
| 55 | strcpy(encoding, enc); |
---|
| 56 | return true; |
---|
| 57 | } |
---|
| 58 | |
---|
| 59 | return false; |
---|
| 60 | } |
---|
| 61 | |
---|
[21] | 62 | static void |
---|
| 63 | Migemo_dealloc(Migemo *self) |
---|
| 64 | { |
---|
[30] | 65 | if (self->migemo_obj) { |
---|
| 66 | migemo_close(self->migemo_obj); |
---|
| 67 | } |
---|
[21] | 68 | |
---|
[30] | 69 | self->ob_type->tp_free((PyObject *)self); |
---|
[21] | 70 | } |
---|
| 71 | |
---|
| 72 | static PyObject * |
---|
| 73 | Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
---|
| 74 | { |
---|
[30] | 75 | Migemo *self; |
---|
[21] | 76 | |
---|
[30] | 77 | self = (Migemo *)type->tp_alloc(type, 0); |
---|
[21] | 78 | |
---|
[30] | 79 | if (self != NULL) { |
---|
| 80 | self->migemo_obj = NULL; |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | return (PyObject *)self; |
---|
[21] | 84 | } |
---|
| 85 | |
---|
| 86 | static int |
---|
| 87 | Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 88 | { |
---|
[30] | 89 | migemo *migemo_obj; |
---|
| 90 | char *dictionary; |
---|
[21] | 91 | |
---|
[30] | 92 | static char *kwlist[] = {"dictionary", NULL}; |
---|
[21] | 93 | |
---|
[30] | 94 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) { |
---|
| 95 | return -1; |
---|
| 96 | } |
---|
[21] | 97 | |
---|
[30] | 98 | if (dictionary) { |
---|
| 99 | if (self->migemo_obj) { |
---|
| 100 | migemo_close(self->migemo_obj); |
---|
| 101 | } |
---|
[21] | 102 | |
---|
[30] | 103 | migemo_obj = migemo_open(dictionary); |
---|
[21] | 104 | |
---|
[30] | 105 | if (migemo_obj) { |
---|
| 106 | self->migemo_obj = migemo_obj; |
---|
| 107 | } |
---|
| 108 | else { |
---|
[34] | 109 | PyErr_SetString(PyExc_AssertionError, "migemo_open() failed"); |
---|
[30] | 110 | return -1; |
---|
| 111 | } |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | return 0; |
---|
[21] | 115 | } |
---|
| 116 | |
---|
| 117 | static PyObject * |
---|
| 118 | Migemo_get_encoding(Migemo *self) |
---|
| 119 | { |
---|
[30] | 120 | char encoding[7]; |
---|
| 121 | |
---|
| 122 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
[34] | 123 | PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); |
---|
[30] | 124 | return NULL; |
---|
| 125 | } |
---|
| 126 | |
---|
| 127 | return PyString_FromString(encoding); |
---|
[21] | 128 | } |
---|
| 129 | |
---|
| 130 | static PyObject * |
---|
| 131 | Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 132 | { |
---|
[34] | 133 | PyObject *result, *pyquery, *pyrestr; |
---|
[30] | 134 | char *query, encoding[7]; |
---|
| 135 | unsigned char *regex; |
---|
[21] | 136 | |
---|
[30] | 137 | static char *kwlist[] = {"query", NULL}; |
---|
[21] | 138 | |
---|
[34] | 139 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &pyquery)) { |
---|
[30] | 140 | return NULL; |
---|
| 141 | } |
---|
[21] | 142 | |
---|
[30] | 143 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
[34] | 144 | PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); |
---|
[30] | 145 | return NULL; |
---|
| 146 | } |
---|
[21] | 147 | |
---|
[34] | 148 | if (PyUnicode_Check(pyquery)) { |
---|
| 149 | PyObject *q = PyUnicode_AsEncodedString(pyquery, encoding, "strict"); |
---|
| 150 | |
---|
| 151 | if (q == NULL) { |
---|
| 152 | return NULL; |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | query = PyString_AS_STRING(q); |
---|
| 156 | Py_DECREF(q); |
---|
[30] | 157 | } |
---|
[34] | 158 | else if (PyString_Check(pyquery)) { |
---|
| 159 | query = PyString_AS_STRING(pyquery); |
---|
[30] | 160 | } |
---|
| 161 | else { |
---|
[34] | 162 | PyErr_SetString(PyExc_ValueError, "argument must be string"); |
---|
[30] | 163 | return NULL; |
---|
| 164 | } |
---|
[34] | 165 | if (query == NULL) { |
---|
| 166 | return NULL; |
---|
| 167 | } |
---|
[21] | 168 | |
---|
[34] | 169 | regex = migemo_query(self->migemo_obj, query); |
---|
| 170 | if (regex == NULL) { |
---|
| 171 | PyErr_SetString(PyExc_AssertionError, "migemo_query() failed"); |
---|
| 172 | return NULL; |
---|
[21] | 173 | } |
---|
| 174 | |
---|
[34] | 175 | pyrestr = PyString_FromString(regex); |
---|
| 176 | migemo_release(self->migemo_obj, regex); |
---|
| 177 | if (pyrestr == NULL) { |
---|
[30] | 178 | return NULL; |
---|
| 179 | } |
---|
[21] | 180 | |
---|
[34] | 181 | result = PyUnicode_FromEncodedObject(pyrestr, encoding, "strict"); |
---|
| 182 | Py_DECREF(pyrestr); |
---|
[30] | 183 | return result; |
---|
[21] | 184 | } |
---|
| 185 | |
---|
| 186 | static PyObject * |
---|
| 187 | Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 188 | { |
---|
[30] | 189 | PyObject *result; |
---|
| 190 | char *op; |
---|
| 191 | int index; |
---|
[21] | 192 | |
---|
[30] | 193 | static char *kwlist[] = {"index", "op", NULL}; |
---|
[21] | 194 | |
---|
[30] | 195 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) { |
---|
| 196 | return NULL; |
---|
| 197 | } |
---|
[21] | 198 | |
---|
[30] | 199 | if (op) { |
---|
[34] | 200 | result = PyBool_FromLong((long)migemo_set_operator(self->migemo_obj, index, op)); |
---|
[30] | 201 | } |
---|
[21] | 202 | |
---|
[30] | 203 | if (!result) { |
---|
| 204 | return NULL; |
---|
| 205 | } |
---|
| 206 | |
---|
| 207 | return result; |
---|
[21] | 208 | } |
---|
| 209 | |
---|
| 210 | static PyObject * |
---|
| 211 | Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 212 | { |
---|
[34] | 213 | PyObject *result; |
---|
| 214 | const unsigned char *op; |
---|
| 215 | int index; |
---|
[21] | 216 | |
---|
[30] | 217 | static char *kwlist[] = {"index", NULL}; |
---|
[21] | 218 | |
---|
[30] | 219 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) { |
---|
| 220 | return NULL; |
---|
| 221 | } |
---|
[21] | 222 | |
---|
[30] | 223 | if (op = migemo_get_operator(self->migemo_obj, index)) { |
---|
| 224 | result = PyString_FromString(op); |
---|
| 225 | } |
---|
[21] | 226 | |
---|
[30] | 227 | if (!result) { |
---|
| 228 | return NULL; |
---|
| 229 | } |
---|
| 230 | |
---|
| 231 | return result; |
---|
[21] | 232 | } |
---|
| 233 | |
---|
| 234 | static PyObject * |
---|
| 235 | Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 236 | { |
---|
[30] | 237 | PyObject *result; |
---|
| 238 | char *dict_file; |
---|
| 239 | int dict_id; |
---|
[21] | 240 | |
---|
[30] | 241 | static char *kwlist[] = {"dict_id", "dict_file", NULL}; |
---|
[21] | 242 | |
---|
[30] | 243 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) { |
---|
| 244 | return NULL; |
---|
| 245 | } |
---|
[21] | 246 | |
---|
[30] | 247 | if (dict_file) { |
---|
| 248 | result = PyInt_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file)); |
---|
| 249 | } |
---|
[21] | 250 | |
---|
[30] | 251 | if (!result) { |
---|
| 252 | return NULL; |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | return result; |
---|
[21] | 256 | } |
---|
| 257 | |
---|
| 258 | static PyObject * |
---|
| 259 | Migemo_is_enable(Migemo *self) |
---|
| 260 | { |
---|
[34] | 261 | return PyBool_FromLong((long)migemo_is_enable(self->migemo_obj)); |
---|
[21] | 262 | } |
---|
| 263 | |
---|
| 264 | static PyMethodDef Migemo_methods[] = { |
---|
[30] | 265 | {"query", (PyCFunction)Migemo_query, METH_KEYWORDS, |
---|
| 266 | "return regex from romaji string\n\ |
---|
[21] | 267 | \n\ |
---|
| 268 | def query(query)\n\ |
---|
| 269 | query: romaji string (str or unicode)\n\ |
---|
| 270 | \n\ |
---|
| 271 | returns: regex string as Unicode object"}, |
---|
[30] | 272 | {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS, |
---|
| 273 | "set operator string as the meta character of regex\n\ |
---|
[21] | 274 | \n\ |
---|
| 275 | def set_operator(index, op):\n\ |
---|
| 276 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 277 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 278 | op: operator string (str)\n\ |
---|
| 279 | \n\ |
---|
| 280 | returns: boolean value"}, |
---|
[30] | 281 | {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS, |
---|
| 282 | "get operator string as the meta character of regex\n\ |
---|
[21] | 283 | \n\ |
---|
| 284 | def get_operator(index)\n\ |
---|
| 285 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 286 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 287 | \n\ |
---|
| 288 | returns: operator string (str)"}, |
---|
[30] | 289 | {"load", (PyCFunction)Migemo_load, METH_KEYWORDS, |
---|
| 290 | "add dictionary to Migemo object\n\ |
---|
[21] | 291 | \n\ |
---|
| 292 | def load(dict_id, dict_file)\n\ |
---|
| 293 | dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ |
---|
| 294 | DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ |
---|
| 295 | dict_file: path to dictionary file (str)\n\ |
---|
| 296 | \n\ |
---|
| 297 | returns: boolean value"}, |
---|
[30] | 298 | {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, |
---|
| 299 | "check internal migemo_dict\n\ |
---|
[21] | 300 | \n\ |
---|
| 301 | def is_enable()\n\ |
---|
| 302 | returns: boolean value"}, |
---|
[30] | 303 | {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, |
---|
| 304 | "get dictionary encoding\n\ |
---|
[21] | 305 | \n\ |
---|
| 306 | def get_encoding()\n\ |
---|
| 307 | returns: encoding string (str)"}, |
---|
[30] | 308 | {NULL} /* Sentinel */ |
---|
[21] | 309 | }; |
---|
| 310 | |
---|
| 311 | static PyMemberDef Migemo_members[] = { |
---|
[30] | 312 | {NULL} /* Sentinel */ |
---|
[21] | 313 | }; |
---|
| 314 | |
---|
| 315 | static PyTypeObject MigemoType = { |
---|
[30] | 316 | PyObject_HEAD_INIT(NULL) |
---|
| 317 | 0, /*ob_size*/ |
---|
| 318 | "migemo.Migemo", /*tp_name*/ |
---|
| 319 | sizeof(Migemo), /*tp_basicsize*/ |
---|
| 320 | 0, /*tp_itemsize*/ |
---|
| 321 | (destructor)Migemo_dealloc, /*tp_dealloc*/ |
---|
| 322 | 0, /*tp_print*/ |
---|
| 323 | 0, /*tp_getattr*/ |
---|
| 324 | 0, /*tp_setattr*/ |
---|
| 325 | 0, /*tp_compare*/ |
---|
| 326 | 0, /*tp_repr*/ |
---|
| 327 | 0, /*tp_as_number*/ |
---|
| 328 | 0, /*tp_as_sequence*/ |
---|
| 329 | 0, /*tp_as_mapping*/ |
---|
| 330 | 0, /*tp_hash */ |
---|
| 331 | 0, /*tp_call*/ |
---|
| 332 | 0, /*tp_str*/ |
---|
| 333 | 0, /*tp_getattro*/ |
---|
| 334 | 0, /*tp_setattro*/ |
---|
| 335 | 0, /*tp_as_buffer*/ |
---|
| 336 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
---|
| 337 | "Migemo wrapper object", /* tp_doc */ |
---|
| 338 | 0, /* tp_traverse */ |
---|
| 339 | 0, /* tp_clear */ |
---|
| 340 | 0, /* tp_richcompare */ |
---|
| 341 | 0, /* tp_weaklistoffset */ |
---|
| 342 | 0, /* tp_iter */ |
---|
| 343 | 0, /* tp_iternext */ |
---|
| 344 | Migemo_methods, /* tp_methods */ |
---|
| 345 | Migemo_members, /* tp_members */ |
---|
| 346 | 0, /* tp_getset */ |
---|
| 347 | 0, /* tp_base */ |
---|
| 348 | 0, /* tp_dict */ |
---|
| 349 | 0, /* tp_descr_get */ |
---|
| 350 | 0, /* tp_descr_set */ |
---|
| 351 | 0, /* tp_dictoffset */ |
---|
| 352 | (initproc)Migemo_init, /* tp_init */ |
---|
| 353 | 0, /* tp_alloc */ |
---|
| 354 | Migemo_new, /* tp_new */ |
---|
[21] | 355 | }; |
---|
| 356 | |
---|
| 357 | static PyMethodDef module_methods[] = { |
---|
[30] | 358 | {NULL} /* Sentinel */ |
---|
[21] | 359 | }; |
---|
| 360 | |
---|
| 361 | #ifndef PyMODINIT_FUNC |
---|
| 362 | #define PyMODINIT_FUNC void |
---|
| 363 | #endif |
---|
| 364 | PyMODINIT_FUNC |
---|
| 365 | initmigemo(void) |
---|
| 366 | { |
---|
[30] | 367 | PyObject* m; |
---|
[21] | 368 | |
---|
[30] | 369 | if (PyType_Ready(&MigemoType) < 0) |
---|
| 370 | return; |
---|
[21] | 371 | |
---|
[30] | 372 | m = Py_InitModule3("migemo", module_methods, "C/Migemo wrapper"); |
---|
[21] | 373 | |
---|
[30] | 374 | Py_INCREF(&MigemoType); |
---|
| 375 | PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); |
---|
| 376 | PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); |
---|
[21] | 377 | |
---|
[30] | 378 | PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); |
---|
[21] | 379 | |
---|
[30] | 380 | PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); |
---|
| 381 | PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); |
---|
| 382 | PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); |
---|
| 383 | PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); |
---|
| 384 | PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); |
---|
| 385 | PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); |
---|
[21] | 386 | |
---|
[30] | 387 | PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); |
---|
| 388 | PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); |
---|
| 389 | PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); |
---|
| 390 | PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); |
---|
| 391 | PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); |
---|
| 392 | PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); |
---|
[21] | 393 | } |
---|