[21] | 1 | /* |
---|
[37] | 2 | * pymigemo.c - C/Migemo Python binding |
---|
[38] | 3 | * Copyright(C) 2005-2010, Atzm WATANABE <atzm@atzm.org> |
---|
[21] | 4 | * |
---|
| 5 | * $Id$ |
---|
| 6 | */ |
---|
| 7 | |
---|
| 8 | #include <Python.h> |
---|
[30] | 9 | #include <structmember.h> |
---|
| 10 | #include <migemo.h> |
---|
[34] | 11 | #include <stdbool.h> |
---|
[30] | 12 | #include <string.h> |
---|
[40] | 13 | #include <stdlib.h> |
---|
[36] | 14 | #include <errno.h> |
---|
| 15 | #include <sys/types.h> |
---|
| 16 | #include <sys/stat.h> |
---|
| 17 | #include <fcntl.h> |
---|
| 18 | #include <unistd.h> |
---|
[21] | 19 | |
---|
[34] | 20 | #define PYMIGEMO_VERSION "0.3" |
---|
[21] | 21 | |
---|
| 22 | /* for dereference migemo object members */ |
---|
[30] | 23 | struct _migemo { |
---|
| 24 | int enable; |
---|
| 25 | void *mtree; |
---|
| 26 | int charset; |
---|
| 27 | void *roma2hira; |
---|
| 28 | void *hira2kata; |
---|
| 29 | void *han2zen; |
---|
| 30 | void *zen2han; |
---|
| 31 | void *rx; |
---|
| 32 | void *addword; |
---|
| 33 | void *char2int; |
---|
[21] | 34 | }; |
---|
| 35 | |
---|
| 36 | typedef struct { |
---|
[30] | 37 | PyObject_HEAD |
---|
| 38 | migemo *migemo_obj; |
---|
[21] | 39 | } Migemo; |
---|
| 40 | |
---|
[34] | 41 | static bool |
---|
| 42 | get_encoding(char *encoding, size_t size, int charset) |
---|
| 43 | { |
---|
| 44 | char *enc; |
---|
| 45 | |
---|
| 46 | switch(charset) { |
---|
| 47 | case 1: |
---|
| 48 | enc = "cp932"; |
---|
| 49 | break; |
---|
| 50 | case 2: |
---|
| 51 | enc = "euc_jp"; |
---|
| 52 | break; |
---|
| 53 | case 3: |
---|
| 54 | enc = "utf_8"; |
---|
| 55 | break; |
---|
| 56 | default: |
---|
| 57 | enc = "ascii"; |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | if (strlen(enc) < size) { |
---|
| 61 | strcpy(encoding, enc); |
---|
| 62 | return true; |
---|
| 63 | } |
---|
| 64 | |
---|
| 65 | return false; |
---|
| 66 | } |
---|
| 67 | |
---|
[36] | 68 | static int |
---|
| 69 | isloadable(const char *path) |
---|
| 70 | { |
---|
| 71 | struct stat st; |
---|
| 72 | int ret = 0; |
---|
| 73 | int fd = open(path, O_RDONLY); |
---|
| 74 | |
---|
| 75 | if (fd < 0) { |
---|
| 76 | return errno; |
---|
| 77 | } |
---|
| 78 | |
---|
| 79 | if (fstat(fd, &st) < 0) { |
---|
| 80 | ret = errno; |
---|
| 81 | goto isloadable_end; |
---|
| 82 | } |
---|
| 83 | if (S_ISDIR(st.st_mode)) { |
---|
| 84 | ret = EISDIR; |
---|
| 85 | goto isloadable_end; |
---|
| 86 | } |
---|
| 87 | |
---|
| 88 | isloadable_end: |
---|
| 89 | if (close(fd) < 0) { |
---|
| 90 | ret = errno; |
---|
| 91 | } |
---|
| 92 | return ret; |
---|
| 93 | } |
---|
| 94 | |
---|
[21] | 95 | static void |
---|
| 96 | Migemo_dealloc(Migemo *self) |
---|
| 97 | { |
---|
[30] | 98 | if (self->migemo_obj) { |
---|
| 99 | migemo_close(self->migemo_obj); |
---|
| 100 | } |
---|
[21] | 101 | |
---|
[30] | 102 | self->ob_type->tp_free((PyObject *)self); |
---|
[21] | 103 | } |
---|
| 104 | |
---|
| 105 | static PyObject * |
---|
| 106 | Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
---|
| 107 | { |
---|
[30] | 108 | Migemo *self; |
---|
[21] | 109 | |
---|
[30] | 110 | self = (Migemo *)type->tp_alloc(type, 0); |
---|
[21] | 111 | |
---|
[30] | 112 | if (self != NULL) { |
---|
| 113 | self->migemo_obj = NULL; |
---|
| 114 | } |
---|
| 115 | |
---|
| 116 | return (PyObject *)self; |
---|
[21] | 117 | } |
---|
| 118 | |
---|
| 119 | static int |
---|
| 120 | Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 121 | { |
---|
[30] | 122 | migemo *migemo_obj; |
---|
| 123 | char *dictionary; |
---|
[21] | 124 | |
---|
[30] | 125 | static char *kwlist[] = {"dictionary", NULL}; |
---|
[21] | 126 | |
---|
[30] | 127 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) { |
---|
| 128 | return -1; |
---|
| 129 | } |
---|
[21] | 130 | |
---|
[30] | 131 | if (dictionary) { |
---|
[36] | 132 | int ret = isloadable(dictionary); |
---|
| 133 | |
---|
| 134 | if (ret != 0) { |
---|
| 135 | PyErr_SetString(PyExc_ValueError, strerror(ret)); |
---|
| 136 | return -1; |
---|
| 137 | } |
---|
| 138 | |
---|
[30] | 139 | if (self->migemo_obj) { |
---|
| 140 | migemo_close(self->migemo_obj); |
---|
| 141 | } |
---|
[21] | 142 | |
---|
[30] | 143 | migemo_obj = migemo_open(dictionary); |
---|
[21] | 144 | |
---|
[30] | 145 | if (migemo_obj) { |
---|
| 146 | self->migemo_obj = migemo_obj; |
---|
| 147 | } |
---|
| 148 | else { |
---|
[34] | 149 | PyErr_SetString(PyExc_AssertionError, "migemo_open() failed"); |
---|
[30] | 150 | return -1; |
---|
| 151 | } |
---|
| 152 | } |
---|
| 153 | |
---|
| 154 | return 0; |
---|
[21] | 155 | } |
---|
| 156 | |
---|
| 157 | static PyObject * |
---|
| 158 | Migemo_get_encoding(Migemo *self) |
---|
| 159 | { |
---|
[30] | 160 | char encoding[7]; |
---|
| 161 | |
---|
| 162 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
[34] | 163 | PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); |
---|
[30] | 164 | return NULL; |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | return PyString_FromString(encoding); |
---|
[21] | 168 | } |
---|
| 169 | |
---|
| 170 | static PyObject * |
---|
| 171 | Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 172 | { |
---|
[34] | 173 | PyObject *result, *pyquery, *pyrestr; |
---|
[40] | 174 | char encoding[7], *query = NULL; |
---|
[30] | 175 | unsigned char *regex; |
---|
[21] | 176 | |
---|
[30] | 177 | static char *kwlist[] = {"query", NULL}; |
---|
[21] | 178 | |
---|
[34] | 179 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &pyquery)) { |
---|
[30] | 180 | return NULL; |
---|
| 181 | } |
---|
[21] | 182 | |
---|
[30] | 183 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
[34] | 184 | PyErr_SetString(PyExc_AssertionError, "get_encoding() failed"); |
---|
[30] | 185 | return NULL; |
---|
| 186 | } |
---|
[21] | 187 | |
---|
[34] | 188 | if (PyUnicode_Check(pyquery)) { |
---|
| 189 | PyObject *q = PyUnicode_AsEncodedString(pyquery, encoding, "strict"); |
---|
| 190 | |
---|
| 191 | if (q == NULL) { |
---|
| 192 | return NULL; |
---|
| 193 | } |
---|
| 194 | |
---|
[40] | 195 | query = strdup(PyString_AS_STRING(q)); |
---|
[34] | 196 | Py_DECREF(q); |
---|
[41] | 197 | |
---|
| 198 | if (query == NULL) { |
---|
| 199 | return PyErr_NoMemory(); |
---|
| 200 | } |
---|
[30] | 201 | } |
---|
[34] | 202 | else if (PyString_Check(pyquery)) { |
---|
[40] | 203 | query = strdup(PyString_AS_STRING(pyquery)); |
---|
[41] | 204 | |
---|
| 205 | if (query == NULL) { |
---|
| 206 | return PyErr_NoMemory(); |
---|
| 207 | } |
---|
[30] | 208 | } |
---|
| 209 | else { |
---|
[34] | 210 | PyErr_SetString(PyExc_ValueError, "argument must be string"); |
---|
[30] | 211 | return NULL; |
---|
| 212 | } |
---|
[21] | 213 | |
---|
[34] | 214 | regex = migemo_query(self->migemo_obj, query); |
---|
[40] | 215 | free(query); |
---|
[34] | 216 | if (regex == NULL) { |
---|
| 217 | PyErr_SetString(PyExc_AssertionError, "migemo_query() failed"); |
---|
| 218 | return NULL; |
---|
[21] | 219 | } |
---|
| 220 | |
---|
[34] | 221 | pyrestr = PyString_FromString(regex); |
---|
| 222 | migemo_release(self->migemo_obj, regex); |
---|
| 223 | if (pyrestr == NULL) { |
---|
[30] | 224 | return NULL; |
---|
| 225 | } |
---|
[21] | 226 | |
---|
[34] | 227 | result = PyUnicode_FromEncodedObject(pyrestr, encoding, "strict"); |
---|
| 228 | Py_DECREF(pyrestr); |
---|
[30] | 229 | return result; |
---|
[21] | 230 | } |
---|
| 231 | |
---|
| 232 | static PyObject * |
---|
| 233 | Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 234 | { |
---|
[35] | 235 | PyObject *result = NULL; |
---|
[30] | 236 | char *op; |
---|
| 237 | int index; |
---|
[21] | 238 | |
---|
[30] | 239 | static char *kwlist[] = {"index", "op", NULL}; |
---|
[21] | 240 | |
---|
[30] | 241 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) { |
---|
| 242 | return NULL; |
---|
| 243 | } |
---|
[21] | 244 | |
---|
[30] | 245 | if (op) { |
---|
[34] | 246 | result = PyBool_FromLong((long)migemo_set_operator(self->migemo_obj, index, op)); |
---|
[30] | 247 | } |
---|
[21] | 248 | |
---|
[30] | 249 | return result; |
---|
[21] | 250 | } |
---|
| 251 | |
---|
| 252 | static PyObject * |
---|
| 253 | Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 254 | { |
---|
[35] | 255 | PyObject *result = NULL; |
---|
[34] | 256 | const unsigned char *op; |
---|
| 257 | int index; |
---|
[21] | 258 | |
---|
[30] | 259 | static char *kwlist[] = {"index", NULL}; |
---|
[21] | 260 | |
---|
[30] | 261 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) { |
---|
| 262 | return NULL; |
---|
| 263 | } |
---|
[21] | 264 | |
---|
[30] | 265 | if (op = migemo_get_operator(self->migemo_obj, index)) { |
---|
| 266 | result = PyString_FromString(op); |
---|
| 267 | } |
---|
[35] | 268 | else { |
---|
| 269 | PyErr_SetString(PyExc_ValueError, "invalid opindex"); |
---|
[30] | 270 | } |
---|
| 271 | |
---|
| 272 | return result; |
---|
[21] | 273 | } |
---|
| 274 | |
---|
| 275 | static PyObject * |
---|
| 276 | Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 277 | { |
---|
[35] | 278 | PyObject *result = NULL; |
---|
[30] | 279 | char *dict_file; |
---|
| 280 | int dict_id; |
---|
[21] | 281 | |
---|
[30] | 282 | static char *kwlist[] = {"dict_id", "dict_file", NULL}; |
---|
[21] | 283 | |
---|
[30] | 284 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) { |
---|
| 285 | return NULL; |
---|
| 286 | } |
---|
[21] | 287 | |
---|
[30] | 288 | if (dict_file) { |
---|
[36] | 289 | int ret = isloadable(dict_file); |
---|
| 290 | |
---|
| 291 | if (ret != 0) { |
---|
| 292 | PyErr_SetString(PyExc_ValueError, strerror(ret)); |
---|
| 293 | return NULL; |
---|
| 294 | } |
---|
| 295 | |
---|
[30] | 296 | result = PyInt_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file)); |
---|
| 297 | } |
---|
[21] | 298 | |
---|
[30] | 299 | return result; |
---|
[21] | 300 | } |
---|
| 301 | |
---|
| 302 | static PyObject * |
---|
| 303 | Migemo_is_enable(Migemo *self) |
---|
| 304 | { |
---|
[34] | 305 | return PyBool_FromLong((long)migemo_is_enable(self->migemo_obj)); |
---|
[21] | 306 | } |
---|
| 307 | |
---|
| 308 | static PyMethodDef Migemo_methods[] = { |
---|
[30] | 309 | {"query", (PyCFunction)Migemo_query, METH_KEYWORDS, |
---|
| 310 | "return regex from romaji string\n\ |
---|
[21] | 311 | \n\ |
---|
| 312 | def query(query)\n\ |
---|
| 313 | query: romaji string (str or unicode)\n\ |
---|
| 314 | \n\ |
---|
| 315 | returns: regex string as Unicode object"}, |
---|
[30] | 316 | {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS, |
---|
| 317 | "set operator string as the meta character of regex\n\ |
---|
[21] | 318 | \n\ |
---|
| 319 | def set_operator(index, op):\n\ |
---|
| 320 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 321 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 322 | op: operator string (str)\n\ |
---|
| 323 | \n\ |
---|
| 324 | returns: boolean value"}, |
---|
[30] | 325 | {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS, |
---|
| 326 | "get operator string as the meta character of regex\n\ |
---|
[21] | 327 | \n\ |
---|
| 328 | def get_operator(index)\n\ |
---|
| 329 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 330 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 331 | \n\ |
---|
| 332 | returns: operator string (str)"}, |
---|
[30] | 333 | {"load", (PyCFunction)Migemo_load, METH_KEYWORDS, |
---|
| 334 | "add dictionary to Migemo object\n\ |
---|
[21] | 335 | \n\ |
---|
| 336 | def load(dict_id, dict_file)\n\ |
---|
| 337 | dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ |
---|
| 338 | DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ |
---|
| 339 | dict_file: path to dictionary file (str)\n\ |
---|
| 340 | \n\ |
---|
[37] | 341 | returns: ID of loaded dictionary"}, |
---|
[30] | 342 | {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, |
---|
| 343 | "check internal migemo_dict\n\ |
---|
[21] | 344 | \n\ |
---|
| 345 | def is_enable()\n\ |
---|
| 346 | returns: boolean value"}, |
---|
[30] | 347 | {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, |
---|
| 348 | "get dictionary encoding\n\ |
---|
[21] | 349 | \n\ |
---|
| 350 | def get_encoding()\n\ |
---|
| 351 | returns: encoding string (str)"}, |
---|
[30] | 352 | {NULL} /* Sentinel */ |
---|
[21] | 353 | }; |
---|
| 354 | |
---|
| 355 | static PyMemberDef Migemo_members[] = { |
---|
[30] | 356 | {NULL} /* Sentinel */ |
---|
[21] | 357 | }; |
---|
| 358 | |
---|
| 359 | static PyTypeObject MigemoType = { |
---|
[30] | 360 | PyObject_HEAD_INIT(NULL) |
---|
| 361 | 0, /*ob_size*/ |
---|
| 362 | "migemo.Migemo", /*tp_name*/ |
---|
| 363 | sizeof(Migemo), /*tp_basicsize*/ |
---|
| 364 | 0, /*tp_itemsize*/ |
---|
| 365 | (destructor)Migemo_dealloc, /*tp_dealloc*/ |
---|
| 366 | 0, /*tp_print*/ |
---|
| 367 | 0, /*tp_getattr*/ |
---|
| 368 | 0, /*tp_setattr*/ |
---|
| 369 | 0, /*tp_compare*/ |
---|
| 370 | 0, /*tp_repr*/ |
---|
| 371 | 0, /*tp_as_number*/ |
---|
| 372 | 0, /*tp_as_sequence*/ |
---|
| 373 | 0, /*tp_as_mapping*/ |
---|
| 374 | 0, /*tp_hash */ |
---|
| 375 | 0, /*tp_call*/ |
---|
| 376 | 0, /*tp_str*/ |
---|
| 377 | 0, /*tp_getattro*/ |
---|
| 378 | 0, /*tp_setattro*/ |
---|
| 379 | 0, /*tp_as_buffer*/ |
---|
| 380 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
---|
[37] | 381 | "", /* tp_doc */ |
---|
[30] | 382 | 0, /* tp_traverse */ |
---|
| 383 | 0, /* tp_clear */ |
---|
| 384 | 0, /* tp_richcompare */ |
---|
| 385 | 0, /* tp_weaklistoffset */ |
---|
| 386 | 0, /* tp_iter */ |
---|
| 387 | 0, /* tp_iternext */ |
---|
| 388 | Migemo_methods, /* tp_methods */ |
---|
| 389 | Migemo_members, /* tp_members */ |
---|
| 390 | 0, /* tp_getset */ |
---|
| 391 | 0, /* tp_base */ |
---|
| 392 | 0, /* tp_dict */ |
---|
| 393 | 0, /* tp_descr_get */ |
---|
| 394 | 0, /* tp_descr_set */ |
---|
| 395 | 0, /* tp_dictoffset */ |
---|
| 396 | (initproc)Migemo_init, /* tp_init */ |
---|
| 397 | 0, /* tp_alloc */ |
---|
| 398 | Migemo_new, /* tp_new */ |
---|
[21] | 399 | }; |
---|
| 400 | |
---|
| 401 | static PyMethodDef module_methods[] = { |
---|
[30] | 402 | {NULL} /* Sentinel */ |
---|
[21] | 403 | }; |
---|
| 404 | |
---|
| 405 | #ifndef PyMODINIT_FUNC |
---|
| 406 | #define PyMODINIT_FUNC void |
---|
| 407 | #endif |
---|
| 408 | PyMODINIT_FUNC |
---|
| 409 | initmigemo(void) |
---|
| 410 | { |
---|
[30] | 411 | PyObject* m; |
---|
[21] | 412 | |
---|
[30] | 413 | if (PyType_Ready(&MigemoType) < 0) |
---|
| 414 | return; |
---|
[21] | 415 | |
---|
[37] | 416 | m = Py_InitModule3("migemo", module_methods, "C/Migemo Python binding"); |
---|
[21] | 417 | |
---|
[30] | 418 | Py_INCREF(&MigemoType); |
---|
| 419 | PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); |
---|
| 420 | PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); |
---|
[21] | 421 | |
---|
[30] | 422 | PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); |
---|
[21] | 423 | |
---|
[30] | 424 | PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); |
---|
| 425 | PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); |
---|
| 426 | PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); |
---|
| 427 | PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); |
---|
| 428 | PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); |
---|
| 429 | PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); |
---|
[21] | 430 | |
---|
[30] | 431 | PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); |
---|
| 432 | PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); |
---|
| 433 | PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); |
---|
| 434 | PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); |
---|
| 435 | PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); |
---|
| 436 | PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); |
---|
[21] | 437 | } |
---|