[21] | 1 | /* |
---|
| 2 | * pymigemo.c - C/Migemo wrapper for Python |
---|
[30] | 3 | * Copyright(C) 2005-2009, Atzm WATANABE <atzm@atzm.org> |
---|
[21] | 4 | * |
---|
| 5 | * $Id$ |
---|
| 6 | */ |
---|
| 7 | |
---|
| 8 | #include <Python.h> |
---|
[30] | 9 | #include <structmember.h> |
---|
| 10 | #include <migemo.h> |
---|
| 11 | #include <string.h> |
---|
[21] | 12 | |
---|
[30] | 13 | #define PYMIGEMO_VERSION "0.2" |
---|
[21] | 14 | |
---|
| 15 | /* for dereference migemo object members */ |
---|
[30] | 16 | struct _migemo { |
---|
| 17 | int enable; |
---|
| 18 | void *mtree; |
---|
| 19 | int charset; |
---|
| 20 | void *roma2hira; |
---|
| 21 | void *hira2kata; |
---|
| 22 | void *han2zen; |
---|
| 23 | void *zen2han; |
---|
| 24 | void *rx; |
---|
| 25 | void *addword; |
---|
| 26 | void *char2int; |
---|
[21] | 27 | }; |
---|
| 28 | |
---|
| 29 | typedef struct { |
---|
[30] | 30 | PyObject_HEAD |
---|
| 31 | migemo *migemo_obj; |
---|
[21] | 32 | } Migemo; |
---|
| 33 | |
---|
| 34 | static void |
---|
| 35 | Migemo_dealloc(Migemo *self) |
---|
| 36 | { |
---|
[30] | 37 | if (self->migemo_obj) { |
---|
| 38 | migemo_close(self->migemo_obj); |
---|
| 39 | } |
---|
[21] | 40 | |
---|
[30] | 41 | self->ob_type->tp_free((PyObject *)self); |
---|
[21] | 42 | } |
---|
| 43 | |
---|
| 44 | static PyObject * |
---|
| 45 | Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
---|
| 46 | { |
---|
[30] | 47 | Migemo *self; |
---|
[21] | 48 | |
---|
[30] | 49 | self = (Migemo *)type->tp_alloc(type, 0); |
---|
[21] | 50 | |
---|
[30] | 51 | if (self != NULL) { |
---|
| 52 | self->migemo_obj = NULL; |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | return (PyObject *)self; |
---|
[21] | 56 | } |
---|
| 57 | |
---|
| 58 | static int |
---|
| 59 | Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 60 | { |
---|
[30] | 61 | migemo *migemo_obj; |
---|
| 62 | char *dictionary; |
---|
[21] | 63 | |
---|
[30] | 64 | static char *kwlist[] = {"dictionary", NULL}; |
---|
[21] | 65 | |
---|
[30] | 66 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) { |
---|
| 67 | return -1; |
---|
| 68 | } |
---|
[21] | 69 | |
---|
[30] | 70 | if (dictionary) { |
---|
| 71 | if (self->migemo_obj) { |
---|
| 72 | migemo_close(self->migemo_obj); |
---|
| 73 | } |
---|
[21] | 74 | |
---|
[30] | 75 | migemo_obj = migemo_open(dictionary); |
---|
[21] | 76 | |
---|
[30] | 77 | if (migemo_obj) { |
---|
| 78 | self->migemo_obj = migemo_obj; |
---|
| 79 | } |
---|
| 80 | else { |
---|
| 81 | return -1; |
---|
| 82 | } |
---|
| 83 | } |
---|
| 84 | |
---|
| 85 | return 0; |
---|
[21] | 86 | } |
---|
| 87 | |
---|
[30] | 88 | static int |
---|
| 89 | get_encoding(char *encoding, size_t size, int charset) |
---|
[21] | 90 | { |
---|
[30] | 91 | char *enc; |
---|
[21] | 92 | |
---|
[30] | 93 | switch(charset) { |
---|
| 94 | case 1: |
---|
| 95 | enc = "cp932"; |
---|
| 96 | break; |
---|
| 97 | case 2: |
---|
| 98 | enc = "euc_jp"; |
---|
| 99 | break; |
---|
| 100 | case 3: |
---|
| 101 | enc = "utf_8"; |
---|
| 102 | break; |
---|
| 103 | default: |
---|
| 104 | enc = "ascii"; |
---|
| 105 | } |
---|
[21] | 106 | |
---|
[30] | 107 | if (strlen(enc) < size) { |
---|
| 108 | strcpy(encoding, enc); |
---|
| 109 | return 1; |
---|
| 110 | } |
---|
| 111 | |
---|
| 112 | return 0; |
---|
[21] | 113 | } |
---|
| 114 | |
---|
| 115 | static PyObject * |
---|
| 116 | Migemo_get_encoding(Migemo *self) |
---|
| 117 | { |
---|
[30] | 118 | char encoding[7]; |
---|
| 119 | |
---|
| 120 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
| 121 | return NULL; |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | return PyString_FromString(encoding); |
---|
[21] | 125 | } |
---|
| 126 | |
---|
| 127 | static PyObject * |
---|
| 128 | Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 129 | { |
---|
[30] | 130 | PyObject *result, *query_obj, *query_str = NULL, *regex_strobj = NULL; |
---|
| 131 | char *query, encoding[7]; |
---|
| 132 | unsigned char *regex; |
---|
[21] | 133 | |
---|
[30] | 134 | static char *kwlist[] = {"query", NULL}; |
---|
[21] | 135 | |
---|
[30] | 136 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &query_obj)) { |
---|
| 137 | return NULL; |
---|
| 138 | } |
---|
[21] | 139 | |
---|
[30] | 140 | if (!get_encoding(encoding, sizeof(encoding), self->migemo_obj->charset)) { |
---|
| 141 | return NULL; |
---|
| 142 | } |
---|
[21] | 143 | |
---|
[30] | 144 | if (PyUnicode_Check(query_obj)) { |
---|
| 145 | query_str = PyUnicode_AsEncodedString(query_obj, encoding, "strict"); |
---|
| 146 | query = PyString_AS_STRING(query_str); |
---|
| 147 | } |
---|
| 148 | else if (PyString_Check(query_obj)) { |
---|
| 149 | query = PyString_AS_STRING(query_obj); |
---|
| 150 | } |
---|
| 151 | else { |
---|
| 152 | return NULL; |
---|
| 153 | } |
---|
[21] | 154 | |
---|
[30] | 155 | if (query) { |
---|
| 156 | regex = migemo_query(self->migemo_obj, query); |
---|
[21] | 157 | |
---|
[30] | 158 | if (regex) { |
---|
| 159 | regex_strobj = PyString_FromString(regex); |
---|
[21] | 160 | |
---|
[30] | 161 | if (regex_strobj) { |
---|
| 162 | result = PyUnicode_FromEncodedObject(regex_strobj, encoding, "strict"); |
---|
| 163 | } |
---|
[21] | 164 | |
---|
[30] | 165 | migemo_release(self->migemo_obj, regex); |
---|
| 166 | } |
---|
[21] | 167 | } |
---|
| 168 | |
---|
[30] | 169 | Py_XDECREF(regex_strobj); |
---|
| 170 | Py_XDECREF(query_str); |
---|
[21] | 171 | |
---|
[30] | 172 | if (!result) { |
---|
| 173 | return NULL; |
---|
| 174 | } |
---|
[21] | 175 | |
---|
[30] | 176 | return result; |
---|
[21] | 177 | } |
---|
| 178 | |
---|
| 179 | static PyObject * |
---|
| 180 | Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 181 | { |
---|
[30] | 182 | PyObject *result; |
---|
| 183 | char *op; |
---|
| 184 | int index; |
---|
[21] | 185 | |
---|
[30] | 186 | static char *kwlist[] = {"index", "op", NULL}; |
---|
[21] | 187 | |
---|
[30] | 188 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) { |
---|
| 189 | return NULL; |
---|
| 190 | } |
---|
[21] | 191 | |
---|
[30] | 192 | if (op) { |
---|
| 193 | result = PyInt_FromLong((long)migemo_set_operator(self->migemo_obj, index, op)); |
---|
| 194 | } |
---|
[21] | 195 | |
---|
[30] | 196 | if (!result) { |
---|
| 197 | return NULL; |
---|
| 198 | } |
---|
| 199 | |
---|
| 200 | return result; |
---|
[21] | 201 | } |
---|
| 202 | |
---|
| 203 | static PyObject * |
---|
| 204 | Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 205 | { |
---|
[30] | 206 | PyObject *result; |
---|
| 207 | unsigned char *op; |
---|
| 208 | int index; |
---|
[21] | 209 | |
---|
[30] | 210 | static char *kwlist[] = {"index", NULL}; |
---|
[21] | 211 | |
---|
[30] | 212 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) { |
---|
| 213 | return NULL; |
---|
| 214 | } |
---|
[21] | 215 | |
---|
[30] | 216 | if (op = migemo_get_operator(self->migemo_obj, index)) { |
---|
| 217 | result = PyString_FromString(op); |
---|
| 218 | } |
---|
[21] | 219 | |
---|
[30] | 220 | if (!result) { |
---|
| 221 | return NULL; |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | return result; |
---|
[21] | 225 | } |
---|
| 226 | |
---|
| 227 | static PyObject * |
---|
| 228 | Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 229 | { |
---|
[30] | 230 | PyObject *result; |
---|
| 231 | char *dict_file; |
---|
| 232 | int dict_id; |
---|
[21] | 233 | |
---|
[30] | 234 | static char *kwlist[] = {"dict_id", "dict_file", NULL}; |
---|
[21] | 235 | |
---|
[30] | 236 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) { |
---|
| 237 | return NULL; |
---|
| 238 | } |
---|
[21] | 239 | |
---|
[30] | 240 | if (dict_file) { |
---|
| 241 | result = PyInt_FromLong((long)migemo_load(self->migemo_obj, dict_id, dict_file)); |
---|
| 242 | } |
---|
[21] | 243 | |
---|
[30] | 244 | if (!result) { |
---|
| 245 | return NULL; |
---|
| 246 | } |
---|
| 247 | |
---|
| 248 | return result; |
---|
[21] | 249 | } |
---|
| 250 | |
---|
| 251 | static PyObject * |
---|
| 252 | Migemo_is_enable(Migemo *self) |
---|
| 253 | { |
---|
[30] | 254 | return PyInt_FromLong((long)migemo_is_enable(self->migemo_obj)); |
---|
[21] | 255 | } |
---|
| 256 | |
---|
| 257 | static PyMethodDef Migemo_methods[] = { |
---|
[30] | 258 | {"query", (PyCFunction)Migemo_query, METH_KEYWORDS, |
---|
| 259 | "return regex from romaji string\n\ |
---|
[21] | 260 | \n\ |
---|
| 261 | def query(query)\n\ |
---|
| 262 | query: romaji string (str or unicode)\n\ |
---|
| 263 | \n\ |
---|
| 264 | returns: regex string as Unicode object"}, |
---|
[30] | 265 | {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS, |
---|
| 266 | "set operator string as the meta character of regex\n\ |
---|
[21] | 267 | \n\ |
---|
| 268 | def set_operator(index, op):\n\ |
---|
| 269 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 270 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 271 | op: operator string (str)\n\ |
---|
| 272 | \n\ |
---|
| 273 | returns: boolean value"}, |
---|
[30] | 274 | {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS, |
---|
| 275 | "get operator string as the meta character of regex\n\ |
---|
[21] | 276 | \n\ |
---|
| 277 | def get_operator(index)\n\ |
---|
| 278 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 279 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 280 | \n\ |
---|
| 281 | returns: operator string (str)"}, |
---|
[30] | 282 | {"load", (PyCFunction)Migemo_load, METH_KEYWORDS, |
---|
| 283 | "add dictionary to Migemo object\n\ |
---|
[21] | 284 | \n\ |
---|
| 285 | def load(dict_id, dict_file)\n\ |
---|
| 286 | dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ |
---|
| 287 | DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ |
---|
| 288 | dict_file: path to dictionary file (str)\n\ |
---|
| 289 | \n\ |
---|
| 290 | returns: boolean value"}, |
---|
[30] | 291 | {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, |
---|
| 292 | "check internal migemo_dict\n\ |
---|
[21] | 293 | \n\ |
---|
| 294 | def is_enable()\n\ |
---|
| 295 | returns: boolean value"}, |
---|
[30] | 296 | {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, |
---|
| 297 | "get dictionary encoding\n\ |
---|
[21] | 298 | \n\ |
---|
| 299 | def get_encoding()\n\ |
---|
| 300 | returns: encoding string (str)"}, |
---|
[30] | 301 | {NULL} /* Sentinel */ |
---|
[21] | 302 | }; |
---|
| 303 | |
---|
| 304 | static PyMemberDef Migemo_members[] = { |
---|
[30] | 305 | {NULL} /* Sentinel */ |
---|
[21] | 306 | }; |
---|
| 307 | |
---|
| 308 | static PyTypeObject MigemoType = { |
---|
[30] | 309 | PyObject_HEAD_INIT(NULL) |
---|
| 310 | 0, /*ob_size*/ |
---|
| 311 | "migemo.Migemo", /*tp_name*/ |
---|
| 312 | sizeof(Migemo), /*tp_basicsize*/ |
---|
| 313 | 0, /*tp_itemsize*/ |
---|
| 314 | (destructor)Migemo_dealloc, /*tp_dealloc*/ |
---|
| 315 | 0, /*tp_print*/ |
---|
| 316 | 0, /*tp_getattr*/ |
---|
| 317 | 0, /*tp_setattr*/ |
---|
| 318 | 0, /*tp_compare*/ |
---|
| 319 | 0, /*tp_repr*/ |
---|
| 320 | 0, /*tp_as_number*/ |
---|
| 321 | 0, /*tp_as_sequence*/ |
---|
| 322 | 0, /*tp_as_mapping*/ |
---|
| 323 | 0, /*tp_hash */ |
---|
| 324 | 0, /*tp_call*/ |
---|
| 325 | 0, /*tp_str*/ |
---|
| 326 | 0, /*tp_getattro*/ |
---|
| 327 | 0, /*tp_setattro*/ |
---|
| 328 | 0, /*tp_as_buffer*/ |
---|
| 329 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
---|
| 330 | "Migemo wrapper object", /* tp_doc */ |
---|
| 331 | 0, /* tp_traverse */ |
---|
| 332 | 0, /* tp_clear */ |
---|
| 333 | 0, /* tp_richcompare */ |
---|
| 334 | 0, /* tp_weaklistoffset */ |
---|
| 335 | 0, /* tp_iter */ |
---|
| 336 | 0, /* tp_iternext */ |
---|
| 337 | Migemo_methods, /* tp_methods */ |
---|
| 338 | Migemo_members, /* tp_members */ |
---|
| 339 | 0, /* tp_getset */ |
---|
| 340 | 0, /* tp_base */ |
---|
| 341 | 0, /* tp_dict */ |
---|
| 342 | 0, /* tp_descr_get */ |
---|
| 343 | 0, /* tp_descr_set */ |
---|
| 344 | 0, /* tp_dictoffset */ |
---|
| 345 | (initproc)Migemo_init, /* tp_init */ |
---|
| 346 | 0, /* tp_alloc */ |
---|
| 347 | Migemo_new, /* tp_new */ |
---|
[21] | 348 | }; |
---|
| 349 | |
---|
| 350 | static PyMethodDef module_methods[] = { |
---|
[30] | 351 | {NULL} /* Sentinel */ |
---|
[21] | 352 | }; |
---|
| 353 | |
---|
| 354 | #ifndef PyMODINIT_FUNC |
---|
| 355 | #define PyMODINIT_FUNC void |
---|
| 356 | #endif |
---|
| 357 | PyMODINIT_FUNC |
---|
| 358 | initmigemo(void) |
---|
| 359 | { |
---|
[30] | 360 | PyObject* m; |
---|
[21] | 361 | |
---|
[30] | 362 | if (PyType_Ready(&MigemoType) < 0) |
---|
| 363 | return; |
---|
[21] | 364 | |
---|
[30] | 365 | m = Py_InitModule3("migemo", module_methods, "C/Migemo wrapper"); |
---|
[21] | 366 | |
---|
[30] | 367 | Py_INCREF(&MigemoType); |
---|
| 368 | PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); |
---|
| 369 | PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); |
---|
[21] | 370 | |
---|
[30] | 371 | PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); |
---|
[21] | 372 | |
---|
[30] | 373 | PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); |
---|
| 374 | PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); |
---|
| 375 | PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); |
---|
| 376 | PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); |
---|
| 377 | PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); |
---|
| 378 | PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); |
---|
[21] | 379 | |
---|
[30] | 380 | PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); |
---|
| 381 | PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); |
---|
| 382 | PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); |
---|
| 383 | PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); |
---|
| 384 | PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); |
---|
| 385 | PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); |
---|
[21] | 386 | } |
---|