[21] | 1 | /* |
---|
| 2 | * pymigemo.c - C/Migemo wrapper for Python |
---|
[28] | 3 | * Copyright(C) 2005, Atzm WATANABE <atzm@atzm.org> |
---|
[21] | 4 | * License: BSD-style |
---|
| 5 | * |
---|
| 6 | * $Id$ |
---|
| 7 | */ |
---|
| 8 | |
---|
| 9 | #include <Python.h> |
---|
| 10 | #include "structmember.h" |
---|
| 11 | #include "migemo.h" |
---|
| 12 | |
---|
| 13 | #define PYMIGEMO_VERSION "0.1" |
---|
| 14 | |
---|
| 15 | /* for dereference migemo object members */ |
---|
| 16 | struct _migemo |
---|
| 17 | { |
---|
| 18 | int enable; |
---|
| 19 | void *mtree; |
---|
| 20 | int charset; |
---|
| 21 | void *roma2hira; |
---|
| 22 | void *hira2kata; |
---|
| 23 | void *han2zen; |
---|
| 24 | void *zen2han; |
---|
| 25 | void *rx; |
---|
| 26 | void *addword; |
---|
| 27 | void *char2int; |
---|
| 28 | }; |
---|
| 29 | |
---|
| 30 | typedef struct { |
---|
[29] | 31 | PyObject_HEAD |
---|
[21] | 32 | migemo *migemo_obj; |
---|
| 33 | } Migemo; |
---|
| 34 | |
---|
| 35 | static void |
---|
| 36 | Migemo_dealloc(Migemo *self) |
---|
| 37 | { |
---|
| 38 | if (self->migemo_obj) |
---|
| 39 | migemo_close(self->migemo_obj); |
---|
| 40 | |
---|
| 41 | self->ob_type->tp_free((PyObject *)self); |
---|
| 42 | } |
---|
| 43 | |
---|
| 44 | static PyObject * |
---|
| 45 | Migemo_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
---|
| 46 | { |
---|
| 47 | Migemo *self; |
---|
| 48 | |
---|
| 49 | self = (Migemo *)type->tp_alloc(type, 0); |
---|
| 50 | if (self != NULL) |
---|
| 51 | self->migemo_obj = NULL; |
---|
| 52 | |
---|
| 53 | return (PyObject *)self; |
---|
| 54 | } |
---|
| 55 | |
---|
| 56 | static int |
---|
| 57 | Migemo_init(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 58 | { |
---|
| 59 | migemo *migemo_obj; |
---|
| 60 | char *dictionary; |
---|
| 61 | static char *kwlist[] = {"dictionary", NULL}; |
---|
| 62 | |
---|
| 63 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &dictionary)) |
---|
| 64 | return -1; |
---|
| 65 | |
---|
| 66 | if (dictionary) { |
---|
| 67 | if (self->migemo_obj) |
---|
| 68 | migemo_close(self->migemo_obj); |
---|
| 69 | |
---|
| 70 | migemo_obj = migemo_open(dictionary); |
---|
| 71 | |
---|
| 72 | if (migemo_obj) |
---|
| 73 | self->migemo_obj = migemo_obj; |
---|
| 74 | else |
---|
| 75 | return -1; |
---|
| 76 | } |
---|
| 77 | |
---|
| 78 | return 0; |
---|
| 79 | } |
---|
| 80 | |
---|
| 81 | const char * |
---|
| 82 | get_encoding(int charset) |
---|
| 83 | { |
---|
| 84 | const char *encoding; |
---|
| 85 | |
---|
| 86 | switch(charset) { |
---|
| 87 | case 1: |
---|
| 88 | encoding = "cp932"; |
---|
| 89 | break; |
---|
| 90 | case 2: |
---|
| 91 | encoding = "euc_jp"; |
---|
| 92 | break; |
---|
| 93 | case 3: |
---|
| 94 | encoding = "utf8"; |
---|
| 95 | break; |
---|
| 96 | default: |
---|
| 97 | encoding = Py_FileSystemDefaultEncoding; |
---|
| 98 | } |
---|
| 99 | |
---|
| 100 | return encoding; |
---|
| 101 | } |
---|
| 102 | |
---|
| 103 | static PyObject * |
---|
| 104 | Migemo_get_encoding(Migemo *self) |
---|
| 105 | { |
---|
| 106 | return Py_BuildValue("s", get_encoding(self->migemo_obj->charset)); |
---|
| 107 | } |
---|
| 108 | |
---|
| 109 | static PyObject * |
---|
| 110 | Migemo_query(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 111 | { |
---|
| 112 | PyObject *result, *query_obj; |
---|
| 113 | static char *kwlist[] = {"query", NULL}; |
---|
| 114 | |
---|
| 115 | const char *query, *encoding; |
---|
| 116 | unsigned char *regex; |
---|
| 117 | PyObject *query_str = NULL, *regex_strobj = NULL; |
---|
| 118 | |
---|
| 119 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &query_obj)) |
---|
| 120 | return NULL; |
---|
| 121 | |
---|
| 122 | encoding = get_encoding(self->migemo_obj->charset); |
---|
| 123 | |
---|
| 124 | if (PyUnicode_Check(query_obj)) { |
---|
| 125 | query_str = PyUnicode_AsEncodedString(query_obj, encoding, "strict"); |
---|
| 126 | query = PyString_AS_STRING(query_str); |
---|
| 127 | } |
---|
| 128 | else if (PyString_Check(query_obj)) |
---|
| 129 | query = PyString_AS_STRING(query_obj); |
---|
| 130 | else |
---|
| 131 | return NULL; |
---|
| 132 | |
---|
| 133 | if (query) { |
---|
| 134 | regex = migemo_query(self->migemo_obj, query); |
---|
| 135 | |
---|
| 136 | if (regex) { |
---|
| 137 | regex_strobj = PyString_FromString(regex); |
---|
| 138 | |
---|
| 139 | if (regex_strobj) |
---|
| 140 | result = PyUnicode_FromEncodedObject(regex_strobj, encoding, "strict"); |
---|
| 141 | |
---|
| 142 | migemo_release(self->migemo_obj, regex); |
---|
| 143 | } |
---|
| 144 | } |
---|
| 145 | |
---|
| 146 | Py_XDECREF(regex_strobj); |
---|
| 147 | Py_XDECREF(query_str); |
---|
| 148 | |
---|
| 149 | if (!result) |
---|
| 150 | return NULL; |
---|
| 151 | |
---|
| 152 | return result; |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | static PyObject * |
---|
| 156 | Migemo_set_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 157 | { |
---|
| 158 | PyObject *result; |
---|
| 159 | static char *kwlist[] = {"index", "op", NULL}; |
---|
| 160 | |
---|
| 161 | int index; |
---|
| 162 | char *op; |
---|
| 163 | |
---|
| 164 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &index, &op)) |
---|
| 165 | return NULL; |
---|
| 166 | |
---|
| 167 | if (op) |
---|
| 168 | result = Py_BuildValue("i", migemo_set_operator(self->migemo_obj, index, op)); |
---|
| 169 | |
---|
| 170 | if (!result) |
---|
| 171 | return NULL; |
---|
| 172 | |
---|
| 173 | return result; |
---|
| 174 | } |
---|
| 175 | |
---|
| 176 | static PyObject * |
---|
| 177 | Migemo_get_operator(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 178 | { |
---|
| 179 | PyObject *result; |
---|
| 180 | static char *kwlist[] = {"index", NULL}; |
---|
| 181 | |
---|
| 182 | int index; |
---|
| 183 | const unsigned char *op; |
---|
| 184 | |
---|
| 185 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "i", kwlist, &index)) |
---|
| 186 | return NULL; |
---|
| 187 | |
---|
| 188 | op = migemo_get_operator(self->migemo_obj, index); |
---|
| 189 | result = Py_BuildValue("s", op); |
---|
| 190 | |
---|
| 191 | if (!result) |
---|
| 192 | return NULL; |
---|
| 193 | |
---|
| 194 | return result; |
---|
| 195 | } |
---|
| 196 | |
---|
| 197 | static PyObject * |
---|
| 198 | Migemo_load(Migemo *self, PyObject *args, PyObject *kwds) |
---|
| 199 | { |
---|
| 200 | PyObject *result; |
---|
| 201 | static char *kwlist[] = {"dict_id", "dict_file", NULL}; |
---|
| 202 | |
---|
| 203 | int dict_id; |
---|
| 204 | const char *dict_file; |
---|
| 205 | |
---|
| 206 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "is", kwlist, &dict_id, &dict_file)) |
---|
| 207 | return NULL; |
---|
| 208 | |
---|
| 209 | if (dict_file) |
---|
| 210 | result = Py_BuildValue("i", migemo_load(self->migemo_obj, dict_id, dict_file)); |
---|
| 211 | |
---|
| 212 | if (!result) |
---|
| 213 | return NULL; |
---|
| 214 | |
---|
| 215 | return result; |
---|
| 216 | } |
---|
| 217 | |
---|
| 218 | static PyObject * |
---|
| 219 | Migemo_is_enable(Migemo *self) |
---|
| 220 | { |
---|
| 221 | return Py_BuildValue("i", migemo_is_enable(self->migemo_obj)); |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | static PyMethodDef Migemo_methods[] = { |
---|
| 225 | {"query", (PyCFunction)Migemo_query, METH_KEYWORDS, |
---|
| 226 | "return regex from romaji string\n\ |
---|
| 227 | \n\ |
---|
| 228 | def query(query)\n\ |
---|
| 229 | query: romaji string (str or unicode)\n\ |
---|
| 230 | \n\ |
---|
| 231 | returns: regex string as Unicode object"}, |
---|
| 232 | {"set_operator", (PyCFunction)Migemo_set_operator, METH_KEYWORDS, |
---|
| 233 | "set operator string as the meta character of regex\n\ |
---|
| 234 | \n\ |
---|
| 235 | def set_operator(index, op):\n\ |
---|
| 236 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 237 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 238 | op: operator string (str)\n\ |
---|
| 239 | \n\ |
---|
| 240 | returns: boolean value"}, |
---|
| 241 | {"get_operator", (PyCFunction)Migemo_get_operator, METH_KEYWORDS, |
---|
| 242 | "get operator string as the meta character of regex\n\ |
---|
| 243 | \n\ |
---|
| 244 | def get_operator(index)\n\ |
---|
| 245 | index: (OPINDEX_NEST_IN|OPINDEX_NEST_OUT|OPINDEX_NEWLINE|\n\ |
---|
| 246 | OPINDEX_OR|OPINDEX_SELECT_IN|OPINDEX_SELECT_OUT)\n\ |
---|
| 247 | \n\ |
---|
| 248 | returns: operator string (str)"}, |
---|
| 249 | {"load", (PyCFunction)Migemo_load, METH_KEYWORDS, |
---|
| 250 | "add dictionary to Migemo object\n\ |
---|
| 251 | \n\ |
---|
| 252 | def load(dict_id, dict_file)\n\ |
---|
| 253 | dict_id: (DICTID_HAN2ZEN|DICTID_HIRA2KATA|DICTID_MIGEMO|\n\ |
---|
| 254 | DICTID_ROMA2HIRA|DICTID_ZEN2HAN)\n\ |
---|
| 255 | dict_file: path to dictionary file (str)\n\ |
---|
| 256 | \n\ |
---|
| 257 | returns: boolean value"}, |
---|
| 258 | {"is_enable", (PyCFunction)Migemo_is_enable, METH_NOARGS, |
---|
| 259 | "check internal migemo_dict\n\ |
---|
| 260 | \n\ |
---|
| 261 | def is_enable()\n\ |
---|
| 262 | returns: boolean value"}, |
---|
| 263 | {"get_encoding", (PyCFunction)Migemo_get_encoding, METH_NOARGS, |
---|
| 264 | "get dictionary encoding\n\ |
---|
| 265 | \n\ |
---|
| 266 | def get_encoding()\n\ |
---|
| 267 | returns: encoding string (str)"}, |
---|
| 268 | {NULL} /* Sentinel */ |
---|
| 269 | }; |
---|
| 270 | |
---|
| 271 | static PyMemberDef Migemo_members[] = { |
---|
| 272 | {NULL} /* Sentinel */ |
---|
| 273 | }; |
---|
| 274 | |
---|
| 275 | static PyTypeObject MigemoType = { |
---|
| 276 | PyObject_HEAD_INIT(NULL) |
---|
| 277 | 0, /*ob_size*/ |
---|
| 278 | "migemo.Migemo", /*tp_name*/ |
---|
| 279 | sizeof(Migemo), /*tp_basicsize*/ |
---|
| 280 | 0, /*tp_itemsize*/ |
---|
| 281 | (destructor)Migemo_dealloc, /*tp_dealloc*/ |
---|
| 282 | 0, /*tp_print*/ |
---|
| 283 | 0, /*tp_getattr*/ |
---|
| 284 | 0, /*tp_setattr*/ |
---|
| 285 | 0, /*tp_compare*/ |
---|
| 286 | 0, /*tp_repr*/ |
---|
| 287 | 0, /*tp_as_number*/ |
---|
| 288 | 0, /*tp_as_sequence*/ |
---|
| 289 | 0, /*tp_as_mapping*/ |
---|
| 290 | 0, /*tp_hash */ |
---|
| 291 | 0, /*tp_call*/ |
---|
| 292 | 0, /*tp_str*/ |
---|
| 293 | 0, /*tp_getattro*/ |
---|
| 294 | 0, /*tp_setattro*/ |
---|
| 295 | 0, /*tp_as_buffer*/ |
---|
| 296 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
---|
| 297 | "Migemo wrapper object", /* tp_doc */ |
---|
| 298 | 0, /* tp_traverse */ |
---|
| 299 | 0, /* tp_clear */ |
---|
| 300 | 0, /* tp_richcompare */ |
---|
| 301 | 0, /* tp_weaklistoffset */ |
---|
| 302 | 0, /* tp_iter */ |
---|
| 303 | 0, /* tp_iternext */ |
---|
| 304 | Migemo_methods, /* tp_methods */ |
---|
| 305 | Migemo_members, /* tp_members */ |
---|
| 306 | 0, /* tp_getset */ |
---|
| 307 | 0, /* tp_base */ |
---|
| 308 | 0, /* tp_dict */ |
---|
| 309 | 0, /* tp_descr_get */ |
---|
| 310 | 0, /* tp_descr_set */ |
---|
| 311 | 0, /* tp_dictoffset */ |
---|
| 312 | (initproc)Migemo_init, /* tp_init */ |
---|
| 313 | 0, /* tp_alloc */ |
---|
| 314 | Migemo_new, /* tp_new */ |
---|
| 315 | }; |
---|
| 316 | |
---|
| 317 | static PyMethodDef module_methods[] = { |
---|
| 318 | {NULL} /* Sentinel */ |
---|
| 319 | }; |
---|
| 320 | |
---|
| 321 | #ifndef PyMODINIT_FUNC |
---|
| 322 | #define PyMODINIT_FUNC void |
---|
| 323 | #endif |
---|
| 324 | PyMODINIT_FUNC |
---|
| 325 | initmigemo(void) |
---|
| 326 | { |
---|
| 327 | PyObject* m; |
---|
| 328 | |
---|
| 329 | if (PyType_Ready(&MigemoType) < 0) |
---|
| 330 | return; |
---|
| 331 | |
---|
| 332 | m = Py_InitModule3("migemo", module_methods, "C/Migemo wrapper"); |
---|
| 333 | |
---|
| 334 | Py_INCREF(&MigemoType); |
---|
| 335 | PyModule_AddObject(m, "Migemo", (PyObject *)&MigemoType); |
---|
| 336 | PyModule_AddObject(m, "PYMIGEMO_VERSION", Py_BuildValue("s", PYMIGEMO_VERSION)); |
---|
| 337 | |
---|
| 338 | PyModule_AddObject(m, "MIGEMO_VERSION", Py_BuildValue("s", MIGEMO_VERSION)); |
---|
| 339 | |
---|
| 340 | PyModule_AddObject(m, "DICTID_INVALID", Py_BuildValue("i", MIGEMO_DICTID_INVALID)); |
---|
| 341 | PyModule_AddObject(m, "DICTID_MIGEMO", Py_BuildValue("i", MIGEMO_DICTID_MIGEMO)); |
---|
| 342 | PyModule_AddObject(m, "DICTID_ROMA2HIRA", Py_BuildValue("i", MIGEMO_DICTID_ROMA2HIRA)); |
---|
| 343 | PyModule_AddObject(m, "DICTID_HIRA2KATA", Py_BuildValue("i", MIGEMO_DICTID_HIRA2KATA)); |
---|
| 344 | PyModule_AddObject(m, "DICTID_HAN2ZEN", Py_BuildValue("i", MIGEMO_DICTID_HAN2ZEN)); |
---|
| 345 | PyModule_AddObject(m, "DICTID_ZEN2HAN", Py_BuildValue("i", MIGEMO_DICTID_ZEN2HAN)); |
---|
| 346 | |
---|
| 347 | PyModule_AddObject(m, "OPINDEX_OR", Py_BuildValue("i", MIGEMO_OPINDEX_OR)); |
---|
| 348 | PyModule_AddObject(m, "OPINDEX_NEST_IN", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_IN)); |
---|
| 349 | PyModule_AddObject(m, "OPINDEX_NEST_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_NEST_OUT)); |
---|
| 350 | PyModule_AddObject(m, "OPINDEX_SELECT_IN", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_IN)); |
---|
| 351 | PyModule_AddObject(m, "OPINDEX_SELECT_OUT", Py_BuildValue("i", MIGEMO_OPINDEX_SELECT_OUT)); |
---|
| 352 | PyModule_AddObject(m, "OPINDEX_NEWLINE", Py_BuildValue("i", MIGEMO_OPINDEX_NEWLINE)); |
---|
| 353 | } |
---|