| 146 | |
| 147 | == Python でマルコフ連鎖 == |
| 148 | * 参考:[http://yamashita.dyndns.org/blog/enhanced-markov-chain-by-python MeCabとPythonでマルコフ連鎖を書いてみる(改)] |
| 149 | |
| 150 | {{{ |
| 151 | #!python |
| 152 | #!/usr/bin/env python |
| 153 | # -*- coding: utf-8 -*- |
| 154 | |
| 155 | import random |
| 156 | import MeCab |
| 157 | import feedparser |
| 158 | |
| 159 | def wakati(text): |
| 160 | t = MeCab.Tagger("-Owakati") |
| 161 | m = t.parse(text) |
| 162 | result = m.rstrip(" \n").split(" ") |
| 163 | return result |
| 164 | |
| 165 | def create_markov_table(wordlist, level=2): |
| 166 | def check_word(words): |
| 167 | return len([w for w in words if w]) == len(words) |
| 168 | |
| 169 | markov = {} |
| 170 | tmpword = ["" for i in range(level)] |
| 171 | |
| 172 | for word in wordlist: |
| 173 | if check_word(tmpword): |
| 174 | key = tuple(tmpword) |
| 175 | try: |
| 176 | markov[key].append(word) |
| 177 | except KeyError: |
| 178 | markov[key] = [word] |
| 179 | tmpword = tmpword[1:] + [word] |
| 180 | |
| 181 | return markov |
| 182 | |
| 183 | def gen_sentence(markov, level=2): |
| 184 | count = 0 |
| 185 | sentence = [] |
| 186 | |
| 187 | tmpword = random.choice(markov.keys()) |
| 188 | while count < len(wordlist): |
| 189 | try: |
| 190 | tmp = random.choice(markov[tmpword]) |
| 191 | sentence.append(tmp) |
| 192 | tmpword = tuple(list(tmpword)[1:] + [tmp]) |
| 193 | except KeyError: |
| 194 | pass |
| 195 | count += 1 |
| 196 | |
| 197 | return ''.join(sentence) |
| 198 | |
| 199 | def cnet_news(): |
| 200 | f = feedparser.parse("http://feed.japan.cnet.com/rss/index.rdf") |
| 201 | text = "\n".join([s["summary"].split("\n")[0] for s in f.entries]) |
| 202 | return text.encode("utf-8") |
| 203 | |
| 204 | if __name__ == "__main__": |
| 205 | level = 4 |
| 206 | src = cnet_news() |
| 207 | wordlist = wakati(src) |
| 208 | markov = create_markov_table(wordlist, level) |
| 209 | sentence = gen_sentence(markov, level) |
| 210 | print unicode(sentence, "utf-8") |
| 211 | }}} |