Context Navigation

Changes between Version 10 and Version 11 of Python

Timestamp:: 06/16/06 19:47:14 (19 years ago)
Author:: atzm
Comment:: --

Legend:

: Unmodified
: Added
: Removed
: Modified

Python

-                      v10
+                      v11
     return list(set(sequence))
 }}}
+== Python でマルコフ連鎖 ==
+ * 参考：[http://yamashita.dyndns.org/blog/enhanced-markov-chain-by-python MeCabとPythonでマルコフ連鎖を書いてみる(改)]
+{{{
+#!python
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import random
+import MeCab
+import feedparser
+def wakati(text):
+    t = MeCab.Tagger("-Owakati")
+    m = t.parse(text)
+    result = m.rstrip(" \n").split(" ")
+    return result
+def create_markov_table(wordlist, level=2):
+    def check_word(words):
+        return len([w for w in words if w]) == len(words)
+    markov = {}
+    tmpword = ["" for i in range(level)]
+    for word in wordlist:
+        if check_word(tmpword):
+            key = tuple(tmpword)
+            try:
+                markov[key].append(word)
+            except KeyError:
+                markov[key] = [word]
+        tmpword = tmpword[1:] + [word]
+    return markov
+def gen_sentence(markov, level=2):
+    count = 0
+    sentence = []
+    tmpword = random.choice(markov.keys())
+    while count < len(wordlist):
+        try:
+            tmp = random.choice(markov[tmpword])
+            sentence.append(tmp)
+            tmpword = tuple(list(tmpword)[1:] + [tmp])
+        except KeyError:
+            pass
+        count += 1
+    return ''.join(sentence)
+def cnet_news():
+    f = feedparser.parse("http://feed.japan.cnet.com/rss/index.rdf")
+    text = "\n".join([s["summary"].split("\n")[0] for s in f.entries])
+    return text.encode("utf-8")
+if __name__ == "__main__":
+    level = 4
+    src = cnet_news()
+    wordlist = wakati(src)
+    markov = create_markov_table(wordlist, level)
+    sentence = gen_sentence(markov, level)
+    print unicode(sentence, "utf-8")
+}}}