Version 12 (modified by atzm, 17 years ago) (diff) |
---|
Python
メモ.主に日記からの転載.
可変長引数
>>> def hoge(*a): ... print a >>> hoge(1, 2, 3) (1, 2, 3) >>> def hage(a, b, c): ... print a, b, c >>> a = (1, 2, 3) >>> hage(*a) 1 2 3 >>> def hige(**a): ... print a >>> hige(a=1, b=2, c=3) {'a': 1, 'c': 3, 'b': 2} >>> def huge(a, b, c): ... print a, b, c >>> a = {'a': 1, 'b': 2, 'c': 3} >>> huge(**a) 1 2 3
日本語関連
文字コードとか
- 例えば euc-jp の場合
>>> a = 'あ' >>> ord(a[0]) == 0xa4 True >>> ord(a[1]) == 0xa2 True >>> ''.join([hex(ord(c))[2:] for c in 'あいうえお']) 'a4a2a4a4a4a6a4a8a4aa'
行列の行と列を入れ替えるワンライナー
$ cat hoge.txt 1 2 3 4 5 6 7 8 9 10 11 12 $ python -c 'import sys; print "\n".join([" ".join(i) for i in zip(*[i.split() for i in sys.stdin])])' < hoge.txt 1 5 9 2 6 10 3 7 11 4 8 12
- キモは zip(*list)
staticmethod
>>> class Hoge: ... def hoge(*args): ... print ', '.join([str(i) for i in args]) ... hoge = staticmethod(hoge) ... >>> Hoge.hoge(1, '2', 3.3, None, False, Hoge) 1, 2, 3.3, None, False, __main__.Hoge
raw_input 中に SIGALRM 出すと EOFError
import signal signal.signal(signal.SIGALRM, lambda *a: None) signal.alarm(5) raw_input()
- 5 秒待つと EOFError.
- 何とか回避できんもんかなぁ.
HostIP を使う
import urllib import mimetools class HostIP(dict): _URL_BASE = 'http://api.hostip.info/rough.php?position=true&ip=%s' _GOOGLEMAPS_BASE = 'http://maps.google.com/?q=%sN+%sE(%s)' def __init__(self, ipaddr): url = self._URL_BASE % ipaddr fp = urllib.urlopen(url) headers = mimetools.Message(fp, 0) fp.close() dict.__init__(self, headers.dict) self['ipaddr'] = ipaddr self['url'] = url self['googlemaps'] = self._GOOGLEMAPS_BASE % (self['latitude'], self['longitude'], ipaddr)
- dict として使える HostIP オブジェクト.
>>> test = HostIP('210.156.41.55') >>> for k, v in test.items(): ... print '%s: %s' % (k, v) ... city: Morioka guessed: true url: http://api.hostip.info/rough.php?position=true&ip=210.156.41.55 googlemaps: http://maps.google.com/?q=39.7N+141.15E(210.156.41.55) latitude: 39.7 country: JAPAN ipaddr: 210.156.41.55 country code: JP longitude: 141.15
dict 同士のマージ
- dict.update() だと同じキーを持つ値が上書きされてしまうので,上書きせずに足し合わせたりできる関数が欲しかった.
- ただし,第一引数について破壊的,末端の value が数値以外に色々混ざってる時にどうなるかは知らん,という欠点あり.
def merge(my_dict, new_dict, mergetype='add'): for k, v in new_dict.iteritems(): try: if isinstance(v, dict): merge(my_dict[k], v, mergetype) else: my_dict[k] = getattr(my_dict[k], '__%s__' % mergetype)(v) except KeyError: my_dict[k] = v return my_dict
リストから重複を取り除く
- ただし順番は勝手にソートされる.
try: set, frozenset except NameError: from sets import Set as set, ImmutableSet as frozenset def uniq(sequence): return list(set(sequence))
Python でマルコフ連鎖
#!/usr/bin/env python # -*- coding: utf-8 -*- import random import MeCab import feedparser def wakati(text): t = MeCab.Tagger("-Owakati") m = t.parse(text) result = m.rstrip(" \n").split(" ") return result def create_markov_table(wordlist, level=2): def check_word(words): return len([w for w in words if w]) == len(words) markov = {} tmpword = ["" for i in range(level)] for word in wordlist: if check_word(tmpword): key = tuple(tmpword) try: markov[key].append(word) except KeyError: markov[key] = [word] tmpword = tmpword[1:] + [word] return markov def gen_sentence(markov, level=2): count = 0 sentence = [] tmpword = random.choice(markov.keys()) while count < len(wordlist): try: tmp = random.choice(markov[tmpword]) sentence.append(tmp) tmpword = tuple(list(tmpword)[1:] + [tmp]) except KeyError: pass count += 1 return ''.join(sentence) def cnet_news(): f = feedparser.parse("http://feed.japan.cnet.com/rss/index.rdf") text = "\n".join([s["summary"].split("\n")[0] for s in f.entries]) return text.encode("utf-8") if __name__ == "__main__": level = 4 src = cnet_news() wordlist = wakati(src) markov = create_markov_table(wordlist, level) sentence = gen_sentence(markov, level) print unicode(sentence, "utf-8")