Version 16 (modified by atzm, 16 years ago) (diff) |
---|
Python
メモ.主に日記からの転載.
行番号を得る
Perl で言うところの __LINE__ みたいなもの. スタックフレームを参照すれば良い.
import inspect def lineno(): return inspect.currentframe().f_back.f_lineno if __name__ == '__main__': print lineno()
可変長引数
>>> def hoge(*a): ... print a >>> hoge(1, 2, 3) (1, 2, 3) >>> def hage(a, b, c): ... print a, b, c >>> a = (1, 2, 3) >>> hage(*a) 1 2 3 >>> def hige(**a): ... print a >>> hige(a=1, b=2, c=3) {'a': 1, 'c': 3, 'b': 2} >>> def huge(a, b, c): ... print a, b, c >>> a = {'a': 1, 'b': 2, 'c': 3} >>> huge(**a) 1 2 3
hidden read-ahead buffer
- Built-in Types -- Python v2.6.1 documentation (#file.next) より
In order to make a for loop the most efficient way of looping over the lines of a file (a very common operation), the next() method uses a hidden read-ahead buffer.
- next() を見る
- コード
def printrss(): pf = open('/proc/self/status') for l in pf: if l[:5] == 'VmRSS': print l, pf.close() def test(fname='/path/to/huge_file'): f = open(fname) c = 1 for i in f: print '===== %d =====' % c printrss() c += 1 f.close() print 'test1' test()
- 結果
: : : ===== 1 ===== VmRSS: 2392 kB : : : ===== 11212 ===== VmRSS: 2408 kB
- ちょっと (16KB) だけ増えてる
- コード
- readline() を見る
- コード
def printrss(): pf = open('/proc/self/status') for l in pf: if l[:5] == 'VmRSS': print l, pf.close() def test(fname='/path/to/huge_file'): f = open(fname) c = 1 while True: i = f.readline() if not i: break print '===== %d =====' % c printrss() c += 1 f.close() print 'test1' test()
- 結果
: : : ===== 1 ===== VmRSS: 2396 kB : : : ===== 11212 ===== VmRSS: 2396 kB
- 増えてない
- コード
日本語関連
文字コードとか
- 例えば euc-jp の場合
>>> a = 'あ' >>> ord(a[0]) == 0xa4 True >>> ord(a[1]) == 0xa2 True >>> ''.join([hex(ord(c))[2:] for c in 'あいうえお']) 'a4a2a4a4a4a6a4a8a4aa'
- こんなこともできる
>>> 'あいうえお'.encode('hex') 'a4a2a4a4a4a6a4a8a4aa'
- 詳細は pydoc encodings
行列の行と列を入れ替えるワンライナー
$ cat hoge.txt 1 2 3 4 5 6 7 8 9 10 11 12 $ python -c 'import sys; print "\n".join([" ".join(i) for i in zip(*[i.split() for i in sys.stdin])])' < hoge.txt 1 5 9 2 6 10 3 7 11 4 8 12
- キモは zip(*list)
staticmethod
- デコレータ関数なので以下のように使う
>>> class Hoge: ... @staticmethod ... def hoge(*args): ... print ', '.join([str(i) for i in args]) ... >>> Hoge.hoge(1, '2', 3.3, None, False, Hoge) 1, 2, 3.3, None, False, __main__.Hoge
- 別に以下のようにしても問題はない
>>> class Hoge: ... def hoge(*args): ... print ', '.join([str(i) for i in args]) ... hoge = staticmethod(hoge) ... >>> Hoge.hoge(1, '2', 3.3, None, False, Hoge) 1, 2, 3.3, None, False, __main__.Hoge
raw_input 中に SIGALRM 出すと EOFError
import signal signal.signal(signal.SIGALRM, lambda *a: None) signal.alarm(5) raw_input()
- 5 秒待つと EOFError.
- 何とか回避できんもんかなぁ.
HostIP を使う
import urllib import mimetools class HostIP(dict): _URL_BASE = 'http://api.hostip.info/rough.php?position=true&ip=%s' _GOOGLEMAPS_BASE = 'http://maps.google.com/?q=%sN+%sE(%s)' def __init__(self, ipaddr): url = self._URL_BASE % ipaddr fp = urllib.urlopen(url) headers = mimetools.Message(fp, 0) fp.close() dict.__init__(self, headers.dict) self['ipaddr'] = ipaddr self['url'] = url self['googlemaps'] = self._GOOGLEMAPS_BASE % (self['latitude'], self['longitude'], ipaddr)
- dict として使える HostIP オブジェクト.
>>> test = HostIP('210.156.41.55') >>> for k, v in test.items(): ... print '%s: %s' % (k, v) ... city: Morioka guessed: true url: http://api.hostip.info/rough.php?position=true&ip=210.156.41.55 googlemaps: http://maps.google.com/?q=39.7N+141.15E(210.156.41.55) latitude: 39.7 country: JAPAN ipaddr: 210.156.41.55 country code: JP longitude: 141.15
dict 同士のマージ
- dict.update() だと同じキーを持つ値が上書きされてしまうので,上書きせずに足し合わせたりできる関数が欲しかった.
- ただし,第一引数について破壊的,末端の value が数値以外に色々混ざってる時にどうなるかは知らん,という欠点あり.
def merge(my_dict, new_dict, mergetype='add'): for k, v in new_dict.iteritems(): try: if isinstance(v, dict): merge(my_dict[k], v, mergetype) else: my_dict[k] = getattr(my_dict[k], '__%s__' % mergetype)(v) except KeyError: my_dict[k] = v return my_dict
リストから重複を取り除く
- ただし順番は勝手にソートされる.
try: set, frozenset except NameError: from sets import Set as set, ImmutableSet as frozenset def uniq(sequence): return list(set(sequence))
Python でマルコフ連鎖
#!/usr/bin/env python # -*- coding: utf-8 -*- import random import MeCab import feedparser def wakati(text): t = MeCab.Tagger("-Owakati") m = t.parse(text) result = m.rstrip(" \n").split(" ") return result def create_markov_table(wordlist, level=2): def check_word(words): return len([w for w in words if w]) == len(words) markov = {} tmpword = ["" for i in range(level)] for word in wordlist: if check_word(tmpword): key = tuple(tmpword) try: markov[key].append(word) except KeyError: markov[key] = [word] tmpword = tmpword[1:] + [word] return markov def gen_sentence(markov, level=2): count = 0 sentence = [] tmpword = random.choice(markov.keys()) while count < len(wordlist): try: tmp = random.choice(markov[tmpword]) sentence.append(tmp) tmpword = tuple(list(tmpword)[1:] + [tmp]) except KeyError: pass count += 1 return ''.join(sentence) def cnet_news(): f = feedparser.parse("http://feed.japan.cnet.com/rss/index.rdf") text = "\n".join([s["summary"].split("\n")[0] for s in f.entries]) return text.encode("utf-8") if __name__ == "__main__": level = 4 src = cnet_news() wordlist = wakati(src) markov = create_markov_table(wordlist, level) sentence = gen_sentence(markov, level) print unicode(sentence, "utf-8")