wiki:Python

Version 12 (modified by atzm, 17 years ago) (diff)

--

Python

メモ.主に日記からの転載.

可変長引数

>>> def hoge(*a):
...     print a
>>> hoge(1, 2, 3)
(1, 2, 3)

>>> def hage(a, b, c):
...     print a, b, c
>>> a = (1, 2, 3)
>>> hage(*a)
1 2 3

>>> def hige(**a):
...     print a
>>> hige(a=1, b=2, c=3)
{'a': 1, 'c': 3, 'b': 2}

>>> def huge(a, b, c):
...     print a, b, c
>>> a = {'a': 1, 'b': 2, 'c': 3}
>>> huge(**a)
1 2 3

日本語関連

文字コードとか

  • 例えば euc-jp の場合
    >>> a = 'あ'
    >>> ord(a[0]) == 0xa4
    True
    >>> ord(a[1]) == 0xa2
    True
    
    >>> ''.join([hex(ord(c))[2:] for c in 'あいうえお'])
    'a4a2a4a4a4a6a4a8a4aa'
    

行列の行と列を入れ替えるワンライナー

$ cat hoge.txt 
1 2 3 4
5 6 7 8
9 10 11 12

$ python -c 'import sys; print "\n".join([" ".join(i) for i in zip(*[i.split() for i in sys.stdin])])' < hoge.txt
1 5 9
2 6 10
3 7 11
4 8 12
  • キモは zip(*list)

staticmethod

>>> class Hoge:
...     def hoge(*args):
...         print ', '.join([str(i) for i in args])
...     hoge = staticmethod(hoge)
...
>>> Hoge.hoge(1, '2', 3.3, None, False, Hoge)
1, 2, 3.3, None, False, __main__.Hoge

raw_input 中に SIGALRM 出すと EOFError

import signal
signal.signal(signal.SIGALRM, lambda *a: None)
signal.alarm(5)
raw_input()
  • 5 秒待つと EOFError.
  • 何とか回避できんもんかなぁ.

HostIP を使う

import urllib
import mimetools

class HostIP(dict):
    _URL_BASE = 'http://api.hostip.info/rough.php?position=true&ip=%s'
    _GOOGLEMAPS_BASE = 'http://maps.google.com/?q=%sN+%sE(%s)'

    def __init__(self, ipaddr):
        url = self._URL_BASE % ipaddr

        fp = urllib.urlopen(url)
        headers = mimetools.Message(fp, 0)
        fp.close()

        dict.__init__(self, headers.dict)
        self['ipaddr'] = ipaddr
        self['url'] = url
        self['googlemaps'] = self._GOOGLEMAPS_BASE % (self['latitude'], self['longitude'], ipaddr)
  • dict として使える HostIP オブジェクト.
>>> test = HostIP('210.156.41.55')
>>> for k, v in test.items():
...     print '%s: %s' % (k, v)
... 
city: Morioka
guessed: true
url: http://api.hostip.info/rough.php?position=true&ip=210.156.41.55
googlemaps: http://maps.google.com/?q=39.7N+141.15E(210.156.41.55)
latitude: 39.7
country: JAPAN
ipaddr: 210.156.41.55
country code: JP
longitude: 141.15

dict 同士のマージ

  • dict.update() だと同じキーを持つ値が上書きされてしまうので,上書きせずに足し合わせたりできる関数が欲しかった.
  • ただし,第一引数について破壊的,末端の value が数値以外に色々混ざってる時にどうなるかは知らん,という欠点あり.
def merge(my_dict, new_dict, mergetype='add'):
    for k, v in new_dict.iteritems():
        try:
            if isinstance(v, dict):
                merge(my_dict[k], v, mergetype)
            else:
                my_dict[k] = getattr(my_dict[k], '__%s__' % mergetype)(v)
        except KeyError:
            my_dict[k] = v
    return my_dict

リストから重複を取り除く

  • ただし順番は勝手にソートされる.
try:
    set, frozenset
except NameError:
    from sets import Set as set, ImmutableSet as frozenset

def uniq(sequence):
    return list(set(sequence))

Python でマルコフ連鎖

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import random
import MeCab
import feedparser

def wakati(text):
    t = MeCab.Tagger("-Owakati")
    m = t.parse(text)
    result = m.rstrip(" \n").split(" ")
    return result

def create_markov_table(wordlist, level=2):
    def check_word(words):
        return len([w for w in words if w]) == len(words)

    markov = {}
    tmpword = ["" for i in range(level)]

    for word in wordlist:
        if check_word(tmpword):
            key = tuple(tmpword)
            try:
                markov[key].append(word)
            except KeyError:
                markov[key] = [word]
        tmpword = tmpword[1:] + [word]

    return markov

def gen_sentence(markov, level=2):
    count = 0
    sentence = []

    tmpword = random.choice(markov.keys())
    while count < len(wordlist):
        try:
            tmp = random.choice(markov[tmpword])
            sentence.append(tmp)
            tmpword = tuple(list(tmpword)[1:] + [tmp])
        except KeyError:
            pass
        count += 1

    return ''.join(sentence)

def cnet_news():
    f = feedparser.parse("http://feed.japan.cnet.com/rss/index.rdf")
    text = "\n".join([s["summary"].split("\n")[0] for s in f.entries])
    return text.encode("utf-8")

if __name__ == "__main__":
    level = 4
    src = cnet_news()
    wordlist = wakati(src)
    markov = create_markov_table(wordlist, level)
    sentence = gen_sentence(markov, level)
    print unicode(sentence, "utf-8")