#!/usr/bin/env python # -*- coding: utf-8 -*- __version__ = '$Revision$' __author__ = 'Atzm WATANABE ' __date__ = '$Date$' __copyright__ = 'Copyright(C) 2006 Atzm WATANABE, all rights reserved.' __license__ = 'Python' import re import sys import time import random import MeCab import nkf from ircbot import SingleServerIRCBot from irclib import nm_to_n import config config.init() import my_amazon my_amazon.setLocale(config.get('amazon', 'locale')) my_amazon.setLicense(config.get('amazon', 'access_key')) try: set, frozenset except NameError: from sets import Set as set, ImmutableSet as frozenset def uniq(sequence): """リストから重複を取り除く (順番が狂うので注意) """ return list(set(sequence)) def unicoding(text): """text を強制的に unicode オブジェクトに変換 """ if type(text) is unicode: return text return unicode(nkf.nkf('-w', text), 'utf-8') def ununicoding(text, encoding='iso-2022-jp'): """text を指定された encoding でエンコードし,raw str に強制変換 """ if type(text) is not unicode: return unicoding(text).encode(encoding) return text.encode(encoding) def mecab_parse(text): """MeCab を使って形態素解析し,固有名詞と一般名詞だけを抽出する """ def choice_nominal(wlist): res = [] for word, wtype in wlist: wtypes = wtype.split('-') if '固有名詞' in wtypes or ('名詞' in wtypes and '一般' in wtypes): res.append(unicoding(word)) return res text = ununicoding(text, 'utf-8') result = [] tag = MeCab.Tagger('-Ochasen') for line in tag.parse(text).split('\n'): if not line or line == 'EOS': break words = line.split() result.append((words[0], words[-1])) # word, word-type result = uniq(choice_nominal(result)) return result class AmazonBotBase(SingleServerIRCBot): """アマゾンボットのベースクラス こいつ単体では,受け取ったメッセージの形態素解析と名詞抽出までしかやらない サブクラスで process_keyword を実装して Amazon へクエリを投げるべし """ def __init__(self): _server = [(config.get('irc', 'server'), config.get('irc', 'port', 'int'))] _nick = config.get('bot', 'nick') self._prev_time = time.time() self._silent = False SingleServerIRCBot.__init__(self, _server, _nick, _nick) def start(self): try: SingleServerIRCBot.start(self) except KeyboardInterrupt: self.die(ununicoding(config.get('bot', 'bye'))) def on_welcome(self, c, e): c.join(config.get('irc', 'channel')) if __debug__: print >> sys.stderr, 'DEBUG> Joined %s' % config.get('irc', 'channel') def on_nicknameinuse(self, c, e): c.nick(c.get_nickname() + '_') def on_privmsg(self, c, e): return self.on_pubmsg(c, e) def on_pubmsg(self, c, e): if time.time() > self._prev_time + config.get('bot', 'freq', 'int'): if __debug__: prev = time.strftime('%y/%m/%d %H:%M:%S', time.localtime(self._prev_time)) print >> sys.stderr, 'DEBUG> Not expired: prev time is %s' % prev return False msg = unicoding(e.arguments()[0]) self.silence(msg, c, e) if self._silent: return False nominals = mecab_parse(msg) if not nominals: if __debug__: print >> sys.stderr, "DEBUG> Couldn't find nominal words" return False title, url = self.process_keyword(' '.join(nominals)) if title and url: channel = e.target() content = unicoding(config.get('bot', 'content')) try: message = ununicoding(': '.join([content, title, url])) except UnicodeError: return False # なぜかたまに unicode オブジェクトを iso-2022-jp でエンコードできない c.privmsg(channel, message) return True return False ACTIVE_PATTERN = re.compile(unicoding(config.get('bot', 'active_pattern'))) SILENT_PATTERN = re.compile(unicoding(config.get('bot', 'silent_pattern'))) def silence(self, msg, c, e): ch = e.target() active = self.ACTIVE_PATTERN.search(msg) silent = self.SILENT_PATTERN.search(msg) if __debug__: print >> sys.stderr, 'DEBUG> ACT_PATT: %s, SIL_PATT: %s' % (str(active), str(silent)) if active: self._silent = False c.privmsg(ch, ununicoding(config.get('bot', 'thanks'))) elif silent: self._silent = True c.privmsg(ch, ununicoding(config.get('bot', 'sorry'))) def process_keyword(self, keyword): return [None, None] class AmazonBot(AmazonBotBase): """アマゾンボットの実装クラス process_keyword メソッドで Amazon へクエリを投げて結果を返す """ def __init__(self): AmazonBotBase.__init__(self) def get_version(self): return 'AmazonBot by %s, based on python-irclib' % __author__ def process_keyword(self, keyword): keyword = ununicoding(keyword, 'utf-8') if __debug__: print >> sys.stderr, 'DEBUG> KEYWORD: %s' % ununicoding(keyword, 'euc-jp') try: data = my_amazon.searchByBlended(keyword) if type(data.ProductLine) is not type([]): data.ProductLine = [data.ProductLine] except my_amazon.AmazonError, e: if __debug__: print >> sys.stderr, 'DEBUG> Caught AmazonError: %s' % str(e) return [None, None] product_line = random.choice(data.ProductLine) detail = random.choice(product_line.ProductInfo.Details) url = unicoding(getattr(detail, 'URL', None)) product_name = unicoding(getattr(detail, 'ProductName', None)) return [product_name, url] if __name__ == '__main__': bot = AmazonBot() bot.start()