#!/usr/bin/env python # -*- coding: utf-8 -*- __version__ = '$Revision$' __author__ = 'Atzm WATANABE ' __date__ = '$Date$' __copyright__ = 'Copyright(C) 2006 Atzm WATANABE, all rights reserved.' __license__ = 'Python' import re import sys import time import shlex import random import getopt import MeCab import nkf from ircbot import SingleServerIRCBot from irclib import nm_to_n try: set, frozenset except NameError: from sets import Set as set, ImmutableSet as frozenset import config; config.init() import my_amazon my_amazon.setLocale(config.get('amazon', 'locale')) my_amazon.setLicense(config.get('amazon', 'access_key')) DEBUG_MSG_TO = sys.stderr def uniq(sequence): """リストから重複を取り除く (順番が狂うので注意) """ return list(set(sequence)) def unicoding(text): """text を強制的に unicode オブジェクトに変換 """ if type(text) is unicode: return text return unicode(nkf.nkf('-w', text), 'utf-8') def ununicoding(text, encoding='iso-2022-jp'): """text を指定された encoding でエンコードし,raw str に強制変換 """ if type(text) is not unicode: return unicoding(text).encode(encoding) return text.encode(encoding) def mecab_parse(text): """MeCab を使って形態素解析し,固有名詞と一般名詞だけを抽出する """ def choice_nominal(wlist): res = [] for word, wtype in wlist: wtypes = wtype.split('-') if '固有名詞' in wtypes or ('名詞' in wtypes and '一般' in wtypes): res.append(unicoding(word)) return res text = ununicoding(text, 'utf-8') result = [] tag = MeCab.Tagger('-Ochasen') for line in tag.parse(text).split('\n'): if not line or line == 'EOS': break words = line.split() result.append((words[0], words[-1])) # word, word-type result = uniq(choice_nominal(result)) return result def _debug(fmt, *args): if __debug__: timeline = time.strftime("%b %d %T", time.localtime()) try: fmt = ununicoding(fmt, 'euc-jp') args = list(args) for i in range(len(args)): if isinstance(args[i], basestring): args[i] = ununicoding(args[i], 'euc-jp') print >> DEBUG_MSG_TO, '(%s) ' % timeline, print >> DEBUG_MSG_TO, fmt % tuple(args) except: print >> DEBUG_MSG_TO, '(%s) ' % timeline, print >> DEBUG_MSG_TO, '!! debug message print failed !!' class AmazonBotBase(SingleServerIRCBot): """アマゾンボットのベースクラス 単体では,受け取ったメッセージの形態素解析と名詞抽出までしかやらない サブクラスで process_keyword を実装して Amazon へクエリを投げるべし サブクラスには onmsg_HOGEHOGE(self, conn, ev, to, args) メソッドを作ることでコマンド追加可能 コマンド書式は !HOGEHOGE arg [, arg2, ...] となる ヘルプはメソッドに docstring を書けば OK """ def __init__(self): _server = [(config.get('irc', 'server'), config.get('irc', 'port', 'int'))] _nick = config.get('bot', 'nick') self._current_lines = 0 self._prev_time = time.time() - config.get('freq', 'timeout', 'int') self._silent = False SingleServerIRCBot.__init__(self, _server, _nick, _nick) def start(self): try: SingleServerIRCBot.start(self) except KeyboardInterrupt: self.die(ununicoding(config.get('bot', 'bye'))) def on_welcome(self, c, e): c.join(config.get('irc', 'channel')) _debug('Joined %s', config.get('irc', 'channel')) def on_nicknameinuse(self, c, e): c.nick(c.get_nickname() + '_') def on_privmsg(self, c, e): return self.on_pubmsg(c, e, to=nm_to_n(e.source())) def on_pubmsg(self, c, e, to=config.get('irc', 'channel')): msg = unicoding(e.arguments()[0]) _debug('pubmsg incoming "%s", should be reply to %s', msg, to) if msg[0] == '!': try: words = shlex.split(ununicoding(msg, 'utf-8')[1:]) except: return False if not words: return False method = getattr(self, 'onmsg_%s' % words[0], lambda *arg: False) return method(c, e, to, words[1:]) # words[0] == command name # silence self.silence(msg, c, e, to) if self._silent: return False # freq_lines self._current_lines += 1 _freq_lines = config.get('freq', 'lines', 'int') if _freq_lines: if config.get('freq', 'lines_random', 'boolean'): _freq_lines = random.randint(int(_freq_lines/2)+1, _freq_lines) _debug('Line count: now %d, next: %d', self._current_lines, _freq_lines) if self._current_lines < _freq_lines: return False self._current_lines = 0 # freq _current_time = time.time() if _current_time < self._prev_time + config.get('freq', 'timeout', 'int'): cur = time.strftime('%H:%M:%S', time.localtime(_current_time)) go = time.strftime('%H:%M:%S', time.localtime( self._prev_time + config.get('freq', 'timeout', 'int'))) _debug('Not expired: now %s, be expired at: %s', cur, go) return False self._prev_time = _current_time nominals = mecab_parse(msg) if not nominals: _debug("Couldn't find nominal words") return False title, url = self.process_keyword(' '.join(nominals)) if title and url: content = unicoding(config.get('bot', 'content')) try: message = ununicoding(': '.join([content, title, url])) except UnicodeError, err: # なぜかたまに unicode オブジェクトを iso-2022-jp でエンコードできない _debug('%s', str(err)) return False c.notice(to, message) return True return False ACTIVE_PATTERN = re.compile(unicoding(config.get('bot', 'active_pattern'))) SILENT_PATTERN = re.compile(unicoding(config.get('bot', 'silent_pattern'))) def silence(self, msg, c, e, to): active = self.ACTIVE_PATTERN.search(msg) silent = self.SILENT_PATTERN.search(msg) _debug('ACT_PATT: %s, SIL_PATT: %s', str(active), str(silent)) if active: self._silent = False c.notice(to, ununicoding(config.get('bot', 'thanks'))) elif silent: self._silent = True c.notice(to, ununicoding(config.get('bot', 'sorry'))) def process_keyword(self, keyword): return [None, None] def is_silent(self): return self._silent def get_current_lines(self): return self._current_lines def get_prev_time(self): return self._prev_time class AmazonBot(AmazonBotBase): """アマゾンボットの実装クラス process_keyword メソッドで Amazon へクエリを投げて結果を返す """ _AVAIL_PRODUCT_LINES = { 'books-jp': '(和書, default)', 'books-us': '(洋書)', 'music-jp': '(ポピュラー音楽)', 'classical-jp': '(クラシック音楽)', 'dvd-jp': '(DVD)', 'vhs-jp': '(ビデオ)', 'electronics-jp': '(エレクトロニクス)', 'kitchen-jp': '(ホーム&キッチン)', 'software-jp': '(ソフトウェア)', 'videogames-jp': '(ゲーム)', 'magazines-jp': '(雑誌)', 'toys-jp': '(おもちゃ&ホビー)', } def __init__(self): AmazonBotBase.__init__(self) def get_version(self): return 'AmazonBot by %s, based on python-irclib' % __author__ def onmsg_s(self, c, e, to, args): return self.onmsg_status(c, e, to, args) def onmsg_status(self, c, e, to, args): """Syntax: !status """ _debug('in status command: %s', str(args)) c.notice(to, 'silent: %s' % self.is_silent()) c.notice(to, 'current lines: %d' % self.get_current_lines()) c.notice(to, time.strftime('previous time: %b %d %T', time.localtime(self.get_prev_time()))) return True def onmsg_isbn(self, c, e, to, args): """Syntax: !isbn """ return self.onmsg_asin(c, e, to, args) def onmsg_asin(self, c, e, to, args): """Syntax: !asin """ _debug('in asin command: %s', str(args)) try: data = my_amazon.searchByASIN(args[0]) except my_amazon.AmazonError, err: c.notice(to, ununicoding(config.get('bot', 'no_products'))) _debug('Caught AmazonError in onmsg_asin: %s', str(err)) return False except IndexError, err: c.notice(to, 'Please specify an argument.') return False return self._process_onmsg(c, e, to, data) def onmsg_k(self, c, e, to, args): return self.onmsg_keyword(c, e, to, args) def onmsg_keyword(self, c, e, to, args): """Syntax: !keyword [-h] [-t type] [, keyword2, ...] """ _debug('in keyword command: %s', str(args)) try: options, rest = getopt.getopt(args, 't:h', ['type=', 'help']) except getopt.GetoptError, err: _debug('Caught GetoptError in onmsg_keyword: %s', str(err)) return False keyword = ' '.join(rest).strip() product_line = 'books-jp' for opt, val in options: if opt in ['-t', '--type']: if val not in self._AVAIL_PRODUCT_LINES.keys(): c.notice(to, 'Type "%s" is not available.' % val) return False product_line = val break elif opt in ['-h', '--help']: _from = nm_to_n(e.source()) # ログを流してしまうのでヘルプは直接送信元へ c.notice(_from, ununicoding('Available types:')) for key, val in self._AVAIL_PRODUCT_LINES.iteritems(): time.sleep(1) # XXX: 連続投稿すると弾かれることがあるので暫定対処 c.notice(_from, ununicoding(' * %s: %s' % (key, val))) return True if not keyword: c.notice(to, 'Please specify keywords.') return False _debug('keyword="%s", product_line=%s', keyword, product_line) try: data = my_amazon.searchByKeyword(keyword, product_line=product_line) except my_amazon.AmazonError, err: c.notice(to, ununicoding(config.get('bot', 'no_products'))) _debug('Caught AmazonError in onmsg_amazon: %s', str(err)) return False return self._process_onmsg(c, e, to, data) def onmsg_h(self, c, e, to, args): return self.onmsg_help(c, e, to, args) def onmsg_help(self, c, e, to, args): """Syntax: !help """ _debug('in help command: %s', str(args)) _from = nm_to_n(e.source()) # ログを流してしまうのでヘルプは直接送信元へ c.notice(_from, self.get_version()) docs = [] for key in dir(self): val = getattr(self, key, '') _debug('key=%s, val=%s', key, str(val)) if key[:6] != 'onmsg_': continue doc = val.__doc__ if doc: doc = doc.strip() if not doc: continue time.sleep(1) # XXX: 連続投稿すると弾かれるっぽいので暫定対処 c.notice(_from, doc) return True def _process_onmsg(self, c, e, to, data): if type(data.Details) is not list: data.Details = [data.Details] detail = random.choice(data.Details) title = ununicoding(detail.ProductName) url = ununicoding(detail.URL) c.notice(to, '%(title)s: %(url)s' % locals()) return True def process_keyword(self, keyword): keyword = ununicoding(keyword, 'utf-8') _debug('KEYWORD: %s', keyword) try: data = my_amazon.searchByBlended(keyword) if type(data.ProductLine) is not type([]): data.ProductLine = [data.ProductLine] except my_amazon.AmazonError, err: _debug('Caught AmazonError: %s', str(err)) return [None, None] product_line = random.choice(data.ProductLine) detail = random.choice(product_line.ProductInfo.Details) url = unicoding(getattr(detail, 'URL', None)) product_name = unicoding(getattr(detail, 'ProductName', None)) return [product_name, url] if __name__ == '__main__': bot = AmazonBot() bot.start() print '> Bye ;)'