diff options
author | Sean B. Palmer <http://inamidst.com/sbp/> | 2008-02-21 12:06:33 +0000 |
---|---|---|
committer | Sean B. Palmer <http://inamidst.com/sbp/> | 2008-02-21 12:06:33 +0000 |
commit | 7931fab14599b739c18c8f1ebcc24b75688dbc09 (patch) | |
tree | bf4df9757f10c155e3b6f78aed48f15884ebbbe6 /modules/translate.py | |
download | bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.gz bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.bz2 bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.zip |
Phenny2, now being tested on Freenode as the main phenny.
Diffstat (limited to 'modules/translate.py')
-rw-r--r-- | modules/translate.py | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/modules/translate.py b/modules/translate.py new file mode 100644 index 0000000..ed3589f --- /dev/null +++ b/modules/translate.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# coding=utf-8 +""" +translate.py - Phenny Translation Module +Copyright 2008, Sean B. Palmer, inamidst.com +Licensed under the Eiffel Forum License 2. + +http://inamidst.com/phenny/ +""" + +import re +import web + +r_translation = re.compile(r'<div style=padding:10px;>([^<]+)</div>') + +def guess_language(phrase): + languages = { + 'english': 'en', + 'french': 'fr', + 'spanish': 'es', + 'portuguese': 'pt', + 'german': 'de', + 'italian': 'it', + 'korean': 'ko', + 'japanese': 'ja', + 'chinese': 'zh', + 'dutch': 'nl', + 'greek': 'el', + 'russian': 'ru' + } + + uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser' + form = {'Text': phrase} + bytes = web.post(uri, form) + for line in bytes.splitlines(): + if '<listing><font size=+1>' in line: + i = line.find('<listing><font size=+1>') + lang = line[i+len('<listing><font size=+1>'):].strip() + lang = lang.lower() + if '_' in lang: + j = lang.find('_') + lang = lang[:j] + try: return languages[lang] + except KeyError: + return lang + return 'unknown' + +def translate(phrase, lang, target='en'): + babelfish = 'http://world.altavista.com/tr' + form = { + 'doit': 'done', + 'intl': '1', + 'tt': 'urltext', + 'trtext': phrase, + 'lp': lang + '_' + target + } + + bytes = web.post(babelfish, form) + m = r_translation.search(bytes) + if m: + translation = m.group(1) + translation = translation.replace('\r', ' ') + translation = translation.replace('\n', ' ') + while ' ' in translation: + translation = translation.replace(' ', ' ') + return translation + return None + +def tr(phenny, input): + lang, phrase = input.groups() + + if (len(phrase) > 350) and (not phenny.admin(input.nick)): + return phenny.reply('Phrase must be under 350 characters.') + + language = guess_language(phrase) + if language is None: + return phenny.reply('Unable to guess the language, sorry.') + + if language != 'en': + translation = translate(phrase, language) + if translation is not None: + return phenny.reply(u'"%s" (%s)' % (translation, language)) + + error = "I think it's %s, but I can't translate that language." + return phenny.reply(error % language.title()) + + # Otherwise, it's English, so mangle it for fun + for other in ['de', 'ja']: + phrase = translate(phrase, 'en', other) + phrase = translate(phrase, other, 'en') + + if phrase is not None: + return phenny.reply(u'"%s" (en-unmangled)' % phrase) + return phenny.reply("I think it's English already.") + # @@ or 'Why but that be English, sire.' +tr.doc = ('phenny: "<phrase>"? or phenny: <lang> "<phrase>"?', + 'Translate <phrase>, optionally forcing the <lang> interpretation.') +tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?["“](.+?)["”]\? *$') +tr.priority = 'low' + +if __name__ == '__main__': + print __doc__.strip() |