diff options
author | Sean B. Palmer <sbp@aldebaran.local> | 2011-09-05 17:46:17 +0100 |
---|---|---|
committer | Sean B. Palmer <sbp@aldebaran.local> | 2011-09-05 17:46:17 +0100 |
commit | 2d3009ccb123bb34bf0d8b8b516a15c05240bf03 (patch) | |
tree | 28029457224da9cd91187e17f8b90cff5cb2de57 /modules/search.py | |
parent | 66edd833726d0a14400df65311999b1496b9f8cb (diff) | |
download | bot-2d3009ccb123bb34bf0d8b8b516a15c05240bf03.tar.gz bot-2d3009ccb123bb34bf0d8b8b516a15c05240bf03.tar.bz2 bot-2d3009ccb123bb34bf0d8b8b516a15c05240bf03.zip |
Attempt to patch a possible message injection hole.
Diffstat (limited to 'modules/search.py')
-rwxr-xr-x | modules/search.py | 96 |
1 files changed, 72 insertions, 24 deletions
diff --git a/modules/search.py b/modules/search.py index c8f9a6a..bfc50bd 100755 --- a/modules/search.py +++ b/modules/search.py @@ -18,26 +18,26 @@ class Grab(web.urllib.URLopener): def http_error_default(self, url, fp, errcode, errmsg, headers): return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url) -def search(query): +def google_ajax(query): """Search using AjaxSearch, and return its JSON.""" uri = 'http://ajax.googleapis.com/ajax/services/search/web' - args = '?v=1.0&safe=off&q=' + web.urllib.quote(query.encode('utf-8')) + args = '?v=1.0&safe=off&q=' + web.urllib.quote(query) handler = web.urllib._urlopener web.urllib._urlopener = Grab() bytes = web.get(uri + args) web.urllib._urlopener = handler return web.json(bytes) -def result(query): - results = search(query) +def google_search(query): + results = google_ajax(query) try: return results['responseData']['results'][0]['unescapedUrl'] except IndexError: return None except TypeError: print results return False -def count(query): - results = search(query) +def google_count(query): + results = google_ajax(query) if not results.has_key('responseData'): return '0' if not results['responseData'].has_key('cursor'): return '0' if not results['responseData']['cursor'].has_key('estimatedResultCount'): @@ -56,7 +56,8 @@ def g(phenny, input): query = input.group(2) if not query: return phenny.reply('.g what?') - uri = result(query) + query = query.encode('utf-8') + uri = google_search(query) if uri: phenny.reply(uri) if not hasattr(phenny.bot, 'last_seen_uri'): @@ -73,7 +74,8 @@ def gc(phenny, input): query = input.group(2) if not query: return phenny.reply('.gc what?') - num = formatnumber(count(query)) + query = query.encode('utf-8') + num = formatnumber(google_count(query)) phenny.say(query + ': ' + num) gc.commands = ['gc'] gc.priority = 'high' @@ -93,7 +95,8 @@ def gcs(phenny, input): results = [] for i, query in enumerate(queries): query = query.strip('[]') - n = int((formatnumber(count(query)) or '0').replace(',', '')) + query = query.encode('utf-8') + n = int((formatnumber(google_count(query)) or '0').replace(',', '')) results.append((n, query)) if i >= 2: __import__('time').sleep(0.25) if i >= 4: __import__('time').sleep(0.25) @@ -105,6 +108,13 @@ gcs.commands = ['gcs', 'comp'] r_bing = re.compile(r'<h3><a href="([^"]+)"') +def bing_search(query, lang='en-GB'): + query = web.urllib.quote(query) + base = 'http://www.bing.com/search?mkt=%s&q=' % lang + bytes = web.get(base + query) + m = r_bing.search(bytes) + if m: return m.group(1) + def bing(phenny, input): """Queries Bing for the specified input.""" query = input.group(2) @@ -115,12 +125,9 @@ def bing(phenny, input): if not query: return phenny.reply('.bing what?') - query = web.urllib.quote(query.encode('utf-8')) - base = 'http://www.bing.com/search?mkt=%s&q=' % lang - bytes = web.get(base + query) - m = r_bing.search(bytes) - if m: - uri = m.group(1) + query = query.encode('utf-8') + uri = bing_search(query, lang) + if uri: phenny.reply(uri) if not hasattr(phenny.bot, 'last_seen_uri'): phenny.bot.last_seen_uri = {} @@ -129,24 +136,65 @@ def bing(phenny, input): bing.commands = ['bing'] bing.example = '.bing swhack' -r_ddg = re.compile(r'nofollow" class="[^"]+" href="(.*?)">') +r_duck = re.compile(r'nofollow" class="[^"]+" href="(.*?)">') -def ddg(phenny, input): +def duck_search(query): + query = query.replace('!', '') + query = web.urllib.quote(query) + uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query + bytes = web.get(uri) + m = r_duck.search(bytes) + if m: return web.decode(m.group(1)) + +def duck(phenny, input): query = input.group(2) if not query: return phenny.reply('.ddg what?') - query = web.urllib.quote(query.encode('utf-8')) - uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query - bytes = web.get(uri) - m = r_ddg.search(bytes) - if m: - uri = m.group(1) + query = query.encode('utf-8') + uri = duck_search(query) + if uri: phenny.reply(uri) if not hasattr(phenny.bot, 'last_seen_uri'): phenny.bot.last_seen_uri = {} phenny.bot.last_seen_uri[input.sender] = uri else: phenny.reply("No results found for '%s'." % query) -ddg.commands = ['ddg'] +duck.commands = ['duck', 'ddg'] + +def search(phenny, input): + if not input.group(2): + return phenny.reply('.search for what?') + query = input.group(2).encode('utf-8') + gu = google_search(query) or '-' + bu = bing_search(query) or '-' + du = duck_search(query) or '-' + + if (gu == bu) and (bu == du): + result = '%s (g, b, d)' % gu + elif (gu == bu): + result = '%s (g, b), %s (d)' % (gu, du) + elif (bu == du): + result = '%s (b, d), %s (g)' % (bu, gu) + elif (gu == du): + result = '%s (g, d), %s (b)' % (gu, bu) + else: + if len(gu) > 250: gu = '(extremely long link)' + if len(bu) > 150: bu = '(extremely long link)' + if len(du) > 150: du = '(extremely long link)' + result = '%s (g), %s (b), %s (d)' % (gu, bu, du) + + phenny.reply(result) +search.commands = ['search'] + +def suggest(phenny, input): + if not input.group(2): + return phenny.reply("No query term.") + query = input.group(2).encode('utf-8') + uri = 'http://websitedev.de/temp-bin/suggest.pl?q=' + answer = web.get(uri + web.urllib.quote(query).replace('+', '%2B')) + if answer: + phenny.say(answer) + else: phenny.reply('Sorry, no result.') +suggest.commands = ['suggest'] if __name__ == '__main__': print __doc__.strip() |