From 2ea54b9d1e382db8eeb4254dc637e7d04316d01e Mon Sep 17 00:00:00 2001 From: Harrison Healey Date: Wed, 1 Jun 2016 16:05:36 -0400 Subject: PLT-2283 Improved trimming of punctuation from hashtags and search terms (#3178) * Improved trimming of punctuation from hashtags and search terms * Separated punctuation regexes used for hashtags and for search terms --- model/search_params.go | 8 ++++++-- model/utils.go | 5 ++--- model/utils_test.go | 14 ++++++++++++-- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'model') diff --git a/model/search_params.go b/model/search_params.go index 250c8e1f3..1772c9028 100644 --- a/model/search_params.go +++ b/model/search_params.go @@ -4,9 +4,13 @@ package model import ( + "regexp" "strings" ) +var searchTermPuncStart = regexp.MustCompile(`^[^\pL\d\s#"]+`) +var searchTermPuncEnd = regexp.MustCompile(`[^\pL\d\s*"]+$`) + type SearchParams struct { Terms string IsHashtag bool @@ -91,8 +95,8 @@ func parseSearchFlags(input []string) ([]string, [][2]string) { if !isFlag { // trim off surrounding punctuation (note that we leave trailing asterisks to allow wildcards) - word = puncStart.ReplaceAllString(word, "") - word = puncEndWildcard.ReplaceAllString(word, "") + word = searchTermPuncStart.ReplaceAllString(word, "") + word = searchTermPuncEnd.ReplaceAllString(word, "") // and remove extra pound #s word = hashtagStart.ReplaceAllString(word, "#") diff --git a/model/utils.go b/model/utils.go index 443a34bc4..27093c096 100644 --- a/model/utils.go +++ b/model/utils.go @@ -315,10 +315,9 @@ func Etag(parts ...interface{}) string { } var validHashtag = regexp.MustCompile(`^(#[A-Za-zäöüÄÖÜß]+[A-Za-z0-9äöüÄÖÜß_\-]*[A-Za-z0-9äöüÄÖÜß])$`) -var puncStart = regexp.MustCompile(`^[.,()&$!\?\[\]{}':;\\<>\-+=%^*|]+`) +var puncStart = regexp.MustCompile(`^[^\pL\d\s#]+`) var hashtagStart = regexp.MustCompile(`^#{2,}`) -var puncEnd = regexp.MustCompile(`[.,()&$#!\?\[\]{}':;\\<>\-+=%^*|]+$`) -var puncEndWildcard = regexp.MustCompile(`[.,()&$#!\?\[\]{}':;\\<>\-+=%^|]+$`) +var puncEnd = regexp.MustCompile(`[^\pL\d\s]+$`) func ParseHashtags(text string) (string, string) { words := strings.Fields(text) diff --git a/model/utils_test.go b/model/utils_test.go index 02a08d113..5d9289a9a 100644 --- a/model/utils_test.go +++ b/model/utils_test.go @@ -82,7 +82,7 @@ func TestEtag(t *testing.T) { } } -var hashtags map[string]string = map[string]string{ +var hashtags = map[string]string{ "#test": "#test", "test": "", "#test123": "#test123", @@ -101,6 +101,7 @@ var hashtags map[string]string = map[string]string{ "<#less_than<": "#less_than", ">#greater_than>": "#greater_than", "-#minus-": "#minus", + "_#under_": "#under", "+#plus+": "#plus", "=#equals=": "#equals", "%#pct%": "#pct", @@ -111,12 +112,21 @@ var hashtags map[string]string = map[string]string{ "|#pipe|": "#pipe", ":#colon:": "#colon", ";#semi;": "#semi", + "#Mötley;": "#Mötley", + ".#period.": "#period", + "¿#upside¿": "#upside", + "\"#quote\"": "#quote", + "/#slash/": "#slash", + "\\#backslash\\": "#backslash", + "#a": "", + "#1": "", + "foo#bar": "", } func TestParseHashtags(t *testing.T) { for input, output := range hashtags { if o, _ := ParseHashtags(input); o != output { - t.Fatal("expected=" + output + " actual=" + o) + t.Fatal("failed to parse hashtags from input=" + input + " expected=" + output + " actual=" + o) } } } -- cgit v1.2.3-1-g7c22