From 8e6141152bd0978bfeb24dbfff05972f4d17fd08 Mon Sep 17 00:00:00 2001 From: Harrison Healey Date: Wed, 26 Apr 2017 11:00:38 -0400 Subject: PLT-3915/PLT-5550 Improve handling of Markdown while parsing mentions (#6091) * PLT-3915 Removed ability to mention users in code blocks * PLT-3915 Added simple check for potential code blocks before using regexes * PLT-5550 Improve splitting when parsing mentions to ignore markdown characters --- app/notification.go | 30 +++++++- app/notification_test.go | 180 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+), 1 deletion(-) (limited to 'app') diff --git a/app/notification.go b/app/notification.go index 8e7e43d55..62e5e6c82 100644 --- a/app/notification.go +++ b/app/notification.go @@ -12,9 +12,11 @@ import ( "net/http" "net/url" "path/filepath" + "regexp" "sort" "strings" "time" + "unicode" l4g "github.com/alecthomas/log4go" "github.com/mattermost/platform/einterfaces" @@ -660,7 +662,12 @@ func GetExplicitMentions(message string, keywords map[string][]string) (map[stri } } - for _, word := range strings.Fields(message) { + message = removeCodeFromMessage(message) + + for _, word := range strings.FieldsFunc(message, func(c rune) bool { + // Split on whitespace (as strings.Fields normally does) or on Markdown characters + return unicode.IsSpace(c) || c == '*' || c == '~' + }) { isMention := false if word == "@here" { @@ -726,6 +733,27 @@ func GetExplicitMentions(message string, keywords map[string][]string) (map[stri return mentioned, potentialOthersMentioned, hereMentioned, channelMentioned, allMentioned } +// Matches a line containing only ``` and a potential language definition, any number of lines not containing ```, +// and then either a line containing only ``` or the end of the text +var codeBlockPattern = regexp.MustCompile("(?m)^[^\\S\n]*\\`\\`\\`.*$[\\s\\S]+?(^[^\\S\n]*\\`\\`\\`$|\\z)") + +// Matches a backquote, either some text or any number of non-empty lines, and then a final backquote +var inlineCodePattern = regexp.MustCompile("(?m)\\`(?:.+?|.*?\n(.*?\\S.*?\n)*.*?)\\`") + +// Strips pre-formatted text and code blocks from a Markdown string by replacing them with whitespace +func removeCodeFromMessage(message string) string { + if strings.Contains(message, "```") { + message = codeBlockPattern.ReplaceAllString(message, "") + } + + // Replace with a space to prevent cases like "user`code`name" from turning into "username" + if strings.Contains(message, "`") { + message = inlineCodePattern.ReplaceAllString(message, " ") + } + + return message +} + // Given a map of user IDs to profiles, returns a list of mention // keywords for all users in the channel. func GetMentionKeywordsInChannel(profiles map[string]*model.User) map[string][]string { diff --git a/app/notification_test.go b/app/notification_test.go index 794bb4b37..1d5c82405 100644 --- a/app/notification_test.go +++ b/app/notification_test.go @@ -39,6 +39,7 @@ func TestSendNotifications(t *testing.T) { func TestGetExplicitMentions(t *testing.T) { id1 := model.NewId() id2 := model.NewId() + id3 := model.NewId() // not mentioning anybody message := "this is a message" @@ -113,6 +114,51 @@ func TestGetExplicitMentions(t *testing.T) { if mentions, potential, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || len(potential) != 1 { t.Fatal("should've mentioned user and have a potential not in channel") } + + // words in inline code shouldn't trigger mentions + message = "`this shouldn't mention @channel at all`" + keywords = map[string][]string{} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 0 { + t.Fatal("@channel in inline code shouldn't cause a mention") + } + + // words in code blocks shouldn't trigger mentions + message = "```\nthis shouldn't mention @channel at all\n```" + keywords = map[string][]string{} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 0 { + t.Fatal("@channel in code block shouldn't cause a mention") + } + + // Markdown-formatted text that isn't code should trigger mentions + message = "*@aaa @bbb @ccc*" + keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] { + t.Fatal("should've mentioned all 3 users", mentions) + } + + message = "**@aaa @bbb @ccc**" + keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] { + t.Fatal("should've mentioned all 3 users") + } + + message = "~~@aaa @bbb @ccc~~" + keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 3 || !mentions[id1] || !mentions[id2] || !mentions[id3] { + t.Fatal("should've mentioned all 3 users") + } + + message = "### @aaa" + keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || mentions[id2] || mentions[id3] { + t.Fatal("should've only mentioned aaa") + } + + message = "> @aaa" + keywords = map[string][]string{"@aaa": {id1}, "@bbb": {id2}, "@ccc": {id3}} + if mentions, _, _, _, _ := GetExplicitMentions(message, keywords); len(mentions) != 1 || !mentions[id1] || mentions[id2] || mentions[id3] { + t.Fatal("should've only mentioned aaa") + } } func TestGetExplicitMentionsAtHere(t *testing.T) { @@ -177,6 +223,140 @@ func TestGetExplicitMentionsAtHere(t *testing.T) { } } +func TestRemoveCodeFromMessage(t *testing.T) { + input := "this is regular text" + expected := input + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with\n```\na code block\n```\nin it" + expected = "this is text with\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with\n```javascript\na JS code block\n```\nin it" + expected = "this is text with\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with\n```java script?\na JS code block\n```\nin it" + expected = "this is text with\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with an empty\n```\n\n\n\n```\nin it" + expected = "this is text with an empty\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with\n```\ntwo\n```\ncode\n```\nblocks\n```\nin it" + expected = "this is text with\n\ncode\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text with indented\n ```\ncode\n ```\nin it" + expected = "this is text with indented\n\nin it" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is text ending with\n```\nan unfinished code block" + expected = "this is text ending with\n" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `code` in a sentence" + expected = "this is in a sentence" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `two` things of `code` in a sentence" + expected = "this is things of in a sentence" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `code with spaces` in a sentence" + expected = "this is in a sentence" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `code\nacross multiple` lines" + expected = "this is lines" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `code\non\nmany\ndifferent` lines" + expected = "this is lines" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `\ncode on its own line\n` across multiple lines" + expected = "this is across multiple lines" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `\n some more code \n` across multiple lines" + expected = "this is across multiple lines" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `\ncode` on its own line" + expected = "this is on its own line" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `code\n` on its own line" + expected = "this is on its own line" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is *italics mixed with `code in a way that has the code` take precedence*" + expected = "this is *italics mixed with take precedence*" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is code within a wo` `rd for some reason" + expected = "this is code within a wo rd for some reason" + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `not\n\ncode` because it has a blank line" + expected = input + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is `not\n \ncode` because it has line with only whitespace" + expected = input + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } + + input = "this is just `` two backquotes" + expected = input + if actual := removeCodeFromMessage(input); actual != expected { + t.Fatalf("received incorrect output\n\nGot:\n%v\n\nExpected:\n%v\n", actual, expected) + } +} + func TestGetMentionKeywords(t *testing.T) { Setup() // user with username or custom mentions enabled -- cgit v1.2.3-1-g7c22