PLT-1626/PLT-1424/PLT-1473/PLT-1483 Improved search highlighting (#3171)

* PLT-1626 Stopped breaking up hyphenated hashtags when highlighting search terms * Made hashtag search highlighting case independent * PLT-1424 Improved search highlighting when searching for CJK characters * PLT-1473 Added search term highlighting to single-line code blocks * PLT-1473 Added search term highlighting to code blocks that don't use syntax highlighting * PLT-1483 Added proper highlighting of at mentions * Fixing formatting
author: Harrison Healey <harrisonmhealey@gmail.com> 2016-05-31 10:37:59 -0400
committer: Harrison Healey <harrisonmhealey@gmail.com> 2016-05-31 10:37:59 -0400
commit: 4a326dd6ce29c7ff62e0f620e8cdca920e1f3016 (patch)
tree: 9a1af026012651f9a5e55c79c426115003843ede
parent: 397e0a3f683e324ce2d4d16c8a7f3f9346c8566b (diff)
download: chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.tar.gz
chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.tar.bz2
chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.zip
3 files changed, 54 insertions, 15 deletions
diff --git a/webapp/utils/markdown.jsx b/webapp/utils/markdown.jsx
index 809ecc526..7fd165134 100644
--- a/webapp/utils/markdown.jsx
+++ b/webapp/utils/markdown.jsx
@@ -43,11 +43,25 @@ class MattermostMarkdownRenderer extends marked.Renderer {
             usedLanguage = 'xml';
         }
 
-        return syntaxHightlighting.formatCode(usedLanguage, code);
+        return syntaxHightlighting.formatCode(usedLanguage, code, null, this.formattingOptions.searchTerm);
     }
 
     codespan(text) {
-        return '<span class="codespan__pre-wrap">' + super.codespan(text) + '</span>';
+        let output = text;
+
+        if (this.formattingOptions.searchTerm) {
+            const tokens = new Map();
+            output = TextFormatting.highlightSearchTerms(output, tokens, this.formattingOptions.searchTerm);
+            output = TextFormatting.replaceTokens(output, tokens);
+        }
+
+        return (
+            '<span class="codespan__pre-wrap">' +
+                '<code>' +
+                    output +
+                '</code>' +
+            '</span>'
+        );
     }
 
     br() {
diff --git a/webapp/utils/syntax_hightlighting.jsx b/webapp/utils/syntax_hightlighting.jsx
index 882afe67f..33b3e2d3d 100644
--- a/webapp/utils/syntax_hightlighting.jsx
+++ b/webapp/utils/syntax_hightlighting.jsx
@@ -123,10 +123,12 @@ hlJS.registerLanguage('yaml', hljsYaml);
 
 const HighlightedLanguages = Constants.HighlightedLanguages;
 
-export function formatCode(lang, data, filename) {
+export function formatCode(lang, data, filename, searchTerm) {
     const language = lang.toLowerCase() || '';
+
     let contents;
     let header = '';
+    let className = 'post-code';
 
     if (HighlightedLanguages[language]) {
         let name = HighlightedLanguages[language].name;
@@ -147,10 +149,13 @@ export function formatCode(lang, data, filename) {
         contents = TextFormatting.sanitizeHtml(data);
     }
 
-    let className = 'post-code';
     if (!language) {
         // wrap when no language is specified
         className += ' post-code--wrap';
+
+        const tokens = new Map();
+        contents = TextFormatting.highlightSearchTerms(contents, tokens, searchTerm);
+        contents = TextFormatting.replaceTokens(contents, tokens);
     }
 
     if (filename) {
diff --git a/webapp/utils/text_formatting.jsx b/webapp/utils/text_formatting.jsx
index 024e8e26e..7a5dc56c7 100644
--- a/webapp/utils/text_formatting.jsx
+++ b/webapp/utils/text_formatting.jsx
@@ -11,6 +11,10 @@ import UserStore from 'stores/user_store.jsx';
 import twemoji from 'twemoji';
 import * as Utils from './utils.jsx';
 
+// pattern to detect the existance of a Chinese, Japanese, or Korean character in a string
+// http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi
+const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]/;
+
 // Performs formatting of user posts including highlighting mentions and search terms and converting urls, hashtags, and
 // @mentions to links by taking a user's message and returning a string of formatted html. Also takes a number of options
 // as part of the second parameter:
@@ -61,7 +65,7 @@ export function doFormatText(text, options) {
     }
 
     if (options.searchTerm) {
-        output = highlightSearchTerm(output, tokens, options.searchTerm);
+        output = highlightSearchTerms(output, tokens, options.searchTerm);
     }
 
     if (!('mentionHighlight' in options) || options.mentionHighlight) {
@@ -323,18 +327,27 @@ function parseSearchTerms(searchTerm) {
             continue;
         }
 
+        // capture at mentions differently from the server so we can highlight them with the preceeding at sign
+        captured = (/^@\w+\b/).exec(termString);
+        if (captured) {
+            termString = termString.substring(captured[0].length);
+
+            terms.push(captured[0]);
+            continue;
+        }
+
         // capture any plain text up until the next quote or search flag
         captured = (/^.+?(?=\bin|\bfrom|\bchannel|"|$)/).exec(termString);
         if (captured) {
             termString = termString.substring(captured[0].length);
 
             // break the text up into words based on how the server splits them in SqlPostStore.SearchPosts and then discard empty terms
-            terms.push(...captured[0].split(/[ <>+\-\(\)~@]/).filter((term) => !!term));
+            terms.push(...captured[0].split(/[ <>+\(\)~@]/).filter((term) => !!term));
             continue;
         }
 
         // we should never reach this point since at least one of the regexes should match something in the remaining text
-        throw new Error('Infinite loop in search term parsing: ' + termString);
+        throw new Error('Infinite loop in search term parsing: "' + termString + '"');
     }
 
     // remove punctuation from each term
@@ -345,16 +358,23 @@ function parseSearchTerms(searchTerm) {
 
 function convertSearchTermToRegex(term) {
     let pattern;
-    if (term.endsWith('*')) {
-        pattern = '\\b' + escapeRegex(term.substring(0, term.length - 1));
+
+    if (cjkPattern.test(term)) {
+        // term contains Chinese, Japanese, or Korean characters so don't mark word boundaries
+        pattern = '()(' + escapeRegex(term.replace(/\*/g, '')) + ')';
+    } else if (term.endsWith('*')) {
+        pattern = '\\b()(' + escapeRegex(term.substring(0, term.length - 1)) + ')';
+    } else if (term.startsWith('@')) {
+        // needs special handling of the first boundary because a word boundary doesn't work before an @ sign
+        pattern = '(\\W|^)(' + escapeRegex(term) + ')\\b';
     } else {
-        pattern = '\\b' + escapeRegex(term) + '\\b';
+        pattern = '\\b()(' + escapeRegex(term) + ')\\b';
     }
 
     return new RegExp(pattern, 'gi');
 }
 
-function highlightSearchTerm(text, tokens, searchTerm) {
+export function highlightSearchTerms(text, tokens, searchTerm) {
     const terms = parseSearchTerms(searchTerm);
 
     if (terms.length === 0) {
@@ -363,7 +383,7 @@ function highlightSearchTerm(text, tokens, searchTerm) {
 
     let output = text;
 
-    function replaceSearchTermWithToken(word) {
+    function replaceSearchTermWithToken(match, prefix, word) {
         const index = tokens.size;
         const alias = `MM_SEARCHTERM${index}`;
 
@@ -372,14 +392,14 @@ function highlightSearchTerm(text, tokens, searchTerm) {
             originalText: word
         });
 
-        return alias;
+        return prefix + alias;
     }
 
     for (const term of terms) {
         // highlight existing tokens matching search terms
         var newTokens = new Map();
         for (const [alias, token] of tokens) {
-            if (token.originalText === term.replace(/\*$/, '')) {
+            if (token.originalText.toLowerCase() === term.replace(/\*$/, '').toLowerCase()) {
                 const index = tokens.size + newTokens.size;
                 const newAlias = `MM_SEARCHTERM${index}`;
 
@@ -403,7 +423,7 @@ function highlightSearchTerm(text, tokens, searchTerm) {
     return output;
 }
 
-function replaceTokens(text, tokens) {
+export function replaceTokens(text, tokens) {
     let output = text;
 
     // iterate backwards through the map so that we do replacement in the opposite order that we added tokens
author	Harrison Healey <harrisonmhealey@gmail.com>	2016-05-31 10:37:59 -0400
committer	Harrison Healey <harrisonmhealey@gmail.com>	2016-05-31 10:37:59 -0400
commit	4a326dd6ce29c7ff62e0f620e8cdca920e1f3016 (patch)
tree	9a1af026012651f9a5e55c79c426115003843ede
parent	397e0a3f683e324ce2d4d16c8a7f3f9346c8566b (diff)
download	chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.tar.gz chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.tar.bz2 chat-4a326dd6ce29c7ff62e0f620e8cdca920e1f3016.zip