diff options
author | Hyeseong Kim <cometkim.kr@gmail.com> | 2018-05-01 01:54:11 +0900 |
---|---|---|
committer | Christopher Speller <crspeller@gmail.com> | 2018-04-30 09:54:11 -0700 |
commit | e73f1d73143ebba9c7e80d21c45bba9b61f2611c (patch) | |
tree | e84e85d12308ae5624643ff7768f55be45c3350c /app | |
parent | 9c5815ee41f29e27774d17382d9a4bd10d208545 (diff) | |
download | chat-e73f1d73143ebba9c7e80d21c45bba9b61f2611c.tar.gz chat-e73f1d73143ebba9c7e80d21c45bba9b61f2611c.tar.bz2 chat-e73f1d73143ebba9c7e80d21c45bba9b61f2611c.zip |
MM-9072/MM-10185 Force-convert the encoding of OpenGraph metadata to UTF-8 (#8631)
* Force-convert non-UTF8 HTML to UTF8 before opengraph processing
* Split the force-encoding function
* Add benchmark Test for the forceHTMLEncodingToUTF8()
```
Running tool: /home/comet/go-v1.9.2/bin/go test -benchmem -run=^$ github.com/mattermost/mattermost-server/app -bench ^BenchmarkForceHTMLEncodingToUTF8$
[03:32:58 KST 2018/04/21] [INFO] (github.com/mattermost/mattermost-server/app.TestMain:28) -test.run used, not creating temporary containers
goos: linux
goarch: amd64
pkg: github.com/mattermost/mattermost-server/app
BenchmarkForceHTMLEncodingToUTF8/with_converting-4 100000 11201 ns/op 18704 B/op 32 allocs/op
BenchmarkForceHTMLEncodingToUTF8/without_converting-4 300000 3931 ns/op 4632 B/op 13 allocs/op
PASS
ok github.com/mattermost/mattermost-server/app 2.703s
Success: Benchmarks passed.
```
* Remove an unnecessary constraint
* Add pre-check if content-type header is already utf-8
* Move the checking for utf-8 into forceHTMLEncodingToUTF8() for testing
* Revert df3f347213faa0d023c26d201fa6531f46391086..HEAD, without Gopkg.lock
Diffstat (limited to 'app')
-rw-r--r-- | app/post.go | 16 | ||||
-rw-r--r-- | app/post_test.go | 28 |
2 files changed, 43 insertions, 1 deletions
diff --git a/app/post.go b/app/post.go index 41139da63..bc31aee44 100644 --- a/app/post.go +++ b/app/post.go @@ -9,6 +9,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "net/http" "net/url" "regexp" @@ -19,6 +20,7 @@ import ( "github.com/mattermost/mattermost-server/model" "github.com/mattermost/mattermost-server/store" "github.com/mattermost/mattermost-server/utils" + "golang.org/x/net/html/charset" ) var linkWithTextRegex = regexp.MustCompile(`<([^<\|]+)\|([^>]+)>`) @@ -726,7 +728,10 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { } defer consumeAndClose(res) - if err := og.ProcessHTML(res.Body); err != nil { + contentType := res.Header.Get("Content-Type") + body := forceHTMLEncodingToUTF8(res.Body, contentType) + + if err := og.ProcessHTML(body); err != nil { mlog.Error(fmt.Sprintf("GetOpenGraphMetadata processing failed for url=%v with err=%v", requestURL, err.Error())) } @@ -735,6 +740,15 @@ func (a *App) GetOpenGraphMetadata(requestURL string) *opengraph.OpenGraph { return og } +func forceHTMLEncodingToUTF8(body io.Reader, contentType string) io.Reader { + r, err := charset.NewReader(body, contentType) + if err != nil { + mlog.Error(fmt.Sprintf("forceHTMLEncodingToUTF8 failed to convert for contentType=%v with err=%v", contentType, err.Error())) + return body + } + return r +} + func makeOpenGraphURLsAbsolute(og *opengraph.OpenGraph, requestURL string) { parsedRequestURL, err := url.Parse(requestURL) if err != nil { diff --git a/app/post_test.go b/app/post_test.go index 10b957751..aefc0ea35 100644 --- a/app/post_test.go +++ b/app/post_test.go @@ -297,6 +297,34 @@ func TestImageProxy(t *testing.T) { } } +func BenchmarkForceHTMLEncodingToUTF8(b *testing.B) { + HTML := ` + <html> + <head> + <meta property="og:url" content="https://example.com/apps/mattermost"> + <meta property="og:image" content="https://images.example.com/image.png"> + </head> + </html> + ` + ContentType := "text/html; utf-8" + + b.Run("with converting", func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := forceHTMLEncodingToUTF8(strings.NewReader(HTML), ContentType) + + og := opengraph.NewOpenGraph() + og.ProcessHTML(r) + } + }) + + b.Run("without converting", func(b *testing.B) { + for i := 0; i < b.N; i++ { + og := opengraph.NewOpenGraph() + og.ProcessHTML(strings.NewReader(HTML)) + } + }) +} + func TestMakeOpenGraphURLsAbsolute(t *testing.T) { for name, tc := range map[string]struct { HTML string |