From d103ed6ca97ca5a2669f6cf5fe4b3d2a9c945f26 Mon Sep 17 00:00:00 2001 From: Christopher Speller Date: Wed, 17 May 2017 16:51:25 -0400 Subject: Upgrading server dependancies (#6431) --- vendor/github.com/dimchansky/utfbom/utfbom.go | 174 ++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 vendor/github.com/dimchansky/utfbom/utfbom.go (limited to 'vendor/github.com/dimchansky/utfbom/utfbom.go') diff --git a/vendor/github.com/dimchansky/utfbom/utfbom.go b/vendor/github.com/dimchansky/utfbom/utfbom.go new file mode 100644 index 000000000..648184a12 --- /dev/null +++ b/vendor/github.com/dimchansky/utfbom/utfbom.go @@ -0,0 +1,174 @@ +// Package utfbom implements the detection of the BOM (Unicode Byte Order Mark) and removing as necessary. +// It wraps an io.Reader object, creating another object (Reader) that also implements the io.Reader +// interface but provides automatic BOM checking and removing as necessary. +package utfbom + +import ( + "errors" + "io" +) + +// Encoding is type alias for detected UTF encoding. +type Encoding int + +// Constants to identify detected UTF encodings. +const ( + // Unknown encoding, returned when no BOM was detected + Unknown Encoding = iota + + // UTF8, BOM bytes: EF BB BF + UTF8 + + // UTF-16, big-endian, BOM bytes: FE FF + UTF16BigEndian + + // UTF-16, little-endian, BOM bytes: FF FE + UTF16LittleEndian + + // UTF-32, big-endian, BOM bytes: 00 00 FE FF + UTF32BigEndian + + // UTF-32, little-endian, BOM bytes: FF FE 00 00 + UTF32LittleEndian +) + +const maxConsecutiveEmptyReads = 100 + +// Skip creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. +// It also returns the encoding detected by the BOM. +// If the detected encoding is not needed, you can call the SkipOnly function. +func Skip(rd io.Reader) (*Reader, Encoding) { + // Is it already a Reader? + b, ok := rd.(*Reader) + if ok { + return b, Unknown + } + + enc, left, err := detectUtf(rd) + return &Reader{ + rd: rd, + buf: left, + err: err, + }, enc +} + +// SkipOnly creates Reader which automatically detects BOM (Unicode Byte Order Mark) and removes it as necessary. +func SkipOnly(rd io.Reader) *Reader { + r, _ := Skip(rd) + return r +} + +// Reader implements automatic BOM (Unicode Byte Order Mark) checking and +// removing as necessary for an io.Reader object. +type Reader struct { + rd io.Reader // reader provided by the client + buf []byte // buffered data + err error // last error +} + +// Read is an implementation of io.Reader interface. +// The bytes are taken from the underlying Reader, but it checks for BOMs, removing them as necessary. +func (r *Reader) Read(p []byte) (n int, err error) { + if len(p) == 0 { + return 0, nil + } + + if r.buf == nil { + if r.err != nil { + return 0, r.readErr() + } + + return r.rd.Read(p) + } + + // copy as much as we can + n = copy(p, r.buf) + r.buf = nilIfEmpty(r.buf[n:]) + return n, nil +} + +func (r *Reader) readErr() error { + err := r.err + r.err = nil + return err +} + +var errNegativeRead = errors.New("utfbom: reader returned negative count from Read") + +func detectUtf(rd io.Reader) (enc Encoding, buf []byte, err error) { + buf, err = readBOM(rd) + + if len(buf) >= 4 { + if isUTF32BigEndianBOM4(buf) { + return UTF32BigEndian, nilIfEmpty(buf[4:]), err + } + if isUTF32LittleEndianBOM4(buf) { + return UTF32LittleEndian, nilIfEmpty(buf[4:]), err + } + } + + if len(buf) > 2 && isUTF8BOM3(buf) { + return UTF8, nilIfEmpty(buf[3:]), err + } + + if (err != nil && err != io.EOF) || (len(buf) < 2) { + return Unknown, nilIfEmpty(buf), err + } + + if isUTF16BigEndianBOM2(buf) { + return UTF16BigEndian, nilIfEmpty(buf[2:]), err + } + if isUTF16LittleEndianBOM2(buf) { + return UTF16LittleEndian, nilIfEmpty(buf[2:]), err + } + + return Unknown, nilIfEmpty(buf), err +} + +func readBOM(rd io.Reader) (buf []byte, err error) { + const maxBOMSize = 4 + var bom [maxBOMSize]byte // used to read BOM + + // read as many bytes as possible + for nEmpty, n := 0, 0; err == nil && len(buf) < maxBOMSize; buf = bom[:len(buf)+n] { + if n, err = rd.Read(bom[len(buf):]); n < 0 { + panic(errNegativeRead) + } + if n > 0 { + nEmpty = 0 + } else { + nEmpty++ + if nEmpty >= maxConsecutiveEmptyReads { + err = io.ErrNoProgress + } + } + } + return +} + +func isUTF32BigEndianBOM4(buf []byte) bool { + return buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF +} + +func isUTF32LittleEndianBOM4(buf []byte) bool { + return buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00 +} + +func isUTF8BOM3(buf []byte) bool { + return buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF +} + +func isUTF16BigEndianBOM2(buf []byte) bool { + return buf[0] == 0xFE && buf[1] == 0xFF +} + +func isUTF16LittleEndianBOM2(buf []byte) bool { + return buf[0] == 0xFF && buf[1] == 0xFE +} + +func nilIfEmpty(buf []byte) (res []byte) { + if len(buf) > 0 { + res = buf + } + return +} -- cgit v1.2.3-1-g7c22