From cd346cac9dfb09d4a0e70c8c74e71b0d52d22907 Mon Sep 17 00:00:00 2001 From: Valery Gridnev Date: Mon, 24 Oct 2022 20:14:19 +1100 Subject: [PATCH] Attempt to fix parallel decoding (#74) * First attempt to fix parallel decoding * Update v2/encoding_test.go Co-authored-by: Albert Nigmatzianov * Update v2/encoding_test.go Co-authored-by: Albert Nigmatzianov * as per code review - remove xencodingWrapper Co-authored-by: Albert Nigmatzianov --- v2/encoding.go | 41 +++++++++----------------------------- v2/encoding_test.go | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/v2/encoding.go b/v2/encoding.go index 65cb1e8..6c89e89 100644 --- a/v2/encoding.go +++ b/v2/encoding.go @@ -4,7 +4,7 @@ import ( "bytes" "io/ioutil" - xencoding "golang.org/x/text/encoding" + "golang.org/x/text/encoding" "golang.org/x/text/encoding/charmap" "golang.org/x/text/encoding/unicode" ) @@ -24,29 +24,6 @@ func (e Encoding) String() string { return e.Name } -// xencodingWrapper is a struct that stores decoder and encoder for -// appropriate x/text/encoding. It's used to reduce allocations -// through creating decoder and encoder only one time and storing it. -type xencodingWrapper struct { - decoder *xencoding.Decoder - encoder *xencoding.Encoder -} - -func newXEncodingWrapper(e xencoding.Encoding) xencodingWrapper { - return xencodingWrapper{ - decoder: e.NewDecoder(), - encoder: e.NewEncoder(), - } -} - -func (e *xencodingWrapper) Decoder() *xencoding.Decoder { - return e.decoder -} - -func (e *xencodingWrapper) Encoder() *xencoding.Encoder { - return e.encoder -} - // Available encodings. var ( // EncodingISO is ISO-8859-1 encoding. @@ -79,11 +56,11 @@ var ( encodings = []Encoding{EncodingISO, EncodingUTF16, EncodingUTF16BE, EncodingUTF8} - xencodingISO = newXEncodingWrapper(charmap.ISO8859_1) - xencodingUTF16BEBOM = newXEncodingWrapper(unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM)) - xencodingUTF16LEBOM = newXEncodingWrapper(unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM)) - xencodingUTF16BE = newXEncodingWrapper(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)) - xencodingUTF8 = newXEncodingWrapper(unicode.UTF8) + xencodingISO = charmap.ISO8859_1 + xencodingUTF16BEBOM = unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM) + xencodingUTF16LEBOM = unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM) + xencodingUTF16BE = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) + xencodingUTF8 = unicode.UTF8 ) // bom is used in UTF-16 encoded Unicode with BOM. @@ -127,7 +104,7 @@ func decodeText(src []byte, from Encoding) string { } fromXEncoding := resolveXEncoding(src, from) - result, err := fromXEncoding.Decoder().Bytes(src) + result, err := fromXEncoding.NewDecoder().Bytes(src) if err != nil { return string(src) } @@ -152,7 +129,7 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error { } toXEncoding := resolveXEncoding(nil, to) - encoded, err := toXEncoding.Encoder().String(src) + encoded, err := toXEncoding.NewEncoder().String(src) if err != nil { return err } @@ -166,7 +143,7 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error { return nil } -func resolveXEncoding(src []byte, encoding Encoding) xencodingWrapper { +func resolveXEncoding(src []byte, encoding Encoding) encoding.Encoding { switch encoding.Key { case 0: return xencodingISO diff --git a/v2/encoding_test.go b/v2/encoding_test.go index 6954b8d..c624c2f 100644 --- a/v2/encoding_test.go +++ b/v2/encoding_test.go @@ -2,6 +2,7 @@ package id3v2 import ( "bytes" + "sync" "testing" ) @@ -25,6 +26,53 @@ func TestDecodeText(t *testing.T) { } } +// See https://github.com/bogem/id3v2/pull/74 +func TestDecodeTextParallel(t *testing.T) { + testCases := []struct { + src []byte + from Encoding + utf8 string + }{ + {[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"}, + {[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"}, + {[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"}, + {[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"}, + {[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"}, + {[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"}, + {[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"}, + {[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"}, + {[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"}, + {[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"}, + {[]byte{0xFF, 0xFE}, EncodingUTF16, ""}, + {[]byte{0xFF, 0xFE}, EncodingUTF16, ""}, + {[]byte{0xFF, 0xFE}, EncodingUTF16, ""}, + {[]byte{0xFF, 0xFE}, EncodingUTF16, ""}, + {[]byte{0xFF, 0xFE}, EncodingUTF16, ""}, + {[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"}, + {[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"}, + {[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"}, + {[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"}, + {[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"}, + } + + var wg sync.WaitGroup + + for _, tc := range testCases { + wg.Add(1) + + tc := tc + go func() { + defer wg.Done() + got := decodeText(tc.src, tc.from) + if got != tc.utf8 { + t.Errorf("Expected %q from %v encoding, got %q", tc.utf8, tc.from, got) + } + }() + } + + wg.Wait() +} + func TestEncodeWriteText(t *testing.T) { testCases := []struct { src string