Skip to content

Commit

Permalink
Attempt to fix parallel decoding (#74)
Browse files Browse the repository at this point in the history
* First attempt to fix parallel decoding

* Update v2/encoding_test.go

Co-authored-by: Albert Nigmatzianov <[email protected]>

* Update v2/encoding_test.go

Co-authored-by: Albert Nigmatzianov <[email protected]>

* as per code review - remove xencodingWrapper

Co-authored-by: Albert Nigmatzianov <[email protected]>
  • Loading branch information
drgolem and n10v authored Oct 24, 2022
1 parent 28e96ce commit cd346ca
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 32 deletions.
41 changes: 9 additions & 32 deletions v2/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"bytes"
"io/ioutil"

xencoding "golang.org/x/text/encoding"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
)
Expand All @@ -24,29 +24,6 @@ func (e Encoding) String() string {
return e.Name
}

// xencodingWrapper is a struct that stores decoder and encoder for
// appropriate x/text/encoding. It's used to reduce allocations
// through creating decoder and encoder only one time and storing it.
type xencodingWrapper struct {
decoder *xencoding.Decoder
encoder *xencoding.Encoder
}

func newXEncodingWrapper(e xencoding.Encoding) xencodingWrapper {
return xencodingWrapper{
decoder: e.NewDecoder(),
encoder: e.NewEncoder(),
}
}

func (e *xencodingWrapper) Decoder() *xencoding.Decoder {
return e.decoder
}

func (e *xencodingWrapper) Encoder() *xencoding.Encoder {
return e.encoder
}

// Available encodings.
var (
// EncodingISO is ISO-8859-1 encoding.
Expand Down Expand Up @@ -79,11 +56,11 @@ var (

encodings = []Encoding{EncodingISO, EncodingUTF16, EncodingUTF16BE, EncodingUTF8}

xencodingISO = newXEncodingWrapper(charmap.ISO8859_1)
xencodingUTF16BEBOM = newXEncodingWrapper(unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM))
xencodingUTF16LEBOM = newXEncodingWrapper(unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM))
xencodingUTF16BE = newXEncodingWrapper(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM))
xencodingUTF8 = newXEncodingWrapper(unicode.UTF8)
xencodingISO = charmap.ISO8859_1
xencodingUTF16BEBOM = unicode.UTF16(unicode.BigEndian, unicode.ExpectBOM)
xencodingUTF16LEBOM = unicode.UTF16(unicode.LittleEndian, unicode.ExpectBOM)
xencodingUTF16BE = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
xencodingUTF8 = unicode.UTF8
)

// bom is used in UTF-16 encoded Unicode with BOM.
Expand Down Expand Up @@ -127,7 +104,7 @@ func decodeText(src []byte, from Encoding) string {
}

fromXEncoding := resolveXEncoding(src, from)
result, err := fromXEncoding.Decoder().Bytes(src)
result, err := fromXEncoding.NewDecoder().Bytes(src)
if err != nil {
return string(src)
}
Expand All @@ -152,7 +129,7 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error {
}

toXEncoding := resolveXEncoding(nil, to)
encoded, err := toXEncoding.Encoder().String(src)
encoded, err := toXEncoding.NewEncoder().String(src)
if err != nil {
return err
}
Expand All @@ -166,7 +143,7 @@ func encodeWriteText(bw *bufWriter, src string, to Encoding) error {
return nil
}

func resolveXEncoding(src []byte, encoding Encoding) xencodingWrapper {
func resolveXEncoding(src []byte, encoding Encoding) encoding.Encoding {
switch encoding.Key {
case 0:
return xencodingISO
Expand Down
48 changes: 48 additions & 0 deletions v2/encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package id3v2

import (
"bytes"
"sync"
"testing"
)

Expand All @@ -25,6 +26,53 @@ func TestDecodeText(t *testing.T) {
}
}

// See https://github.com/bogem/id3v2/pull/74
func TestDecodeTextParallel(t *testing.T) {
testCases := []struct {
src []byte
from Encoding
utf8 string
}{
{[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"},
{[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"},
{[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"},
{[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"},
{[]byte{0x48, 0xE9, 0x6C, 0x6C, 0xF6}, EncodingISO, "Héllö"},
{[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"},
{[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"},
{[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"},
{[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"},
{[]byte{0xFF, 0xFE, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6, 0x00}, EncodingUTF16, "Héllö"},
{[]byte{0xFF, 0xFE}, EncodingUTF16, ""},
{[]byte{0xFF, 0xFE}, EncodingUTF16, ""},
{[]byte{0xFF, 0xFE}, EncodingUTF16, ""},
{[]byte{0xFF, 0xFE}, EncodingUTF16, ""},
{[]byte{0xFF, 0xFE}, EncodingUTF16, ""},
{[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"},
{[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"},
{[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"},
{[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"},
{[]byte{0x00, 0x48, 0x00, 0xE9, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0xF6}, EncodingUTF16BE, "Héllö"},
}

var wg sync.WaitGroup

for _, tc := range testCases {
wg.Add(1)

tc := tc
go func() {
defer wg.Done()
got := decodeText(tc.src, tc.from)
if got != tc.utf8 {
t.Errorf("Expected %q from %v encoding, got %q", tc.utf8, tc.from, got)
}
}()
}

wg.Wait()
}

func TestEncodeWriteText(t *testing.T) {
testCases := []struct {
src string
Expand Down

0 comments on commit cd346ca

Please sign in to comment.