From d967435eddd000dc996df5c244122d27b385eacf Mon Sep 17 00:00:00 2001 From: Atsushi Watanabe Date: Wed, 17 Feb 2021 23:48:06 +0900 Subject: [PATCH] Improve unmarshal performance (#149) --- block.go | 19 ++++++----- elementtable.go | 3 +- unlacer.go | 15 ++++---- unmarshal.go | 14 ++++---- value.go | 91 +++++++++++++++++++++++++++++++++---------------- value_test.go | 33 +++++++++++++----- 6 files changed, 116 insertions(+), 59 deletions(-) diff --git a/block.go b/block.go index 0e5548f..a213422 100644 --- a/block.go +++ b/block.go @@ -67,19 +67,21 @@ func UnmarshalBlock(r io.Reader, n int64) (*Block, error) { var b Block var err error var nRead int - if b.TrackNumber, nRead, err = readVUInt(r); err != nil { + + vd := &valueDecoder{} + + if b.TrackNumber, nRead, err = vd.readVUInt(r); err != nil { return nil, err } n -= int64(nRead) - if v, err := readInt(r, 2); err == nil { + if v, err := vd.readInt(r, 2); err == nil { b.Timecode = int16(v.(int64)) } else { return nil, err } n -= 2 - var bs [1]byte - switch _, err := io.ReadFull(r, bs[:]); err { + switch _, err := r.Read(vd.bs[:]); err { case nil: case io.EOF: return nil, io.ErrUnexpectedEOF @@ -92,16 +94,17 @@ func UnmarshalBlock(r io.Reader, n int64) (*Block, error) { return nil, io.ErrUnexpectedEOF } - if bs[0]&blockFlagMaskKeyframe != 0 { + f := vd.bs[0] + if f&blockFlagMaskKeyframe != 0 { b.Keyframe = true } - if bs[0]&blockFlagMaskInvisible != 0 { + if f&blockFlagMaskInvisible != 0 { b.Invisible = true } - if bs[0]&blockFlagMaskDiscardable != 0 { + if f&blockFlagMaskDiscardable != 0 { b.Discardable = true } - b.Lacing = LacingMode((bs[0] & blockFlagMaskLacing) >> 1) + b.Lacing = LacingMode((f & blockFlagMaskLacing) >> 1) var ul Unlacer switch b.Lacing { diff --git a/elementtable.go b/elementtable.go index 03f995c..34e0aa3 100644 --- a/elementtable.go +++ b/elementtable.go @@ -272,8 +272,9 @@ func init() { } func initReverseLookupTable(revTb elementRevTable, tb elementTable) { + vd := &valueDecoder{} for k, v := range tb { - e, _, err := readVUInt(bytes.NewBuffer(v.b)) + e, _, err := vd.readVUInt(bytes.NewBuffer(v.b)) if err != nil { panic(err) } diff --git a/unlacer.go b/unlacer.go index b8bb603..e793d57 100644 --- a/unlacer.go +++ b/unlacer.go @@ -54,7 +54,7 @@ func NewNoUnlacer(r io.Reader, n int64) (Unlacer, error) { func NewXiphUnlacer(r io.Reader, n int64) (Unlacer, error) { var nFrame int var b [1]byte - switch _, err := io.ReadFull(r, b[:]); err { + switch _, err := r.Read(b[:]); err { case nil: nFrame = int(b[0]) + 1 case io.EOF: @@ -70,8 +70,7 @@ func NewXiphUnlacer(r io.Reader, n int64) (Unlacer, error) { } for i := 0; i < nFrame-1; i++ { for { - var b [1]byte - switch _, err := io.ReadFull(ul.r, b[:]); err { + switch _, err := ul.r.Read(b[:]); err { case nil: case io.EOF: return nil, io.ErrUnexpectedEOF @@ -98,7 +97,7 @@ func NewXiphUnlacer(r io.Reader, n int64) (Unlacer, error) { func NewFixedUnlacer(r io.Reader, n int64) (Unlacer, error) { var nFrame int var b [1]byte - switch _, err := io.ReadFull(r, b[:]); err { + switch _, err := r.Read(b[:]); err { case nil: nFrame = int(b[0]) + 1 case io.EOF: @@ -127,7 +126,7 @@ func NewFixedUnlacer(r io.Reader, n int64) (Unlacer, error) { func NewEBMLUnlacer(r io.Reader, n int64) (Unlacer, error) { var nFrame int var b [1]byte - switch _, err := io.ReadFull(r, b[:]); err { + switch _, err := r.Read(b[:]); err { case nil: nFrame = int(b[0]) + 1 case io.EOF: @@ -137,11 +136,13 @@ func NewEBMLUnlacer(r io.Reader, n int64) (Unlacer, error) { } n-- + vd := &valueDecoder{} + ul := &unlacer{ r: r, size: make([]int, nFrame), } - un64, nRead, err := readVUInt(ul.r) + un64, nRead, err := vd.readVUInt(ul.r) if err != nil { return nil, err } @@ -151,7 +152,7 @@ func NewEBMLUnlacer(r io.Reader, n int64) (Unlacer, error) { ul.size[nFrame-1] -= int(n64) for i := 1; i < nFrame-1; i++ { - n64Diff, nRead, err := readVInt(ul.r) + n64Diff, nRead, err := vd.readVInt(ul.r) n64 += int64(n64Diff) if err != nil { return nil, err diff --git a/unmarshal.go b/unmarshal.go index 9dbd27d..9ac998e 100644 --- a/unmarshal.go +++ b/unmarshal.go @@ -51,9 +51,11 @@ func Unmarshal(r io.Reader, val interface{}, opts ...UnmarshalOption) error { return wrapErrorf(ErrIncompatibleType, "unmarshalling to %T", val) } + vd := &valueDecoder{} + voe := vo.Elem() for { - if _, err := readElement(r, SizeUnknown, voe, 0, 0, nil, options); err != nil { + if _, err := vd.readElement(r, SizeUnknown, voe, 0, 0, nil, options); err != nil { if err == io.EOF { return nil } @@ -62,7 +64,7 @@ func Unmarshal(r io.Reader, val interface{}, opts ...UnmarshalOption) error { } } -func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, parent *Element, options *UnmarshalOptions) (io.Reader, error) { +func (vd *valueDecoder) readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, parent *Element, options *UnmarshalOptions) (io.Reader, error) { pos0 := pos var r rollbackReader if options.ignoreUnknown { @@ -105,7 +107,7 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, r.Reset() var headerSize uint64 - e, nb, err := readVUInt(r) + e, nb, err := vd.readVUInt(r) headerSize += uint64(nb) if err != nil { if nb == 0 && err == io.ErrUnexpectedEOF { @@ -126,7 +128,7 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, return nil, wrapErrorf(ErrUnknownElement, "unmarshalling element 0x%x", e) } - size, nb, err := readDataSize(r) + size, nb, err := vd.readDataSize(r) headerSize += uint64(nb) if n != SizeUnknown && pos+headerSize+size > pos0+uint64(n) { @@ -192,7 +194,7 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, if elem != nil { elem.Value = vn.Interface() } - r0, err := readElement(r, int64(size), vn, depth+1, pos+headerSize, elem, options) + r0, err := vd.readElement(r, int64(size), vn, depth+1, pos+headerSize, elem, options) if err != nil && err != io.EOF { return r0, err } @@ -200,7 +202,7 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, r.Set(io.MultiReader(r0, r.Get())) } default: - val, err := perTypeReader[v.t](r, size) + val, err := vd.decode(v.t, r, size) if err != nil { if options.ignoreUnknown { r.RollbackTo(1) diff --git a/value.go b/value.go index 81172e1..fe5771a 100644 --- a/value.go +++ b/value.go @@ -43,26 +43,42 @@ var ErrUnsupportedElementID = errors.New("unsupported Element ID") // ErrOutOfRange means that a value is out of range of the data type. var ErrOutOfRange = errors.New("out of range") -var perTypeReader = map[DataType]func(io.Reader, uint64) (interface{}, error){ - DataTypeInt: readInt, - DataTypeUInt: readUInt, - DataTypeDate: readDate, - DataTypeFloat: readFloat, - DataTypeBinary: readBinary, - DataTypeString: readString, - DataTypeBlock: readBlock, +// valueDecoder is a value decoder sharing internal buffer. +// Member functions must not called concurrently. +type valueDecoder struct { + bs [1]byte } -func readDataSize(r io.Reader) (uint64, int, error) { - v, n, err := readVUInt(r) +func (d *valueDecoder) decode(t DataType, r io.Reader, n uint64) (interface{}, error) { + switch t { + case DataTypeInt: + return d.readInt(r, n) + case DataTypeUInt: + return d.readUInt(r, n) + case DataTypeDate: + return d.readDate(r, n) + case DataTypeFloat: + return d.readFloat(r, n) + case DataTypeBinary: + return d.readBinary(r, n) + case DataTypeString: + return d.readString(r, n) + case DataTypeBlock: + return d.readBlock(r, n) + } + panic("invalid data type") +} + +func (d *valueDecoder) readDataSize(r io.Reader) (uint64, int, error) { + v, n, err := d.readVUInt(r) if v == (uint64(0xFFFFFFFFFFFFFFFF) >> uint(64-n*7)) { return SizeUnknown, n, err } return v, n, err } -func readVUInt(r io.Reader) (uint64, int, error) { - var bs [1]byte - bytesRead, err := io.ReadFull(r, bs[:]) + +func (d *valueDecoder) readVUInt(r io.Reader) (uint64, int, error) { + bytesRead, err := r.Read(d.bs[:]) switch err { case nil: case io.EOF: @@ -74,7 +90,7 @@ func readVUInt(r io.Reader) (uint64, int, error) { var vc int var value uint64 - b := bs[0] + b := d.bs[0] switch { case b&0x80 == 0x80: vc = 0 @@ -107,8 +123,7 @@ func readVUInt(r io.Reader) (uint64, int, error) { return value, bytesRead, nil } - var bs [1]byte - n, err := io.ReadFull(r, bs[:]) + n, err := r.Read(d.bs[:]) switch err { case nil: case io.EOF: @@ -117,12 +132,13 @@ func readVUInt(r io.Reader) (uint64, int, error) { return 0, bytesRead, err } bytesRead += n - value = value<<8 | uint64(bs[0]) + value = value<<8 | uint64(d.bs[0]) vc-- } } -func readVInt(r io.Reader) (int64, int, error) { - u, n, err := readVUInt(r) + +func (d *valueDecoder) readVInt(r io.Reader) (int64, int, error) { + u, n, err := d.readVUInt(r) if err != nil { return 0, n, err } @@ -147,7 +163,8 @@ func readVInt(r io.Reader) (int64, int, error) { } return v, n, nil } -func readBinary(r io.Reader, n uint64) (interface{}, error) { + +func (d *valueDecoder) readBinary(r io.Reader, n uint64) (interface{}, error) { bs := make([]byte, n) switch _, err := io.ReadFull(r, bs); err { @@ -159,8 +176,9 @@ func readBinary(r io.Reader, n uint64) (interface{}, error) { return []byte{}, err } } -func readString(r io.Reader, n uint64) (interface{}, error) { - bs, err := readBinary(r, n) + +func (d *valueDecoder) readString(r io.Reader, n uint64) (interface{}, error) { + bs, err := d.readBinary(r, n) if err != nil { return "", err } @@ -169,8 +187,9 @@ func readString(r io.Reader, n uint64) (interface{}, error) { ss := strings.Split(s, "\x00") return ss[0], nil } -func readInt(r io.Reader, n uint64) (interface{}, error) { - v, err := readUInt(r, n) + +func (d *valueDecoder) readInt(r io.Reader, n uint64) (interface{}, error) { + v, err := d.readUInt(r, n) if err != nil { return 0, err } @@ -183,7 +202,8 @@ func readInt(r io.Reader, n uint64) (interface{}, error) { } return int64(v64), nil } -func readUInt(r io.Reader, n uint64) (interface{}, error) { + +func (d *valueDecoder) readUInt(r io.Reader, n uint64) (interface{}, error) { bs := make([]byte, n) switch _, err := io.ReadFull(r, bs); err { @@ -200,14 +220,16 @@ func readUInt(r io.Reader, n uint64) (interface{}, error) { } return v, nil } -func readDate(r io.Reader, n uint64) (interface{}, error) { - i, err := readInt(r, n) + +func (d *valueDecoder) readDate(r io.Reader, n uint64) (interface{}, error) { + i, err := d.readInt(r, n) if err != nil { return time.Unix(0, 0), err } return time.Unix(DateEpochInUnixtime, i.(int64)), nil } -func readFloat(r io.Reader, n uint64) (interface{}, error) { + +func (d *valueDecoder) readFloat(r io.Reader, n uint64) (interface{}, error) { bs := make([]byte, n) switch _, err := io.ReadFull(r, bs); err { @@ -227,7 +249,8 @@ func readFloat(r io.Reader, n uint64) (interface{}, error) { return 0.0, wrapErrorf(ErrInvalidFloatSize, "reading %d bytes float", n) } } -func readBlock(r io.Reader, n uint64) (interface{}, error) { + +func (d *valueDecoder) readBlock(r io.Reader, n uint64) (interface{}, error) { b, err := UnmarshalBlock(r, int64(n)) if err != nil { return nil, err @@ -267,6 +290,7 @@ func encodeDataSize(v, n uint64) []byte { return []byte{0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} } } + func encodeElementID(v uint64) ([]byte, error) { switch { case v < 0x80: @@ -286,6 +310,7 @@ func encodeElementID(v uint64) ([]byte, error) { } return nil, ErrUnsupportedElementID } + func encodeVInt(v int64) ([]byte, error) { switch { case -0x3F <= v && v <= 0x3F: @@ -327,6 +352,7 @@ func encodeBinary(i interface{}, n uint64) ([]byte, error) { } return append(v, bytes.Repeat([]byte{0x00}, int(n)-len(v))...), nil } + func encodeString(i interface{}, n uint64) ([]byte, error) { v, ok := i.(string) if !ok { @@ -337,6 +363,7 @@ func encodeString(i interface{}, n uint64) ([]byte, error) { } return append([]byte(v), bytes.Repeat([]byte{0x00}, int(n)-len(v))...), nil } + func encodeInt(i interface{}, n uint64) ([]byte, error) { var v int64 switch v2 := i.(type) { @@ -355,6 +382,7 @@ func encodeInt(i interface{}, n uint64) ([]byte, error) { } return encodeUInt(uint64(v), n) } + func encodeUInt(i interface{}, n uint64) ([]byte, error) { var v uint64 switch v2 := i.(type) { @@ -390,6 +418,7 @@ func encodeUInt(i interface{}, n uint64) ([]byte, error) { return []byte{byte(v >> 56), byte(v >> 48), byte(v >> 40), byte(v >> 32), byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}, nil } } + func encodeDate(i interface{}, n uint64) ([]byte, error) { v, ok := i.(time.Time) if !ok { @@ -398,16 +427,19 @@ func encodeDate(i interface{}, n uint64) ([]byte, error) { dtns := v.Sub(time.Unix(DateEpochInUnixtime, 0)).Nanoseconds() return encodeInt(int64(dtns), n) } + func encodeFloat32(i float32) ([]byte, error) { b := make([]byte, 4) binary.BigEndian.PutUint32(b, math.Float32bits(i)) return b, nil } + func encodeFloat64(i float64) ([]byte, error) { b := make([]byte, 8) binary.BigEndian.PutUint64(b, math.Float64bits(i)) return b, nil } + func encodeFloat(i interface{}, n uint64) ([]byte, error) { switch v := i.(type) { case float64: @@ -436,6 +468,7 @@ func encodeFloat(i interface{}, n uint64) ([]byte, error) { return []byte{}, wrapErrorf(ErrInvalidType, "writing %T as float", i) } } + func encodeBlock(i interface{}, n uint64) ([]byte, error) { v, ok := i.(Block) if !ok { diff --git a/value_test.go b/value_test.go index 08f5c4a..13bd870 100644 --- a/value_test.go +++ b/value_test.go @@ -33,9 +33,11 @@ func TestDataSize(t *testing.T) { "Indefinite": {[]byte{0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, SizeUnknown}, } + vd := &valueDecoder{} + for n, c := range testCases { t.Run("DecodeVInt "+n, func(t *testing.T) { - r, _, err := readVUInt(bytes.NewBuffer(c.b)) + r, _, err := vd.readVUInt(bytes.NewBuffer(c.b)) if err != nil { t.Fatalf("Failed to readVUInt: '%v'", err) } @@ -46,7 +48,7 @@ func TestDataSize(t *testing.T) { } for n, c := range testCases { t.Run("DecodeDataSize "+n, func(t *testing.T) { - r, _, err := readDataSize(bytes.NewBuffer(c.b)) + r, _, err := vd.readDataSize(bytes.NewBuffer(c.b)) if err != nil { t.Fatalf("Failed to readDataSize: '%v'", err) } @@ -77,9 +79,11 @@ func TestDataSize_Unknown(t *testing.T) { "8 bytes": {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, } + vd := &valueDecoder{} + for n, b := range testCases { t.Run("DecodeDataSize "+n, func(t *testing.T) { - r, _, err := readDataSize(bytes.NewBuffer(b)) + r, _, err := vd.readDataSize(bytes.NewBuffer(b)) if err != nil { t.Fatalf("Failed to readDataSize: '%v'", err) } @@ -110,9 +114,11 @@ func TestElementID(t *testing.T) { "7 bytes (upper bound)": {[]byte{0x03, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 0x2000000000000 - 1}, } + vd := &valueDecoder{} + for n, c := range testCases { t.Run("Decode "+n, func(t *testing.T) { - r, _, err := readVUInt(bytes.NewBuffer(c.b)) + r, _, err := vd.readVUInt(bytes.NewBuffer(c.b)) if err != nil { t.Fatalf("Failed to readVUInt: '%v'", err) } @@ -162,9 +168,11 @@ func TestVInt(t *testing.T) { "8 bytes (upper bound)": {[]byte{0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 0x7FFFFFFFFFFFFF}, } + vd := &valueDecoder{} + for n, c := range testCases { t.Run("Decode "+n, func(t *testing.T) { - r, _, err := readVInt(bytes.NewBuffer(c.b)) + r, _, err := vd.readVInt(bytes.NewBuffer(c.b)) if err != nil { t.Fatalf("Failed to readVUInt: '%v'", err) } @@ -242,9 +250,12 @@ func TestValue(t *testing.T) { "ConvertUInt32": {[]byte{0x01, 0x02, 0x03, 0x04}, DataTypeUInt, uint64(0x01020304), 0, uint32(0x01020304)}, "ConvertUInt": {[]byte{0x01, 0x02, 0x03, 0x04}, DataTypeUInt, uint64(0x01020304), 0, uint(0x01020304)}, } + + vd := &valueDecoder{} + for n, c := range testCases { t.Run("Read "+n, func(t *testing.T) { - v, err := perTypeReader[c.t](bytes.NewBuffer(c.b), uint64(len(c.b))) + v, err := vd.decode(c.t, bytes.NewBuffer(c.b), uint64(len(c.b))) if err != nil { t.Fatalf("Failed to read%s: '%v'", n, err) } @@ -375,9 +386,12 @@ func TestReadValue_WrongSize(t *testing.T) { ErrInvalidFloatSize, }, } + + vd := &valueDecoder{} + for n, c := range testCases { t.Run("Read "+n, func(t *testing.T) { - _, err := perTypeReader[c.t](bytes.NewReader(c.b), c.n) + _, err := vd.decode(c.t, bytes.NewReader(c.b), c.n) if !errs.Is(err, c.err) { t.Fatalf("Expected error against wrong data size of %s: %v, got: %v", n, c.err, err) } @@ -397,11 +411,14 @@ func TestReadValue_ReadUnexpectedEOF(t *testing.T) { {DataTypeDate, []byte{0x00, 0x00}}, {DataTypeFloat, []byte{0x00, 0x00, 0x00, 0x00}}, } + + vd := &valueDecoder{} + for _, c := range testCases { t.Run("Read "+c.t.String(), func(t *testing.T) { for l := 0; l < len(c.b)-1; l++ { r := bytes.NewReader(c.b[:l]) - _, err := perTypeReader[c.t](r, uint64(len(c.b))) + _, err := vd.decode(c.t, r, uint64(len(c.b))) if !errs.Is(err, io.ErrUnexpectedEOF) { t.Errorf("Expected error against short (%d bytes) %s: %v, got: %v", l, c.t.String(), io.ErrUnexpectedEOF, err)