Skip to content

Commit

Permalink
Support parsing damaged mkv (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
at-wat authored Feb 17, 2021
1 parent 1c79946 commit f769d2b
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 23 deletions.
5 changes: 5 additions & 0 deletions block.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func UnmarshalBlock(r io.Reader, n int64) (*Block, error) {
}
n--

if n < 0 {
return nil, io.ErrUnexpectedEOF
}

if bs[0]&blockFlagMaskKeyframe != 0 {
b.Keyframe = true
}
Expand All @@ -113,6 +117,7 @@ func UnmarshalBlock(r io.Reader, n int64) (*Block, error) {
if err != nil {
return nil, err
}

for {
frame, err := ul.Read()
if err == io.EOF {
Expand Down
8 changes: 4 additions & 4 deletions matroska_official_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ func TestMatroskaOfficial(t *testing.T) {
"DifferentEBMLHeadSizesAndCueLessSeeking": {
filename: "test6.mkv",
},
// "ExtraUnknownJunkElementsDamaged": {
// filename: "test7.mkv",
// opts: []UnmarshalOption{WithIgnoreUnknown(true)},
// },
"ExtraUnknownJunkElementsDamaged": {
filename: "test7.mkv",
opts: []UnmarshalOption{WithIgnoreUnknown(true)},
},
"AudioGap": {
filename: "test8.mkv",
},
Expand Down
79 changes: 79 additions & 0 deletions reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2021 The ebml-go authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ebml

import (
"bytes"
"io"
)

type rollbackReader interface {
Set(io.Reader)
Get() io.Reader
Read([]byte) (int, error)
Reset()
RollbackTo(int)
}

type rollbackReaderImpl struct {
io.Reader
buf []byte
}

func (r *rollbackReaderImpl) Set(v io.Reader) {
r.Reader = v
}

func (r *rollbackReaderImpl) Get() io.Reader {
return r.Reader
}

func (r *rollbackReaderImpl) Read(b []byte) (int, error) {
n, err := r.Reader.Read(b)
r.buf = append(r.buf, b[:n]...)
return n, err
}

func (r *rollbackReaderImpl) Reset() {
r.buf = r.buf[0:0]
}

func (r *rollbackReaderImpl) RollbackTo(i int) {
buf := r.buf
r.Reader = io.MultiReader(
bytes.NewReader(buf[i:]),
r.Reader,
)
r.buf = nil
}

type rollbackReaderNop struct {
io.Reader
}

func (r *rollbackReaderNop) Set(v io.Reader) {
r.Reader = v
}

func (r *rollbackReaderNop) Get() io.Reader {
return r.Reader
}

func (*rollbackReaderNop) Reset() {
}

func (*rollbackReaderNop) RollbackTo(i int) {
panic("can't rollback nop rollback reader")
}
103 changes: 103 additions & 0 deletions reader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2021 The ebml-go authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ebml

import (
"bytes"
"io"
"testing"
)

func TestRollbackReader(t *testing.T) {
r := &rollbackReaderImpl{
Reader: bytes.NewReader([]byte{0, 1, 2, 3, 4, 5, 6, 7}),
}

b := make([]byte, 3)
n, err := io.ReadFull(r, b)
if err != nil {
t.Fatal(err)
}
if n != 3 {
t.Fatalf("Expected to read 3 bytes, got %d bytes", n)
}
if !bytes.Equal([]byte{0, 1, 2}, b) {
t.Fatalf("Unexpected read result: %v", b)
}

r.Reset()

n, err = io.ReadFull(r, b)
if err != nil {
t.Fatal(err)
}
if n != 3 {
t.Fatalf("Expected to read 3 bytes, got %d bytes", n)
}
if !bytes.Equal([]byte{3, 4, 5}, b) {
t.Fatalf("Unexpected read result: %v", b)
}

r.RollbackTo(1)

n, err = io.ReadFull(r, b)
if err != nil {
t.Fatal(err)
}
if n != 3 {
t.Fatalf("Expected to read 3 bytes, got %d bytes", n)
}
if !bytes.Equal([]byte{4, 5, 6}, b) {
t.Fatalf("Unexpected read result: %v", b)
}
}

func TestRollbackReaderNop(t *testing.T) {
r := &rollbackReaderNop{
Reader: bytes.NewReader([]byte{0, 1, 2, 3, 4, 5, 6, 7}),
}

b := make([]byte, 3)
n, err := r.Read(b)
if err != nil {
t.Fatal(err)
}
if n != 3 {
t.Fatalf("Expected to read 3 bytes, got %d bytes", n)
}
if !bytes.Equal([]byte{0, 1, 2}, b) {
t.Fatalf("Unexpected read result: %v", b)
}

r.Reset()

n, err = r.Read(b)
if err != nil {
t.Fatal(err)
}
if n != 3 {
t.Fatalf("Expected to read 3 bytes, got %d bytes", n)
}
if !bytes.Equal([]byte{3, 4, 5}, b) {
t.Fatalf("Unexpected read result: %v", b)
}

defer func() {
if err := recover(); err == nil {
t.Error("Expected panic")
}
}()
r.RollbackTo(1)
}
2 changes: 1 addition & 1 deletion unlacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func NewEBMLUnlacer(r io.Reader, n int64) (Unlacer, error) {
if err != nil {
return nil, err
}
if n64 < 0 {
if n64 <= 0 {
return nil, io.ErrUnexpectedEOF
}
n -= int64(nRead)
Expand Down
54 changes: 38 additions & 16 deletions unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ var ErrIndefiniteType = errors.New("marshal/unmarshal to indefinite type")
// ErrIncompatibleType means that an element is not convertible to a corresponding struct field.
var ErrIncompatibleType = errors.New("marshal/unmarshal to incompatible type")

// ErrInvalidElementSize means that an element has inconsistent size. e.g. element size is larger than its parent element size.
var ErrInvalidElementSize = errors.New("invalid element size")

// Unmarshal EBML stream.
func Unmarshal(r io.Reader, val interface{}, opts ...UnmarshalOption) error {
options := &UnmarshalOptions{}
Expand Down Expand Up @@ -60,11 +63,17 @@ func Unmarshal(r io.Reader, val interface{}, opts ...UnmarshalOption) error {
}

func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64, parent *Element, options *UnmarshalOptions) (io.Reader, error) {
var r io.Reader
pos0 := pos
var r rollbackReader
if options.ignoreUnknown {
r = &rollbackReaderImpl{}
} else {
r = &rollbackReaderNop{}
}
if n != SizeUnknown {
r = io.LimitReader(r0, n)
r.Set(io.LimitReader(r0, n))
} else {
r = r0
r.Set(r0)
}

var mapOut bool
Expand Down Expand Up @@ -93,35 +102,43 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64,
}

for {
r.Reset()

var headerSize uint64
e, nb, err := readVUInt(r)
headerSize += uint64(nb)
if err != nil {
if nb == 0 && err == io.ErrUnexpectedEOF {
return nil, io.EOF
}
if !options.ignoreUnknown {
return nil, err
if options.ignoreUnknown {
return nil, nil
}
return nil, nil
return nil, err
}
v, ok := revTable[uint32(e)]
if !ok {
if !options.ignoreUnknown {
return nil, wrapErrorf(ErrUnknownElement, "unmarshalling element 0x%x", e)
if options.ignoreUnknown {
r.RollbackTo(1)
pos++
continue
}
remain, _ := encodeElementID(e)
r = io.MultiReader(
bytes.NewReader(remain[1:]),
r,
)
pos++
continue
return nil, wrapErrorf(ErrUnknownElement, "unmarshalling element 0x%x", e)
}

size, nb, err := readDataSize(r)
headerSize += uint64(nb)

if n != SizeUnknown && pos+headerSize+size > pos0+uint64(n) {
err = ErrInvalidElementSize
}

if err != nil {
if options.ignoreUnknown {
r.RollbackTo(1)
pos++
continue
}
return nil, err
}

Expand Down Expand Up @@ -180,11 +197,16 @@ func readElement(r0 io.Reader, n int64, vo reflect.Value, depth int, pos uint64,
return r0, err
}
if r0 != nil {
r = io.MultiReader(r0, r)
r.Set(io.MultiReader(r0, r.Get()))
}
default:
val, err := perTypeReader[v.t](r, size)
if err != nil {
if options.ignoreUnknown {
r.RollbackTo(1)
pos++
continue
}
return nil, err
}
vr := reflect.ValueOf(val)
Expand Down
5 changes: 3 additions & 2 deletions unmarshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ func ExampleUnmarshal() {
TestBinary := []byte{
0x1a, 0x45, 0xdf, 0xa3, // EBML
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // 0x10
0x42, 0x82, 0x85, 0x77, 0x65, 0x62, 0x6d, 0x00,
0x42, 0x87, 0x81, 0x02, 0x42, 0x85, 0x81, 0x02,
0x42, 0x82, 0x85, 0x77, 0x65, 0x62, 0x6d, 0x00, // EBMLDocType = webm
0x42, 0x87, 0x81, 0x02, // DocTypeVersion = 2
0x42, 0x85, 0x81, 0x02, // DocTypeReadVersion = 2
}
type TestEBML struct {
Header struct {
Expand Down

0 comments on commit f769d2b

Please sign in to comment.