forked from segmentio/parquet-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
column_index_test.go
109 lines (100 loc) · 3.48 KB
/
column_index_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package parquet_test
import (
"testing"
"github.com/parquet-go/parquet-go"
)
func TestBinaryColumnIndexMinMax(t *testing.T) {
testCases := [][]interface{}{
// kind, type, page min, page max, size limit, [value to search, expected result]...
{parquet.ByteArray, parquet.ByteArrayType,
[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
[]byte{0, 0, 0, 0, 0, 0}, true,
[]byte{0, 1, 2, 3, 4, 5}, true,
[]byte{1, 2, 3, 4}, true,
[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
[]byte{1, 2, 3, 5}, true, // false positive due to size limit
[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
[]byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max
},
{parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6),
[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
[]byte{0, 0, 0, 0, 0, 0}, true,
[]byte{0, 1, 2, 3, 4, 5}, true,
[]byte{1, 2, 3, 4, 0, 0}, true,
[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
[]byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit
[]byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
[]byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
},
}
for _, testCase := range testCases {
kind := testCase[0].(parquet.Kind)
typ := testCase[1].(parquet.Type)
min := testCase[2].([]byte)
max := testCase[3].([]byte)
sizeLimit := testCase[4].(int)
indexer := typ.NewColumnIndexer(sizeLimit)
indexer.IndexPage(100, 0,
parquet.ValueOf(min),
parquet.ValueOf(max),
)
formatIndex := indexer.ColumnIndex()
columnIndex := parquet.NewColumnIndex(kind, &formatIndex)
for i := 5; i < len(testCase); i += 2 {
value := testCase[i].([]byte)
expected := testCase[i+1].(bool)
v := parquet.ValueOf(value)
actual := parquet.Search(columnIndex, v, typ) == 0
if actual != expected {
t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected)
}
}
}
}
func Test_ColumnIndexReuse(t *testing.T) {
min := "a"
max := "z"
indexer := parquet.ByteArrayType.NewColumnIndexer(16)
indexer.IndexPage(100, 0,
parquet.ValueOf(min),
parquet.ValueOf(max),
)
before := indexer.ColumnIndex()
if len(before.NullPages) != 1 {
t.Fatalf("expected 1 null page, got %d", len(before.NullPages))
}
if before.NullPages[0] {
t.Fatalf("unexpected null page 0")
}
// Reset the indexer. Should be safe for reuse.
indexer.Reset()
// Index two pages that are both nul pages, expect the previous index to not have changed.
indexer.IndexPage(100, 100,
parquet.ValueOf(min),
parquet.ValueOf(max),
)
indexer.IndexPage(10, 10,
parquet.ValueOf(min),
parquet.ValueOf(max),
)
after := indexer.ColumnIndex()
if len(after.NullPages) != 2 {
t.Fatalf("expected 2 null pages, got %d", len(after.NullPages))
}
if !after.NullPages[0] {
t.Fatalf("expected null page 0")
}
if !after.NullPages[1] {
t.Fatalf("expected null page 1")
}
// Validate null pages of the previous index.
if len(before.NullPages) != 1 {
t.Fatalf("expected 1 null page, got %d", len(before.NullPages))
}
if before.NullPages[0] {
t.Fatalf("unexpected null page 0")
}
}