From 7146ce3d5851ad21f8e6245cbc9fe863a1dd59ba Mon Sep 17 00:00:00 2001 From: Dylan Reimerink Date: Tue, 7 Nov 2023 14:03:26 +0100 Subject: [PATCH] btf: Optimizing BTF parsing by merging `readTypes` and `inflateRawTypes` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After profiling the BTF parsing code, it became apparent that a lot of time was spent in `readTypes` was spent allocating rawTypes. These rawTypes are only used as an intermediate step to create the final inflated types so all of the allocation work gets thrown away. This commit merges `readTypes` and `inflateRawTypes` into a single function. This allows us to re-use the intermediate objects and only allocate the final inflated types. This results in the following performance improvements: ``` goos: linux goarch: amd64 pkg: github.com/cilium/ebpf/btf cpu: 11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ ParseVmlinux-16 49.05m ± 1% 46.08m ± 1% -6.06% (n=100) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ ParseVmlinux-16 31.45Mi ± 0% 26.65Mi ± 0% -15.28% (n=100) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ ParseVmlinux-16 534.1k ± 0% 467.5k ± 0% -12.48% (n=100) ``` Signed-off-by: Dylan Reimerink --- btf/btf.go | 15 ++-- btf/btf_types.go | 64 --------------- btf/types.go | 199 +++++++++++++++++++++++++++++++++------------- btf/types_test.go | 33 ++++++-- 4 files changed, 176 insertions(+), 135 deletions(-) diff --git a/btf/btf.go b/btf/btf.go index 9291b8c14..80f64d78a 100644 --- a/btf/btf.go +++ b/btf/btf.go @@ -209,12 +209,7 @@ func loadRawSpec(btf io.ReaderAt, bo binary.ByteOrder, base *Spec) (*Spec, error } } - rawTypes, rawStrings, err := parseBTF(btf, bo, baseStrings) - if err != nil { - return nil, err - } - - types, err := inflateRawTypes(rawTypes, rawStrings, base) + types, rawStrings, err := parseBTF(btf, bo, baseStrings, base) if err != nil { return nil, err } @@ -373,7 +368,7 @@ func guessRawBTFByteOrder(r io.ReaderAt) binary.ByteOrder { // parseBTF reads a .BTF section into memory and parses it into a list of // raw types and a string table. -func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([]rawType, *stringTable, error) { +func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable, base *Spec) ([]Type, *stringTable, error) { buf := internal.NewBufferedSectionReader(btf, 0, math.MaxInt64) header, err := parseBTFHeader(buf, bo) if err != nil { @@ -387,12 +382,12 @@ func parseBTF(btf io.ReaderAt, bo binary.ByteOrder, baseStrings *stringTable) ([ } buf.Reset(io.NewSectionReader(btf, header.typeStart(), int64(header.TypeLen))) - rawTypes, err := readTypes(buf, bo, header.TypeLen) + types, err := readAndInflateTypes(buf, bo, header.TypeLen, rawStrings, base) if err != nil { - return nil, nil, fmt.Errorf("can't read types: %w", err) + return nil, nil, err } - return rawTypes, rawStrings, nil + return types, rawStrings, nil } type symbol struct { diff --git a/btf/btf_types.go b/btf/btf_types.go index c9984b2d4..f3830061d 100644 --- a/btf/btf_types.go +++ b/btf/btf_types.go @@ -365,67 +365,3 @@ type btfParam struct { type btfDeclTag struct { ComponentIdx uint32 } - -func readTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32) ([]rawType, error) { - var header btfType - // because of the interleaving between types and struct members it is difficult to - // precompute the numbers of raw types this will parse - // this "guess" is a good first estimation - sizeOfbtfType := uintptr(btfTypeLen) - tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2 - types := make([]rawType, 0, tyMaxCount) - - for id := TypeID(1); ; id++ { - if err := binary.Read(r, bo, &header); err == io.EOF { - return types, nil - } else if err != nil { - return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) - } - - var data interface{} - switch header.Kind() { - case kindInt: - data = new(btfInt) - case kindPointer: - case kindArray: - data = new(btfArray) - case kindStruct: - fallthrough - case kindUnion: - data = make([]btfMember, header.Vlen()) - case kindEnum: - data = make([]btfEnum, header.Vlen()) - case kindForward: - case kindTypedef: - case kindVolatile: - case kindConst: - case kindRestrict: - case kindFunc: - case kindFuncProto: - data = make([]btfParam, header.Vlen()) - case kindVar: - data = new(btfVariable) - case kindDatasec: - data = make([]btfVarSecinfo, header.Vlen()) - case kindFloat: - case kindDeclTag: - data = new(btfDeclTag) - case kindTypeTag: - case kindEnum64: - data = make([]btfEnum64, header.Vlen()) - default: - return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind()) - } - - if data == nil { - types = append(types, rawType{header, nil}) - continue - } - - if err := binary.Read(r, bo, data); err != nil { - return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err) - } - - types = append(types, rawType{header, data}) - } -} diff --git a/btf/types.go b/btf/types.go index 421cc387d..5be7cc0e2 100644 --- a/btf/types.go +++ b/btf/types.go @@ -1,6 +1,7 @@ package btf import ( + "encoding/binary" "errors" "fmt" "io" @@ -726,17 +727,22 @@ func (c *copier) copy(typ *Type, transform Transformer) { type typeDeque = internal.Deque[*Type] -// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns -// it into a graph of Types connected via pointers. +// readAndInflateTypes reads the raw btf type info and turns it into a graph +// of Types connected via pointers. // -// If base is provided, then the raw types are considered to be of a split BTF +// If base is provided, then the types are considered to be of a split BTF // (e.g., a kernel module). // // Returns a slice of types indexed by TypeID. Since BTF ignores compilation // units, multiple types may share the same name. A Type may form a cyclic graph // by pointing at itself. -func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([]Type, error) { - types := make([]Type, 0, len(rawTypes)+1) // +1 for Void added to base types +func readAndInflateTypes(r io.Reader, bo binary.ByteOrder, typeLen uint32, rawStrings *stringTable, base *Spec) ([]Type, error) { + // because of the interleaving between types and struct members it is difficult to + // precompute the numbers of raw types this will parse + // this "guess" is a good first estimation + sizeOfbtfType := uintptr(btfTypeLen) + tyMaxCount := uintptr(typeLen) / sizeOfbtfType / 2 + types := make([]Type, 0, tyMaxCount) // Void is defined to always be type ID 0, and is thus omitted from BTF. types = append(types, (*Void)(nil)) @@ -841,62 +847,112 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([ return members, nil } + var ( + header btfType + bInt btfInt + bArr btfArray + bMembers []btfMember + bEnums []btfEnum + bParams []btfParam + bVariable btfVariable + bSecInfos []btfVarSecinfo + bDeclTag btfDeclTag + bEnums64 []btfEnum64 + ) + var declTags []*declTag - for _, raw := range rawTypes { + for { var ( id = firstTypeID + TypeID(len(types)) typ Type ) + if err := binary.Read(r, bo, &header); err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) + } + if id < firstTypeID { return nil, fmt.Errorf("no more type IDs") } - name, err := rawStrings.Lookup(raw.NameOff) + name, err := rawStrings.Lookup(header.NameOff) if err != nil { return nil, fmt.Errorf("get name for type id %d: %w", id, err) } - switch raw.Kind() { + switch header.Kind() { case kindInt: - size := raw.Size() - bi := raw.data.(*btfInt) - if bi.Offset() > 0 || bi.Bits().Bytes() != size { - legacyBitfields[id] = [2]Bits{bi.Offset(), bi.Bits()} + size := header.Size() + if err := binary.Read(r, bo, &bInt); err != nil { + return nil, fmt.Errorf("can't read btfInt, id: %d: %w", id, err) } - typ = &Int{name, raw.Size(), bi.Encoding()} + if bInt.Offset() > 0 || bInt.Bits().Bytes() != size { + legacyBitfields[id] = [2]Bits{bInt.Offset(), bInt.Bits()} + } + typ = &Int{name, header.Size(), bInt.Encoding()} case kindPointer: ptr := &Pointer{nil} - fixup(raw.Type(), &ptr.Target) + fixup(header.Type(), &ptr.Target) typ = ptr case kindArray: - btfArr := raw.data.(*btfArray) - arr := &Array{nil, nil, btfArr.Nelems} - fixup(btfArr.IndexType, &arr.Index) - fixup(btfArr.Type, &arr.Type) + if err := binary.Read(r, bo, &bArr); err != nil { + return nil, fmt.Errorf("can't read btfArray, id: %d: %w", id, err) + } + + arr := &Array{nil, nil, bArr.Nelems} + fixup(bArr.IndexType, &arr.Index) + fixup(bArr.Type, &arr.Type) typ = arr case kindStruct: - members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield()) + vlen := header.Vlen() + if len(bMembers) < vlen { + bMembers = append(bMembers, make([]btfMember, vlen-len(bMembers))...) + } + + if err := binary.Read(r, bo, bMembers[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err) + } + + members, err := convertMembers(bMembers[:vlen], header.Bitfield()) if err != nil { return nil, fmt.Errorf("struct %s (id %d): %w", name, id, err) } - typ = &Struct{name, raw.Size(), members} + typ = &Struct{name, header.Size(), members} case kindUnion: - members, err := convertMembers(raw.data.([]btfMember), raw.Bitfield()) + vlen := header.Vlen() + if len(bMembers) < vlen { + bMembers = append(bMembers, make([]btfMember, vlen-len(bMembers))...) + } + + if err := binary.Read(r, bo, bMembers[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfMembers, id: %d: %w", id, err) + } + + members, err := convertMembers(bMembers[:vlen], header.Bitfield()) if err != nil { return nil, fmt.Errorf("union %s (id %d): %w", name, id, err) } - typ = &Union{name, raw.Size(), members} + typ = &Union{name, header.Size(), members} case kindEnum: - rawvals := raw.data.([]btfEnum) - vals := make([]EnumValue, 0, len(rawvals)) - signed := raw.Signed() - for i, btfVal := range rawvals { + vlen := header.Vlen() + if len(bEnums) < vlen { + bEnums = append(bEnums, make([]btfEnum, vlen-len(bEnums))...) + } + + if err := binary.Read(r, bo, bEnums[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfEnum, id: %d: %w", id, err) + } + + vals := make([]EnumValue, 0, vlen) + signed := header.Signed() + for i, btfVal := range bEnums[:vlen] { name, err := rawStrings.Lookup(btfVal.NameOff) if err != nil { return nil, fmt.Errorf("get name for enum value %d: %s", i, err) @@ -908,40 +964,48 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([ } vals = append(vals, EnumValue{name, value}) } - typ = &Enum{name, raw.Size(), signed, vals} + typ = &Enum{name, header.Size(), signed, vals} case kindForward: - typ = &Fwd{name, raw.FwdKind()} + typ = &Fwd{name, header.FwdKind()} case kindTypedef: typedef := &Typedef{name, nil} - fixup(raw.Type(), &typedef.Type) + fixup(header.Type(), &typedef.Type) typ = typedef case kindVolatile: volatile := &Volatile{nil} - fixup(raw.Type(), &volatile.Type) + fixup(header.Type(), &volatile.Type) typ = volatile case kindConst: cnst := &Const{nil} - fixup(raw.Type(), &cnst.Type) + fixup(header.Type(), &cnst.Type) typ = cnst case kindRestrict: restrict := &Restrict{nil} - fixup(raw.Type(), &restrict.Type) + fixup(header.Type(), &restrict.Type) typ = restrict case kindFunc: - fn := &Func{name, nil, raw.Linkage()} - fixup(raw.Type(), &fn.Type) + fn := &Func{name, nil, header.Linkage()} + fixup(header.Type(), &fn.Type) typ = fn case kindFuncProto: - rawparams := raw.data.([]btfParam) - params := make([]FuncParam, 0, len(rawparams)) - for i, param := range rawparams { + vlen := header.Vlen() + if len(bParams) < vlen { + bParams = append(bParams, make([]btfParam, vlen-len(bParams))...) + } + + if err := binary.Read(r, bo, bParams[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfParam, id: %d: %w", id, err) + } + + params := make([]FuncParam, 0, vlen) + for i, param := range bParams[:vlen] { name, err := rawStrings.Lookup(param.NameOff) if err != nil { return nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err) @@ -951,57 +1015,80 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([ }) } for i := range params { - fixup(rawparams[i].Type, ¶ms[i].Type) + fixup(bParams[i].Type, ¶ms[i].Type) } fp := &FuncProto{nil, params} - fixup(raw.Type(), &fp.Return) + fixup(header.Type(), &fp.Return) typ = fp case kindVar: - variable := raw.data.(*btfVariable) - v := &Var{name, nil, VarLinkage(variable.Linkage)} - fixup(raw.Type(), &v.Type) + if err := binary.Read(r, bo, &bVariable); err != nil { + return nil, fmt.Errorf("can't read btfVariable, id: %d: %w", id, err) + } + + v := &Var{name, nil, VarLinkage(bVariable.Linkage)} + fixup(header.Type(), &v.Type) typ = v case kindDatasec: - btfVars := raw.data.([]btfVarSecinfo) - vars := make([]VarSecinfo, 0, len(btfVars)) - for _, btfVar := range btfVars { + vlen := header.Vlen() + if len(bSecInfos) < vlen { + bSecInfos = append(bSecInfos, make([]btfVarSecinfo, vlen-len(bSecInfos))...) + } + + if err := binary.Read(r, bo, bSecInfos[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfVarSecinfo, id: %d: %w", id, err) + } + + vars := make([]VarSecinfo, 0, vlen) + for _, btfVar := range bSecInfos[:vlen] { vars = append(vars, VarSecinfo{ Offset: btfVar.Offset, Size: btfVar.Size, }) } for i := range vars { - fixup(btfVars[i].Type, &vars[i].Type) + fixup(bSecInfos[i].Type, &vars[i].Type) } - typ = &Datasec{name, raw.Size(), vars} + typ = &Datasec{name, header.Size(), vars} case kindFloat: - typ = &Float{name, raw.Size()} + typ = &Float{name, header.Size()} case kindDeclTag: - btfIndex := raw.data.(*btfDeclTag).ComponentIdx + if err := binary.Read(r, bo, &bDeclTag); err != nil { + return nil, fmt.Errorf("can't read btfDeclTag, id: %d: %w", id, err) + } + + btfIndex := bDeclTag.ComponentIdx if uint64(btfIndex) > math.MaxInt { return nil, fmt.Errorf("type id %d: index exceeds int", id) } dt := &declTag{nil, name, int(int32(btfIndex))} - fixup(raw.Type(), &dt.Type) + fixup(header.Type(), &dt.Type) typ = dt declTags = append(declTags, dt) case kindTypeTag: tt := &typeTag{nil, name} - fixup(raw.Type(), &tt.Type) + fixup(header.Type(), &tt.Type) typ = tt case kindEnum64: - rawvals := raw.data.([]btfEnum64) - vals := make([]EnumValue, 0, len(rawvals)) - for i, btfVal := range rawvals { + vlen := header.Vlen() + if len(bEnums64) < vlen { + bEnums64 = append(bEnums64, make([]btfEnum64, vlen-len(bEnums64))...) + } + + if err := binary.Read(r, bo, bEnums64[:vlen]); err != nil { + return nil, fmt.Errorf("can't read btfEnum64, id: %d: %w", id, err) + } + + vals := make([]EnumValue, 0, vlen) + for i, btfVal := range bEnums64[:vlen] { name, err := rawStrings.Lookup(btfVal.NameOff) if err != nil { return nil, fmt.Errorf("get name for enum64 value %d: %s", i, err) @@ -1009,10 +1096,10 @@ func inflateRawTypes(rawTypes []rawType, rawStrings *stringTable, base *Spec) ([ value := (uint64(btfVal.ValHi32) << 32) | uint64(btfVal.ValLo32) vals = append(vals, EnumValue{name, value}) } - typ = &Enum{name, raw.Size(), raw.Signed(), vals} + typ = &Enum{name, header.Size(), header.Signed(), vals} default: - return nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind()) + return nil, fmt.Errorf("type id %d: unknown kind: %v", id, header.Kind()) } types = append(types, typ) diff --git a/btf/types_test.go b/btf/types_test.go index a8bd90456..ce84a2814 100644 --- a/btf/types_test.go +++ b/btf/types_test.go @@ -1,7 +1,10 @@ package btf import ( + "bytes" + "encoding/binary" "fmt" + "io" "reflect" "testing" @@ -396,26 +399,46 @@ func TestInflateLegacyBitfield(t *testing.T) { data.SetBits(size) rawInt.data = &data + var ( + before bytes.Buffer + after bytes.Buffer + ) + var beforeInt rawType beforeInt.SetKind(kindStruct) beforeInt.SetVlen(1) beforeInt.data = []btfMember{{Type: 2}} + if err := beforeInt.Marshal(&before, binary.LittleEndian); err != nil { + t.Fatal(err) + } + if err := rawInt.Marshal(&before, binary.LittleEndian); err != nil { + t.Fatal(err) + } + afterInt := beforeInt afterInt.data = []btfMember{{Type: 1}} + if err := rawInt.Marshal(&after, binary.LittleEndian); err != nil { + t.Fatal(err) + } + if err := afterInt.Marshal(&after, binary.LittleEndian); err != nil { + t.Fatal(err) + } + emptyStrings := newStringTable("") for _, test := range []struct { - name string - raw []rawType + name string + reader io.Reader }{ - {"struct before int", []rawType{beforeInt, rawInt}}, - {"struct after int", []rawType{rawInt, afterInt}}, + {"struct before int", &before}, + {"struct after int", &after}, } { t.Run(test.name, func(t *testing.T) { - types, err := inflateRawTypes(test.raw, emptyStrings, nil) + types, err := readAndInflateTypes(test.reader, binary.LittleEndian, 2, emptyStrings, nil) if err != nil { + fmt.Println(before.Bytes()) t.Fatal(err) }