From 12008ddaec985a3e6bf8c744e5fceb689d3fa81b Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Thu, 25 Apr 2024 16:06:20 -0600 Subject: [PATCH] perf(profiling): use allocator-api2 based StringTable Creates a `StringTable` where the individual strings are allocated in an arena allocator. This uses the Chain, Linear, and Virtual Allocators provided by datadog-alloc to create a chain of 4 MiB chunks, each of which is virtually allocated. --- Cargo.lock | 3 + profiling/Cargo.toml | 9 + profiling/benches/interning_strings.rs | 28 + profiling/benches/main.rs | 8 + profiling/src/collections/mod.rs | 1 + profiling/src/collections/string_table/mod.rs | 260 ++++ .../string_table/wordpress_test_data.rs | 1064 +++++++++++++++++ profiling/src/internal/profile.rs | 25 +- profiling/src/iter.rs | 23 + profiling/src/lib.rs | 1 + tools/docker/Dockerfile.build | 3 +- 11 files changed, 1407 insertions(+), 18 deletions(-) create mode 100644 profiling/benches/interning_strings.rs create mode 100644 profiling/benches/main.rs create mode 100644 profiling/src/collections/string_table/mod.rs create mode 100644 profiling/src/collections/string_table/wordpress_test_data.rs create mode 100644 profiling/src/iter.rs diff --git a/Cargo.lock b/Cargo.lock index b759402cb..607f04809 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -822,11 +822,14 @@ dependencies = [ "byteorder", "bytes", "chrono", + "criterion", + "datadog-alloc", "ddcommon", "derivative", "futures", "futures-core", "futures-util", + "hashbrown 0.14.3", "http", "http-body", "hyper", diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index 1dea03960..570c9a775 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -11,16 +11,22 @@ license.workspace = true [lib] crate-type = ["lib"] +[[bench]] +name = "main" +harness = false + [dependencies] anyhow = "1.0" bitmaps = "3.2.0" bytes = "1.1" chrono = {version = "0.4", default-features = false, features = ["std", "clock"]} +datadog-alloc = {path = "../alloc"} ddcommon = {path = "../ddcommon"} derivative = "2.2.0" futures = { version = "0.3", default-features = false } futures-core = {version = "0.3.0", default-features = false} futures-util = {version = "0.3.0", default-features = false} +hashbrown = { version = "0.14", default-features = false, features = ["allocator-api2"] } http = "0.2" http-body = "0.4" hyper = {version = "0.14", features = ["client"], default-features = false} @@ -38,3 +44,6 @@ serde_json = {version = "1.0"} tokio = {version = "1.23", features = ["rt", "macros"]} tokio-util = "0.7.1" byteorder = { version = "1.5", features = ["std"] } + +[dev-dependencies] +criterion = "0.5.1" diff --git a/profiling/benches/interning_strings.rs b/profiling/benches/interning_strings.rs new file mode 100644 index 000000000..159a062b1 --- /dev/null +++ b/profiling/benches/interning_strings.rs @@ -0,0 +1,28 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::*; +use datadog_profiling::collections::string_table::{ + wordpress_test_data::WORDPRESS_STRINGS, StringTable, +}; + +pub fn small_wordpress_profile(c: &mut Criterion) { + c.bench_function("benching string interning on wordpress profile", |b| { + b.iter(|| { + let mut table = StringTable::new(); + let n_strings = WORDPRESS_STRINGS.len(); + for string in WORDPRESS_STRINGS { + black_box(table.intern(string)); + } + assert_eq!(n_strings, table.len()); + + // re-insert, should nothing should be inserted. + for string in WORDPRESS_STRINGS { + black_box(table.intern(string)); + } + assert_eq!(n_strings, table.len()) + }) + }); +} + +criterion_group!(benches, small_wordpress_profile); diff --git a/profiling/benches/main.rs b/profiling/benches/main.rs new file mode 100644 index 000000000..e2d220f76 --- /dev/null +++ b/profiling/benches/main.rs @@ -0,0 +1,8 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::criterion_main; + +mod interning_strings; + +criterion_main!(interning_strings::benches); diff --git a/profiling/src/collections/mod.rs b/profiling/src/collections/mod.rs index 435ca2bd6..66281e671 100644 --- a/profiling/src/collections/mod.rs +++ b/profiling/src/collections/mod.rs @@ -2,3 +2,4 @@ // SPDX-License-Identifier: Apache-2.0 pub mod identifiable; +pub mod string_table; diff --git a/profiling/src/collections/string_table/mod.rs b/profiling/src/collections/string_table/mod.rs new file mode 100644 index 000000000..00c25cefc --- /dev/null +++ b/profiling/src/collections/string_table/mod.rs @@ -0,0 +1,260 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::collections::identifiable::{Id, StringId}; +use crate::iter::{IntoLendingIterator, LendingIterator}; +use datadog_alloc::{Allocator, ChainAllocator, VirtualAllocator}; +use std::alloc::Layout; + +type Hasher = core::hash::BuildHasherDefault; +type HashSet = indexmap::IndexSet; + +/// Holds unique strings and provides [StringId]s that correspond to the order +/// that the strings were inserted. +pub struct StringTable { + /// The bytes of each string stored in `strings` are allocated here. + bytes: ChainAllocator, + + /// The ordered hash set of unique strings. The order becomes the StringId. + /// The static lifetime is a lie, it is tied to the `bytes`, which is only + /// moved if the string table is moved e.g. + /// [StringTable::into_lending_iterator]. + /// References to the underlying strings should generally not be handed, + /// but if they are, they should be bound to the string table's lifetime + /// or the lending iterator's lifetime. + strings: HashSet<&'static str>, +} + +impl Default for StringTable { + fn default() -> Self { + Self::new() + } +} + +impl StringTable { + /// Creates a new string table, which initially holds the empty string and + /// no others. + pub fn new() -> Self { + // Christophe and Grégory think this is a fine size for 32-bit .NET. + const SIZE_HINT: usize = 4 * 1024 * 1024; + let bytes = ChainAllocator::new_in(SIZE_HINT, VirtualAllocator {}); + + let mut strings = HashSet::with_hasher(Hasher::default()); + // It various by implementation, but frequently I've noticed that the + // capacity after the first insertion is quite small, as in 3. This is + // a bit too small and there are frequent reallocations. For one sample + // with endpoint + code hotspots, we'd have at least these strings: + // - "" + // - At least one sample type + // - At least one sample unit--already at 3 without any samples. + // - "local root span id" + // - "span id" + // - "trace endpoint" + // - A file and/or function name per frame. + // So with a capacity like 3, we end up reallocating a bunch on or + // before the very first sample. The number here is not fine-tuned, + // just skipping some obviously bad, tiny sizes. + strings.reserve(32); + + // Always hold the empty string as item 0. Do not insert it via intern + // because that will try to allocate zero-bytes from the storage, + // which is sketchy. + strings.insert(""); + + Self { bytes, strings } + } + + /// Returns the number of strings currently held in the string table. + #[inline] + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Adds the string to the string table if it isn't present already, and + /// returns a [StringId] that corresponds to the order that this string + /// was originally inserted. + /// + /// # Panics + /// This panics if the allocator fails to allocate. This could happen for + /// a few reasons: + /// - It failed to acquire a chunk. + /// - The string is too large (larger than a chunk size, for example) + pub fn intern(&mut self, str: &str) -> StringId { + let set = &mut self.strings; + match set.get_index_of(str) { + Some(offset) => StringId::from_offset(offset), + None => { + // No match. Get the current size of the table, which + // corresponds to the StringId it will have when inserted. + let string_id = StringId::from_offset(set.len()); + + // Allocate the string with alignment of 1 so that bytes are + // not wasted between strings. + let uninit_ptr = { + // SAFETY: this layout matches an existing string, it must + // be a valid layout since it already exists. + let layout = + unsafe { Layout::from_size_align(str.len(), 1).unwrap_unchecked() }; + self.bytes.allocate(layout).unwrap() + }; + + // Copy the bytes of the string into the allocated memory. + // SAFETY: this is guaranteed to not be overlapping because an + // allocator must not return aliasing bytes in its allocations. + unsafe { + let src = str.as_ptr(); + let dst = uninit_ptr.as_ptr() as *mut u8; + let count = str.len(); + core::ptr::copy_nonoverlapping(src, dst, count) + }; + + // Convert the bytes into a str, and insert into the set. + + // SAFETY: The bytes were properly initialized, and they cannot + // be misaligned because they have an alignment of 1, so it is + // safe to create a slice of the given data and length. The + // static lifetime is _wrong_, and we need to be careful. Any + // references returned to a caller must be tied to the lifetime + // of the string table, or to the iterator if iterating. + let slice: &'static [u8] = unsafe { + core::slice::from_raw_parts(uninit_ptr.as_ptr() as *const u8, str.len()) + }; + + // SAFETY: Since the bytes were copied from a valid str without + // slicing, the bytes must also be utf-8. + let new_str = unsafe { core::str::from_utf8_unchecked(slice) }; + + self.strings.insert(new_str); + string_id + } + } + } +} + +/// A [LendingIterator] for a [StringTable]. Make one by calling +/// [StringTable::into_lending_iter]. +pub struct StringTableIter { + /// This is actually used, the compiler doesn't know that the static + /// references in `iter` actually point in here. + #[allow(unused)] + bytes: ChainAllocator, + + /// The strings of the string table, in order of insertion. + /// The static lifetimes are a lie, they are tied to the `bytes`. When + /// handing out references, bind the lifetime to the iterator's lifetime, + /// which is a [LendingIterator] is needed. + iter: as IntoIterator>::IntoIter, +} + +impl StringTableIter { + fn new(string_table: StringTable) -> StringTableIter { + StringTableIter { + bytes: string_table.bytes, + iter: string_table.strings.into_iter(), + } + } +} + +impl LendingIterator for StringTableIter { + type Item<'a> = &'a str where Self: 'a; + + fn next(&mut self) -> Option> { + self.iter.next() + } + + fn count(self) -> usize { + self.iter.count() + } +} + +impl IntoLendingIterator for StringTable { + type Iter = StringTableIter; + + fn into_lending_iter(self) -> Self::Iter { + StringTableIter::new(self) + } +} + +#[allow(unused)] +pub mod wordpress_test_data; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basics() { + let mut table = StringTable::new(); + // The empty string should already be present. + assert_eq!(1, table.len()); + assert_eq!(StringId::ZERO, table.intern("")); + + // Intern a string literal to ensure ?Sized works. + let string = table.intern("datadog"); + assert_eq!(StringId::from_offset(1), string); + assert_eq!(2, table.len()); + } + + #[track_caller] + fn test_from_src(src: &[&str]) { + // Insert all the strings. + let mut table = StringTable::new(); + let n_strings = src.len(); + for string in src { + table.intern(string); + } + assert_eq!(n_strings, table.len()); + + // Re-inserting doesn't change the size. + for string in src { + table.intern(string); + } + assert_eq!(n_strings, table.len()); + + // Check that they are ordered correctly when iterating. + let mut actual_iter = table.into_lending_iter(); + let mut expected_iter = src.iter(); + while let (Some(expected), Some(actual)) = (expected_iter.next(), actual_iter.next()) { + assert_eq!(*expected, actual); + } + + // The iterators should be exhausted at this point. + assert_eq!(None, expected_iter.next()); + assert_eq!(0, actual_iter.count()); + } + + #[test] + fn test_small_set_of_strings() { + let cases: &[_] = &[ + "", + "local root span id", + "span id", + "trace endpoint", + "samples", + "count", + "wall-time", + "nanoseconds", + "cpu-time", + ", stack_traces: FxIndexSet, start_time: SystemTime, - strings: FxIndexSet>, + strings: StringTable, timestamp_key: StringId, upscaling_rules: UpscalingRules, } @@ -287,7 +289,8 @@ impl Profile { encoder.encode(ProfileFunctionsEntry::from(item))?; } - for item in self.strings.into_iter() { + let mut lender = self.strings.into_lending_iter(); + while let Some(item) = lender.next() { encoder.encode_string_table_entry(item)?; } @@ -426,21 +429,9 @@ impl Profile { /// Interns the `str` as a string, returning the id in the string table. /// The empty string is guaranteed to have an id of [StringId::ZERO]. + #[inline] fn intern(&mut self, item: &str) -> StringId { - // For performance, delay converting the [&str] to a [String] until - // after it has been determined to not exist in the set. This avoids - // temporary allocations. - let index = match self.strings.get_index_of(item) { - Some(index) => index, - None => { - let (index, _inserted) = self.strings.insert_full(item.into()); - // This wouldn't make any sense; the item couldn't be found so - // we try to insert it, but suddenly it exists now? - debug_assert!(_inserted); - index - } - }; - StringId::from_offset(index) + self.strings.intern(item) } fn translate_and_enrich_sample_labels( @@ -821,7 +812,7 @@ mod api_test { assert_eq!(profile.sample_types, prev.sample_types); // The string table should have at least the empty string. - assert!(!profile.strings.is_empty()); + assert!(profile.strings.len() > 0); // todo: how to test strings? // assert_eq!("", profile.get_string(StringId::ZERO)); diff --git a/profiling/src/iter.rs b/profiling/src/iter.rs new file mode 100644 index 000000000..b1afbead3 --- /dev/null +++ b/profiling/src/iter.rs @@ -0,0 +1,23 @@ +// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +/// The [LendingIterator] is a version of an [Iterator] that can yield items +/// with references into the lender. It is a well-known name and there are +/// multiple crates which offer it, with differences. The needs here are +/// small, and so rather than bring in a pre-1.0 crate, just make our own. +pub trait LendingIterator { + type Item<'a> + where + Self: 'a; + + fn next(&mut self) -> Option>; + + #[allow(unused)] + fn count(self) -> usize; +} + +/// Turn a collection of some sort into a [LendingIterator]. +pub trait IntoLendingIterator { + type Iter: LendingIterator; + fn into_lending_iter(self) -> Self::Iter; +} diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index 1da8d7e5e..9dc4b980a 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -5,5 +5,6 @@ pub mod api; pub mod collections; pub mod exporter; pub mod internal; +mod iter; pub mod pprof; pub mod serializer; diff --git a/tools/docker/Dockerfile.build b/tools/docker/Dockerfile.build index 5f1227660..58124a47c 100644 --- a/tools/docker/Dockerfile.build +++ b/tools/docker/Dockerfile.build @@ -106,7 +106,7 @@ COPY "data-pipeline/Cargo.toml" "data-pipeline/" COPY "data-pipeline-ffi/Cargo.toml" "data-pipeline-ffi/" COPY "bin_tests/Cargo.toml" "bin_tests/" RUN find -name "Cargo.toml" | sed -e s#Cargo.toml#src/lib.rs#g | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs -RUN echo trace-obfuscation/benches/trace_obfuscation.rs tools/src/bin/dedup_headers.rs tools/sidecar_mockgen/src/bin/sidecar_mockgen.rs ddtelemetry/examples/tm-worker-test.rs ipc/tarpc/tarpc/examples/compression.rs ipc/tarpc/tarpc/examples/custom_transport.rs ipc/tarpc/tarpc/examples/pubsub.rs ipc/tarpc/tarpc/examples/readme.rs ipc/tarpc/tarpc/examples/tracing.rs ipc/tarpc/tarpc/tests/compile_fail.rs ipc/tarpc/tarpc/tests/dataservice.rs ipc/tarpc/tarpc/tests/service_functional.rs bin_tests/src/bin/crashtracker_bin_test.rs bin_tests/src/bin/test_the_tests.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs +RUN echo profiling/benches/main.rs profiling/benches/interning_strings.rs trace-obfuscation/benches/trace_obfuscation.rs tools/src/bin/dedup_headers.rs tools/sidecar_mockgen/src/bin/sidecar_mockgen.rs ddtelemetry/examples/tm-worker-test.rs ipc/tarpc/tarpc/examples/compression.rs ipc/tarpc/tarpc/examples/custom_transport.rs ipc/tarpc/tarpc/examples/pubsub.rs ipc/tarpc/tarpc/examples/readme.rs ipc/tarpc/tarpc/examples/tracing.rs ipc/tarpc/tarpc/tests/compile_fail.rs ipc/tarpc/tarpc/tests/dataservice.rs ipc/tarpc/tarpc/tests/service_functional.rs bin_tests/src/bin/crashtracker_bin_test.rs bin_tests/src/bin/test_the_tests.rs | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs # cache dependencies RUN cargo fetch --locked @@ -130,4 +130,5 @@ COPY ./ ./ RUN ./build-profiling-ffi.sh /build/output FROM scratch as ffi_build_output + COPY --from=ffi_build /build/output/ ./