Skip to content

Commit

Permalink
Allow SerializerStates and Writers to own their `SerializerConfig…
Browse files Browse the repository at this point in the history
…`s (#20)

Resolves #14

This makes them easier to manage (no need for self-referencing) when they are seldom instantiated but need to be moved around/stored...
  • Loading branch information
Ten0 authored Jul 7, 2024
1 parent ec86138 commit 5ecd1ab
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 86 deletions.
1 change: 0 additions & 1 deletion serde_avro_fast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
apache-avro = { version = "0.14", features = ["bzip", "snappy", "xz", "zstandard"] }
criterion = "0.5"
lazy_static = "1"
ouroboros = "0.18"
paste = "1"
pretty_assertions = "1"
serde-tuple-vec-map = "1"
Expand Down
32 changes: 29 additions & 3 deletions serde_avro_fast/src/object_container_file_encoding/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use compression::CompressionCodecState;

use crate::{
object_container_file_encoding::{Metadata, METADATA_SCHEMA},
ser::{SerError, SerializerConfig, SerializerState},
ser::{SerError, SerializerConfig, SerializerConfigRef, SerializerState},
Schema,
};

Expand Down Expand Up @@ -101,7 +101,7 @@ where
///
/// See [`Writer`] for an example.
pub struct WriterBuilder<'c, 's> {
serializer_config: &'c mut SerializerConfig<'s>,
serializer_config: SerializerConfigRef<'c, 's>,
compression: Compression,
approx_block_size: u32,
/// Will otherwise be randomly generated
Expand All @@ -115,6 +115,19 @@ impl<'c, 's> WriterBuilder<'c, 's> {
/// be reused across serializations for performance, and other
/// serialization configuration.
pub fn new(serializer_config: &'c mut SerializerConfig<'s>) -> Self {
Self::with_opt_owned_config(SerializerConfigRef::Borrowed(serializer_config))
}

/// Construct a writer from an owned [`SerializerConfig`].
///
/// This is a less performant version of [`WriterBuilder::new`] because it
/// takes ownership of the [`SerializerConfig`], so the corresponding
/// buffers will not be re-used.
pub fn with_owned_config(serializer_config: SerializerConfig<'s>) -> Self {
Self::with_opt_owned_config(SerializerConfigRef::Owned(Box::new(serializer_config)))
}

fn with_opt_owned_config(serializer_config: SerializerConfigRef<'c, 's>) -> Self {
Self {
serializer_config,
compression: Compression::Null,
Expand All @@ -123,6 +136,14 @@ impl<'c, 's> WriterBuilder<'c, 's> {
}
}

/// Get a reference to the `SerializerConfig` object that was passed when
/// constructing this `WriterBuilder`
///
/// This allows to update its parameters.
pub fn serializer_config(&mut self) -> &mut SerializerConfig<'s> {
&mut *self.serializer_config
}

/// Specify the compression codec that each block will be compressed with
pub fn compression(mut self, compression: Compression) -> Self {
self.compression = compression;
Expand Down Expand Up @@ -197,6 +218,8 @@ impl<'c, 's> WriterBuilder<'c, 's> {

{
// Serialize metadata
// No buffers will be used here and default parameters of `SerializerConfig`
// will be enough, so we can create a dedicated `SerializerConfig` for this.
let mut header_serializer_config = SerializerConfig::new_with_optional_schema(None);
let mut header_serializer_state =
SerializerState::from_writer(buf, &mut header_serializer_config);
Expand All @@ -223,7 +246,10 @@ impl<'c, 's> WriterBuilder<'c, 's> {

Ok(Writer {
inner: WriterInner {
serializer_state: SerializerState::from_writer(buf, self.serializer_config),
serializer_state: SerializerState::with_opt_owned_config(
buf,
self.serializer_config,
),
sync_marker,
compression_codec_state: CompressionCodecState::new(self.compression),
n_elements_in_block: 0,
Expand Down
53 changes: 52 additions & 1 deletion serde_avro_fast/src/ser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ pub struct SerializerState<'c, 's, W> {
/// Storing these here for reuse so that we can bypass the allocation,
/// and statistically obtain buffers that are already the proper length
/// (since we have used them for previous records)
config: &'c mut SerializerConfig<'s>,
config: SerializerConfigRef<'c, 's>,
}

/// Schema + serialization buffers
Expand Down Expand Up @@ -175,6 +175,34 @@ impl<'c, 's, W: std::io::Write> SerializerState<'c, 's, W> {
/// [`SerializerState::serializer`] to obtain a `DatumSerializer` that
/// does.
pub fn from_writer(writer: W, serializer_config: &'c mut SerializerConfig<'s>) -> Self {
Self {
writer,
config: SerializerConfigRef::Borrowed(serializer_config),
}
}

/// Build a `SerializerState` from a writer and a `SerializerConfig`.
///
/// This behaves the same as [`SerializerState::from_writer`], but takes
/// ownership of the `SerializerConfig`.
///
/// Note that the `SerializerConfig` contains the buffers that
/// should be re-used for performance, so this function should only be used
/// if the [`SerializerState`] is rarely instantiated.
///
/// For all other matters, please see [`SerializerState::from_writer`]'s
/// documentation for more details.
pub fn with_owned_config(writer: W, serializer_config: SerializerConfig<'s>) -> Self {
Self {
writer,
config: SerializerConfigRef::Owned(Box::new(serializer_config)),
}
}

pub(crate) fn with_opt_owned_config(
writer: W,
serializer_config: SerializerConfigRef<'c, 's>,
) -> Self {
Self {
writer,
config: serializer_config,
Expand Down Expand Up @@ -235,3 +263,26 @@ struct Buffers {
field_reordering_buffers: Vec<Vec<u8>>,
field_reordering_super_buffers: Vec<Vec<Option<Vec<u8>>>>,
}

pub(crate) enum SerializerConfigRef<'c, 's> {
Borrowed(&'c mut SerializerConfig<'s>),
Owned(Box<SerializerConfig<'s>>),
}
impl<'c, 's> std::ops::Deref for SerializerConfigRef<'c, 's> {
type Target = SerializerConfig<'s>;

fn deref(&self) -> &Self::Target {
match self {
Self::Borrowed(config) => &**config,
Self::Owned(config) => &**config,
}
}
}
impl std::ops::DerefMut for SerializerConfigRef<'_, '_> {
fn deref_mut(&mut self) -> &mut Self::Target {
match &mut *self {
Self::Borrowed(config) => &mut **config,
Self::Owned(config) => &mut **config,
}
}
}
2 changes: 1 addition & 1 deletion serde_avro_fast/src/ser/serializer/struct_or_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ where
v
})
.unwrap_or_else(Vec::new),
config: serializer_state.config,
config: SerializerConfigRef::Borrowed(&mut *serializer_state.config),
};
value.serialize(DatumSerializer {
state: &mut buf_serializer_state,
Expand Down
63 changes: 63 additions & 0 deletions serde_avro_fast/tests/owned_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use {
serde::Serialize,
serde_avro_derive::BuildSchema,
serde_avro_fast::{object_container_file_encoding::Writer, ser::SerializerConfig, Schema},
};

#[derive(Serialize, BuildSchema)]
struct Bar {
a: i32,
b: String,
}

fn build_writer() -> Writer<'static, 'static, Vec<u8>> {
lazy_static::lazy_static! {
static ref SCHEMA: Schema = Bar::schema().unwrap();
}
serde_avro_fast::object_container_file_encoding::WriterBuilder::with_owned_config(
SerializerConfig::new(&*SCHEMA),
)
.sync_marker({
// make test deterministic
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
})
.build(Vec::new())
.unwrap()
}

#[test]
fn test_owned_writer() {
let values = [
&Bar {
a: 1,
b: "foo".to_string(),
},
&Bar {
a: 2,
b: "bar".to_string(),
},
];

let mut writer = build_writer();
for b in values {
writer.serialize(b).unwrap();
}

let finished_written_buffer: Vec<u8> = writer.into_inner().unwrap();

assert_eq!(
finished_written_buffer,
[
79, 98, 106, 1, 2, 22, 97, 118, 114, 111, 46, 115, 99, 104, 101, 109, 97, 218, 1, 123,
34, 116, 121, 112, 101, 34, 58, 34, 114, 101, 99, 111, 114, 100, 34, 44, 34, 110, 97,
109, 101, 34, 58, 34, 111, 119, 110, 101, 100, 95, 119, 114, 105, 116, 101, 114, 46,
66, 97, 114, 34, 44, 34, 102, 105, 101, 108, 100, 115, 34, 58, 91, 123, 34, 110, 97,
109, 101, 34, 58, 34, 97, 34, 44, 34, 116, 121, 112, 101, 34, 58, 34, 105, 110, 116,
34, 125, 44, 123, 34, 110, 97, 109, 101, 34, 58, 34, 98, 34, 44, 34, 116, 121, 112,
101, 34, 58, 34, 115, 116, 114, 105, 110, 103, 34, 125, 93, 125, 2, 20, 97, 118, 114,
111, 46, 99, 111, 100, 101, 99, 8, 110, 117, 108, 108, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 4, 20, 2, 6, 102, 111, 111, 4, 6, 98, 97, 114, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
]
)
}
80 changes: 0 additions & 80 deletions serde_avro_fast/tests/self_referential_writer.rs

This file was deleted.

0 comments on commit 5ecd1ab

Please sign in to comment.