Skip to content

Commit

Permalink
Rework logical type representation (#23)
Browse files Browse the repository at this point in the history
Resolves #22

**What it does:**
- Fixes logical type representation for fixed with logical type (`{"type":{"name":"duration","type":"fixed","size":12},"logicalType":"duration"}` -> `{"type":"fixed", "name":"duration","size":12,"logicalType":"duration"}`)
  This is what I had originally written but interop tests with `apache-avro` bamboozled me into thinking this is how it was supposed to be. It wasn't, and they later fixed it to make it homogeneous with Java.
- Removes support for overly nested types in schema deserialization.
  This means specifically that the old representation above won't parse. (And similar objects written by versions of `apache-avro` before their fix won't parse.)
  [This is not supported by the reference Java implementation](https://github.com/apache/avro/blob/06c8b5ddfa3540b466b144503b150e30bf8afc15/lang/java/avro/src/main/java/org/apache/avro/Schema.java#L1830), and it's unspecified and unclear how references would resolve for named overly-nested types.
- Reworks logical type representation: `SchemaNode` goes from:
  ```rust
  pub enum SchemaNode {
      /// An Avro type that's not annotated with a logical type
      RegularType(RegularType),
      /// An Avro type that is annotated with a logical type
      LogicalType {
          /// The key of the [`RegularType`] (in the [`SchemaMut`]) that is
          /// annotated with this logical type
          inner: SchemaKey,
          /// The LogicalType this node is annotated with
          logical_type: LogicalType,
      },
  }
  ```
  to
  ```rust
  pub struct SchemaNode {
      /// The underlying regular type of this node
      pub type_: RegularType,
      /// Logical type that the avro type is annotated with, if any
      pub logical_type: Option<LogicalType>,
  }
  ```
  which is simpler to use, and used to be the representation before I got bamboozled into thinking it was not the appropriate representation by interop tests with the broken `apache-avro`.
  • Loading branch information
Ten0 authored Oct 20, 2024
1 parent 8d11c48 commit d2456a0
Show file tree
Hide file tree
Showing 20 changed files with 832 additions and 873 deletions.
97 changes: 73 additions & 24 deletions serde_avro_derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl SchemaBuilder {
/// position in `nodes`.
pub fn reserve(&mut self) -> usize {
let idx = self.nodes.len();
self.nodes.push(SchemaNode::RegularType(RegularType::Null));
self.nodes.push(RegularType::Null.into());
idx
}

Expand All @@ -102,34 +102,59 @@ impl SchemaBuilder {
{
std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
std::collections::hash_map::Entry::Vacant(entry) => {
let idx = SchemaKey::from_idx(self.nodes.len());
entry.insert(idx);
let schema_key = SchemaKey::from_idx(self.nodes.len());
entry.insert(schema_key);
T::append_schema(self);
assert!(
self.nodes.len() > idx.idx(),
self.nodes.len() > schema_key.idx(),
"append_schema should always insert at least a node \
(and its dependencies below itself)"
);
idx
schema_key
}
}
}

/// Insert a new [`SchemaNode`] corresponding to the schema for the type `T`
/// into [`nodes`](SchemaBuilder::nodes), regardless of whether it has
/// already been built.
///
/// This is only useful if using a type as a base for the definition of
/// another type, but patching it afterwards (e.g. adding a logical type).
/// Otherwise, use [`find_or_build`](SchemaBuilder::find_or_build) to avoid
/// duplicate nodes.
pub fn build_duplicate<T: BuildSchema + ?Sized>(&mut self) -> SchemaKey {
let schema_key = SchemaKey::from_idx(self.nodes.len());
T::append_schema(self);
assert!(
self.nodes.len() > schema_key.idx(),
"append_schema should always insert at least a node \
(and its dependencies below itself)"
);
schema_key
}

/// Register a new node for this logical type, where the regular type
/// specified with `T` is annotated with the logical type specified as the
/// `logical_type` argument.
///
/// `name_override` specifies how to override the name if the underlying
/// node ends up generating a named node
pub fn build_logical_type(
&mut self,
logical_type: LogicalType,
inner_type: impl FnOnce(&mut Self) -> SchemaKey,
inner_type_duplicate: impl FnOnce(&mut Self) -> SchemaKey,
name_override: impl FnOnce() -> String,
) -> SchemaKey {
let reserved_schema_key = self.reserve();
let new_node = SchemaNode::LogicalType {
logical_type,
inner: inner_type(self),
};
self.nodes[reserved_schema_key] = new_node;
SchemaKey::from_idx(reserved_schema_key)
let inner_type_duplicate_key = inner_type_duplicate(self);
let node = &mut self.nodes[inner_type_duplicate_key.idx()];
node.logical_type = Some(logical_type);

if let Some(name) = node.type_.name_mut() {
*name = Name::from_fully_qualified_name(name_override());
}

inner_type_duplicate_key
}
}

Expand All @@ -138,7 +163,7 @@ macro_rules! impl_primitive {
$(
impl BuildSchema for $ty {
fn append_schema(builder: &mut SchemaBuilder) {
builder.nodes.push(SchemaNode::RegularType(RegularType::$variant));
builder.nodes.push(RegularType::$variant.into());
}
type TypeLookup = Self;
}
Expand Down Expand Up @@ -214,8 +239,7 @@ impl<T: BuildSchema + ?Sized> BuildSchema for &'_ mut T {
impl<T: BuildSchema> BuildSchema for Vec<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Array(Array::new(builder.find_or_build::<T>())));
let new_node = Array::new(builder.find_or_build::<T>()).into();
builder.nodes[reserved_schema_key] = new_node;
}

Expand All @@ -232,10 +256,11 @@ impl<T: BuildSchema> BuildSchema for [T] {
impl<T: BuildSchema> BuildSchema for Option<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![
let new_node = Union::new(vec![
builder.find_or_build::<()>(),
builder.find_or_build::<T>(),
])));
])
.into();
builder.nodes[reserved_schema_key] = new_node;
}

Expand All @@ -244,21 +269,21 @@ impl<T: BuildSchema> BuildSchema for Option<T> {

impl<const N: usize> BuildSchema for [u8; N] {
fn append_schema(builder: &mut SchemaBuilder) {
builder
.nodes
.push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new(
builder.nodes.push(
Fixed::new(
Name::from_fully_qualified_name(format!("u8_array_{}", N)),
N,
))));
)
.into(),
);
}
type TypeLookup = Self;
}

impl<S: std::ops::Deref<Target = str>, V: BuildSchema> BuildSchema for HashMap<S, V> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Map(Map::new(builder.find_or_build::<V>())));
let new_node = Map::new(builder.find_or_build::<V>()).into();
builder.nodes[reserved_schema_key] = new_node;
}
type TypeLookup = HashMap<String, V::TypeLookup>;
Expand All @@ -285,3 +310,27 @@ pub fn hash_type_id(struct_name: &mut String, type_id: TypeId) {
type_id.hash(&mut hasher);
write!(struct_name, "_{:016x?}", hasher.finish()).unwrap();
}

#[doc(hidden)]
pub enum LazyNamespace {
Pending(fn() -> String),
Computed(String),
}
impl LazyNamespace {
pub fn new(f: fn() -> String) -> Self {
Self::Pending(f)
}
pub fn get(&mut self) -> &str {
match self {
Self::Pending(f) => {
let n = f();
*self = Self::Computed(n);
match self {
Self::Computed(n) => n,
_ => unreachable!(),
}
}
Self::Computed(n) => n,
}
}
}
38 changes: 38 additions & 0 deletions serde_avro_derive/tests/derive_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,44 @@ fn newtype() {
);
}

#[derive(BuildSchema)]
#[allow(unused)]
struct NewTypeDecimalFixed(
#[avro_schema(logical_type = "Decimal", scale = 3, precision = 5)] [u8; 4],
);

#[test]
fn newtype_decimal_fixed() {
test::<NewTypeDecimalFixed>(
r#"{
"logicalType": "decimal",
"type": "fixed",
"scale": 3,
"precision": 5,
"name": "derive_schema.NewTypeDecimalFixed",
"size": 4
}"#,
);
}

#[derive(BuildSchema)]
#[allow(unused)]
struct NewTypeDecimalBytes(
#[avro_schema(logical_type = "Decimal", scale = 3, precision = 5)] Vec<u8>,
);

#[test]
fn newtype_decimal_bytes() {
test::<NewTypeDecimalBytes>(
r#"{
"logicalType": "decimal",
"type": "bytes",
"scale": 3,
"precision": 5
}"#,
);
}

#[derive(BuildSchema)]
#[avro_schema(namespace = "namespace_override")]
#[allow(unused)]
Expand Down
Loading

0 comments on commit d2456a0

Please sign in to comment.