From 3be11499a9b077583c0eb37e9f71f128d84a1778 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 08:13:15 +0100 Subject: [PATCH 001/107] CI: use container hostname in healthcheck As a workaround for scylladb/scylladb#16329, pass the hostname of the container when checking for its health with `cqlsh`. --- .github/workflows/book.yml | 2 +- test/cluster/docker-compose.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/book.yml b/.github/workflows/book.yml index f36ff004b2..d476929923 100644 --- a/.github/workflows/book.yml +++ b/.github/workflows/book.yml @@ -24,7 +24,7 @@ jobs: image: scylladb/scylla ports: - 9042:9042 - options: --health-cmd "cqlsh --debug" --health-interval 5s --health-retries 10 + options: --health-cmd "cqlsh --debug scylladb" --health-interval 5s --health-retries 10 steps: - uses: actions/checkout@v3 - name: Install mdbook diff --git a/test/cluster/docker-compose.yml b/test/cluster/docker-compose.yml index 0fa1e04327..210cc0b738 100644 --- a/test/cluster/docker-compose.yml +++ b/test/cluster/docker-compose.yml @@ -23,7 +23,7 @@ services: --smp 2 --memory 1G healthcheck: - test: [ "CMD", "cqlsh", "-e", "select * from system.local" ] + test: [ "CMD", "cqlsh", "scylla1", "-e", "select * from system.local" ] interval: 5s timeout: 5s retries: 60 @@ -41,7 +41,7 @@ services: --smp 2 --memory 1G healthcheck: - test: [ "CMD", "cqlsh", "-e", "select * from system.local" ] + test: [ "CMD", "cqlsh", "scylla2", "-e", "select * from system.local" ] interval: 5s timeout: 5s retries: 60 @@ -62,7 +62,7 @@ services: --smp 2 --memory 1G healthcheck: - test: [ "CMD", "cqlsh", "-e", "select * from system.local" ] + test: [ "CMD", "cqlsh", "scylla3", "-e", "select * from system.local" ] interval: 5s timeout: 5s retries: 60 From 9119af46f5db7f50746b21f4fc2a99fd5422d843 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 09:40:48 +0100 Subject: [PATCH 002/107] serialize/writers: get rid of counting writers They are not going to be used after in the adjusted design, so remove them. --- scylla-cql/src/types/serialize/mod.rs | 2 +- scylla-cql/src/types/serialize/writers.rs | 125 +--------------------- 2 files changed, 2 insertions(+), 125 deletions(-) diff --git a/scylla-cql/src/types/serialize/mod.rs b/scylla-cql/src/types/serialize/mod.rs index 617fbc5f88..5cb8cc37c0 100644 --- a/scylla-cql/src/types/serialize/mod.rs +++ b/scylla-cql/src/types/serialize/mod.rs @@ -8,7 +8,7 @@ pub mod writers; pub use writers::{ BufBackedCellValueBuilder, BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, - CellWriter, CountingCellWriter, RowWriter, + CellWriter, RowWriter, }; #[derive(Debug, Clone, Error)] pub struct SerializationError(Arc); diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index ecb8a1fcc1..3562a14983 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -247,122 +247,10 @@ impl<'buf> CellValueBuilder for BufBackedCellValueBuilder<'buf> { } } -/// A row writer that does not actually write anything, just counts the bytes. -pub struct CountingRowWriter<'buf> { - buf: &'buf mut usize, -} - -impl<'buf> CountingRowWriter<'buf> { - /// Creates a new writer which increments the counter under given reference - /// when bytes are appended. - #[inline] - pub fn new(buf: &'buf mut usize) -> Self { - CountingRowWriter { buf } - } -} - -impl<'buf> RowWriter for CountingRowWriter<'buf> { - type CellWriter<'a> = CountingCellWriter<'a> where Self: 'a; - - #[inline] - fn make_cell_writer(&mut self) -> Self::CellWriter<'_> { - CountingCellWriter::new(self.buf) - } -} - -/// A cell writer that does not actually write anything, just counts the bytes. -pub struct CountingCellWriter<'buf> { - buf: &'buf mut usize, -} - -impl<'buf> CountingCellWriter<'buf> { - /// Creates a new writer which increments the counter under given reference - /// when bytes are appended. - #[inline] - fn new(buf: &'buf mut usize) -> Self { - CountingCellWriter { buf } - } -} - -impl<'buf> CellWriter for CountingCellWriter<'buf> { - type ValueBuilder = CountingCellValueBuilder<'buf>; - - type WrittenCellProof = (); - - #[inline] - fn set_null(self) { - *self.buf += 4; - } - - #[inline] - fn set_unset(self) { - *self.buf += 4; - } - - #[inline] - fn set_value(self, contents: &[u8]) -> Result<(), CellOverflowError> { - if contents.len() > i32::MAX as usize { - return Err(CellOverflowError); - } - *self.buf += 4 + contents.len(); - Ok(()) - } - - #[inline] - fn into_value_builder(self) -> Self::ValueBuilder { - *self.buf += 4; - CountingCellValueBuilder::new(self.buf) - } -} - -pub struct CountingCellValueBuilder<'buf> { - buf: &'buf mut usize, - - starting_pos: usize, -} - -impl<'buf> CountingCellValueBuilder<'buf> { - /// Creates a new builder which increments the counter under given reference - /// when bytes are appended. - #[inline] - fn new(buf: &'buf mut usize) -> Self { - let starting_pos = *buf; - CountingCellValueBuilder { buf, starting_pos } - } -} - -impl<'buf> CellValueBuilder for CountingCellValueBuilder<'buf> { - type SubCellWriter<'a> = CountingCellWriter<'a> - where - Self: 'a; - - type WrittenCellProof = (); - - #[inline] - fn append_bytes(&mut self, bytes: &[u8]) { - *self.buf += bytes.len(); - } - - #[inline] - fn make_sub_writer(&mut self) -> Self::SubCellWriter<'_> { - CountingCellWriter::new(self.buf) - } - - #[inline] - fn finish(self) -> Result { - if *self.buf - self.starting_pos > i32::MAX as usize { - return Err(CellOverflowError); - } - Ok(()) - } -} - #[cfg(test)] mod tests { - use crate::types::serialize::writers::CountingRowWriter; - use super::{ - BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, CellWriter, CountingCellWriter, + BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, CellWriter, RowWriter, }; @@ -377,12 +265,6 @@ mod tests { let mut data = Vec::new(); let writer = BufBackedCellWriter::new(&mut data); c.check(writer); - - let mut byte_count = 0usize; - let counting_writer = CountingCellWriter::new(&mut byte_count); - c.check(counting_writer); - - assert_eq!(data.len(), byte_count); data } @@ -441,11 +323,6 @@ mod tests { let mut writer = BufBackedRowWriter::new(&mut data); c.check(&mut writer); - let mut byte_count = 0usize; - let mut counting_writer = CountingRowWriter::new(&mut byte_count); - c.check(&mut counting_writer); - - assert_eq!(data.len(), byte_count); data } From ff9e363afd14e9bdc788f3387c6bafd9a2bd5989 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 09:44:48 +0100 Subject: [PATCH 003/107] serialize/writers: simplify tests The tests in the `writers` module used a helper function to run the same code with both counting writers and buffer-backed writers, which required implementing a trait (Rust doesn't have generic closures). This complexity is no longer necessary. --- scylla-cql/src/types/serialize/writers.rs | 79 ++++++----------------- 1 file changed, 19 insertions(+), 60 deletions(-) diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index 3562a14983..6c67177adc 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -249,42 +249,21 @@ impl<'buf> CellValueBuilder for BufBackedCellValueBuilder<'buf> { #[cfg(test)] mod tests { - use super::{ - BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, CellWriter, - RowWriter, - }; - - // We want to perform the same computation for both buf backed writer - // and counting writer, but Rust does not support generic closures. - // This trait comes to the rescue. - trait CellSerializeCheck { - fn check(&self, writer: W); - } - - fn check_cell_serialize(c: C) -> Vec { - let mut data = Vec::new(); - let writer = BufBackedCellWriter::new(&mut data); - c.check(writer); - data - } + use super::{BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, CellWriter, RowWriter}; #[test] fn test_cell_writer() { - struct Check; - impl CellSerializeCheck for Check { - fn check(&self, writer: W) { - let mut sub_writer = writer.into_value_builder(); - sub_writer.make_sub_writer().set_null(); - sub_writer - .make_sub_writer() - .set_value(&[1, 2, 3, 4]) - .unwrap(); - sub_writer.make_sub_writer().set_unset(); - sub_writer.finish().unwrap(); - } - } + let mut data = Vec::new(); + let writer = BufBackedCellWriter::new(&mut data); + let mut sub_writer = writer.into_value_builder(); + sub_writer.make_sub_writer().set_null(); + sub_writer + .make_sub_writer() + .set_value(&[1, 2, 3, 4]) + .unwrap(); + sub_writer.make_sub_writer().set_unset(); + sub_writer.finish().unwrap(); - let data = check_cell_serialize(Check); assert_eq!( data, [ @@ -298,14 +277,10 @@ mod tests { #[test] fn test_poisoned_appender() { - struct Check; - impl CellSerializeCheck for Check { - fn check(&self, writer: W) { - let _ = writer.into_value_builder(); - } - } + let mut data = Vec::new(); + let writer = BufBackedCellWriter::new(&mut data); + let _ = writer.into_value_builder(); - let data = check_cell_serialize(Check); assert_eq!( data, [ @@ -314,30 +289,14 @@ mod tests { ); } - trait RowSerializeCheck { - fn check(&self, writer: &mut W); - } - - fn check_row_serialize(c: C) -> Vec { - let mut data = Vec::new(); - let mut writer = BufBackedRowWriter::new(&mut data); - c.check(&mut writer); - - data - } - #[test] fn test_row_writer() { - struct Check; - impl RowSerializeCheck for Check { - fn check(&self, writer: &mut W) { - writer.make_cell_writer().set_null(); - writer.make_cell_writer().set_value(&[1, 2, 3, 4]).unwrap(); - writer.make_cell_writer().set_unset(); - } - } + let mut data = Vec::new(); + let mut writer = BufBackedRowWriter::new(&mut data); + writer.make_cell_writer().set_null(); + writer.make_cell_writer().set_value(&[1, 2, 3, 4]).unwrap(); + writer.make_cell_writer().set_unset(); - let data = check_row_serialize(Check); assert_eq!( data, [ From a7aa5d5ba845b0ea11679f3e3854c607eb7130a1 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 6 Dec 2023 02:21:41 +0100 Subject: [PATCH 004/107] serialize/writers: add WrittenCellProof The writer traits are using a type-level trick to force implementors of `SerializeCql::serialize` to call `CellWriter::finish()`: the latter returns `CellWriter::WrittenCellProof` which the former is required to return. The fact that the proof is an associated type of the writer and the `serialize` method is generic over the writer type makes sure that the proof truly comes from the `finish()` call on the writer provided to the method and not from any other writer. We are going to change `CellWriter` into a struct, so this trick will no longer be applicable. Instead, introduce a `WrittenCellProof` struct, which is a zero-sized type generic over an invariant lifetime. The `CellWriter` struct will consume itself and return a `WrittenCellProof` with the same lifetime parameter as the original cell writer; `serialize` will require the implementor to return it. The lifetime invariance makes sure that one proof cannot be assigned to another if their lifetimes are not exactly the same. --- scylla-cql/src/types/serialize/writers.rs | 51 +++++++++++++++++++---- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index 6c67177adc..59f564ad53 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -105,6 +105,39 @@ pub trait CellValueBuilder { fn finish(self) -> Result; } +/// An object that indicates a type-level proof that something was written +/// by a [`CellWriter`] or [`CellValueBuilder`] with lifetime parameter `'buf`. +/// +/// This type is returned by [`set_null`](CellWriter::set_null), +/// [`set_unset`](CellWriter::set_unset), +/// [`set_value`](CellWriter::set_value) +/// and also [`CellValueBuilder::finish`] - generally speaking, after +/// the value is fully initialized and the `CellWriter` is destroyed. +/// +/// The purpose of this type is to enforce the contract of +/// [`SerializeCql::serialize`](super::value::SerializeCql::serialize): either +/// the method succeeds and returns a proof that it serialized itself +/// into the given value, or it fails and returns an error or panics. +pub struct WrittenCellProof<'buf> { + /// Using *mut &'buf () is deliberate and makes WrittenCellProof invariant + /// on the 'buf lifetime parameter. + /// Ref: + _phantom: std::marker::PhantomData<*mut &'buf ()>, +} + +impl<'buf> WrittenCellProof<'buf> { + /// A shorthand for creating the proof. + /// + /// Do not make it public! It's important that only the row writer defined + /// in this module is able to create a proof. + #[inline] + fn new() -> Self { + WrittenCellProof { + _phantom: std::marker::PhantomData, + } + } +} + /// There was an attempt to produce a CQL value over the maximum size limit (i32::MAX) #[derive(Debug, Clone, Copy, Error)] #[error("CQL cell overflowed the maximum allowed size of 2^31 - 1")] @@ -169,24 +202,26 @@ impl<'buf> BufBackedCellWriter<'buf> { impl<'buf> CellWriter for BufBackedCellWriter<'buf> { type ValueBuilder = BufBackedCellValueBuilder<'buf>; - type WrittenCellProof = (); + type WrittenCellProof = WrittenCellProof<'buf>; #[inline] - fn set_null(self) { + fn set_null(self) -> Self::WrittenCellProof { self.buf.extend_from_slice(&(-1i32).to_be_bytes()); + WrittenCellProof::new() } #[inline] - fn set_unset(self) { + fn set_unset(self) -> Self::WrittenCellProof { self.buf.extend_from_slice(&(-2i32).to_be_bytes()); + WrittenCellProof::new() } #[inline] - fn set_value(self, bytes: &[u8]) -> Result<(), CellOverflowError> { + fn set_value(self, bytes: &[u8]) -> Result { let value_len: i32 = bytes.len().try_into().map_err(|_| CellOverflowError)?; self.buf.extend_from_slice(&value_len.to_be_bytes()); self.buf.extend_from_slice(bytes); - Ok(()) + Ok(WrittenCellProof::new()) } #[inline] @@ -224,7 +259,7 @@ impl<'buf> CellValueBuilder for BufBackedCellValueBuilder<'buf> { where Self: 'a; - type WrittenCellProof = (); + type WrittenCellProof = WrittenCellProof<'buf>; #[inline] fn append_bytes(&mut self, bytes: &[u8]) { @@ -237,13 +272,13 @@ impl<'buf> CellValueBuilder for BufBackedCellValueBuilder<'buf> { } #[inline] - fn finish(self) -> Result<(), CellOverflowError> { + fn finish(self) -> Result { let value_len: i32 = (self.buf.len() - self.starting_pos - 4) .try_into() .map_err(|_| CellOverflowError)?; self.buf[self.starting_pos..self.starting_pos + 4] .copy_from_slice(&value_len.to_be_bytes()); - Ok(()) + Ok(WrittenCellProof::new()) } } From e696663c949c0a4376bd808b6b525c526334c7d1 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 09:49:12 +0100 Subject: [PATCH 005/107] serialize/row: de-genericise SerializeRow Adjust the interface to use the buffer backed row writer. Rename BufBackedRowWriter to RowWriter to make our life easier later when we replace the writer trait with the buffer-backed row writer struct. --- scylla-cql/src/types/serialize/row.rs | 36 +++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index b5fd862cee..cc89c17607 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -10,7 +10,7 @@ use crate::frame::value::{SerializedValues, ValueList}; use crate::frame::{response::result::ColumnSpec, types::RawValue}; use super::value::SerializeCql; -use super::{CellWriter, RowWriter, SerializationError}; +use super::{BufBackedRowWriter as RowWriter, CellWriter, RowWriter as _, SerializationError}; /// Contains information needed to serialize a row. pub struct RowSerializationContext<'a> { @@ -48,10 +48,10 @@ pub trait SerializeRow { /// /// The function may assume that `preliminary_type_check` was called, /// though it must not do anything unsafe if this assumption does not hold. - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError>; fn is_empty(&self) -> bool; @@ -64,10 +64,10 @@ macro_rules! fallback_impl_contents { ) -> Result<(), SerializationError> { Ok(()) } - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { serialize_legacy_row(self, ctx, writer) } @@ -94,10 +94,10 @@ macro_rules! impl_serialize_row_for_unit { Ok(()) } - fn serialize( + fn serialize( &self, _ctx: &RowSerializationContext<'_>, - _writer: &mut W, + _writer: &mut RowWriter, ) -> Result<(), SerializationError> { // Row is empty - do nothing Ok(()) @@ -136,10 +136,10 @@ macro_rules! impl_serialize_row_for_slice { Ok(()) } - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { if ctx.columns().len() != self.len() { return Err(mk_typck_err::( @@ -197,10 +197,10 @@ macro_rules! impl_serialize_row_for_map { Ok(()) } - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { // Unfortunately, column names aren't guaranteed to be unique. // We need to track not-yet-used columns in order to see @@ -272,10 +272,10 @@ impl SerializeRow for &T { ::preliminary_type_check(ctx) } - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { ::serialize(self, ctx, writer) } @@ -326,10 +326,10 @@ macro_rules! impl_tuple { Ok(()) } - fn serialize( + fn serialize( &self, ctx: &RowSerializationContext<'_>, - writer: &mut W, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { let ($($tidents,)*) = match ctx.columns() { [$($tidents),*] => ($($tidents,)*), @@ -452,10 +452,10 @@ macro_rules! impl_serialize_row_via_value_list { ::std::result::Result::Ok(()) } - fn serialize( + fn serialize( &self, ctx: &$crate::types::serialize::row::RowSerializationContext<'_>, - writer: &mut W, + writer: &mut $crate::types::serialize::writers::BufBackedRowWriter, ) -> ::std::result::Result<(), $crate::types::serialize::SerializationError> { $crate::types::serialize::row::serialize_legacy_row(self, ctx, writer) } @@ -492,7 +492,7 @@ macro_rules! impl_serialize_row_via_value_list { pub fn serialize_legacy_row( r: &T, ctx: &RowSerializationContext<'_>, - writer: &mut impl RowWriter, + writer: &mut RowWriter, ) -> Result<(), SerializationError> { let serialized = ::serialized(r).map_err(|err| SerializationError(Arc::new(err)))?; From f374ba7d9a7836c9978d30110c29c2aa16ce3ac7 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 09:54:14 +0100 Subject: [PATCH 006/107] serialize/value: de-genericise SerializeCql Adjust the interface to use the buffer backed cell writer. Rename BufBackedCellWriter to CellWriter to make our life easier later when we replace the writer trait with the buffer-backed cell writer struct. --- scylla-cql/src/types/serialize/value.rs | 147 ++++++++++++------------ 1 file changed, 75 insertions(+), 72 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 5d81cdb938..121eccf3ab 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -24,7 +24,10 @@ use crate::frame::value::{ #[cfg(feature = "chrono")] use crate::frame::value::ValueOverflow; -use super::{CellValueBuilder, CellWriter, SerializationError}; +use super::writers::WrittenCellProof; +use super::{ + BufBackedCellWriter as CellWriter, CellValueBuilder, CellWriter as _, SerializationError, +}; pub trait SerializeCql { /// Given a CQL type, checks if it _might_ be possible to serialize to that type. @@ -41,11 +44,11 @@ pub trait SerializeCql { /// /// The function may assume that `preliminary_type_check` was called, /// though it must not do anything unsafe if this assumption does not hold. - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result; + writer: CellWriter<'b>, + ) -> Result, SerializationError>; } macro_rules! impl_exact_preliminary_type_check { @@ -69,11 +72,11 @@ macro_rules! impl_serialize_via_writer { impl_serialize_via_writer!(|$me, _typ, $writer| $e); }; (|$me:ident, $typ:ident, $writer:ident| $e:expr) => { - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { let $writer = writer; let $typ = typ; let $me = self; @@ -182,11 +185,11 @@ impl SerializeCql for Secret { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { V::preliminary_type_check(typ) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { V::serialize(self.expose_secret(), typ, writer) } } @@ -270,11 +273,11 @@ impl SerializeCql for Option { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { T::preliminary_type_check(typ) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { match self { Some(v) => v.serialize(typ, writer), None => Ok(writer.set_null()), @@ -308,11 +311,11 @@ impl SerializeCql for MaybeUnset { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { V::preliminary_type_check(typ) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { match self { MaybeUnset::Set(v) => v.serialize(typ, writer), MaybeUnset::Unset => Ok(writer.set_unset()), @@ -323,11 +326,11 @@ impl SerializeCql for &T { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { T::preliminary_type_check(typ) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { T::serialize(*self, typ, writer) } } @@ -335,11 +338,11 @@ impl SerializeCql for Box { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { T::preliminary_type_check(typ) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { T::serialize(&**self, typ, writer) } } @@ -359,11 +362,11 @@ impl SerializeCql for HashSet { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_sequence( std::any::type_name::(), self.len(), @@ -389,11 +392,11 @@ impl SerializeCql for HashMap< } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_mapping( std::any::type_name::(), self.len(), @@ -419,11 +422,11 @@ impl SerializeCql for BTreeSet { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_sequence( std::any::type_name::(), self.len(), @@ -449,11 +452,11 @@ impl SerializeCql for BTreeMap { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_mapping( std::any::type_name::(), self.len(), @@ -481,11 +484,11 @@ impl SerializeCql for Vec { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_sequence( std::any::type_name::(), self.len(), @@ -513,11 +516,11 @@ impl<'a, T: SerializeCql + 'a> SerializeCql for &'a [T] { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_sequence( std::any::type_name::(), self.len(), @@ -538,20 +541,20 @@ impl SerializeCql for CqlValue { } } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { serialize_cql_value(self, typ, writer).map_err(fix_cql_value_name_in_err) } } -fn serialize_cql_value( +fn serialize_cql_value<'b>( value: &CqlValue, typ: &ColumnType, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { match value { CqlValue::Ascii(a) => check_and_serialize(a, typ, writer), CqlValue::Boolean(b) => check_and_serialize(b, typ, writer), @@ -666,22 +669,22 @@ fn fix_cql_value_name_in_err(mut err: SerializationError) -> SerializationError err } -fn check_and_serialize( +fn check_and_serialize<'b, V: SerializeCql>( v: &V, typ: &ColumnType, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { V::preliminary_type_check(typ)?; v.serialize(typ, writer) } -fn serialize_udt( +fn serialize_udt<'b>( typ: &ColumnType, keyspace: &str, type_name: &str, values: &[(String, Option)], - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { let (dst_type_name, dst_keyspace, field_types) = match typ { ColumnType::UserDefinedType { type_name, @@ -747,12 +750,12 @@ fn serialize_udt( .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow)) } -fn serialize_tuple_like<'t, W: CellWriter>( +fn serialize_tuple_like<'t, 'b>( typ: &ColumnType, field_types: impl Iterator, field_values: impl Iterator>, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { let mut builder = writer.into_value_builder(); for (index, (el, typ)) in field_values.zip(field_types).enumerate() { @@ -818,11 +821,11 @@ macro_rules! impl_tuple { Ok(()) } - fn serialize( + fn serialize<'b>( &self, typ: &ColumnType, - writer: W, - ) -> Result { + writer: CellWriter<'b>, + ) -> Result, SerializationError> { let ($($tidents,)*) = match typ { ColumnType::Tuple(typs) => match typs.as_slice() { [$($tidents),*] => ($($tidents,)*), @@ -892,13 +895,13 @@ impl_tuples!( 16 ); -fn serialize_sequence<'t, T: SerializeCql + 't, W: CellWriter>( +fn serialize_sequence<'t, 'b, T: SerializeCql + 't>( rust_name: &'static str, len: usize, iter: impl Iterator, typ: &ColumnType, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { let elt = match typ { ColumnType::List(elt) | ColumnType::Set(elt) => elt, _ => { @@ -936,13 +939,13 @@ fn serialize_sequence<'t, T: SerializeCql + 't, W: CellWriter>( .map_err(|_| mk_ser_err_named(rust_name, typ, BuiltinSerializationErrorKind::SizeOverflow)) } -fn serialize_mapping<'t, K: SerializeCql + 't, V: SerializeCql + 't, W: CellWriter>( +fn serialize_mapping<'t, 'b, K: SerializeCql + 't, V: SerializeCql + 't>( rust_name: &'static str, len: usize, iter: impl Iterator, typ: &ColumnType, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { let (ktyp, vtyp) = match typ { ColumnType::Map(k, v) => (k, v), _ => { @@ -1009,7 +1012,7 @@ fn serialize_mapping<'t, K: SerializeCql + 't, V: SerializeCql + 't, W: CellWrit /// # use scylla_cql::impl_serialize_cql_via_value; /// struct NoGenerics {} /// impl Value for NoGenerics { -/// fn serialize(&self, _buf: &mut Vec) -> Result<(), ValueTooBig> { +/// fn serialize<'b>(&self, _buf: &mut Vec) -> Result<(), ValueTooBig> { /// Ok(()) /// } /// } @@ -1019,7 +1022,7 @@ fn serialize_mapping<'t, K: SerializeCql + 't, V: SerializeCql + 't, W: CellWrit /// // struct/enum contains any. /// struct WithGenerics(T, U); /// impl Value for WithGenerics { -/// fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { +/// fn serialize<'b>(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { /// self.0.serialize(buf)?; /// self.1.clone().serialize(buf)?; /// Ok(()) @@ -1042,12 +1045,12 @@ macro_rules! impl_serialize_cql_via_value { ::std::result::Result::Ok(()) } - fn serialize( + fn serialize<'b>( &self, _typ: &$crate::frame::response::result::ColumnType, - writer: W, + writer: $crate::types::serialize::writers::BufBackedCellWriter<'b>, ) -> ::std::result::Result< - W::WrittenCellProof, + $crate::types::serialize::writers::WrittenCellProof<'b>, $crate::types::serialize::SerializationError, > { $crate::types::serialize::value::serialize_legacy_value(self, writer) @@ -1069,10 +1072,10 @@ macro_rules! impl_serialize_cql_via_value { /// /// See [`impl_serialize_cql_via_value`] which generates a boilerplate /// [`SerializeCql`] implementation that uses this function. -pub fn serialize_legacy_value( +pub fn serialize_legacy_value<'b, T: Value>( v: &T, - writer: W, -) -> Result { + writer: CellWriter<'b>, +) -> Result, SerializationError> { // It's an inefficient and slightly tricky but correct implementation. let mut buf = Vec::new(); ::serialize(v, &mut buf).map_err(|err| SerializationError(Arc::new(err)))?; From d37b2c4eb48f17d65bce7f49707a7cbc1c722a84 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 12:36:11 +0100 Subject: [PATCH 007/107] serialize/writers: replace writer traits with buffer-backed impls Remove RowWriter/CellWriter/CellValueBuilder traits and rename their buffer-backed implementations to take their place as structs. Fix the existing direct uses of buf-backed structs to refer to them by their new name. --- scylla-cql/src/frame/value_tests.rs | 10 +- scylla-cql/src/types/serialize/mod.rs | 5 +- scylla-cql/src/types/serialize/row.rs | 10 +- scylla-cql/src/types/serialize/value.rs | 10 +- scylla-cql/src/types/serialize/writers.rs | 214 ++++++++-------------- 5 files changed, 88 insertions(+), 161 deletions(-) diff --git a/scylla-cql/src/frame/value_tests.rs b/scylla-cql/src/frame/value_tests.rs index 280d5d055b..ecb678ecae 100644 --- a/scylla-cql/src/frame/value_tests.rs +++ b/scylla-cql/src/frame/value_tests.rs @@ -1,7 +1,7 @@ use crate::frame::{response::result::CqlValue, types::RawValue, value::BatchValuesIterator}; use crate::types::serialize::row::{RowSerializationContext, SerializeRow}; use crate::types::serialize::value::SerializeCql; -use crate::types::serialize::{BufBackedCellWriter, BufBackedRowWriter}; +use crate::types::serialize::{CellWriter, RowWriter}; use super::response::result::{ColumnSpec, ColumnType, TableSpec}; use super::value::{ @@ -27,7 +27,7 @@ where T::preliminary_type_check(&typ).unwrap(); let mut new_result: Vec = Vec::new(); - let writer = BufBackedCellWriter::new(&mut new_result); + let writer = CellWriter::new(&mut new_result); SerializeCql::serialize(&val, &typ, writer).unwrap(); assert_eq!(result, new_result); @@ -37,7 +37,7 @@ where fn serialized_only_new(val: T, typ: ColumnType) -> Vec { let mut result: Vec = Vec::new(); - let writer = BufBackedCellWriter::new(&mut result); + let writer = CellWriter::new(&mut result); SerializeCql::serialize(&val, &typ, writer).unwrap(); result } @@ -997,7 +997,7 @@ fn serialize_values( let ctx = RowSerializationContext { columns }; ::preliminary_type_check(&ctx).unwrap(); let mut new_serialized = vec![0, 0]; - let mut writer = BufBackedRowWriter::new(&mut new_serialized); + let mut writer = RowWriter::new(&mut new_serialized); ::serialize(&vl, &ctx, &mut writer).unwrap(); let value_count: u16 = writer.value_count().try_into().unwrap(); let is_empty = writer.value_count() == 0; @@ -1016,7 +1016,7 @@ fn serialize_values_only_new(vl: T, columns: &[ColumnSpec]) -> let ctx = RowSerializationContext { columns }; ::preliminary_type_check(&ctx).unwrap(); let mut serialized = vec![0, 0]; - let mut writer = BufBackedRowWriter::new(&mut serialized); + let mut writer = RowWriter::new(&mut serialized); ::serialize(&vl, &ctx, &mut writer).unwrap(); let value_count: u16 = writer.value_count().try_into().unwrap(); let is_empty = writer.value_count() == 0; diff --git a/scylla-cql/src/types/serialize/mod.rs b/scylla-cql/src/types/serialize/mod.rs index 5cb8cc37c0..230462759d 100644 --- a/scylla-cql/src/types/serialize/mod.rs +++ b/scylla-cql/src/types/serialize/mod.rs @@ -6,10 +6,7 @@ pub mod row; pub mod value; pub mod writers; -pub use writers::{ - BufBackedCellValueBuilder, BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, - CellWriter, RowWriter, -}; +pub use writers::{CellValueBuilder, CellWriter, RowWriter}; #[derive(Debug, Clone, Error)] pub struct SerializationError(Arc); diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index cc89c17607..0da2a4ac10 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -10,7 +10,7 @@ use crate::frame::value::{SerializedValues, ValueList}; use crate::frame::{response::result::ColumnSpec, types::RawValue}; use super::value::SerializeCql; -use super::{BufBackedRowWriter as RowWriter, CellWriter, RowWriter as _, SerializationError}; +use super::{RowWriter, SerializationError}; /// Contains information needed to serialize a row. pub struct RowSerializationContext<'a> { @@ -455,7 +455,7 @@ macro_rules! impl_serialize_row_via_value_list { fn serialize( &self, ctx: &$crate::types::serialize::row::RowSerializationContext<'_>, - writer: &mut $crate::types::serialize::writers::BufBackedRowWriter, + writer: &mut $crate::types::serialize::writers::RowWriter, ) -> ::std::result::Result<(), $crate::types::serialize::SerializationError> { $crate::types::serialize::row::serialize_legacy_row(self, ctx, writer) } @@ -660,7 +660,7 @@ pub enum ValueListToSerializeRowAdapterError { mod tests { use crate::frame::response::result::{ColumnSpec, ColumnType, TableSpec}; use crate::frame::value::{MaybeUnset, SerializedValues, ValueList}; - use crate::types::serialize::BufBackedRowWriter; + use crate::types::serialize::RowWriter; use super::{RowSerializationContext, SerializeRow}; @@ -688,7 +688,7 @@ mod tests { <_ as ValueList>::write_to_request(&row, &mut legacy_data).unwrap(); let mut new_data = Vec::new(); - let mut new_data_writer = BufBackedRowWriter::new(&mut new_data); + let mut new_data_writer = RowWriter::new(&mut new_data); let ctx = RowSerializationContext { columns: &[ col_spec("a", ColumnType::Int), @@ -725,7 +725,7 @@ mod tests { unsorted_row.add_named_value("c", &None::).unwrap(); let mut unsorted_row_data = Vec::new(); - let mut unsorted_row_data_writer = BufBackedRowWriter::new(&mut unsorted_row_data); + let mut unsorted_row_data_writer = RowWriter::new(&mut unsorted_row_data); let ctx = RowSerializationContext { columns: &[ col_spec("a", ColumnType::Int), diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 121eccf3ab..28754d9a11 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -25,9 +25,7 @@ use crate::frame::value::{ use crate::frame::value::ValueOverflow; use super::writers::WrittenCellProof; -use super::{ - BufBackedCellWriter as CellWriter, CellValueBuilder, CellWriter as _, SerializationError, -}; +use super::{CellWriter, SerializationError}; pub trait SerializeCql { /// Given a CQL type, checks if it _might_ be possible to serialize to that type. @@ -1048,7 +1046,7 @@ macro_rules! impl_serialize_cql_via_value { fn serialize<'b>( &self, _typ: &$crate::frame::response::result::ColumnType, - writer: $crate::types::serialize::writers::BufBackedCellWriter<'b>, + writer: $crate::types::serialize::writers::CellWriter<'b>, ) -> ::std::result::Result< $crate::types::serialize::writers::WrittenCellProof<'b>, $crate::types::serialize::SerializationError, @@ -1576,7 +1574,7 @@ pub enum ValueToSerializeCqlAdapterError { mod tests { use crate::frame::response::result::ColumnType; use crate::frame::value::{MaybeUnset, Value}; - use crate::types::serialize::BufBackedCellWriter; + use crate::types::serialize::CellWriter; use super::SerializeCql; @@ -1585,7 +1583,7 @@ mod tests { ::serialize(&v, &mut legacy_data).unwrap(); let mut new_data = Vec::new(); - let new_data_writer = BufBackedCellWriter::new(&mut new_data); + let new_data_writer = CellWriter::new(&mut new_data); ::serialize(&v, &ColumnType::Int, new_data_writer).unwrap(); assert_eq!(legacy_data, new_data); diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index 59f564ad53..b42c8a7da1 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -3,14 +3,22 @@ use thiserror::Error; /// An interface that facilitates writing values for a CQL query. -pub trait RowWriter { - type CellWriter<'a>: CellWriter - where - Self: 'a; +pub struct RowWriter<'buf> { + // Buffer that this value should be serialized to. + buf: &'buf mut Vec, + // Number of values written so far. + value_count: usize, +} + +impl<'buf> RowWriter<'buf> { /// Appends a new value to the sequence and returns an object that allows /// to fill it in. - fn make_cell_writer(&mut self) -> Self::CellWriter<'_>; + #[inline] + pub fn make_cell_writer(&mut self) -> CellWriter<'_> { + self.value_count += 1; + CellWriter::new(self.buf) + } } /// Represents a handle to a CQL value that needs to be written into. @@ -19,11 +27,11 @@ pub trait RowWriter { /// (via [`set_null`](CellWriter::set_null), /// [`set_unset`](CellWriter::set_unset) /// or [`set_value`](CellWriter::set_value) or transformed into -/// the [`CellWriter::ValueBuilder`] in order to gradually initialize +/// the [`CellValueBuilder`] in order to gradually initialize /// the value when the contents are not available straight away. /// /// After the value is fully initialized, the handle is consumed and -/// a [`WrittenCellProof`](CellWriter::WrittenCellProof) object is returned +/// a [`WrittenCellProof`] object is returned /// in its stead. This is a type-level proof that the value was fully initialized /// and is used in [`SerializeCql::serialize`](`super::value::SerializeCql::serialize`) /// in order to enforce the implementor to fully initialize the provided handle @@ -31,33 +39,24 @@ pub trait RowWriter { /// /// Dropping this type without calling any of its methods will result /// in nothing being written. -pub trait CellWriter { - /// The type of the value builder, returned by the [`CellWriter::set_value`] - /// method. - type ValueBuilder: CellValueBuilder; - - /// An object that serves as a proof that the cell was fully initialized. - /// - /// This type is returned by [`set_null`](CellWriter::set_null), - /// [`set_unset`](CellWriter::set_unset), - /// [`set_value`](CellWriter::set_value) - /// and also [`CellValueBuilder::finish`] - generally speaking, after - /// the value is fully initialized and the `CellWriter` is destroyed. - /// - /// The purpose of this type is to enforce the contract of - /// [`SerializeCql::serialize`](super::value::SerializeCql::serialize): either - /// the method succeeds and returns a proof that it serialized itself - /// into the given value, or it fails and returns an error or panics. - /// The exact type of [`WrittenCellProof`](CellWriter::WrittenCellProof) - /// is not important as the value is not used at all - it's only - /// a compile-time check. - type WrittenCellProof; +pub struct CellWriter<'buf> { + buf: &'buf mut Vec, +} +impl<'buf> CellWriter<'buf> { /// Sets this value to be null, consuming this object. - fn set_null(self) -> Self::WrittenCellProof; + #[inline] + pub fn set_null(self) -> WrittenCellProof<'buf> { + self.buf.extend_from_slice(&(-1i32).to_be_bytes()); + WrittenCellProof::new() + } /// Sets this value to represent an unset value, consuming this object. - fn set_unset(self) -> Self::WrittenCellProof; + #[inline] + pub fn set_unset(self) -> WrittenCellProof<'buf> { + self.buf.extend_from_slice(&(-2i32).to_be_bytes()); + WrittenCellProof::new() + } /// Sets this value to a non-zero, non-unset value with given contents. /// @@ -67,7 +66,13 @@ pub trait CellWriter { /// /// Fails if the contents size overflows the maximum allowed CQL cell size /// (which is i32::MAX). - fn set_value(self, contents: &[u8]) -> Result; + #[inline] + pub fn set_value(self, contents: &[u8]) -> Result, CellOverflowError> { + let value_len: i32 = contents.len().try_into().map_err(|_| CellOverflowError)?; + self.buf.extend_from_slice(&value_len.to_be_bytes()); + self.buf.extend_from_slice(contents); + Ok(WrittenCellProof::new()) + } /// Turns this writter into a [`CellValueBuilder`] which can be used /// to gradually initialize the CQL value. @@ -75,7 +80,10 @@ pub trait CellWriter { /// This method should be used if you don't have all of the data /// up front, e.g. when serializing compound types such as collections /// or UDTs. - fn into_value_builder(self) -> Self::ValueBuilder; + #[inline] + pub fn into_value_builder(self) -> CellValueBuilder<'buf> { + CellValueBuilder::new(self.buf) + } } /// Allows appending bytes to a non-null, non-unset cell. @@ -84,25 +92,41 @@ pub trait CellWriter { /// serialized. Failing to drop this value will result in a payload that will /// not be parsed by the database correctly, but otherwise should not cause /// data to be misinterpreted. -pub trait CellValueBuilder { - type SubCellWriter<'a>: CellWriter - where - Self: 'a; +pub struct CellValueBuilder<'buf> { + // Buffer that this value should be serialized to. + buf: &'buf mut Vec, - type WrittenCellProof; + // Starting position of the value in the buffer. + starting_pos: usize, +} +impl<'buf> CellValueBuilder<'buf> { /// Appends raw bytes to this cell. - fn append_bytes(&mut self, bytes: &[u8]); + #[inline] + pub fn append_bytes(&mut self, bytes: &[u8]) { + self.buf.extend_from_slice(bytes); + } /// Appends a sub-value to the end of the current contents of the cell /// and returns an object that allows to fill it in. - fn make_sub_writer(&mut self) -> Self::SubCellWriter<'_>; + #[inline] + pub fn make_sub_writer(&mut self) -> CellWriter<'_> { + CellWriter::new(self.buf) + } /// Finishes serializing the value. /// /// Fails if the constructed cell size overflows the maximum allowed /// CQL cell size (which is i32::MAX). - fn finish(self) -> Result; + #[inline] + pub fn finish(self) -> Result, CellOverflowError> { + let value_len: i32 = (self.buf.len() - self.starting_pos - 4) + .try_into() + .map_err(|_| CellOverflowError)?; + self.buf[self.starting_pos..self.starting_pos + 4] + .copy_from_slice(&value_len.to_be_bytes()); + Ok(WrittenCellProof::new()) + } } /// An object that indicates a type-level proof that something was written @@ -143,16 +167,7 @@ impl<'buf> WrittenCellProof<'buf> { #[error("CQL cell overflowed the maximum allowed size of 2^31 - 1")] pub struct CellOverflowError; -/// A row writer backed by a buffer (vec). -pub struct BufBackedRowWriter<'buf> { - // Buffer that this value should be serialized to. - buf: &'buf mut Vec, - - // Number of values written so far. - value_count: usize, -} - -impl<'buf> BufBackedRowWriter<'buf> { +impl<'buf> RowWriter<'buf> { /// Creates a new row writer based on an existing Vec. /// /// The newly created row writer will append data to the end of the vec. @@ -174,72 +189,17 @@ impl<'buf> BufBackedRowWriter<'buf> { } } -impl<'buf> RowWriter for BufBackedRowWriter<'buf> { - type CellWriter<'a> = BufBackedCellWriter<'a> where Self: 'a; - - #[inline] - fn make_cell_writer(&mut self) -> Self::CellWriter<'_> { - self.value_count += 1; - BufBackedCellWriter::new(self.buf) - } -} - -/// A cell writer backed by a buffer (vec). -pub struct BufBackedCellWriter<'buf> { - buf: &'buf mut Vec, -} - -impl<'buf> BufBackedCellWriter<'buf> { +impl<'buf> CellWriter<'buf> { /// Creates a new cell writer based on an existing Vec. /// /// The newly created row writer will append data to the end of the vec. #[inline] pub fn new(buf: &'buf mut Vec) -> Self { - BufBackedCellWriter { buf } + Self { buf } } } -impl<'buf> CellWriter for BufBackedCellWriter<'buf> { - type ValueBuilder = BufBackedCellValueBuilder<'buf>; - - type WrittenCellProof = WrittenCellProof<'buf>; - - #[inline] - fn set_null(self) -> Self::WrittenCellProof { - self.buf.extend_from_slice(&(-1i32).to_be_bytes()); - WrittenCellProof::new() - } - - #[inline] - fn set_unset(self) -> Self::WrittenCellProof { - self.buf.extend_from_slice(&(-2i32).to_be_bytes()); - WrittenCellProof::new() - } - - #[inline] - fn set_value(self, bytes: &[u8]) -> Result { - let value_len: i32 = bytes.len().try_into().map_err(|_| CellOverflowError)?; - self.buf.extend_from_slice(&value_len.to_be_bytes()); - self.buf.extend_from_slice(bytes); - Ok(WrittenCellProof::new()) - } - - #[inline] - fn into_value_builder(self) -> Self::ValueBuilder { - BufBackedCellValueBuilder::new(self.buf) - } -} - -/// A cell value builder backed by a buffer (vec). -pub struct BufBackedCellValueBuilder<'buf> { - // Buffer that this value should be serialized to. - buf: &'buf mut Vec, - - // Starting position of the value in the buffer. - starting_pos: usize, -} - -impl<'buf> BufBackedCellValueBuilder<'buf> { +impl<'buf> CellValueBuilder<'buf> { #[inline] fn new(buf: &'buf mut Vec) -> Self { // "Length" of a [bytes] frame can either be a non-negative i32, @@ -250,46 +210,18 @@ impl<'buf> BufBackedCellValueBuilder<'buf> { // won't be misinterpreted. let starting_pos = buf.len(); buf.extend_from_slice(&(-3i32).to_be_bytes()); - BufBackedCellValueBuilder { buf, starting_pos } - } -} - -impl<'buf> CellValueBuilder for BufBackedCellValueBuilder<'buf> { - type SubCellWriter<'a> = BufBackedCellWriter<'a> - where - Self: 'a; - - type WrittenCellProof = WrittenCellProof<'buf>; - - #[inline] - fn append_bytes(&mut self, bytes: &[u8]) { - self.buf.extend_from_slice(bytes); - } - - #[inline] - fn make_sub_writer(&mut self) -> Self::SubCellWriter<'_> { - BufBackedCellWriter::new(self.buf) - } - - #[inline] - fn finish(self) -> Result { - let value_len: i32 = (self.buf.len() - self.starting_pos - 4) - .try_into() - .map_err(|_| CellOverflowError)?; - self.buf[self.starting_pos..self.starting_pos + 4] - .copy_from_slice(&value_len.to_be_bytes()); - Ok(WrittenCellProof::new()) + Self { buf, starting_pos } } } #[cfg(test)] mod tests { - use super::{BufBackedCellWriter, BufBackedRowWriter, CellValueBuilder, CellWriter, RowWriter}; + use super::{CellWriter, RowWriter}; #[test] fn test_cell_writer() { let mut data = Vec::new(); - let writer = BufBackedCellWriter::new(&mut data); + let writer = CellWriter::new(&mut data); let mut sub_writer = writer.into_value_builder(); sub_writer.make_sub_writer().set_null(); sub_writer @@ -313,7 +245,7 @@ mod tests { #[test] fn test_poisoned_appender() { let mut data = Vec::new(); - let writer = BufBackedCellWriter::new(&mut data); + let writer = CellWriter::new(&mut data); let _ = writer.into_value_builder(); assert_eq!( @@ -327,7 +259,7 @@ mod tests { #[test] fn test_row_writer() { let mut data = Vec::new(); - let mut writer = BufBackedRowWriter::new(&mut data); + let mut writer = RowWriter::new(&mut data); writer.make_cell_writer().set_null(); writer.make_cell_writer().set_value(&[1, 2, 3, 4]).unwrap(); writer.make_cell_writer().set_unset(); From 458c4de10e752a006a04d7e24fe72a7de0f6ba0d Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 12:37:21 +0100 Subject: [PATCH 008/107] serialze/writers: move `new`/`value_count` methods higher Put them in the same `impl` block as other methods of the same structs for improved visual clarity. --- scylla-cql/src/types/serialize/writers.rs | 88 +++++++++++------------ 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index b42c8a7da1..4d350adc75 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -12,6 +12,26 @@ pub struct RowWriter<'buf> { } impl<'buf> RowWriter<'buf> { + /// Creates a new row writer based on an existing Vec. + /// + /// The newly created row writer will append data to the end of the vec. + #[inline] + pub fn new(buf: &'buf mut Vec) -> Self { + Self { + buf, + value_count: 0, + } + } + + /// Returns the number of values that were written so far. + /// + /// Note that the protocol allows at most u16::MAX to be written into a query, + /// but the writer's interface allows more to be written. + #[inline] + pub fn value_count(&self) -> usize { + self.value_count + } + /// Appends a new value to the sequence and returns an object that allows /// to fill it in. #[inline] @@ -44,6 +64,14 @@ pub struct CellWriter<'buf> { } impl<'buf> CellWriter<'buf> { + /// Creates a new cell writer based on an existing Vec. + /// + /// The newly created row writer will append data to the end of the vec. + #[inline] + pub fn new(buf: &'buf mut Vec) -> Self { + Self { buf } + } + /// Sets this value to be null, consuming this object. #[inline] pub fn set_null(self) -> WrittenCellProof<'buf> { @@ -101,6 +129,19 @@ pub struct CellValueBuilder<'buf> { } impl<'buf> CellValueBuilder<'buf> { + #[inline] + fn new(buf: &'buf mut Vec) -> Self { + // "Length" of a [bytes] frame can either be a non-negative i32, + // -1 (null) or -1 (not set). Push an invalid value here. It will be + // overwritten eventually either by set_null, set_unset or Drop. + // If the CellSerializer is not dropped as it should, this will trigger + // an error on the DB side and the serialized data + // won't be misinterpreted. + let starting_pos = buf.len(); + buf.extend_from_slice(&(-3i32).to_be_bytes()); + Self { buf, starting_pos } + } + /// Appends raw bytes to this cell. #[inline] pub fn append_bytes(&mut self, bytes: &[u8]) { @@ -167,53 +208,6 @@ impl<'buf> WrittenCellProof<'buf> { #[error("CQL cell overflowed the maximum allowed size of 2^31 - 1")] pub struct CellOverflowError; -impl<'buf> RowWriter<'buf> { - /// Creates a new row writer based on an existing Vec. - /// - /// The newly created row writer will append data to the end of the vec. - #[inline] - pub fn new(buf: &'buf mut Vec) -> Self { - Self { - buf, - value_count: 0, - } - } - - /// Returns the number of values that were written so far. - /// - /// Note that the protocol allows at most u16::MAX to be written into a query, - /// but the writer's interface allows more to be written. - #[inline] - pub fn value_count(&self) -> usize { - self.value_count - } -} - -impl<'buf> CellWriter<'buf> { - /// Creates a new cell writer based on an existing Vec. - /// - /// The newly created row writer will append data to the end of the vec. - #[inline] - pub fn new(buf: &'buf mut Vec) -> Self { - Self { buf } - } -} - -impl<'buf> CellValueBuilder<'buf> { - #[inline] - fn new(buf: &'buf mut Vec) -> Self { - // "Length" of a [bytes] frame can either be a non-negative i32, - // -1 (null) or -1 (not set). Push an invalid value here. It will be - // overwritten eventually either by set_null, set_unset or Drop. - // If the CellSerializer is not dropped as it should, this will trigger - // an error on the DB side and the serialized data - // won't be misinterpreted. - let starting_pos = buf.len(); - buf.extend_from_slice(&(-3i32).to_be_bytes()); - Self { buf, starting_pos } - } -} - #[cfg(test)] mod tests { use super::{CellWriter, RowWriter}; From 9dc39fd7f3b87d92e0facccc079a673852a2daea Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 18:32:42 +0100 Subject: [PATCH 009/107] serialize: provide default preliminary typecheck impls We are going to remove `preliminary_type_check` methods by moving the type checking logic to `serialize`. Add default `preliminary_type_check` impls in the trait definitions to make it possible to gradually remove their impls. --- scylla-cql/src/types/serialize/row.rs | 6 +++++- scylla-cql/src/types/serialize/value.rs | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 0da2a4ac10..1fa06c7295 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -42,7 +42,11 @@ pub trait SerializeRow { /// Sometimes, a row cannot be fully type checked right away without knowing /// the exact values of the columns (e.g. when deserializing to `CqlValue`), /// but it's fine to do full type checking later in `serialize`. - fn preliminary_type_check(ctx: &RowSerializationContext<'_>) -> Result<(), SerializationError>; + fn preliminary_type_check( + _ctx: &RowSerializationContext<'_>, + ) -> Result<(), SerializationError> { + Ok(()) + } /// Serializes the row according to the information in the given context. /// diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 28754d9a11..9056fb785f 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -36,7 +36,9 @@ pub trait SerializeCql { /// Some types cannot be type checked without knowing the exact value, /// this is the case e.g. for `CqlValue`. It's also fine to do it later in /// `serialize`. - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError>; + fn preliminary_type_check(_typ: &ColumnType) -> Result<(), SerializationError> { + Ok(()) + } /// Serializes the value to given CQL type. /// From 6d7cc980467251375c85f645308c6558777774e1 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 7 Dec 2023 18:45:46 +0100 Subject: [PATCH 010/107] serialize/value: move exact typechecks to serialize Moves the type checking logic of types that used `impl_exact_preliminary_type_check` into their `serialize` method. --- scylla-cql/src/types/serialize/value.rs | 120 +++++++++++++----------- 1 file changed, 67 insertions(+), 53 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 9056fb785f..b3e04566c2 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -51,18 +51,16 @@ pub trait SerializeCql { ) -> Result, SerializationError>; } -macro_rules! impl_exact_preliminary_type_check { - ($($cql:tt),*) => { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - $(ColumnType::$cql)|* => Ok(()), - _ => Err(mk_typck_err::( - typ, - BuiltinTypeCheckErrorKind::MismatchedType { - expected: &[$(ColumnType::$cql),*], - } - )) - } +macro_rules! exact_type_check { + ($typ:ident, $($cql:tt),*) => { + match $typ { + $(ColumnType::$cql)|* => {}, + _ => return Err(mk_typck_err::( + $typ, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[$(ColumnType::$cql),*], + } + )) } }; } @@ -87,24 +85,32 @@ macro_rules! impl_serialize_via_writer { } impl SerializeCql for i8 { - impl_exact_preliminary_type_check!(TinyInt); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, TinyInt); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for i16 { - impl_exact_preliminary_type_check!(SmallInt); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, SmallInt); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for i32 { - impl_exact_preliminary_type_check!(Int); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Int); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for i64 { - impl_exact_preliminary_type_check!(BigInt); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, BigInt); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for BigDecimal { - impl_exact_preliminary_type_check!(Decimal); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Decimal); let mut builder = writer.into_value_builder(); let (value, scale) = me.as_bigint_and_exponent(); let scale: i32 = scale @@ -118,41 +124,41 @@ impl SerializeCql for BigDecimal { }); } impl SerializeCql for CqlDate { - impl_exact_preliminary_type_check!(Date); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Date); writer.set_value(me.0.to_be_bytes().as_slice()).unwrap() }); } impl SerializeCql for CqlTimestamp { - impl_exact_preliminary_type_check!(Timestamp); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Timestamp); writer.set_value(me.0.to_be_bytes().as_slice()).unwrap() }); } impl SerializeCql for CqlTime { - impl_exact_preliminary_type_check!(Time); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Time); writer.set_value(me.0.to_be_bytes().as_slice()).unwrap() }); } #[cfg(feature = "chrono")] impl SerializeCql for NaiveDate { - impl_exact_preliminary_type_check!(Date); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Date); ::serialize(&(*me).into(), typ, writer)? }); } #[cfg(feature = "chrono")] impl SerializeCql for DateTime { - impl_exact_preliminary_type_check!(Timestamp); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Timestamp); ::serialize(&(*me).into(), typ, writer)? }); } #[cfg(feature = "chrono")] impl SerializeCql for NaiveTime { - impl_exact_preliminary_type_check!(Time); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Time); let cql_time = CqlTime::try_from(*me).map_err(|_: ValueOverflow| { mk_ser_err::(typ, BuiltinSerializationErrorKind::ValueOverflow) })?; @@ -161,22 +167,22 @@ impl SerializeCql for NaiveTime { } #[cfg(feature = "chrono")] impl SerializeCql for time::Date { - impl_exact_preliminary_type_check!(Date); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Date); ::serialize(&(*me).into(), typ, writer)? }); } #[cfg(feature = "chrono")] impl SerializeCql for time::OffsetDateTime { - impl_exact_preliminary_type_check!(Timestamp); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Timestamp); ::serialize(&(*me).into(), typ, writer)? }); } #[cfg(feature = "chrono")] impl SerializeCql for time::Time { - impl_exact_preliminary_type_check!(Time); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Time); ::serialize(&(*me).into(), typ, writer)? }); } @@ -194,24 +200,32 @@ impl SerializeCql for Secret { } } impl SerializeCql for bool { - impl_exact_preliminary_type_check!(Boolean); - impl_serialize_via_writer!(|me, writer| writer.set_value(&[*me as u8]).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Boolean); + writer.set_value(&[*me as u8]).unwrap() + }); } impl SerializeCql for f32 { - impl_exact_preliminary_type_check!(Float); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Float); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for f64 { - impl_exact_preliminary_type_check!(Double); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.to_be_bytes().as_slice()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Double); + writer.set_value(me.to_be_bytes().as_slice()).unwrap() + }); } impl SerializeCql for Uuid { - impl_exact_preliminary_type_check!(Uuid, Timeuuid); - impl_serialize_via_writer!(|me, writer| writer.set_value(me.as_bytes().as_ref()).unwrap()); + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Uuid, Timeuuid); + writer.set_value(me.as_bytes().as_ref()).unwrap() + }); } impl SerializeCql for BigInt { - impl_exact_preliminary_type_check!(Varint); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Varint); // TODO: The allocation here can be avoided and we can reimplement // `to_signed_bytes_be` by using `to_u64_digits` and a bit of custom // logic. Need better tests in order to do this. @@ -221,40 +235,40 @@ impl SerializeCql for BigInt { }); } impl SerializeCql for &str { - impl_exact_preliminary_type_check!(Ascii, Text); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Ascii, Text); writer .set_value(me.as_bytes()) .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow))? }); } impl SerializeCql for Vec { - impl_exact_preliminary_type_check!(Blob); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Blob); writer .set_value(me.as_ref()) .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow))? }); } impl SerializeCql for &[u8] { - impl_exact_preliminary_type_check!(Blob); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Blob); writer .set_value(me) .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow))? }); } impl SerializeCql for [u8; N] { - impl_exact_preliminary_type_check!(Blob); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Blob); writer .set_value(me.as_ref()) .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow))? }); } impl SerializeCql for IpAddr { - impl_exact_preliminary_type_check!(Inet); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Inet); match me { IpAddr::V4(ip) => writer.set_value(&ip.octets()).unwrap(), IpAddr::V6(ip) => writer.set_value(&ip.octets()).unwrap(), @@ -262,8 +276,8 @@ impl SerializeCql for IpAddr { }); } impl SerializeCql for String { - impl_exact_preliminary_type_check!(Ascii, Text); impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Ascii, Text); writer .set_value(me.as_bytes()) .map_err(|_| mk_ser_err::(typ, BuiltinSerializationErrorKind::SizeOverflow))? @@ -291,14 +305,14 @@ impl SerializeCql for Unset { impl_serialize_via_writer!(|_me, writer| writer.set_unset()); } impl SerializeCql for Counter { - impl_exact_preliminary_type_check!(Counter); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Counter); writer.set_value(me.0.to_be_bytes().as_slice()).unwrap() }); } impl SerializeCql for CqlDuration { - impl_exact_preliminary_type_check!(Duration); - impl_serialize_via_writer!(|me, writer| { + impl_serialize_via_writer!(|me, typ, writer| { + exact_type_check!(typ, Duration); // TODO: adjust vint_encode to use CellValueBuilder or something like that let mut buf = Vec::with_capacity(27); // worst case size is 27 vint_encode(me.months as i64, &mut buf); From cc0a1a0de906896f2aa0b05f9a53e7926061acbc Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 03:36:38 +0100 Subject: [PATCH 011/107] serialize/value: move CqlValue's typecheck to serialize --- scylla-cql/src/types/serialize/value.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index b3e04566c2..20c3394540 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -545,14 +545,8 @@ impl<'a, T: SerializeCql + 'a> SerializeCql for &'a [T] { } } impl SerializeCql for CqlValue { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Custom(_) => Err(mk_typck_err::( - typ, - BuiltinTypeCheckErrorKind::CustomTypeUnsupported, - )), - _ => Ok(()), - } + fn preliminary_type_check(_typ: &ColumnType) -> Result<(), SerializationError> { + Ok(()) } fn serialize<'b>( @@ -569,6 +563,12 @@ fn serialize_cql_value<'b>( typ: &ColumnType, writer: CellWriter<'b>, ) -> Result, SerializationError> { + if let ColumnType::Custom(_) = typ { + return Err(mk_typck_err::( + typ, + BuiltinTypeCheckErrorKind::CustomTypeUnsupported, + )); + } match value { CqlValue::Ascii(a) => check_and_serialize(a, typ, writer), CqlValue::Boolean(b) => check_and_serialize(b, typ, writer), From 69cb8f41720005b2b8babb63e39f5f2a35d970cc Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 06:47:51 +0100 Subject: [PATCH 012/107] serialize/value: remove nested typecheck errors As we get rid of `preliminary_type_check`, error variants that were used to represent type check failure of some sub-type (e.g. a tuple element) will not be used anymore. Remove them now and adjust the rest of the code. --- scylla-cql/src/types/serialize/value.rs | 105 ++---------------------- 1 file changed, 8 insertions(+), 97 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 20c3394540..da4fd4e304 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -363,12 +363,7 @@ impl SerializeCql for Box { impl SerializeCql for HashSet { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::Set(elt) => V::preliminary_type_check(elt).map_err(|err| { - mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::ElementTypeCheckFailed(err), - ) - }), + ColumnType::Set(_) => Ok(()), _ => Err(mk_typck_err::( typ, SetOrListTypeCheckErrorKind::NotSetOrList, @@ -393,15 +388,7 @@ impl SerializeCql for HashSet { impl SerializeCql for HashMap { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::Map(k, v) => { - K::preliminary_type_check(k).map_err(|err| { - mk_typck_err::(typ, MapTypeCheckErrorKind::KeyTypeCheckFailed(err)) - })?; - V::preliminary_type_check(v).map_err(|err| { - mk_typck_err::(typ, MapTypeCheckErrorKind::ValueTypeCheckFailed(err)) - })?; - Ok(()) - } + ColumnType::Map(_, _) => Ok(()), _ => Err(mk_typck_err::(typ, MapTypeCheckErrorKind::NotMap)), } } @@ -423,12 +410,7 @@ impl SerializeCql for HashMap< impl SerializeCql for BTreeSet { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::Set(elt) => V::preliminary_type_check(elt).map_err(|err| { - mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::ElementTypeCheckFailed(err), - ) - }), + ColumnType::Set(_) => Ok(()), _ => Err(mk_typck_err::( typ, SetOrListTypeCheckErrorKind::NotSetOrList, @@ -453,15 +435,7 @@ impl SerializeCql for BTreeSet { impl SerializeCql for BTreeMap { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::Map(k, v) => { - K::preliminary_type_check(k).map_err(|err| { - mk_typck_err::(typ, MapTypeCheckErrorKind::KeyTypeCheckFailed(err)) - })?; - V::preliminary_type_check(v).map_err(|err| { - mk_typck_err::(typ, MapTypeCheckErrorKind::ValueTypeCheckFailed(err)) - })?; - Ok(()) - } + ColumnType::Map(_, _) => Ok(()), _ => Err(mk_typck_err::(typ, MapTypeCheckErrorKind::NotMap)), } } @@ -483,14 +457,7 @@ impl SerializeCql for BTreeMap { impl SerializeCql for Vec { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::List(elt) | ColumnType::Set(elt) => { - T::preliminary_type_check(elt).map_err(|err| { - mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::ElementTypeCheckFailed(err), - ) - }) - } + ColumnType::List(_) | ColumnType::Set(_) => Ok(()), _ => Err(mk_typck_err::( typ, SetOrListTypeCheckErrorKind::NotSetOrList, @@ -515,14 +482,7 @@ impl SerializeCql for Vec { impl<'a, T: SerializeCql + 'a> SerializeCql for &'a [T] { fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { match typ { - ColumnType::List(elt) | ColumnType::Set(elt) => { - T::preliminary_type_check(elt).map_err(|err| { - mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::ElementTypeCheckFailed(err), - ) - }) - } + ColumnType::List(_) | ColumnType::Set(_) => Ok(()), _ => Err(mk_typck_err::( typ, SetOrListTypeCheckErrorKind::NotSetOrList, @@ -803,21 +763,8 @@ macro_rules! impl_tuple { match typ { ColumnType::Tuple(typs) => match typs.as_slice() { [$($tidents),*, ..] => { - let index = 0; - $( - <$typs as SerializeCql>::preliminary_type_check($tidents) - .map_err(|err| - mk_typck_err::( - typ, - TupleTypeCheckErrorKind::ElementTypeCheckFailed { - index, - err, - } - ) - )?; - let index = index + 1; - )* - let _ = index; + // Suppress the "unused" warning + let _ = ($($tidents),*,); } _ => return Err(mk_typck_err::( typ, @@ -1341,12 +1288,6 @@ impl Display for BuiltinSerializationErrorKind { pub enum MapTypeCheckErrorKind { /// The CQL type is not a map. NotMap, - - /// Checking the map key type failed. - KeyTypeCheckFailed(SerializationError), - - /// Checking the map value type failed. - ValueTypeCheckFailed(SerializationError), } impl Display for MapTypeCheckErrorKind { @@ -1358,12 +1299,6 @@ impl Display for MapTypeCheckErrorKind { "the CQL type the map was attempted to be serialized to was not map" ) } - MapTypeCheckErrorKind::KeyTypeCheckFailed(err) => { - write!(f, "failed to type check one of the keys: {}", err) - } - MapTypeCheckErrorKind::ValueTypeCheckFailed(err) => { - write!(f, "failed to type check one of the values: {}", err) - } } } } @@ -1405,9 +1340,6 @@ impl Display for MapSerializationErrorKind { pub enum SetOrListTypeCheckErrorKind { /// The CQL type is neither a set not a list. NotSetOrList, - - /// Checking the type of the set/list element failed. - ElementTypeCheckFailed(SerializationError), } impl Display for SetOrListTypeCheckErrorKind { @@ -1419,9 +1351,6 @@ impl Display for SetOrListTypeCheckErrorKind { "the CQL type the tuple was attempted to was neither a set or a list" ) } - SetOrListTypeCheckErrorKind::ElementTypeCheckFailed(err) => { - write!(f, "failed to type check one of the elements: {err}") - } } } } @@ -1464,12 +1393,6 @@ pub enum TupleTypeCheckErrorKind { /// than the corresponding CQL type, but not more. The additional, unknown /// elements will be set to null. WrongElementCount { actual: usize, asked_for: usize }, - - /// One of the tuple elements failed to type check. - ElementTypeCheckFailed { - index: usize, - err: SerializationError, - }, } impl Display for TupleTypeCheckErrorKind { @@ -1483,9 +1406,6 @@ impl Display for TupleTypeCheckErrorKind { f, "wrong tuple element count: CQL type has {asked_for}, the Rust tuple has {actual}" ), - TupleTypeCheckErrorKind::ElementTypeCheckFailed { index, err } => { - write!(f, "element no. {index} failed to type check: {err}") - } } } } @@ -1521,12 +1441,6 @@ pub enum UdtTypeCheckErrorKind { /// The Rust data contains a field that is not present in the UDT UnexpectedFieldInDestination { field_name: String }, - - /// One of the fields failed to type check. - FieldTypeCheckFailed { - field_name: String, - err: SerializationError, - }, } impl Display for UdtTypeCheckErrorKind { @@ -1547,9 +1461,6 @@ impl Display for UdtTypeCheckErrorKind { f, "the field {field_name} present in the Rust data is not present in the CQL type" ), - UdtTypeCheckErrorKind::FieldTypeCheckFailed { field_name, err } => { - write!(f, "field {field_name} failed to type check: {err}") - } } } } From 53b73493c2659e8588c7466a50bf4513ff41bdec Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 06:50:45 +0100 Subject: [PATCH 013/107] serialize/value: remove typechecks of compound types Type checks for compound types only check that the type is of the right shape (e.g. it's a UDT, a list, etc.). This is already checked in `serialize`, so remove the `preliminary_type_check` impls. --- scylla-cql/src/types/serialize/value.rs | 77 ------------------------- 1 file changed, 77 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index da4fd4e304..433592aead 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -361,16 +361,6 @@ impl SerializeCql for Box { } } impl SerializeCql for HashSet { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Set(_) => Ok(()), - _ => Err(mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::NotSetOrList, - )), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -386,13 +376,6 @@ impl SerializeCql for HashSet { } } impl SerializeCql for HashMap { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Map(_, _) => Ok(()), - _ => Err(mk_typck_err::(typ, MapTypeCheckErrorKind::NotMap)), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -408,16 +391,6 @@ impl SerializeCql for HashMap< } } impl SerializeCql for BTreeSet { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Set(_) => Ok(()), - _ => Err(mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::NotSetOrList, - )), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -433,13 +406,6 @@ impl SerializeCql for BTreeSet { } } impl SerializeCql for BTreeMap { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Map(_, _) => Ok(()), - _ => Err(mk_typck_err::(typ, MapTypeCheckErrorKind::NotMap)), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -455,16 +421,6 @@ impl SerializeCql for BTreeMap { } } impl SerializeCql for Vec { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::List(_) | ColumnType::Set(_) => Ok(()), - _ => Err(mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::NotSetOrList, - )), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -480,16 +436,6 @@ impl SerializeCql for Vec { } } impl<'a, T: SerializeCql + 'a> SerializeCql for &'a [T] { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::List(_) | ColumnType::Set(_) => Ok(()), - _ => Err(mk_typck_err::( - typ, - SetOrListTypeCheckErrorKind::NotSetOrList, - )), - } - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -759,29 +705,6 @@ macro_rules! impl_tuple { $length:expr ) => { impl<$($typs: SerializeCql),*> SerializeCql for ($($typs,)*) { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - match typ { - ColumnType::Tuple(typs) => match typs.as_slice() { - [$($tidents),*, ..] => { - // Suppress the "unused" warning - let _ = ($($tidents),*,); - } - _ => return Err(mk_typck_err::( - typ, - TupleTypeCheckErrorKind::WrongElementCount { - actual: $length, - asked_for: typs.len(), - } - )) - } - _ => return Err(mk_typck_err::( - typ, - TupleTypeCheckErrorKind::NotTuple - )), - }; - Ok(()) - } - fn serialize<'b>( &self, typ: &ColumnType, From e8a92ad300814cbde55f59a78725f221502b275a Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 06:51:01 +0100 Subject: [PATCH 014/107] serialize/value: remove remaining preliminary type check impls Remaining `SerializeCql::preliminary_type_check` impls can be removed because they either not do anything or call `preliminary_type_check` on other types. --- scylla-cql/src/types/serialize/value.rs | 29 ------------------------- 1 file changed, 29 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 433592aead..82f79c81a2 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -188,9 +188,6 @@ impl SerializeCql for time::Time { } #[cfg(feature = "secret")] impl SerializeCql for Secret { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - V::preliminary_type_check(typ) - } fn serialize<'b>( &self, typ: &ColumnType, @@ -284,9 +281,6 @@ impl SerializeCql for String { }); } impl SerializeCql for Option { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - T::preliminary_type_check(typ) - } fn serialize<'b>( &self, typ: &ColumnType, @@ -299,9 +293,6 @@ impl SerializeCql for Option { } } impl SerializeCql for Unset { - fn preliminary_type_check(_typ: &ColumnType) -> Result<(), SerializationError> { - Ok(()) // Fits everything - } impl_serialize_via_writer!(|_me, writer| writer.set_unset()); } impl SerializeCql for Counter { @@ -322,9 +313,6 @@ impl SerializeCql for CqlDuration { }); } impl SerializeCql for MaybeUnset { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - V::preliminary_type_check(typ) - } fn serialize<'b>( &self, typ: &ColumnType, @@ -337,9 +325,6 @@ impl SerializeCql for MaybeUnset { } } impl SerializeCql for &T { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - T::preliminary_type_check(typ) - } fn serialize<'b>( &self, typ: &ColumnType, @@ -349,9 +334,6 @@ impl SerializeCql for &T { } } impl SerializeCql for Box { - fn preliminary_type_check(typ: &ColumnType) -> Result<(), SerializationError> { - T::preliminary_type_check(typ) - } fn serialize<'b>( &self, typ: &ColumnType, @@ -451,10 +433,6 @@ impl<'a, T: SerializeCql + 'a> SerializeCql for &'a [T] { } } impl SerializeCql for CqlValue { - fn preliminary_type_check(_typ: &ColumnType) -> Result<(), SerializationError> { - Ok(()) - } - fn serialize<'b>( &self, typ: &ColumnType, @@ -922,13 +900,6 @@ macro_rules! impl_serialize_cql_via_value { where Self: $crate::frame::value::Value, { - fn preliminary_type_check( - _typ: &$crate::frame::response::result::ColumnType, - ) -> ::std::result::Result<(), $crate::types::serialize::SerializationError> { - // No-op - the old interface didn't offer type safety - ::std::result::Result::Ok(()) - } - fn serialize<'b>( &self, _typ: &$crate::frame::response::result::ColumnType, From 6e074d59d33314746754cf10493b724b70ddddf3 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 03:49:27 +0100 Subject: [PATCH 015/107] serialize/row: fix incorrect error being returned In case when serialization of one of the values fails, a row represented by a map would return a type check error. A serialization error should be returned instead. --- scylla-cql/src/types/serialize/row.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 1fa06c7295..25398e880c 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -223,8 +223,8 @@ macro_rules! impl_serialize_row_for_map { Some(v) => { ::serialize(v, &col.typ, writer.make_cell_writer()) .map_err(|err| { - mk_typck_err::( - BuiltinTypeCheckErrorKind::ColumnTypeCheckFailed { + mk_ser_err::( + BuiltinSerializationErrorKind::ColumnSerializationFailed { name: col.name.clone(), err, }, From 5eddc08cd114f8f9a5bd009d63c061608c8c0f5c Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 03:50:10 +0100 Subject: [PATCH 016/107] serialize/row: remove preliminary type check impls There's not many of them, so remove them all in one go. The ColumnTypeCheckFailed serialization error variant is also removed as it is no longer used. --- scylla-cql/src/types/serialize/row.rs | 93 +-------------------------- 1 file changed, 3 insertions(+), 90 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 25398e880c..72c0bf5a23 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -63,11 +63,6 @@ pub trait SerializeRow { macro_rules! fallback_impl_contents { () => { - fn preliminary_type_check( - _ctx: &RowSerializationContext<'_>, - ) -> Result<(), SerializationError> { - Ok(()) - } fn serialize( &self, ctx: &RowSerializationContext<'_>, @@ -84,8 +79,10 @@ macro_rules! fallback_impl_contents { macro_rules! impl_serialize_row_for_unit { () => { - fn preliminary_type_check( + fn serialize( + &self, ctx: &RowSerializationContext<'_>, + _writer: &mut RowWriter, ) -> Result<(), SerializationError> { if !ctx.columns().is_empty() { return Err(mk_typck_err::( @@ -95,14 +92,6 @@ macro_rules! impl_serialize_row_for_unit { }, )); } - Ok(()) - } - - fn serialize( - &self, - _ctx: &RowSerializationContext<'_>, - _writer: &mut RowWriter, - ) -> Result<(), SerializationError> { // Row is empty - do nothing Ok(()) } @@ -124,22 +113,6 @@ impl SerializeRow for [u8; 0] { macro_rules! impl_serialize_row_for_slice { () => { - fn preliminary_type_check( - ctx: &RowSerializationContext<'_>, - ) -> Result<(), SerializationError> { - // While we don't know how many columns will be there during serialization, - // we can at least check that all provided columns match T. - for col in ctx.columns() { - ::preliminary_type_check(&col.typ).map_err(|err| { - mk_typck_err::(BuiltinTypeCheckErrorKind::ColumnTypeCheckFailed { - name: col.name.clone(), - err, - }) - })?; - } - Ok(()) - } - fn serialize( &self, ctx: &RowSerializationContext<'_>, @@ -185,22 +158,6 @@ impl SerializeRow for Vec { macro_rules! impl_serialize_row_for_map { () => { - fn preliminary_type_check( - ctx: &RowSerializationContext<'_>, - ) -> Result<(), SerializationError> { - // While we don't know the column count or their names, - // we can go over all columns and check that their types match T. - for col in ctx.columns() { - ::preliminary_type_check(&col.typ).map_err(|err| { - mk_typck_err::(BuiltinTypeCheckErrorKind::ColumnTypeCheckFailed { - name: col.name.clone(), - err, - }) - })?; - } - Ok(()) - } - fn serialize( &self, ctx: &RowSerializationContext<'_>, @@ -272,10 +229,6 @@ impl SerializeRow for HashMap<&str, T, S> { } impl SerializeRow for &T { - fn preliminary_type_check(ctx: &RowSerializationContext<'_>) -> Result<(), SerializationError> { - ::preliminary_type_check(ctx) - } - fn serialize( &self, ctx: &RowSerializationContext<'_>, @@ -306,30 +259,6 @@ macro_rules! impl_tuple { $length:expr ) => { impl<$($typs: SerializeCql),*> SerializeRow for ($($typs,)*) { - fn preliminary_type_check( - ctx: &RowSerializationContext<'_>, - ) -> Result<(), SerializationError> { - match ctx.columns() { - [$($tidents),*] => { - $( - <$typs as SerializeCql>::preliminary_type_check(&$tidents.typ).map_err(|err| { - mk_typck_err::(BuiltinTypeCheckErrorKind::ColumnTypeCheckFailed { - name: $tidents.name.clone(), - err, - }) - })?; - )* - } - _ => return Err(mk_typck_err::( - BuiltinTypeCheckErrorKind::WrongColumnCount { - actual: $length, - asked_for: ctx.columns().len(), - }, - )), - }; - Ok(()) - } - fn serialize( &self, ctx: &RowSerializationContext<'_>, @@ -449,13 +378,6 @@ macro_rules! impl_serialize_row_via_value_list { where Self: $crate::frame::value::ValueList, { - fn preliminary_type_check( - _ctx: &$crate::types::serialize::row::RowSerializationContext<'_>, - ) -> ::std::result::Result<(), $crate::types::serialize::SerializationError> { - // No-op - the old interface didn't offer type safety - ::std::result::Result::Ok(()) - } - fn serialize( &self, ctx: &$crate::types::serialize::row::RowSerializationContext<'_>, @@ -600,12 +522,6 @@ pub enum BuiltinTypeCheckErrorKind { /// A value required by the statement is not provided by the Rust type. ColumnMissingForValue { name: String }, - - /// One of the columns failed to type check. - ColumnTypeCheckFailed { - name: String, - err: SerializationError, - }, } impl Display for BuiltinTypeCheckErrorKind { @@ -626,9 +542,6 @@ impl Display for BuiltinTypeCheckErrorKind { "value for column {name} was provided, but there is no bind marker for this column in the query" ) } - BuiltinTypeCheckErrorKind::ColumnTypeCheckFailed { name, err } => { - write!(f, "failed to check column {name}: {err}") - } } } } From 006dc3522cc4bddba9ddcee2a2c5a5e4291bee70 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 03:51:21 +0100 Subject: [PATCH 017/107] serialize: remove `preliminary_type_check` method As we removed all the non-default impls of `preliminary_type_check`, the method now does nothing. Remove it and all the remaining callers. --- scylla-cql/src/frame/value_tests.rs | 4 ---- scylla-cql/src/types/serialize/row.rs | 18 ------------------ scylla-cql/src/types/serialize/value.rs | 16 ---------------- 3 files changed, 38 deletions(-) diff --git a/scylla-cql/src/frame/value_tests.rs b/scylla-cql/src/frame/value_tests.rs index ecb678ecae..0ded4b4ed0 100644 --- a/scylla-cql/src/frame/value_tests.rs +++ b/scylla-cql/src/frame/value_tests.rs @@ -24,8 +24,6 @@ where let mut result: Vec = Vec::new(); Value::serialize(&val, &mut result).unwrap(); - T::preliminary_type_check(&typ).unwrap(); - let mut new_result: Vec = Vec::new(); let writer = CellWriter::new(&mut new_result); SerializeCql::serialize(&val, &typ, writer).unwrap(); @@ -995,7 +993,6 @@ fn serialize_values( serialized.write_to_request(&mut old_serialized); let ctx = RowSerializationContext { columns }; - ::preliminary_type_check(&ctx).unwrap(); let mut new_serialized = vec![0, 0]; let mut writer = RowWriter::new(&mut new_serialized); ::serialize(&vl, &ctx, &mut writer).unwrap(); @@ -1014,7 +1011,6 @@ fn serialize_values( fn serialize_values_only_new(vl: T, columns: &[ColumnSpec]) -> Vec { let ctx = RowSerializationContext { columns }; - ::preliminary_type_check(&ctx).unwrap(); let mut serialized = vec![0, 0]; let mut writer = RowWriter::new(&mut serialized); ::serialize(&vl, &ctx, &mut writer).unwrap(); diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 72c0bf5a23..925ceeb4ed 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -33,25 +33,7 @@ impl<'a> RowSerializationContext<'a> { } pub trait SerializeRow { - /// Checks if it _might_ be possible to serialize the row according to the - /// information in the context. - /// - /// This function is intended to serve as an optimization in the future, - /// if we were ever to introduce prepared statements parametrized by types. - /// - /// Sometimes, a row cannot be fully type checked right away without knowing - /// the exact values of the columns (e.g. when deserializing to `CqlValue`), - /// but it's fine to do full type checking later in `serialize`. - fn preliminary_type_check( - _ctx: &RowSerializationContext<'_>, - ) -> Result<(), SerializationError> { - Ok(()) - } - /// Serializes the row according to the information in the given context. - /// - /// The function may assume that `preliminary_type_check` was called, - /// though it must not do anything unsafe if this assumption does not hold. fn serialize( &self, ctx: &RowSerializationContext<'_>, diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 82f79c81a2..224b678c0a 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -28,22 +28,7 @@ use super::writers::WrittenCellProof; use super::{CellWriter, SerializationError}; pub trait SerializeCql { - /// Given a CQL type, checks if it _might_ be possible to serialize to that type. - /// - /// This function is intended to serve as an optimization in the future, - /// if we were ever to introduce prepared statements parametrized by types. - /// - /// Some types cannot be type checked without knowing the exact value, - /// this is the case e.g. for `CqlValue`. It's also fine to do it later in - /// `serialize`. - fn preliminary_type_check(_typ: &ColumnType) -> Result<(), SerializationError> { - Ok(()) - } - /// Serializes the value to given CQL type. - /// - /// The function may assume that `preliminary_type_check` was called, - /// though it must not do anything unsafe if this assumption does not hold. fn serialize<'b>( &self, typ: &ColumnType, @@ -572,7 +557,6 @@ fn check_and_serialize<'b, V: SerializeCql>( typ: &ColumnType, writer: CellWriter<'b>, ) -> Result, SerializationError> { - V::preliminary_type_check(typ)?; v.serialize(typ, writer) } From ef6e428e39c833c88c8565c2052e5fa07d8cf30b Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 07:09:10 +0100 Subject: [PATCH 018/107] serialize/value: inline check_and_serialize into callers --- scylla-cql/src/types/serialize/value.rs | 52 +++++++++++-------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 224b678c0a..4359eee497 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -439,14 +439,14 @@ fn serialize_cql_value<'b>( )); } match value { - CqlValue::Ascii(a) => check_and_serialize(a, typ, writer), - CqlValue::Boolean(b) => check_and_serialize(b, typ, writer), - CqlValue::Blob(b) => check_and_serialize(b, typ, writer), - CqlValue::Counter(c) => check_and_serialize(c, typ, writer), - CqlValue::Decimal(d) => check_and_serialize(d, typ, writer), - CqlValue::Date(d) => check_and_serialize(d, typ, writer), - CqlValue::Double(d) => check_and_serialize(d, typ, writer), - CqlValue::Duration(d) => check_and_serialize(d, typ, writer), + CqlValue::Ascii(a) => <_ as SerializeCql>::serialize(&a, typ, writer), + CqlValue::Boolean(b) => <_ as SerializeCql>::serialize(&b, typ, writer), + CqlValue::Blob(b) => <_ as SerializeCql>::serialize(&b, typ, writer), + CqlValue::Counter(c) => <_ as SerializeCql>::serialize(&c, typ, writer), + CqlValue::Decimal(d) => <_ as SerializeCql>::serialize(&d, typ, writer), + CqlValue::Date(d) => <_ as SerializeCql>::serialize(&d, typ, writer), + CqlValue::Double(d) => <_ as SerializeCql>::serialize(&d, typ, writer), + CqlValue::Duration(d) => <_ as SerializeCql>::serialize(&d, typ, writer), CqlValue::Empty => { if !typ.supports_special_empty_value() { return Err(mk_typck_err::( @@ -456,13 +456,13 @@ fn serialize_cql_value<'b>( } Ok(writer.set_value(&[]).unwrap()) } - CqlValue::Float(f) => check_and_serialize(f, typ, writer), - CqlValue::Int(i) => check_and_serialize(i, typ, writer), - CqlValue::BigInt(b) => check_and_serialize(b, typ, writer), - CqlValue::Text(t) => check_and_serialize(t, typ, writer), - CqlValue::Timestamp(t) => check_and_serialize(t, typ, writer), - CqlValue::Inet(i) => check_and_serialize(i, typ, writer), - CqlValue::List(l) => check_and_serialize(l, typ, writer), + CqlValue::Float(f) => <_ as SerializeCql>::serialize(&f, typ, writer), + CqlValue::Int(i) => <_ as SerializeCql>::serialize(&i, typ, writer), + CqlValue::BigInt(b) => <_ as SerializeCql>::serialize(&b, typ, writer), + CqlValue::Text(t) => <_ as SerializeCql>::serialize(&t, typ, writer), + CqlValue::Timestamp(t) => <_ as SerializeCql>::serialize(&t, typ, writer), + CqlValue::Inet(i) => <_ as SerializeCql>::serialize(&i, typ, writer), + CqlValue::List(l) => <_ as SerializeCql>::serialize(&l, typ, writer), CqlValue::Map(m) => serialize_mapping( std::any::type_name::(), m.len(), @@ -470,16 +470,16 @@ fn serialize_cql_value<'b>( typ, writer, ), - CqlValue::Set(s) => check_and_serialize(s, typ, writer), + CqlValue::Set(s) => <_ as SerializeCql>::serialize(&s, typ, writer), CqlValue::UserDefinedType { keyspace, type_name, fields, } => serialize_udt(typ, keyspace, type_name, fields, writer), - CqlValue::SmallInt(s) => check_and_serialize(s, typ, writer), - CqlValue::TinyInt(t) => check_and_serialize(t, typ, writer), - CqlValue::Time(t) => check_and_serialize(t, typ, writer), - CqlValue::Timeuuid(t) => check_and_serialize(t, typ, writer), + CqlValue::SmallInt(s) => <_ as SerializeCql>::serialize(&s, typ, writer), + CqlValue::TinyInt(t) => <_ as SerializeCql>::serialize(&t, typ, writer), + CqlValue::Time(t) => <_ as SerializeCql>::serialize(&t, typ, writer), + CqlValue::Timeuuid(t) => <_ as SerializeCql>::serialize(&t, typ, writer), CqlValue::Tuple(t) => { // We allow serializing tuples that have less fields // than the database tuple, but not the other way around. @@ -505,8 +505,8 @@ fn serialize_cql_value<'b>( }; serialize_tuple_like(typ, fields.iter(), t.iter(), writer) } - CqlValue::Uuid(u) => check_and_serialize(u, typ, writer), - CqlValue::Varint(v) => check_and_serialize(v, typ, writer), + CqlValue::Uuid(u) => <_ as SerializeCql>::serialize(&u, typ, writer), + CqlValue::Varint(v) => <_ as SerializeCql>::serialize(&v, typ, writer), } } @@ -552,14 +552,6 @@ fn fix_cql_value_name_in_err(mut err: SerializationError) -> SerializationError err } -fn check_and_serialize<'b, V: SerializeCql>( - v: &V, - typ: &ColumnType, - writer: CellWriter<'b>, -) -> Result, SerializationError> { - v.serialize(typ, writer) -} - fn serialize_udt<'b>( typ: &ColumnType, keyspace: &str, From 6a1ea41e716d4de49376b26894c811f227d81ddc Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 03:58:53 +0100 Subject: [PATCH 019/107] serialize/row: relax restriction on SerializeRow impl on reference The bound on `impl SerializeRow for &T` implicitly requires the type `T` to be sized, preventing it from being used with `dyn SerializeRow`. Relax the restriction by adding `+ ?Sized` to be able to use it with trait objects. --- scylla-cql/src/types/serialize/row.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 925ceeb4ed..c5256a2eb8 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -210,7 +210,7 @@ impl SerializeRow for HashMap<&str, T, S> { impl_serialize_row_for_map!(); } -impl SerializeRow for &T { +impl SerializeRow for &T { fn serialize( &self, ctx: &RowSerializationContext<'_>, From 5733a2f5a34e9e8d458c93ba81415eb35eb809d5 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 8 Dec 2023 04:02:20 +0100 Subject: [PATCH 020/107] serialize: tests for dyn SerializeCql/dyn SerializeRow As a final confirmation of the work in the PR and to prevent regressions, add tests which explicitly use `dyn SerializeCql` and `dyn SerializeRow`. --- scylla-cql/src/types/serialize/row.rs | 33 +++++++++++++++++++++++++ scylla-cql/src/types/serialize/value.rs | 15 +++++++++++ 2 files changed, 48 insertions(+) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index c5256a2eb8..d8702100b6 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -639,4 +639,37 @@ mod tests { // Skip the value count assert_eq!(&sorted_row_data[2..], unsorted_row_data); } + + #[test] + fn test_dyn_serialize_row() { + let row = ( + 1i32, + "Ala ma kota", + None::, + MaybeUnset::Unset::, + ); + let ctx = RowSerializationContext { + columns: &[ + col_spec("a", ColumnType::Int), + col_spec("b", ColumnType::Text), + col_spec("c", ColumnType::BigInt), + col_spec("d", ColumnType::Ascii), + ], + }; + + let mut typed_data = Vec::new(); + let mut typed_data_writer = RowWriter::new(&mut typed_data); + <_ as SerializeRow>::serialize(&row, &ctx, &mut typed_data_writer).unwrap(); + + let row = &row as &dyn SerializeRow; + let mut erased_data = Vec::new(); + let mut erased_data_writer = RowWriter::new(&mut erased_data); + <_ as SerializeRow>::serialize(&row, &ctx, &mut erased_data_writer).unwrap(); + + assert_eq!( + typed_data_writer.value_count(), + erased_data_writer.value_count(), + ); + assert_eq!(typed_data, erased_data); + } } diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 4359eee497..37244f7073 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1392,4 +1392,19 @@ mod tests { check_compat(None::); check_compat(MaybeUnset::Unset::); } + + #[test] + fn test_dyn_serialize_cql() { + let v: i32 = 123; + let mut typed_data = Vec::new(); + let typed_data_writer = CellWriter::new(&mut typed_data); + <_ as SerializeCql>::serialize(&v, &ColumnType::Int, typed_data_writer).unwrap(); + + let v = &v as &dyn SerializeCql; + let mut erased_data = Vec::new(); + let erased_data_writer = CellWriter::new(&mut erased_data); + <_ as SerializeCql>::serialize(&v, &ColumnType::Int, erased_data_writer).unwrap(); + + assert_eq!(typed_data, erased_data); + } } From 80a59a9c7dfc6e6c1207ec4e1f04690649623ab9 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 20 Oct 2023 10:01:32 +0200 Subject: [PATCH 021/107] scylla-macros: introduce SerializeCql derive macro Introduce a derive macro which serializes a struct into a UDT. Unlike the previous IntoUserType, the new macro takes care to match the struct fields to UDT fields by their names. It does not assume that the order of the fields in the Rust struct is the same as in the UDT. --- Cargo.lock.msrv | 48 ++++ scylla-cql/src/lib.rs | 12 + scylla-cql/src/macros.rs | 62 +++++ scylla-cql/src/types/serialize/value.rs | 265 +++++++++++++++++++++- scylla-cql/src/types/serialize/writers.rs | 1 + scylla-macros/Cargo.toml | 1 + scylla-macros/src/lib.rs | 12 + scylla-macros/src/serialize/cql.rs | 224 ++++++++++++++++++ scylla-macros/src/serialize/mod.rs | 1 + scylla/tests/integration/hygiene.rs | 6 + 10 files changed, 630 insertions(+), 2 deletions(-) create mode 100644 scylla-macros/src/serialize/cql.rs create mode 100644 scylla-macros/src/serialize/mod.rs diff --git a/Cargo.lock.msrv b/Cargo.lock.msrv index 10393ac6eb..59c9ee1b56 100644 --- a/Cargo.lock.msrv +++ b/Cargo.lock.msrv @@ -340,6 +340,41 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "darling" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.38", +] + +[[package]] +name = "darling_macro" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.38", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -444,6 +479,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foreign-types" version = "0.3.2" @@ -651,6 +692,12 @@ dependencies = [ "cc", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.4.0" @@ -1455,6 +1502,7 @@ dependencies = [ name = "scylla-macros" version = "0.2.0" dependencies = [ + "darling", "proc-macro2", "quote", "syn 2.0.38", diff --git a/scylla-cql/src/lib.rs b/scylla-cql/src/lib.rs index b8d7d28671..ab94470e10 100644 --- a/scylla-cql/src/lib.rs +++ b/scylla-cql/src/lib.rs @@ -20,4 +20,16 @@ pub mod _macro_internal { SerializedResult, SerializedValues, Value, ValueList, ValueTooBig, }; pub use crate::macros::*; + + pub use crate::types::serialize::value::{ + BuiltinSerializationError as BuiltinTypeSerializationError, + BuiltinSerializationErrorKind as BuiltinTypeSerializationErrorKind, + BuiltinTypeCheckError as BuiltinTypeTypeCheckError, + BuiltinTypeCheckErrorKind as BuiltinTypeTypeCheckErrorKind, SerializeCql, + UdtSerializationErrorKind, UdtTypeCheckErrorKind, + }; + pub use crate::types::serialize::writers::WrittenCellProof; + pub use crate::types::serialize::{CellValueBuilder, CellWriter, SerializationError}; + + pub use crate::frame::response::result::ColumnType; } diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 8d60312145..56f1f43cf3 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -13,6 +13,68 @@ pub use scylla_macros::IntoUserType; /// #[derive(ValueList)] allows to pass struct as a list of values for a query pub use scylla_macros::ValueList; +/// Derive macro for the [`SerializeCql`](crate::types::serialize::value::SerializeCql) trait +/// which serializes given Rust structure as a User Defined Type (UDT). +/// +/// At the moment, only structs with named fields are supported. The generated +/// implementation of the trait will match the struct fields to UDT fields +/// by name automatically. +/// +/// Serialization will fail if there are some fields in the UDT that don't match +/// to any of the Rust struct fields, _or vice versa_. +/// +/// In case of failure, either [`BuiltinTypeCheckError`](crate::types::serialize::value::BuiltinTypeCheckError) +/// or [`BuiltinSerializationError`](crate::types::serialize::value::BuiltinSerializationError) +/// will be returned. +/// +/// # Example +/// +/// A UDT defined like this: +/// +/// ```notrust +/// CREATE TYPE ks.my_udt (a int, b text, c blob); +/// ``` +/// +/// ...can be serialized using the following struct: +/// +/// ```rust +/// # use scylla_cql::macros::SerializeCql; +/// #[derive(SerializeCql)] +/// # #[scylla(crate = scylla_cql)] +/// struct MyUdt { +/// a: i32, +/// b: Option, +/// c: Vec, +/// } +/// ``` +/// +/// # Attributes +/// +/// `#[scylla(crate = crate_name)]` +/// +/// By default, the code generated by the derive macro will refer to the items +/// defined by the driver (types, traits, etc.) via the `::scylla` path. +/// For example, it will refer to the [`SerializeCql`](crate::types::serialize::value::SerializeCql) trait +/// using the following path: +/// +/// ```rust,ignore +/// use ::scylla::_macro_internal::SerializeCql; +/// ``` +/// +/// Most users will simply add `scylla` to their dependencies, then use +/// the derive macro and the path above will work. However, there are some +/// niche cases where this path will _not_ work: +/// +/// - The `scylla` crate is imported under a different name, +/// - The `scylla` crate is _not imported at all_ - the macro actually +/// is defined in the `scylla-macros` crate and the generated code depends +/// on items defined in `scylla-cql`. +/// +/// It's not possible to automatically resolve those issues in the procedural +/// macro itself, so in those cases the user must provide an alternative path +/// to either the `scylla` or `scylla-cql` crate. +pub use scylla_macros::SerializeCql; + // Reexports for derive(IntoUserType) pub use bytes::{BufMut, Bytes, BytesMut}; diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 37244f7073..85033dac25 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1309,6 +1309,9 @@ pub enum UdtTypeCheckErrorKind { /// The name of the UDT being serialized to does not match. NameMismatch { keyspace: String, type_name: String }, + /// One of the fields that is required to be present by the Rust struct was not present in the CQL UDT type. + MissingField { field_name: String }, + /// The Rust data contains a field that is not present in the UDT UnexpectedFieldInDestination { field_name: String }, } @@ -1327,6 +1330,9 @@ impl Display for UdtTypeCheckErrorKind { f, "the Rust UDT name does not match the actual CQL UDT name ({keyspace}.{type_name})" ), + UdtTypeCheckErrorKind::MissingField { field_name } => { + write!(f, "the field {field_name} is missing from the CQL UDT type") + } UdtTypeCheckErrorKind::UnexpectedFieldInDestination { field_name } => write!( f, "the field {field_name} present in the Rust data is not present in the CQL type" @@ -1369,11 +1375,17 @@ pub enum ValueToSerializeCqlAdapterError { #[cfg(test)] mod tests { - use crate::frame::response::result::ColumnType; + use crate::frame::response::result::{ColumnType, CqlValue}; use crate::frame::value::{MaybeUnset, Value}; + use crate::types::serialize::value::{ + BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, + BuiltinTypeCheckErrorKind, + }; use crate::types::serialize::CellWriter; - use super::SerializeCql; + use scylla_macros::SerializeCql; + + use super::{SerializeCql, UdtSerializationErrorKind, UdtTypeCheckErrorKind}; fn check_compat(v: V) { let mut legacy_data = Vec::new(); @@ -1407,4 +1419,253 @@ mod tests { assert_eq!(typed_data, erased_data); } + + fn do_serialize(t: T, typ: &ColumnType) -> Vec { + let mut ret = Vec::new(); + let writer = CellWriter::new(&mut ret); + t.serialize(typ, writer).unwrap(); + ret + } + + // Do not remove. It's not used in tests but we keep it here to check that + // we properly ignore warnings about unused variables, unnecessary `mut`s + // etc. that usually pop up when generating code for empty structs. + #[derive(SerializeCql)] + #[scylla(crate = crate)] + struct TestUdtWithNoFields {} + + #[derive(SerializeCql, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate)] + struct TestUdtWithFieldSorting { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_udt_serialization_with_field_sorting_correct_order() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let reference = do_serialize( + CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "typ".to_string(), + fields: vec![ + ( + "a".to_string(), + Some(CqlValue::Text(String::from("Ala ma kota"))), + ), + ("b".to_string(), Some(CqlValue::Int(42))), + ( + "c".to_string(), + Some(CqlValue::List(vec![ + CqlValue::BigInt(1), + CqlValue::BigInt(2), + CqlValue::BigInt(3), + ])), + ), + ], + }, + &typ, + ); + let udt = do_serialize( + TestUdtWithFieldSorting { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &typ, + ); + + assert_eq!(reference, udt); + } + + #[test] + fn test_udt_serialization_with_field_sorting_incorrect_order() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + // Two first columns are swapped + ("b".to_string(), ColumnType::Int), + ("a".to_string(), ColumnType::Text), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let reference = do_serialize( + CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "typ".to_string(), + fields: vec![ + // FIXME: UDTs in CqlValue should also honor the order + // For now, it's swapped here as well + ("b".to_string(), Some(CqlValue::Int(42))), + ( + "a".to_string(), + Some(CqlValue::Text(String::from("Ala ma kota"))), + ), + ( + "c".to_string(), + Some(CqlValue::List(vec![ + CqlValue::BigInt(1), + CqlValue::BigInt(2), + CqlValue::BigInt(3), + ])), + ), + ], + }, + &typ, + ); + let udt = do_serialize( + TestUdtWithFieldSorting { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &typ, + ); + + assert_eq!(reference, udt); + } + + #[test] + fn test_udt_serialization_failing_type_check() { + let typ_not_udt = ColumnType::Ascii; + let udt = TestUdtWithFieldSorting::default(); + let mut data = Vec::new(); + + let err = udt + .serialize(&typ_not_udt, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NotUdt) + )); + + let typ_without_c = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + // Last field is missing + ], + }; + + let err = udt + .serialize(&typ_without_c, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::MissingField { .. }) + )); + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected field + ("d".to_string(), ColumnType::Counter), + ], + }; + + let err = udt + .serialize(&typ_unexpected_field, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError( + UdtTypeCheckErrorKind::UnexpectedFieldInDestination { .. } + ) + )); + + let typ_wrong_type = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ("c".to_string(), ColumnType::TinyInt), // Wrong column type + ], + }; + + let err = udt + .serialize(&typ_wrong_type, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::UdtError( + UdtSerializationErrorKind::FieldSerializationFailed { .. } + ) + )); + } + + #[derive(SerializeCql)] + #[scylla(crate = crate)] + struct TestUdtWithGenerics<'a, T: SerializeCql> { + a: &'a str, + b: T, + } + + #[test] + fn test_udt_serialization_with_generics() { + // A minimal smoke test just to test that it works. + fn check_with_type(typ: ColumnType, t: T, cql_t: CqlValue) { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![("a".to_string(), ColumnType::Text), ("b".to_string(), typ)], + }; + let reference = do_serialize( + CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "typ".to_string(), + fields: vec![ + ( + "a".to_string(), + Some(CqlValue::Text(String::from("Ala ma kota"))), + ), + ("b".to_string(), Some(cql_t)), + ], + }, + &typ, + ); + let udt = do_serialize( + TestUdtWithGenerics { + a: "Ala ma kota", + b: t, + }, + &typ, + ); + assert_eq!(reference, udt); + } + + check_with_type(ColumnType::Int, 123_i32, CqlValue::Int(123_i32)); + check_with_type(ColumnType::Double, 123_f64, CqlValue::Double(123_f64)); + } } diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index 4d350adc75..9b2be47998 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -183,6 +183,7 @@ impl<'buf> CellValueBuilder<'buf> { /// [`SerializeCql::serialize`](super::value::SerializeCql::serialize): either /// the method succeeds and returns a proof that it serialized itself /// into the given value, or it fails and returns an error or panics. +#[derive(Debug)] pub struct WrittenCellProof<'buf> { /// Using *mut &'buf () is deliberate and makes WrittenCellProof invariant /// on the 'buf lifetime parameter. diff --git a/scylla-macros/Cargo.toml b/scylla-macros/Cargo.toml index d39bd58116..ac5f5d16f1 100644 --- a/scylla-macros/Cargo.toml +++ b/scylla-macros/Cargo.toml @@ -12,6 +12,7 @@ license = "MIT OR Apache-2.0" proc-macro = true [dependencies] +darling = "0.20.0" syn = "2.0" quote = "1.0" proc-macro2 = "1.0" \ No newline at end of file diff --git a/scylla-macros/src/lib.rs b/scylla-macros/src/lib.rs index 59300a0020..84ee58bca0 100644 --- a/scylla-macros/src/lib.rs +++ b/scylla-macros/src/lib.rs @@ -1,4 +1,5 @@ use proc_macro::TokenStream; +use quote::ToTokens; mod from_row; mod from_user_type; @@ -6,6 +7,17 @@ mod into_user_type; mod parser; mod value_list; +mod serialize; + +/// See the documentation for this item in the `scylla` crate. +#[proc_macro_derive(SerializeCql, attributes(scylla))] +pub fn serialize_cql_derive(tokens_input: TokenStream) -> TokenStream { + match serialize::cql::derive_serialize_cql(tokens_input) { + Ok(t) => t.into_token_stream().into(), + Err(e) => e.into_compile_error().into(), + } +} + /// #[derive(FromRow)] derives FromRow for struct /// Works only on simple structs without generics etc #[proc_macro_derive(FromRow, attributes(scylla_crate))] diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs new file mode 100644 index 0000000000..f19e47b27c --- /dev/null +++ b/scylla-macros/src/serialize/cql.rs @@ -0,0 +1,224 @@ +use darling::FromAttributes; +use proc_macro::TokenStream; +use proc_macro2::Span; +use syn::parse_quote; + +#[derive(FromAttributes)] +#[darling(attributes(scylla))] +struct Attributes { + #[darling(rename = "crate")] + crate_path: Option, +} + +impl Attributes { + fn crate_path(&self) -> syn::Path { + self.crate_path + .as_ref() + .map(|p| parse_quote!(#p::_macro_internal)) + .unwrap_or_else(|| parse_quote!(::scylla::_macro_internal)) + } +} + +struct Context { + attributes: Attributes, + fields: Vec, +} + +pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result { + let input: syn::DeriveInput = syn::parse(tokens_input)?; + let struct_name = input.ident.clone(); + let named_fields = crate::parser::parse_named_fields(&input, "SerializeCql")?; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + let attributes = Attributes::from_attributes(&input.attrs)?; + + let crate_path = attributes.crate_path(); + let implemented_trait: syn::Path = parse_quote!(#crate_path::SerializeCql); + + let fields = named_fields.named.iter().cloned().collect(); + let ctx = Context { attributes, fields }; + let gen = FieldSortingGenerator { ctx: &ctx }; + + let serialize_item = gen.generate_serialize(); + + let res = parse_quote! { + impl #impl_generics #implemented_trait for #struct_name #ty_generics #where_clause { + #serialize_item + } + }; + Ok(res) +} + +impl Context { + fn generate_udt_type_match(&self, err: syn::Expr) -> syn::Stmt { + let crate_path = self.attributes.crate_path(); + + parse_quote! { + let (type_name, keyspace, field_types) = match typ { + #crate_path::ColumnType::UserDefinedType { type_name, keyspace, field_types, .. } => { + (type_name, keyspace, field_types) + } + _ => return ::std::result::Result::Err(mk_typck_err(#err)), + }; + } + } + + fn generate_mk_typck_err(&self) -> syn::Stmt { + let crate_path = self.attributes.crate_path(); + parse_quote! { + let mk_typck_err = |kind: #crate_path::UdtTypeCheckErrorKind| -> #crate_path::SerializationError { + #crate_path::SerializationError::new( + #crate_path::BuiltinTypeTypeCheckError { + rust_name: ::std::any::type_name::(), + got: <_ as ::std::clone::Clone>::clone(typ), + kind: #crate_path::BuiltinTypeTypeCheckErrorKind::UdtError(kind), + } + ) + }; + } + } + + fn generate_mk_ser_err(&self) -> syn::Stmt { + let crate_path = self.attributes.crate_path(); + parse_quote! { + let mk_ser_err = |kind: #crate_path::UdtSerializationErrorKind| -> #crate_path::SerializationError { + #crate_path::SerializationError::new( + #crate_path::BuiltinTypeSerializationError { + rust_name: ::std::any::type_name::(), + got: <_ as ::std::clone::Clone>::clone(typ), + kind: #crate_path::BuiltinTypeSerializationErrorKind::UdtError(kind), + } + ) + }; + } + } +} + +// Generates an implementation of the trait which sorts the fields according +// to how it is defined in the database. +struct FieldSortingGenerator<'a> { + ctx: &'a Context, +} + +impl<'a> FieldSortingGenerator<'a> { + fn generate_serialize(&self) -> syn::TraitItemFn { + // Need to: + // - Check that all required fields are there and no more + // - Check that the field types match + let mut statements: Vec = Vec::new(); + + let crate_path = self.ctx.attributes.crate_path(); + + let rust_field_idents = self + .ctx + .fields + .iter() + .map(|f| f.ident.clone()) + .collect::>(); + let rust_field_names = rust_field_idents + .iter() + .map(|i| i.as_ref().unwrap().to_string()) + .collect::>(); + let udt_field_names = rust_field_names.clone(); // For now, it's the same + let field_types = self.ctx.fields.iter().map(|f| &f.ty).collect::>(); + + // Declare helper lambdas for creating errors + statements.push(self.ctx.generate_mk_typck_err()); + statements.push(self.ctx.generate_mk_ser_err()); + + // Check that the type we want to serialize to is a UDT + statements.push( + self.ctx + .generate_udt_type_match(parse_quote!(#crate_path::UdtTypeCheckErrorKind::NotUdt)), + ); + + // Generate a "visited" flag for each field + let visited_flag_names = rust_field_names + .iter() + .map(|s| syn::Ident::new(&format!("visited_flag_{}", s), Span::call_site())) + .collect::>(); + statements.extend::>(parse_quote! { + #(let mut #visited_flag_names = false;)* + }); + + // Generate a variable that counts down visited fields. + let field_count = self.ctx.fields.len(); + statements.push(parse_quote! { + let mut remaining_count = #field_count; + }); + + // Turn the cell writer into a value builder + statements.push(parse_quote! { + let mut builder = #crate_path::CellWriter::into_value_builder(writer); + }); + + // Generate a loop over the fields and a `match` block to match on + // the field name. + statements.push(parse_quote! { + for (field_name, field_type) in field_types { + match ::std::string::String::as_str(field_name) { + #( + #udt_field_names => { + let sub_builder = #crate_path::CellValueBuilder::make_sub_writer(&mut builder); + match <#field_types as #crate_path::SerializeCql>::serialize(&self.#rust_field_idents, field_type, sub_builder) { + ::std::result::Result::Ok(_proof) => {} + ::std::result::Result::Err(err) => { + return ::std::result::Result::Err(mk_ser_err( + #crate_path::UdtSerializationErrorKind::FieldSerializationFailed { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + err, + } + )); + } + } + if !#visited_flag_names { + #visited_flag_names = true; + remaining_count -= 1; + } + } + )* + _ => return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::UnexpectedFieldInDestination { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + } + )), + } + } + }); + + // Finally, check that all fields were consumed. + // If there are some missing fields, return an error + statements.push(parse_quote! { + if remaining_count > 0 { + #( + if !#visited_flag_names { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::MissingField { + field_name: <_ as ::std::string::ToString>::to_string(#rust_field_names), + } + )); + } + )* + ::std::unreachable!() + } + }); + + parse_quote! { + fn serialize<'b>( + &self, + typ: &#crate_path::ColumnType, + writer: #crate_path::CellWriter<'b>, + ) -> ::std::result::Result<#crate_path::WrittenCellProof<'b>, #crate_path::SerializationError> { + #(#statements)* + let proof = #crate_path::CellValueBuilder::finish(builder) + .map_err(|_| #crate_path::SerializationError::new( + #crate_path::BuiltinTypeSerializationError { + rust_name: ::std::any::type_name::(), + got: <_ as ::std::clone::Clone>::clone(typ), + kind: #crate_path::BuiltinTypeSerializationErrorKind::SizeOverflow, + } + ) as #crate_path::SerializationError)?; + ::std::result::Result::Ok(proof) + } + } + } +} diff --git a/scylla-macros/src/serialize/mod.rs b/scylla-macros/src/serialize/mod.rs new file mode 100644 index 0000000000..15fd9ae87c --- /dev/null +++ b/scylla-macros/src/serialize/mod.rs @@ -0,0 +1 @@ +pub(crate) mod cql; diff --git a/scylla/tests/integration/hygiene.rs b/scylla/tests/integration/hygiene.rs index 6195bb0256..12d55ccb61 100644 --- a/scylla/tests/integration/hygiene.rs +++ b/scylla/tests/integration/hygiene.rs @@ -63,6 +63,12 @@ macro_rules! test_crate { let sv2 = tuple_with_same_layout.serialized().unwrap().into_owned(); assert_eq!(sv, sv2); } + + #[derive(_scylla::macros::SerializeCql)] + #[scylla(crate = _scylla)] + struct TestStructNew { + x: ::core::primitive::i32, + } }; } From 30a69f84199498fc7f1c403d943fc926c9c79cd4 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 20 Oct 2023 10:08:40 +0200 Subject: [PATCH 022/107] scylla-macros: introduce SerializeRow derive macro Introduce a derive macro which serializes a struct into bind markers of a statement. Unlike the previous ValueList, the new macro takes care to match the struct fields to bind markers/columns by their names. --- scylla-cql/src/lib.rs | 11 +- scylla-cql/src/macros.rs | 64 ++++++++ scylla-cql/src/types/serialize/row.rs | 168 ++++++++++++++++++++- scylla-macros/src/lib.rs | 9 ++ scylla-macros/src/serialize/mod.rs | 1 + scylla-macros/src/serialize/row.rs | 202 ++++++++++++++++++++++++++ scylla/tests/integration/hygiene.rs | 2 +- 7 files changed, 454 insertions(+), 3 deletions(-) create mode 100644 scylla-macros/src/serialize/row.rs diff --git a/scylla-cql/src/lib.rs b/scylla-cql/src/lib.rs index ab94470e10..6d74b680ba 100644 --- a/scylla-cql/src/lib.rs +++ b/scylla-cql/src/lib.rs @@ -21,6 +21,13 @@ pub mod _macro_internal { }; pub use crate::macros::*; + pub use crate::types::serialize::row::{ + BuiltinSerializationError as BuiltinRowSerializationError, + BuiltinSerializationErrorKind as BuiltinRowSerializationErrorKind, + BuiltinTypeCheckError as BuiltinRowTypeCheckError, + BuiltinTypeCheckErrorKind as BuiltinRowTypeCheckErrorKind, RowSerializationContext, + SerializeRow, + }; pub use crate::types::serialize::value::{ BuiltinSerializationError as BuiltinTypeSerializationError, BuiltinSerializationErrorKind as BuiltinTypeSerializationErrorKind, @@ -29,7 +36,9 @@ pub mod _macro_internal { UdtSerializationErrorKind, UdtTypeCheckErrorKind, }; pub use crate::types::serialize::writers::WrittenCellProof; - pub use crate::types::serialize::{CellValueBuilder, CellWriter, SerializationError}; + pub use crate::types::serialize::{ + CellValueBuilder, CellWriter, RowWriter, SerializationError, + }; pub use crate::frame::response::result::ColumnType; } diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 56f1f43cf3..8f53e24fa9 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -75,6 +75,70 @@ pub use scylla_macros::ValueList; /// to either the `scylla` or `scylla-cql` crate. pub use scylla_macros::SerializeCql; +/// Derive macro for the [`SerializeRow`](crate::types::serialize::row::SerializeRow) trait +/// which serializes given Rust structure into bind markers for a CQL statement. +/// +/// At the moment, only structs with named fields are supported. The generated +/// implementation of the trait will match the struct fields to bind markers/columns +/// by name automatically. +/// +/// Serialization will fail if there are some bind markers/columns in the statement +/// that don't match to any of the Rust struct fields, _or vice versa_. +/// +/// In case of failure, either [`BuiltinTypeCheckError`](crate::types::serialize::row::BuiltinTypeCheckError) +/// or [`BuiltinSerializationError`](crate::types::serialize::row::BuiltinSerializationError) +/// will be returned. +/// +/// # Example +/// +/// A UDT defined like this: +/// Given a table and a query: +/// +/// ```notrust +/// CREATE TABLE ks.my_t (a int PRIMARY KEY, b text, c blob); +/// INSERT INTO ks.my_t (a, b, c) VALUES (?, ?, ?); +/// ``` +/// +/// ...the values for the query can be serialized using the following struct: +/// +/// ```rust +/// # use scylla_cql::macros::SerializeRow; +/// #[derive(SerializeRow)] +/// # #[scylla(crate = scylla_cql)] +/// struct MyValues { +/// a: i32, +/// b: Option, +/// c: Vec, +/// } +/// ``` +/// +/// # Attributes +/// +/// `#[scylla(crate = crate_name)]` +/// +/// By default, the code generated by the derive macro will refer to the items +/// defined by the driver (types, traits, etc.) via the `::scylla` path. +/// For example, it will refer to the [`SerializeRow`](crate::types::serialize::row::SerializeRow) trait +/// using the following path: +/// +/// ```rust,ignore +/// use ::scylla::_macro_internal::SerializeRow; +/// ``` +/// +/// Most users will simply add `scylla` to their dependencies, then use +/// the derive macro and the path above will work. However, there are some +/// niche cases where this path will _not_ work: +/// +/// - The `scylla` crate is imported under a different name, +/// - The `scylla` crate is _not imported at all_ - the macro actually +/// is defined in the `scylla-macros` crate and the generated code depends +/// on items defined in `scylla-cql`. +/// +/// It's not possible to automatically resolve those issues in the procedural +/// macro itself, so in those cases the user must provide an alternative path +/// to either the `scylla` or `scylla-cql` crate. +pub use scylla_macros::SerializeRow; + // Reexports for derive(IntoUserType) pub use bytes::{BufMut, Bytes, BytesMut}; diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index d8702100b6..d398a42281 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -561,7 +561,12 @@ mod tests { use crate::frame::value::{MaybeUnset, SerializedValues, ValueList}; use crate::types::serialize::RowWriter; - use super::{RowSerializationContext, SerializeRow}; + use super::{ + BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, + BuiltinTypeCheckErrorKind, RowSerializationContext, SerializeCql, SerializeRow, + }; + + use scylla_macros::SerializeRow; fn col_spec(name: &str, typ: ColumnType) -> ColumnSpec { ColumnSpec { @@ -672,4 +677,165 @@ mod tests { ); assert_eq!(typed_data, erased_data); } + + fn do_serialize(t: T, columns: &[ColumnSpec]) -> Vec { + let ctx = RowSerializationContext { columns }; + let mut ret = Vec::new(); + let mut builder = RowWriter::new(&mut ret); + t.serialize(&ctx, &mut builder).unwrap(); + ret + } + + fn col(name: &str, typ: ColumnType) -> ColumnSpec { + ColumnSpec { + table_spec: TableSpec { + ks_name: "ks".to_string(), + table_name: "tbl".to_string(), + }, + name: name.to_string(), + typ, + } + } + + // Do not remove. It's not used in tests but we keep it here to check that + // we properly ignore warnings about unused variables, unnecessary `mut`s + // etc. that usually pop up when generating code for empty structs. + #[derive(SerializeRow)] + #[scylla(crate = crate)] + struct TestRowWithNoColumns {} + + #[derive(SerializeRow, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate)] + struct TestRowWithColumnSorting { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_row_serialization_with_column_sorting_correct_order() { + let spec = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + ]; + + let reference = do_serialize(("Ala ma kota", 42i32, vec![1i64, 2i64, 3i64]), &spec); + let row = do_serialize( + TestRowWithColumnSorting { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &spec, + ); + + assert_eq!(reference, row); + } + + #[test] + fn test_row_serialization_with_column_sorting_incorrect_order() { + // The order of two last columns is swapped + let spec = [ + col("a", ColumnType::Text), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + col("b", ColumnType::Int), + ]; + + let reference = do_serialize(("Ala ma kota", vec![1i64, 2i64, 3i64], 42i32), &spec); + let row = do_serialize( + TestRowWithColumnSorting { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &spec, + ); + + assert_eq!(reference, row); + } + + #[test] + fn test_row_serialization_failing_type_check() { + let row = TestRowWithColumnSorting::default(); + let mut data = Vec::new(); + let mut row_writer = RowWriter::new(&mut data); + + let spec_without_c = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + // Missing column c + ]; + + let ctx = RowSerializationContext { + columns: &spec_without_c, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut row_writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::ColumnMissingForValue { .. } + )); + + let spec_duplicate_column = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + // Unexpected last column + col("d", ColumnType::Counter), + ]; + + let ctx = RowSerializationContext { + columns: &spec_duplicate_column, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut row_writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MissingValueForColumn { .. } + )); + + let spec_wrong_type = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::TinyInt), // Wrong type + ]; + + let ctx = RowSerializationContext { + columns: &spec_wrong_type, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut row_writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::ColumnSerializationFailed { .. } + )); + } + + #[derive(SerializeRow)] + #[scylla(crate = crate)] + struct TestRowWithGenerics<'a, T: SerializeCql> { + a: &'a str, + b: T, + } + + #[test] + fn test_row_serialization_with_generics() { + // A minimal smoke test just to test that it works. + fn check_with_type(typ: ColumnType, t: T) { + let spec = [col("a", ColumnType::Text), col("b", typ)]; + let reference = do_serialize(("Ala ma kota", t), &spec); + let row = do_serialize( + TestRowWithGenerics { + a: "Ala ma kota", + b: t, + }, + &spec, + ); + assert_eq!(reference, row); + } + + check_with_type(ColumnType::Int, 123_i32); + check_with_type(ColumnType::Double, 123_f64); + } } diff --git a/scylla-macros/src/lib.rs b/scylla-macros/src/lib.rs index 84ee58bca0..64ce0ee06e 100644 --- a/scylla-macros/src/lib.rs +++ b/scylla-macros/src/lib.rs @@ -18,6 +18,15 @@ pub fn serialize_cql_derive(tokens_input: TokenStream) -> TokenStream { } } +/// See the documentation for this item in the `scylla` crate. +#[proc_macro_derive(SerializeRow, attributes(scylla))] +pub fn serialize_row_derive(tokens_input: TokenStream) -> TokenStream { + match serialize::row::derive_serialize_row(tokens_input) { + Ok(t) => t.into_token_stream().into(), + Err(e) => e.into_compile_error().into(), + } +} + /// #[derive(FromRow)] derives FromRow for struct /// Works only on simple structs without generics etc #[proc_macro_derive(FromRow, attributes(scylla_crate))] diff --git a/scylla-macros/src/serialize/mod.rs b/scylla-macros/src/serialize/mod.rs index 15fd9ae87c..53abe0f296 100644 --- a/scylla-macros/src/serialize/mod.rs +++ b/scylla-macros/src/serialize/mod.rs @@ -1 +1,2 @@ pub(crate) mod cql; +pub(crate) mod row; diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs new file mode 100644 index 0000000000..0dd2356041 --- /dev/null +++ b/scylla-macros/src/serialize/row.rs @@ -0,0 +1,202 @@ +use darling::FromAttributes; +use proc_macro::TokenStream; +use proc_macro2::Span; +use syn::parse_quote; + +#[derive(FromAttributes)] +#[darling(attributes(scylla))] +struct Attributes { + #[darling(rename = "crate")] + crate_path: Option, +} + +impl Attributes { + fn crate_path(&self) -> syn::Path { + self.crate_path + .as_ref() + .map(|p| parse_quote!(#p::_macro_internal)) + .unwrap_or_else(|| parse_quote!(::scylla::_macro_internal)) + } +} + +struct Context { + attributes: Attributes, + fields: Vec, +} + +pub fn derive_serialize_row(tokens_input: TokenStream) -> Result { + let input: syn::DeriveInput = syn::parse(tokens_input)?; + let struct_name = input.ident.clone(); + let named_fields = crate::parser::parse_named_fields(&input, "SerializeRow")?; + let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); + let attributes = Attributes::from_attributes(&input.attrs)?; + + let crate_path = attributes.crate_path(); + let implemented_trait: syn::Path = parse_quote!(#crate_path::SerializeRow); + + let fields = named_fields.named.iter().cloned().collect(); + let ctx = Context { attributes, fields }; + let gen = ColumnSortingGenerator { ctx: &ctx }; + + let serialize_item = gen.generate_serialize(); + let is_empty_item = gen.generate_is_empty(); + + let res = parse_quote! { + impl #impl_generics #implemented_trait for #struct_name #ty_generics #where_clause { + #serialize_item + #is_empty_item + } + }; + Ok(res) +} + +impl Context { + fn generate_mk_typck_err(&self) -> syn::Stmt { + let crate_path = self.attributes.crate_path(); + parse_quote! { + let mk_typck_err = |kind: #crate_path::BuiltinRowTypeCheckErrorKind| -> #crate_path::SerializationError { + #crate_path::SerializationError::new( + #crate_path::BuiltinRowTypeCheckError { + rust_name: ::std::any::type_name::(), + kind, + } + ) + }; + } + } + + fn generate_mk_ser_err(&self) -> syn::Stmt { + let crate_path = self.attributes.crate_path(); + parse_quote! { + let mk_ser_err = |kind: #crate_path::BuiltinRowSerializationErrorKind| -> #crate_path::SerializationError { + #crate_path::SerializationError::new( + #crate_path::BuiltinRowSerializationError { + rust_name: ::std::any::type_name::(), + kind, + } + ) + }; + } + } +} + +// Generates an implementation of the trait which sorts the columns according +// to how they are defined in prepared statement metadata. +struct ColumnSortingGenerator<'a> { + ctx: &'a Context, +} + +impl<'a> ColumnSortingGenerator<'a> { + fn generate_serialize(&self) -> syn::TraitItemFn { + // Need to: + // - Check that all required columns are there and no more + // - Check that the column types match + let mut statements: Vec = Vec::new(); + + let crate_path = self.ctx.attributes.crate_path(); + + let rust_field_idents = self + .ctx + .fields + .iter() + .map(|f| f.ident.clone()) + .collect::>(); + let rust_field_names = rust_field_idents + .iter() + .map(|i| i.as_ref().unwrap().to_string()) + .collect::>(); + let udt_field_names = rust_field_names.clone(); // For now, it's the same + let field_types = self.ctx.fields.iter().map(|f| &f.ty).collect::>(); + + // Declare a helper lambda for creating errors + statements.push(self.ctx.generate_mk_typck_err()); + statements.push(self.ctx.generate_mk_ser_err()); + + // Generate a "visited" flag for each field + let visited_flag_names = rust_field_names + .iter() + .map(|s| syn::Ident::new(&format!("visited_flag_{}", s), Span::call_site())) + .collect::>(); + statements.extend::>(parse_quote! { + #(let mut #visited_flag_names = false;)* + }); + + // Generate a variable that counts down visited fields. + let field_count = self.ctx.fields.len(); + statements.push(parse_quote! { + let mut remaining_count = #field_count; + }); + + // Generate a loop over the fields and a `match` block to match on + // the field name. + statements.push(parse_quote! { + for spec in ctx.columns() { + match ::std::string::String::as_str(&spec.name) { + #( + #udt_field_names => { + let sub_writer = #crate_path::RowWriter::make_cell_writer(writer); + match <#field_types as #crate_path::SerializeCql>::serialize(&self.#rust_field_idents, &spec.typ, sub_writer) { + ::std::result::Result::Ok(_proof) => {} + ::std::result::Result::Err(err) => { + return ::std::result::Result::Err(mk_ser_err( + #crate_path::BuiltinRowSerializationErrorKind::ColumnSerializationFailed { + name: <_ as ::std::clone::Clone>::clone(&spec.name), + err, + } + )); + } + } + if !#visited_flag_names { + #visited_flag_names = true; + remaining_count -= 1; + } + } + )* + _ => return ::std::result::Result::Err(mk_typck_err( + #crate_path::BuiltinRowTypeCheckErrorKind::MissingValueForColumn { + name: <_ as ::std::clone::Clone>::clone(&&spec.name), + } + )), + } + } + }); + + // Finally, check that all fields were consumed. + // If there are some missing fields, return an error + statements.push(parse_quote! { + if remaining_count > 0 { + #( + if !#visited_flag_names { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::BuiltinRowTypeCheckErrorKind::ColumnMissingForValue { + name: <_ as ::std::string::ToString>::to_string(#rust_field_names), + } + )); + } + )* + ::std::unreachable!() + } + }); + + parse_quote! { + fn serialize<'b>( + &self, + ctx: &#crate_path::RowSerializationContext, + writer: &mut #crate_path::RowWriter<'b>, + ) -> ::std::result::Result<(), #crate_path::SerializationError> { + #(#statements)* + ::std::result::Result::Ok(()) + } + } + } + + fn generate_is_empty(&self) -> syn::TraitItemFn { + let is_empty = self.ctx.fields.is_empty(); + parse_quote! { + #[inline] + fn is_empty(&self) -> bool { + #is_empty + } + } + } +} diff --git a/scylla/tests/integration/hygiene.rs b/scylla/tests/integration/hygiene.rs index 12d55ccb61..cf2aaed7b3 100644 --- a/scylla/tests/integration/hygiene.rs +++ b/scylla/tests/integration/hygiene.rs @@ -64,7 +64,7 @@ macro_rules! test_crate { assert_eq!(sv, sv2); } - #[derive(_scylla::macros::SerializeCql)] + #[derive(_scylla::macros::SerializeCql, _scylla::macros::SerializeRow)] #[scylla(crate = _scylla)] struct TestStructNew { x: ::core::primitive::i32, From dcb4cf48f2841d7ad37d6849b570788398ddde41 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 27 Oct 2023 07:44:49 +0200 Subject: [PATCH 023/107] scylla-macros: implement enforce_order flavor of SerializeCql Some users might not need the additional robustness of `SerializeCql` that comes from sorting the fields before serializing, as they are used to the current behavior of `Value` and properly set the order of the fields in their Rust struct. In order to give them some performance boost, add an additional mode to `SerializeCql` called "enforce_order" which expects that the order of the fields in the struct is kept in sync with the DB definition of the UDT. It's still safe to use because, as the struct fields are serialized, their names are compared with the fields in the UDT definition order and serialization fails if the field name on some position is mismatched. --- scylla-cql/src/macros.rs | 19 ++- scylla-cql/src/types/serialize/value.rs | 170 ++++++++++++++++++++++++ scylla-macros/src/serialize/cql.rs | 121 ++++++++++++++++- scylla-macros/src/serialize/mod.rs | 18 +++ 4 files changed, 323 insertions(+), 5 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 8f53e24fa9..2b7b0b4ae7 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -16,9 +16,7 @@ pub use scylla_macros::ValueList; /// Derive macro for the [`SerializeCql`](crate::types::serialize::value::SerializeCql) trait /// which serializes given Rust structure as a User Defined Type (UDT). /// -/// At the moment, only structs with named fields are supported. The generated -/// implementation of the trait will match the struct fields to UDT fields -/// by name automatically. +/// At the moment, only structs with named fields are supported. /// /// Serialization will fail if there are some fields in the UDT that don't match /// to any of the Rust struct fields, _or vice versa_. @@ -50,6 +48,21 @@ pub use scylla_macros::ValueList; /// /// # Attributes /// +/// `#[scylla(flavor = "flavor_name")]` +/// +/// Allows to choose one of the possible "flavors", i.e. the way how the +/// generated code will approach serialization. Possible flavors are: +/// +/// - `"match_by_name"` (default) - the generated implementation _does not +/// require_ the fields in the Rust struct to be in the same order as the +/// fields in the UDT. During serialization, the implementation will take +/// care to serialize the fields in the order which the database expects. +/// - `"enforce_order"` - the generated implementation _requires_ the fields +/// in the Rust struct to be in the same order as the fields in the UDT. +/// If the order is incorrect, type checking/serialization will fail. +/// This is a less robust flavor than `"match_by_name"`, but should be +/// slightly more performant as it doesn't need to perform lookups by name. +/// /// `#[scylla(crate = crate_name)]` /// /// By default, the code generated by the derive macro will refer to the items diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 85033dac25..567b59cfab 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1314,6 +1314,12 @@ pub enum UdtTypeCheckErrorKind { /// The Rust data contains a field that is not present in the UDT UnexpectedFieldInDestination { field_name: String }, + + /// A different field name was expected at given position. + FieldNameMismatch { + rust_field_name: String, + db_field_name: String, + }, } impl Display for UdtTypeCheckErrorKind { @@ -1337,6 +1343,10 @@ impl Display for UdtTypeCheckErrorKind { f, "the field {field_name} present in the Rust data is not present in the CQL type" ), + UdtTypeCheckErrorKind::FieldNameMismatch { rust_field_name, db_field_name } => write!( + f, + "expected field with name {db_field_name} at given position, but the Rust field name is {rust_field_name}" + ), } } } @@ -1668,4 +1678,164 @@ mod tests { check_with_type(ColumnType::Int, 123_i32, CqlValue::Int(123_i32)); check_with_type(ColumnType::Double, 123_f64, CqlValue::Double(123_f64)); } + + #[derive(SerializeCql, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate, flavor = "enforce_order")] + struct TestUdtWithEnforcedOrder { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_udt_serialization_with_enforced_order_correct_order() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let reference = do_serialize( + CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "typ".to_string(), + fields: vec![ + ( + "a".to_string(), + Some(CqlValue::Text(String::from("Ala ma kota"))), + ), + ("b".to_string(), Some(CqlValue::Int(42))), + ( + "c".to_string(), + Some(CqlValue::List(vec![ + CqlValue::BigInt(1), + CqlValue::BigInt(2), + CqlValue::BigInt(3), + ])), + ), + ], + }, + &typ, + ); + let udt = do_serialize( + TestUdtWithEnforcedOrder { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &typ, + ); + + assert_eq!(reference, udt); + } + + #[test] + fn test_udt_serialization_with_enforced_order_failing_type_check() { + let typ_not_udt = ColumnType::Ascii; + let udt = TestUdtWithEnforcedOrder::default(); + + let mut data = Vec::new(); + + let err = <_ as SerializeCql>::serialize(&udt, &typ_not_udt, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NotUdt) + )); + + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + // Two first columns are swapped + ("b".to_string(), ColumnType::Int), + ("a".to_string(), ColumnType::Text), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let err = + <_ as SerializeCql>::serialize(&udt, &typ, CellWriter::new(&mut data)).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::FieldNameMismatch { .. }) + )); + + let typ_without_c = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + // Last field is missing + ], + }; + + let err = <_ as SerializeCql>::serialize(&udt, &typ_without_c, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::MissingField { .. }) + )); + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected field + ("d".to_string(), ColumnType::Counter), + ], + }; + + let err = + <_ as SerializeCql>::serialize(&udt, &typ_unexpected_field, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError( + UdtTypeCheckErrorKind::UnexpectedFieldInDestination { .. } + ) + )); + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ("c".to_string(), ColumnType::TinyInt), // Wrong column type + ], + }; + + let err = + <_ as SerializeCql>::serialize(&udt, &typ_unexpected_field, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::UdtError( + UdtSerializationErrorKind::FieldSerializationFailed { .. } + ) + )); + } } diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index f19e47b27c..d3c5788401 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -3,11 +3,15 @@ use proc_macro::TokenStream; use proc_macro2::Span; use syn::parse_quote; +use super::Flavor; + #[derive(FromAttributes)] #[darling(attributes(scylla))] struct Attributes { #[darling(rename = "crate")] crate_path: Option, + + flavor: Option, } impl Attributes { @@ -36,7 +40,11 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result = match ctx.attributes.flavor { + Some(Flavor::MatchByName) | None => Box::new(FieldSortingGenerator { ctx: &ctx }), + Some(Flavor::EnforceOrder) => Box::new(FieldOrderedGenerator { ctx: &ctx }), + }; let serialize_item = gen.generate_serialize(); @@ -93,13 +101,17 @@ impl Context { } } +trait Generator { + fn generate_serialize(&self) -> syn::TraitItemFn; +} + // Generates an implementation of the trait which sorts the fields according // to how it is defined in the database. struct FieldSortingGenerator<'a> { ctx: &'a Context, } -impl<'a> FieldSortingGenerator<'a> { +impl<'a> Generator for FieldSortingGenerator<'a> { fn generate_serialize(&self) -> syn::TraitItemFn { // Need to: // - Check that all required fields are there and no more @@ -222,3 +234,108 @@ impl<'a> FieldSortingGenerator<'a> { } } } + +// Generates an implementation of the trait which requires the fields +// to be placed in the same order as they are defined in the struct. +struct FieldOrderedGenerator<'a> { + ctx: &'a Context, +} + +impl<'a> Generator for FieldOrderedGenerator<'a> { + fn generate_serialize(&self) -> syn::TraitItemFn { + let mut statements: Vec = Vec::new(); + + let crate_path = self.ctx.attributes.crate_path(); + + // Declare a helper lambda for creating errors + statements.push(self.ctx.generate_mk_typck_err()); + statements.push(self.ctx.generate_mk_ser_err()); + + // Check that the type we want to serialize to is a UDT + statements.push( + self.ctx + .generate_udt_type_match(parse_quote!(#crate_path::UdtTypeCheckErrorKind::NotUdt)), + ); + + // Turn the cell writer into a value builder + statements.push(parse_quote! { + let mut builder = #crate_path::CellWriter::into_value_builder(writer); + }); + + // Create an iterator over fields + statements.push(parse_quote! { + let mut field_iter = field_types.iter(); + }); + + // Serialize each field + for field in self.ctx.fields.iter() { + let rust_field_ident = field.ident.as_ref().unwrap(); + let rust_field_name = rust_field_ident.to_string(); + let typ = &field.ty; + statements.push(parse_quote! { + match field_iter.next() { + Some((field_name, typ)) => { + if field_name == #rust_field_name { + let sub_builder = #crate_path::CellValueBuilder::make_sub_writer(&mut builder); + match <#typ as #crate_path::SerializeCql>::serialize(&self.#rust_field_ident, typ, sub_builder) { + Ok(_proof) => {}, + Err(err) => { + return ::std::result::Result::Err(mk_ser_err( + #crate_path::UdtSerializationErrorKind::FieldSerializationFailed { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + err, + } + )); + } + } + } else { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::FieldNameMismatch { + rust_field_name: <_ as ::std::string::ToString>::to_string(#rust_field_name), + db_field_name: <_ as ::std::clone::Clone>::clone(field_name), + } + )); + } + } + None => { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::MissingField { + field_name: <_ as ::std::string::ToString>::to_string(#rust_field_name), + } + )); + } + } + }); + } + + // Check whether there are some fields remaining + statements.push(parse_quote! { + if let Some((field_name, typ)) = field_iter.next() { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::UnexpectedFieldInDestination { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + } + )); + } + }); + + parse_quote! { + fn serialize<'b>( + &self, + typ: &#crate_path::ColumnType, + writer: #crate_path::CellWriter<'b>, + ) -> ::std::result::Result<#crate_path::WrittenCellProof<'b>, #crate_path::SerializationError> { + #(#statements)* + let proof = #crate_path::CellValueBuilder::finish(builder) + .map_err(|_| #crate_path::SerializationError::new( + #crate_path::BuiltinTypeSerializationError { + rust_name: ::std::any::type_name::(), + got: <_ as ::std::clone::Clone>::clone(typ), + kind: #crate_path::BuiltinTypeSerializationErrorKind::SizeOverflow, + } + ) as #crate_path::SerializationError)?; + ::std::result::Result::Ok(proof) + } + } + } +} diff --git a/scylla-macros/src/serialize/mod.rs b/scylla-macros/src/serialize/mod.rs index 53abe0f296..183183fa91 100644 --- a/scylla-macros/src/serialize/mod.rs +++ b/scylla-macros/src/serialize/mod.rs @@ -1,2 +1,20 @@ +use darling::FromMeta; + pub(crate) mod cql; pub(crate) mod row; + +#[derive(Copy, Clone, PartialEq, Eq)] +enum Flavor { + MatchByName, + EnforceOrder, +} + +impl FromMeta for Flavor { + fn from_string(value: &str) -> darling::Result { + match value { + "match_by_name" => Ok(Self::MatchByName), + "enforce_order" => Ok(Self::EnforceOrder), + _ => Err(darling::Error::unknown_value(value)), + } + } +} From a255d81aa3e0c5e2661755153022c535ed0d15ea Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 27 Oct 2023 09:16:38 +0200 Subject: [PATCH 024/107] scylla-macros: implement enforce_order flavor of SerializeRow Like in the case of `SerializeRow`, some people might be used to working with the old `ValueList` and already order their Rust struct fields with accordance to the queries they are used with and don't need the overhead associated with looking up columns by name. The `enforce_order` mode is added to `SerializeRow` which works analogously as in `SerializeCql` - expects the columns to be in the correct order and verifies that this is the case when serializing, but just fails instead of reordering if that expectation is broken. --- scylla-cql/src/macros.rs | 19 ++++- scylla-cql/src/types/serialize/row.rs | 110 +++++++++++++++++++++++++ scylla-macros/src/serialize/row.rs | 113 +++++++++++++++++++++++++- 3 files changed, 237 insertions(+), 5 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 2b7b0b4ae7..51cc79ce24 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -91,9 +91,7 @@ pub use scylla_macros::SerializeCql; /// Derive macro for the [`SerializeRow`](crate::types::serialize::row::SerializeRow) trait /// which serializes given Rust structure into bind markers for a CQL statement. /// -/// At the moment, only structs with named fields are supported. The generated -/// implementation of the trait will match the struct fields to bind markers/columns -/// by name automatically. +/// At the moment, only structs with named fields are supported. /// /// Serialization will fail if there are some bind markers/columns in the statement /// that don't match to any of the Rust struct fields, _or vice versa_. @@ -127,6 +125,21 @@ pub use scylla_macros::SerializeCql; /// /// # Attributes /// +/// `#[scylla(flavor = "flavor_name")]` +/// +/// Allows to choose one of the possible "flavors", i.e. the way how the +/// generated code will approach serialization. Possible flavors are: +/// +/// - `"match_by_name"` (default) - the generated implementation _does not +/// require_ the fields in the Rust struct to be in the same order as the +/// columns/bind markers. During serialization, the implementation will take +/// care to serialize the fields in the order which the database expects. +/// - `"enforce_order"` - the generated implementation _requires_ the fields +/// in the Rust struct to be in the same order as the columns/bind markers. +/// If the order is incorrect, type checking/serialization will fail. +/// This is a less robust flavor than `"match_by_name"`, but should be +/// slightly more performant as it doesn't need to perform lookups by name. +/// /// `#[scylla(crate = crate_name)]` /// /// By default, the code generated by the derive macro will refer to the items diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index d398a42281..213af49c0f 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -504,6 +504,12 @@ pub enum BuiltinTypeCheckErrorKind { /// A value required by the statement is not provided by the Rust type. ColumnMissingForValue { name: String }, + + /// A different column name was expected at given position. + ColumnNameMismatch { + rust_column_name: String, + db_column_name: String, + }, } impl Display for BuiltinTypeCheckErrorKind { @@ -524,6 +530,10 @@ impl Display for BuiltinTypeCheckErrorKind { "value for column {name} was provided, but there is no bind marker for this column in the query" ) } + BuiltinTypeCheckErrorKind::ColumnNameMismatch { rust_column_name, db_column_name } => write!( + f, + "expected column with name {db_column_name} at given position, but the Rust field name is {rust_column_name}" + ), } } } @@ -838,4 +848,104 @@ mod tests { check_with_type(ColumnType::Int, 123_i32); check_with_type(ColumnType::Double, 123_f64); } + + #[derive(SerializeRow, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate, flavor = "enforce_order")] + struct TestRowWithEnforcedOrder { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_row_serialization_with_enforced_order_correct_order() { + let spec = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + ]; + + let reference = do_serialize(("Ala ma kota", 42i32, vec![1i64, 2i64, 3i64]), &spec); + let row = do_serialize( + TestRowWithEnforcedOrder { + a: "Ala ma kota".to_owned(), + b: 42, + c: vec![1, 2, 3], + }, + &spec, + ); + + assert_eq!(reference, row); + } + + #[test] + fn test_row_serialization_with_enforced_order_failing_type_check() { + let row = TestRowWithEnforcedOrder::default(); + let mut data = Vec::new(); + let mut writer = RowWriter::new(&mut data); + + // The order of two last columns is swapped + let spec = [ + col("a", ColumnType::Text), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + col("b", ColumnType::Int), + ]; + let ctx = RowSerializationContext { columns: &spec }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::ColumnNameMismatch { .. } + )); + + let spec_without_c = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + // Missing column c + ]; + + let ctx = RowSerializationContext { + columns: &spec_without_c, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::ColumnMissingForValue { .. } + )); + + let spec_duplicate_column = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::List(Box::new(ColumnType::BigInt))), + // Unexpected last column + col("d", ColumnType::Counter), + ]; + + let ctx = RowSerializationContext { + columns: &spec_duplicate_column, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MissingValueForColumn { .. } + )); + + let spec_wrong_type = [ + col("a", ColumnType::Text), + col("b", ColumnType::Int), + col("c", ColumnType::TinyInt), // Wrong type + ]; + + let ctx = RowSerializationContext { + columns: &spec_wrong_type, + }; + let err = <_ as SerializeRow>::serialize(&row, &ctx, &mut writer).unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::ColumnSerializationFailed { .. } + )); + } } diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs index 0dd2356041..ee0f702d27 100644 --- a/scylla-macros/src/serialize/row.rs +++ b/scylla-macros/src/serialize/row.rs @@ -3,11 +3,15 @@ use proc_macro::TokenStream; use proc_macro2::Span; use syn::parse_quote; +use super::Flavor; + #[derive(FromAttributes)] #[darling(attributes(scylla))] struct Attributes { #[darling(rename = "crate")] crate_path: Option, + + flavor: Option, } impl Attributes { @@ -36,7 +40,11 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result = match ctx.attributes.flavor { + Some(Flavor::MatchByName) | None => Box::new(ColumnSortingGenerator { ctx: &ctx }), + Some(Flavor::EnforceOrder) => Box::new(ColumnOrderedGenerator { ctx: &ctx }), + }; let serialize_item = gen.generate_serialize(); let is_empty_item = gen.generate_is_empty(); @@ -80,13 +88,18 @@ impl Context { } } +trait Generator { + fn generate_serialize(&self) -> syn::TraitItemFn; + fn generate_is_empty(&self) -> syn::TraitItemFn; +} + // Generates an implementation of the trait which sorts the columns according // to how they are defined in prepared statement metadata. struct ColumnSortingGenerator<'a> { ctx: &'a Context, } -impl<'a> ColumnSortingGenerator<'a> { +impl<'a> Generator for ColumnSortingGenerator<'a> { fn generate_serialize(&self) -> syn::TraitItemFn { // Need to: // - Check that all required columns are there and no more @@ -200,3 +213,99 @@ impl<'a> ColumnSortingGenerator<'a> { } } } + +// Generates an implementation of the trait which requires the columns +// to be placed in the same order as they are defined in the struct. +struct ColumnOrderedGenerator<'a> { + ctx: &'a Context, +} + +impl<'a> Generator for ColumnOrderedGenerator<'a> { + fn generate_serialize(&self) -> syn::TraitItemFn { + let mut statements: Vec = Vec::new(); + + let crate_path = self.ctx.attributes.crate_path(); + + // Declare a helper lambda for creating errors + statements.push(self.ctx.generate_mk_typck_err()); + statements.push(self.ctx.generate_mk_ser_err()); + + // Create an iterator over fields + statements.push(parse_quote! { + let mut column_iter = ctx.columns().iter(); + }); + + // Serialize each field + for field in self.ctx.fields.iter() { + let rust_field_ident = field.ident.as_ref().unwrap(); + let rust_field_name = rust_field_ident.to_string(); + let typ = &field.ty; + statements.push(parse_quote! { + match column_iter.next() { + Some(spec) => { + if spec.name == #rust_field_name { + let cell_writer = #crate_path::RowWriter::make_cell_writer(writer); + match <#typ as #crate_path::SerializeCql>::serialize(&self.#rust_field_ident, &spec.typ, cell_writer) { + Ok(_proof) => {}, + Err(err) => { + return ::std::result::Result::Err(mk_ser_err( + #crate_path::BuiltinRowSerializationErrorKind::ColumnSerializationFailed { + name: <_ as ::std::clone::Clone>::clone(&spec.name), + err, + } + )); + } + } + } else { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::BuiltinRowTypeCheckErrorKind::ColumnNameMismatch { + rust_column_name: <_ as ::std::string::ToString>::to_string(#rust_field_name), + db_column_name: <_ as ::std::clone::Clone>::clone(&spec.name), + } + )); + } + } + None => { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::BuiltinRowTypeCheckErrorKind::ColumnMissingForValue { + name: <_ as ::std::string::ToString>::to_string(#rust_field_name), + } + )); + } + } + }); + } + + // Check whether there are some columns remaining + statements.push(parse_quote! { + if let Some(spec) = column_iter.next() { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::BuiltinRowTypeCheckErrorKind::MissingValueForColumn { + name: <_ as ::std::clone::Clone>::clone(&spec.name), + } + )); + } + }); + + parse_quote! { + fn serialize<'b>( + &self, + ctx: &#crate_path::RowSerializationContext, + writer: &mut #crate_path::RowWriter<'b>, + ) -> ::std::result::Result<(), #crate_path::SerializationError> { + #(#statements)* + ::std::result::Result::Ok(()) + } + } + } + + fn generate_is_empty(&self) -> syn::TraitItemFn { + let is_empty = self.ctx.fields.is_empty(); + parse_quote! { + #[inline] + fn is_empty(&self) -> bool { + #is_empty + } + } + } +} From 64452ddcdf70be9e6ea9cd8dc10d99d0729d935c Mon Sep 17 00:00:00 2001 From: Yaniv Kaul Date: Sat, 9 Dec 2023 15:54:46 +0200 Subject: [PATCH 025/107] Typos: fix minor typos in code Mainly in comments. Fixes: https://github.com/scylladb/scylla-rust-driver/issues/875 Signed-off-by: Yaniv Kaul --- docs/source/execution-profiles/priority.md | 2 +- docs/source/tracing/tracing.md | 2 +- scylla-cql/src/frame/value.rs | 2 +- scylla-cql/src/types/serialize/writers.rs | 2 +- scylla-proxy/src/proxy.rs | 12 ++++++------ scylla/src/cloud/config.rs | 6 +++--- scylla/src/history.rs | 2 +- scylla/src/routing.rs | 2 +- scylla/src/statement/prepared_statement.rs | 2 +- scylla/src/transport/caching_session.rs | 2 +- scylla/src/transport/connection.rs | 4 ++-- scylla/src/transport/load_balancing/default.rs | 2 +- scylla/src/transport/locator/precomputed_replicas.rs | 2 +- scylla/src/transport/locator/test.rs | 2 +- scylla/src/transport/locator/token_ring.rs | 2 +- scylla/src/transport/session.rs | 4 ++-- scylla/src/transport/session_builder.rs | 2 +- scylla/src/transport/session_test.rs | 6 +++--- scylla/src/transport/silent_prepare_batch_test.rs | 2 +- test/dockerized/run.sh | 2 +- test/tls/scylla.yaml | 6 +++--- 21 files changed, 34 insertions(+), 34 deletions(-) diff --git a/docs/source/execution-profiles/priority.md b/docs/source/execution-profiles/priority.md index 92a46d50c4..4ae22d2c86 100644 --- a/docs/source/execution-profiles/priority.md +++ b/docs/source/execution-profiles/priority.md @@ -1,6 +1,6 @@ # Priorities of execution settings -You always have a default execution profile set for the `Session`, either the default one or overriden upon `Session` creation. Moreover, you can set a profile for specific statements, in which case the statement's profile has higher priority. Some options are also available for specific statements to be set directly on them, such as request timeout and consistency. In such case, the directly set options are preferred over those specified in execution profiles. +You always have a default execution profile set for the `Session`, either the default one or overridden upon `Session` creation. Moreover, you can set a profile for specific statements, in which case the statement's profile has higher priority. Some options are also available for specific statements to be set directly on them, such as request timeout and consistency. In such case, the directly set options are preferred over those specified in execution profiles. > **Recap**\ > Priorities are as follows:\ diff --git a/docs/source/tracing/tracing.md b/docs/source/tracing/tracing.md index 2d54fb333c..dbf50ce2c0 100644 --- a/docs/source/tracing/tracing.md +++ b/docs/source/tracing/tracing.md @@ -1,6 +1,6 @@ # Query tracing -The driver has utilites for monitoring the execution of queries. +The driver has utilities for monitoring the execution of queries. There are two separate ways to get information about what happened with a query: `Tracing` and `Query Execution History`. ### Tracing diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index a5fa8462f4..fa3355f4a8 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -493,7 +493,7 @@ pub trait BatchValuesIterator<'a> { /// Implements `BatchValuesIterator` from an `Iterator` over references to things that implement `ValueList` /// -/// Essentially used internally by this lib to provide implementors of `BatchValuesIterator` for cases +/// Essentially used internally by this lib to provide implementers of `BatchValuesIterator` for cases /// that always serialize the same concrete `ValueList` type pub struct BatchValuesIteratorFromIterator { it: IT, diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index 9b2be47998..cd1ccd7f62 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -54,7 +54,7 @@ impl<'buf> RowWriter<'buf> { /// a [`WrittenCellProof`] object is returned /// in its stead. This is a type-level proof that the value was fully initialized /// and is used in [`SerializeCql::serialize`](`super::value::SerializeCql::serialize`) -/// in order to enforce the implementor to fully initialize the provided handle +/// in order to enforce the implementer to fully initialize the provided handle /// to CQL value. /// /// Dropping this type without calling any of its methods will result diff --git a/scylla-proxy/src/proxy.rs b/scylla-proxy/src/proxy.rs index a4aed08c7d..11acf465d0 100644 --- a/scylla-proxy/src/proxy.rs +++ b/scylla-proxy/src/proxy.rs @@ -431,8 +431,8 @@ impl RunningProxy { } } - /// Attempts to fetch the first error that has occured in proxy since last check. - /// If no errors occured, returns Ok(()). + /// Attempts to fetch the first error that has occurred in proxy since last check. + /// If no errors occurred, returns Ok(()). pub fn sanity_check(&mut self) -> Result<(), ProxyError> { match self.error_sink.try_recv() { Ok(err) => Err(err), @@ -444,13 +444,13 @@ impl RunningProxy { } } - /// Waits until an error occurs in proxy. If proxy finishes with no errors occured, returns Err(()). + /// Waits until an error occurs in proxy. If proxy finishes with no errors occurred, returns Err(()). pub async fn wait_for_error(&mut self) -> Option { self.error_sink.recv().await } /// Requests termination of all proxy workers and awaits its completion. - /// Returns the first error that occured in proxy. + /// Returns the first error that occurred in proxy. pub async fn finish(mut self) -> Result<(), ProxyError> { self.terminate_signaler.send(()).map_err(|err| { ProxyError::AwaitFinishFailure(format!( @@ -768,7 +768,7 @@ impl Doorkeeper { // If ShardAwareness is aware (QueryNode or FixedNum variants) and the // proxy succeeded to know the shards count (in FixedNum we get it for - // free, in QueryNode the initial Options query succceeded and Supported + // free, in QueryNode the initial Options query succeeded and Supported // contained SCYLLA_SHARDS_NUM), then upon opening each connection to the // node, the proxy issues another Options requests and acknowledges the // shard it got connected to. @@ -2446,7 +2446,7 @@ mod tests { ) .await; - // Messages after REGISTER should be passed trough without feedback + // Messages after REGISTER should be passed through without feedback for i in 0..5 { perform_reqest_response( RequestOpcode::Query, diff --git a/scylla/src/cloud/config.rs b/scylla/src/cloud/config.rs index 74a088726a..9984cba34e 100644 --- a/scylla/src/cloud/config.rs +++ b/scylla/src/cloud/config.rs @@ -184,13 +184,13 @@ mod deserialize { // +optional apiVersion: Option, - // Datacenters is a map of referencable names to datacenter configs. + // Datacenters is a map of referenceable names to datacenter configs. datacenters: HashMap, - // AuthInfos is a map of referencable names to authentication configs. + // AuthInfos is a map of referenceable names to authentication configs. authInfos: HashMap, - // Contexts is a map of referencable names to context configs. + // Contexts is a map of referenceable names to context configs. contexts: HashMap, // CurrentContext is the name of the context that you would like to use by default. diff --git a/scylla/src/history.rs b/scylla/src/history.rs index 34c2c19244..9109601c23 100644 --- a/scylla/src/history.rs +++ b/scylla/src/history.rs @@ -56,7 +56,7 @@ pub trait HistoryListener: Debug + Send + Sync { node_addr: SocketAddr, ) -> AttemptId; - /// Log that an attempt succeded. + /// Log that an attempt succeeded. fn log_attempt_success(&self, attempt_id: AttemptId); /// Log that an attempt ended with an error. The error and decision whether to retry the attempt are also included in the log. diff --git a/scylla/src/routing.rs b/scylla/src/routing.rs index d6fff76466..e54dfcaec9 100644 --- a/scylla/src/routing.rs +++ b/scylla/src/routing.rs @@ -102,7 +102,7 @@ impl Sharder { pub enum ShardingError { #[error("ShardInfo parameters missing")] MissingShardInfoParameter, - #[error("ShardInfo parameters missing after unwraping")] + #[error("ShardInfo parameters missing after unwrapping")] MissingUnwrapedShardInfoParameter, #[error("ShardInfo contains an invalid number of shards (0)")] ZeroShards, diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index 58d8b9ea3d..8abdf6bd91 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -145,7 +145,7 @@ impl PreparedStatement { Ok(buf.freeze()) } - /// Determines which values consistute the partition key and puts them in order. + /// Determines which values constitute the partition key and puts them in order. /// /// This is a preparation step necessary for calculating token based on a prepared statement. pub(crate) fn extract_partition_key<'ps>( diff --git a/scylla/src/transport/caching_session.rs b/scylla/src/transport/caching_session.rs index 82e12b1ab2..3d3dfa0e17 100644 --- a/scylla/src/transport/caching_session.rs +++ b/scylla/src/transport/caching_session.rs @@ -381,7 +381,7 @@ mod tests { for expected_row in expected_rows.iter() { if !selected_rows.contains(expected_row) { panic!( - "Expected {:?} to contain row: {:?}, but they didnt", + "Expected {:?} to contain row: {:?}, but they didn't", selected_rows, expected_row ); } diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index de79c5d130..2ecd37b290 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -290,7 +290,7 @@ mod ssl_config { /// This struct encapsulates all Ssl-regarding configuration and helps pass it tidily through the code. // // There are 3 possible options for SslConfig, whose behaviour is somewhat subtle. - // Option 1: No ssl configuration. Then it is None everytime. + // Option 1: No ssl configuration. Then it is None every time. // Option 2: User-provided global SslContext. Then, a SslConfig is created upon Session creation // and henceforth stored in the ConnectionConfig. // Option 3: Serverless Cloud. The Option remains None in ConnectionConfig until it reaches @@ -1334,7 +1334,7 @@ impl Connection { // or passing the negotiated features via a channel/mutex/etc. // Fortunately, events do not need information about protocol features // to be serialized (yet), therefore I'm leaving this problem for - // future implementors. + // future implementers. let features = ProtocolFeatures::default(); // TODO: Use the right features let response = Self::parse_response(task_response, compression, &features)?.response; diff --git a/scylla/src/transport/load_balancing/default.rs b/scylla/src/transport/load_balancing/default.rs index d1554babf1..3fdeef18ef 100644 --- a/scylla/src/transport/load_balancing/default.rs +++ b/scylla/src/transport/load_balancing/default.rs @@ -987,7 +987,7 @@ mod tests { // and just `assert_eq` them let mut got = got.iter(); for (group_id, expected) in self.groups.iter().enumerate() { - // Collect the nodes that consistute the group + // Collect the nodes that constitute the group // in the actual plan let got_group: Vec<_> = (&mut got).take(expected.len()).copied().collect(); diff --git a/scylla/src/transport/locator/precomputed_replicas.rs b/scylla/src/transport/locator/precomputed_replicas.rs index de6d5e1a63..0a9256b7e8 100644 --- a/scylla/src/transport/locator/precomputed_replicas.rs +++ b/scylla/src/transport/locator/precomputed_replicas.rs @@ -10,7 +10,7 @@ //! to compute those lists for each strategy used in cluster. //! //! Notes on Network Topology Strategy precomputation: -//! The optimization mentioned above works ony if requested `replication factor` is <= `rack count`. +//! The optimization mentioned above works only if requested `replication factor` is <= `rack count`. use super::replication_info::ReplicationInfo; use super::TokenRing; diff --git a/scylla/src/transport/locator/test.rs b/scylla/src/transport/locator/test.rs index d09a22d7c1..bb74ee0469 100644 --- a/scylla/src/transport/locator/test.rs +++ b/scylla/src/transport/locator/test.rs @@ -496,7 +496,7 @@ fn test_replica_set_choose(locator: &ReplicaLocator) { || locator.replicas_for_token(Token { value: 75 }, &strategy, None); // Verify that after a certain number of random selections, the set of selected replicas - // will contain all nodes in the ring (replica set was created usin a strategy with + // will contain all nodes in the ring (replica set was created using a strategy with // replication factors higher than node count). let mut chosen_replicas = HashSet::new(); for _ in 0..32 { diff --git a/scylla/src/transport/locator/token_ring.rs b/scylla/src/transport/locator/token_ring.rs index 686d8e0a90..cd5b4de8f3 100644 --- a/scylla/src/transport/locator/token_ring.rs +++ b/scylla/src/transport/locator/token_ring.rs @@ -47,7 +47,7 @@ impl TokenRing { /// Provides an iterator over the ring's elements starting at the given token. /// The iterator traverses the whole ring in the direction of increasing tokens. /// After reaching the maximum token it wraps around and continues from the lowest one. - /// The iterator visits each member once, it doesn't have an infinte length. + /// The iterator visits each member once, it doesn't have an infinite length. /// To access the token along with the element you can use `ring_range_full`. pub fn ring_range(&self, token: Token) -> impl Iterator { self.ring_range_full(token).map(|(_t, e)| e) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 24cc481c93..39fbdf78b5 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -116,7 +116,7 @@ impl AddressTranslator for HashMap { } #[async_trait] -// Notice: this is unefficient, but what else can we do with such poor representation as str? +// Notice: this is inefficient, but what else can we do with such poor representation as str? // After all, the cluster size is small enough to make this irrelevant. impl AddressTranslator for HashMap<&'static str, &'static str> { async fn translate_address( @@ -444,7 +444,7 @@ pub(crate) enum RunQueryResult { /// Represents a CQL session, which can be used to communicate /// with the database impl Session { - /// Estabilishes a CQL session with the database + /// Establishes a CQL session with the database /// /// Usually it's easier to use [SessionBuilder](crate::transport::session_builder::SessionBuilder) /// instead of calling `Session::connect` directly, because it's more convenient. diff --git a/scylla/src/transport/session_builder.rs b/scylla/src/transport/session_builder.rs index 394a348ca2..09ee03b961 100644 --- a/scylla/src/transport/session_builder.rs +++ b/scylla/src/transport/session_builder.rs @@ -708,7 +708,7 @@ impl GenericSessionBuilder { pub fn keepalive_timeout(mut self, timeout: Duration) -> Self { if timeout <= Duration::from_secs(1) { warn!( - "Setting the keepalive timeout to low values ({:?}) is not recommended as it may aggresively close connections. Consider setting it above 5 seconds.", + "Setting the keepalive timeout to low values ({:?}) is not recommended as it may aggressively close connections. Consider setting it above 5 seconds.", timeout ); } diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index 805217053d..79df0834e3 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -724,7 +724,7 @@ async fn test_use_keyspace_case_sensitivity() { .await .unwrap(); - // Use uppercase keyspace without case sesitivity + // Use uppercase keyspace without case sensitivity // Should select the lowercase one session.use_keyspace(ks_upper.clone(), false).await.unwrap(); @@ -740,7 +740,7 @@ async fn test_use_keyspace_case_sensitivity() { assert_eq!(rows, vec!["lowercase".to_string()]); - // Use uppercase keyspace with case sesitivity + // Use uppercase keyspace with case sensitivity // Should select the uppercase one session.use_keyspace(ks_upper, true).await.unwrap(); @@ -2221,7 +2221,7 @@ async fn assert_test_batch_table_rows_contain(sess: &Session, expected_rows: &[( for expected_row in expected_rows.iter() { if !selected_rows.contains(expected_row) { panic!( - "Expected {:?} to contain row: {:?}, but they didnt", + "Expected {:?} to contain row: {:?}, but they didn't", selected_rows, expected_row ); } diff --git a/scylla/src/transport/silent_prepare_batch_test.rs b/scylla/src/transport/silent_prepare_batch_test.rs index 3a2ed83baa..469c90b49f 100644 --- a/scylla/src/transport/silent_prepare_batch_test.rs +++ b/scylla/src/transport/silent_prepare_batch_test.rs @@ -102,7 +102,7 @@ async fn assert_test_batch_table_rows_contain(sess: &Session, expected_rows: &[( for expected_row in expected_rows.iter() { if !selected_rows.contains(expected_row) { panic!( - "Expected {:?} to contain row: {:?}, but they didnt", + "Expected {:?} to contain row: {:?}, but they didn't", selected_rows, expected_row ); } diff --git a/test/dockerized/run.sh b/test/dockerized/run.sh index 54c44ea2cb..c7bb989584 100755 --- a/test/dockerized/run.sh +++ b/test/dockerized/run.sh @@ -9,7 +9,7 @@ fi IMAGE_NAME="scylla_rust_driver_testing" -# Build a new image with embeded driver source files and deletes the +# Build a new image with embedded driver source files and deletes the # previously built image docker tag "$IMAGE_NAME:latest" "$IMAGE_NAME:previous" &>/dev/null if docker build -f test/dockerized/Dockerfile -t "$IMAGE_NAME:latest" . ; then diff --git a/test/tls/scylla.yaml b/test/tls/scylla.yaml index ee4eee6a45..cd36533c4d 100644 --- a/test/tls/scylla.yaml +++ b/test/tls/scylla.yaml @@ -361,7 +361,7 @@ commitlog_total_space_in_mb: -1 # tombstones seen in memory so we can return them to the coordinator, which # will use them to make sure other replicas also know about the deleted rows. # With workloads that generate a lot of tombstones, this can cause performance -# problems and even exaust the server heap. +# problems and even exhaust the server heap. # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) # Adjust the thresholds here if you understand the dangers and want to # scan more tombstones anyway. These thresholds may also be adjusted at runtime @@ -460,7 +460,7 @@ client_encryption_options: # not met, performance and reliability can be degraded. # # These requirements include: -# - A filesystem with good support for aysnchronous I/O (AIO). Currently, +# - A filesystem with good support for asynchronous I/O (AIO). Currently, # this means XFS. # # false: strict environment checks are in place; do not start if they are not met. @@ -493,7 +493,7 @@ client_encryption_options: # [shard0] [shard1] ... [shardN-1] [shard0] [shard1] ... [shardN-1] ... # # Scylla versions 1.6 and below used just one repetition of the pattern; -# this intefered with data placement among nodes (vnodes). +# this interfered with data placement among nodes (vnodes). # # Scylla versions 1.7 and above use 4096 repetitions of the pattern; this # provides for better data distribution. From 0b0d02d8c7af1720bcaddc1a0d49d55fd01dc017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Sat, 11 Nov 2023 18:19:28 +0100 Subject: [PATCH 026/107] RowSerializationContext: Add from_prepared method This was a missing method allowing actually creating this struct. --- scylla-cql/src/types/serialize/row.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 213af49c0f..8e97044b11 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -6,6 +6,7 @@ use std::{collections::HashMap, sync::Arc}; use thiserror::Error; +use crate::frame::response::result::PreparedMetadata; use crate::frame::value::{SerializedValues, ValueList}; use crate::frame::{response::result::ColumnSpec, types::RawValue}; @@ -18,6 +19,13 @@ pub struct RowSerializationContext<'a> { } impl<'a> RowSerializationContext<'a> { + #[inline] + pub fn from_prepared(prepared: &'a PreparedMetadata) -> Self { + Self { + columns: prepared.col_specs.as_slice(), + } + } + /// Returns column/bind marker specifications for given query. #[inline] pub fn columns(&self) -> &'a [ColumnSpec] { From 18d639358e751851236a225f96a8ac7178c92f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Wed, 8 Nov 2023 19:54:26 +0100 Subject: [PATCH 027/107] scylla-cql: Add SerializationError variant for QueryError This commit adds new variant of QueryError and a necessary From impl. --- scylla-cql/src/errors.rs | 10 ++++++++++ scylla-cql/src/frame/frame_errors.rs | 3 +++ 2 files changed, 13 insertions(+) diff --git a/scylla-cql/src/errors.rs b/scylla-cql/src/errors.rs index 9e80247e20..e884e37ad5 100644 --- a/scylla-cql/src/errors.rs +++ b/scylla-cql/src/errors.rs @@ -3,6 +3,7 @@ use crate::frame::frame_errors::{FrameError, ParseError}; use crate::frame::protocol_features::ProtocolFeatures; use crate::frame::value::SerializeValuesError; +use crate::types::serialize::SerializationError; use crate::Consistency; use bytes::Bytes; use std::io::ErrorKind; @@ -340,6 +341,9 @@ pub enum BadQuery { #[error("Serializing values failed: {0} ")] SerializeValuesError(#[from] SerializeValuesError), + #[error("Serializing values failed: {0} ")] + SerializationError(#[from] SerializationError), + /// Serialized values are too long to compute partition key #[error("Serialized values are too long to compute partition key! Length: {0}, Max allowed length: {1}")] ValuesTooLongForKey(usize, usize), @@ -443,6 +447,12 @@ impl From for QueryError { } } +impl From for QueryError { + fn from(serialized_err: SerializationError) -> QueryError { + QueryError::BadQuery(BadQuery::SerializationError(serialized_err)) + } +} + impl From for QueryError { fn from(parse_error: ParseError) -> QueryError { QueryError::InvalidMessage(format!("Error parsing message: {}", parse_error)) diff --git a/scylla-cql/src/frame/frame_errors.rs b/scylla-cql/src/frame/frame_errors.rs index 3da4e26d01..9a3b228505 100644 --- a/scylla-cql/src/frame/frame_errors.rs +++ b/scylla-cql/src/frame/frame_errors.rs @@ -1,6 +1,7 @@ use super::response; use crate::cql_to_rust::CqlTypeError; use crate::frame::value::SerializeValuesError; +use crate::types::serialize::SerializationError; use thiserror::Error; #[derive(Error, Debug)] @@ -44,5 +45,7 @@ pub enum ParseError { #[error(transparent)] SerializeValuesError(#[from] SerializeValuesError), #[error(transparent)] + SerializationError(#[from] SerializationError), + #[error(transparent)] CqlTypeError(#[from] CqlTypeError), } From ca30092592b369bc12bda37efaae0eee196699e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Wed, 8 Nov 2023 21:33:28 +0100 Subject: [PATCH 028/107] connection.rs: Don't allow queries with values After serialization refactor it will be impossible to perform unprepared query with values, because serializing values will require knowing column types. In order to make refactor easier and better split responsibility, this commit removes `values` arguments from `Connection` methods, so that it is callers responsibility to prepare the query if necessary. --- scylla/src/transport/connection.rs | 115 ++++++++++++++++------------- scylla/src/transport/iterator.rs | 58 +++++++++++---- scylla/src/transport/session.rs | 63 +++++++++++----- scylla/src/transport/topology.rs | 13 +++- 4 files changed, 159 insertions(+), 90 deletions(-) diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 2ecd37b290..12dfc61540 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -4,6 +4,7 @@ use scylla_cql::errors::TranslationError; use scylla_cql::frame::request::options::Options; use scylla_cql::frame::response::Error; use scylla_cql::frame::types::SerialConsistency; +use scylla_cql::frame::value::SerializedValues; use socket2::{SockRef, TcpKeepalive}; use tokio::io::{split, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter}; use tokio::net::{TcpSocket, TcpStream}; @@ -596,7 +597,6 @@ impl Connection { pub(crate) async fn query_single_page( &self, query: impl Into, - values: impl ValueList, ) -> Result { let query: Query = query.into(); @@ -606,24 +606,18 @@ impl Connection { .determine_consistency(self.config.default_consistency); let serial_consistency = query.config.serial_consistency; - self.query_single_page_with_consistency( - query, - &values, - consistency, - serial_consistency.flatten(), - ) - .await + self.query_single_page_with_consistency(query, consistency, serial_consistency.flatten()) + .await } pub(crate) async fn query_single_page_with_consistency( &self, query: impl Into, - values: impl ValueList, consistency: Consistency, serial_consistency: Option, ) -> Result { let query: Query = query.into(); - self.query_with_consistency(&query, &values, consistency, serial_consistency, None) + self.query_with_consistency(&query, consistency, serial_consistency, None) .await? .into_query_result() } @@ -631,13 +625,11 @@ impl Connection { pub(crate) async fn query( &self, query: &Query, - values: impl ValueList, paging_state: Option, ) -> Result { // This method is used only for driver internal queries, so no need to consult execution profile here. self.query_with_consistency( query, - values, query .config .determine_consistency(self.config.default_consistency), @@ -650,33 +642,16 @@ impl Connection { pub(crate) async fn query_with_consistency( &self, query: &Query, - values: impl ValueList, consistency: Consistency, serial_consistency: Option, paging_state: Option, ) -> Result { - let serialized_values = values.serialized()?; - - let values_size = serialized_values.size(); - if values_size != 0 { - let prepared = self.prepare(query).await?; - return self - .execute_with_consistency( - &prepared, - values, - consistency, - serial_consistency, - paging_state, - ) - .await; - } - let query_frame = query::Query { contents: Cow::Borrowed(&query.contents), parameters: query::QueryParameters { consistency, serial_consistency, - values: serialized_values, + values: Cow::Borrowed(SerializedValues::EMPTY), page_size: query.get_page_size(), paging_state, timestamp: query.get_timestamp(), @@ -687,6 +662,26 @@ impl Connection { .await } + #[allow(dead_code)] + pub(crate) async fn execute( + &self, + prepared: PreparedStatement, + values: impl ValueList, + paging_state: Option, + ) -> Result { + // This method is used only for driver internal queries, so no need to consult execution profile here. + self.execute_with_consistency( + &prepared, + values, + prepared + .config + .determine_consistency(self.config.default_consistency), + prepared.config.serial_consistency.flatten(), + paging_state, + ) + .await + } + pub(crate) async fn execute_with_consistency( &self, prepared_statement: &PreparedStatement, @@ -734,19 +729,33 @@ impl Connection { pub(crate) async fn query_iter( self: Arc, query: Query, - values: impl ValueList, ) -> Result { - let serialized_values = values.serialized()?.into_owned(); - let consistency = query .config .determine_consistency(self.config.default_consistency); let serial_consistency = query.config.serial_consistency.flatten(); - RowIterator::new_for_connection_query_iter( - query, + RowIterator::new_for_connection_query_iter(query, self, consistency, serial_consistency) + .await + } + + /// Executes a prepared statements and fetches its results over multiple pages, using + /// the asynchronous iterator interface. + pub(crate) async fn execute_iter( + self: Arc, + prepared_statement: PreparedStatement, + values: impl ValueList, + ) -> Result { + let consistency = prepared_statement + .config + .determine_consistency(self.config.default_consistency); + let serial_consistency = prepared_statement.config.serial_consistency.flatten(); + let serialized = values.serialized()?.into_owned(); + + RowIterator::new_for_connection_execute_iter( + prepared_statement, + serialized, self, - serialized_values, consistency, serial_consistency, ) @@ -885,7 +894,7 @@ impl Connection { false => format!("USE {}", keyspace_name.as_str()).into(), }; - let query_response = self.query(&query, (), None).await?; + let query_response = self.query(&query, None).await?; match query_response.response { Response::Result(result::Result::SetKeyspace(set_keyspace)) => { @@ -929,7 +938,7 @@ impl Connection { pub(crate) async fn fetch_schema_version(&self) -> Result { let (version_id,): (Uuid,) = self - .query_single_page(LOCAL_VERSION, &[]) + .query_single_page(LOCAL_VERSION) .await? .rows .ok_or(QueryError::ProtocolError("Version query returned not rows"))? @@ -1833,7 +1842,6 @@ mod tests { use super::ConnectionConfig; use crate::query::Query; use crate::transport::connection::open_connection; - use crate::transport::connection::QueryResponse; use crate::transport::node::ResolvedContactPoint; use crate::transport::topology::UntranslatedEndpoint; use crate::utils::test_utils::unique_keyspace_name; @@ -1914,7 +1922,7 @@ mod tests { let select_query = Query::new("SELECT p FROM connection_query_iter_tab").with_page_size(7); let empty_res = connection .clone() - .query_iter(select_query.clone(), &[]) + .query_iter(select_query.clone()) .await .unwrap() .try_collect::>() @@ -1927,15 +1935,18 @@ mod tests { let mut insert_futures = Vec::new(); let insert_query = Query::new("INSERT INTO connection_query_iter_tab (p) VALUES (?)").with_page_size(7); + let prepared = connection.prepare(&insert_query).await.unwrap(); for v in &values { - insert_futures.push(connection.query_single_page(insert_query.clone(), (v,))); + let prepared_clone = prepared.clone(); + let fut = async { connection.execute(prepared_clone, (*v,), None).await }; + insert_futures.push(fut); } futures::future::try_join_all(insert_futures).await.unwrap(); let mut results: Vec = connection .clone() - .query_iter(select_query.clone(), &[]) + .query_iter(select_query.clone()) .await .unwrap() .into_typed::<(i32,)>() @@ -1947,7 +1958,9 @@ mod tests { // 3. INSERT query_iter should work and not return any rows. let insert_res1 = connection - .query_iter(insert_query, (0,)) + .query_iter(Query::new( + "INSERT INTO connection_query_iter_tab (p) VALUES (0)", + )) .await .unwrap() .try_collect::>() @@ -2007,10 +2020,7 @@ mod tests { .await .unwrap(); - connection - .query(&"TRUNCATE t".into(), (), None) - .await - .unwrap(); + connection.query(&"TRUNCATE t".into(), None).await.unwrap(); let mut futs = Vec::new(); @@ -2025,8 +2035,9 @@ mod tests { let q = Query::new("INSERT INTO t (p, v) VALUES (?, ?)"); let conn = conn.clone(); async move { - let response: QueryResponse = conn - .query(&q, (j, vec![j as u8; j as usize]), None) + let prepared = conn.prepare(&q).await.unwrap(); + let response = conn + .execute(prepared.clone(), (j, vec![j as u8; j as usize]), None) .await .unwrap(); // QueryResponse might contain an error - make sure that there were no errors @@ -2045,7 +2056,7 @@ mod tests { // Check that everything was written properly let range_end = arithmetic_sequence_sum(NUM_BATCHES); let mut results = connection - .query(&"SELECT p, v FROM t".into(), (), None) + .query(&"SELECT p, v FROM t".into(), None) .await .unwrap() .into_query_result() @@ -2198,7 +2209,7 @@ mod tests { // As everything is normal, these queries should succeed. for _ in 0..3 { tokio::time::sleep(Duration::from_millis(500)).await; - conn.query_single_page("SELECT host_id FROM system.local", ()) + conn.query_single_page("SELECT host_id FROM system.local") .await .unwrap(); } @@ -2218,7 +2229,7 @@ mod tests { // As the router is invalidated, all further queries should immediately // return error. - conn.query_single_page("SELECT host_id FROM system.local", ()) + conn.query_single_page("SELECT host_id FROM system.local") .await .unwrap_err(); diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index e9389992ed..a7dc4969b2 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -128,7 +128,6 @@ impl RowIterator { pub(crate) async fn new_for_query( mut query: Query, - values: SerializedValues, execution_profile: Arc, cluster_data: Arc, metrics: Arc, @@ -162,29 +161,27 @@ impl RowIterator { let parent_span = tracing::Span::current(); let worker_task = async move { let query_ref = &query; - let values_ref = &values; let choose_connection = |node: Arc| async move { node.random_connection().await }; let page_query = |connection: Arc, consistency: Consistency, - paging_state: Option| async move { - connection - .query_with_consistency( - query_ref, - values_ref, - consistency, - serial_consistency, - paging_state, - ) - .await + paging_state: Option| { + async move { + connection + .query_with_consistency( + query_ref, + consistency, + serial_consistency, + paging_state, + ) + .await + } }; let query_ref = &query; - let serialized_values_size = values.size(); - let span_creator = - move || RequestSpan::new_query(&query_ref.contents, serialized_values_size); + let span_creator = move || RequestSpan::new_query(&query_ref.contents, 0); let worker = RowIteratorWorker { sender: sender.into(), @@ -337,7 +334,6 @@ impl RowIterator { pub(crate) async fn new_for_connection_query_iter( mut query: Query, connection: Arc, - values: SerializedValues, consistency: Consistency, serial_consistency: Option, ) -> Result { @@ -352,6 +348,36 @@ impl RowIterator { fetcher: |paging_state| { connection.query_with_consistency( &query, + consistency, + serial_consistency, + paging_state, + ) + }, + }; + worker.work().await + }; + + Self::new_from_worker_future(worker_task, receiver).await + } + + pub(crate) async fn new_for_connection_execute_iter( + mut prepared: PreparedStatement, + values: SerializedValues, + connection: Arc, + consistency: Consistency, + serial_consistency: Option, + ) -> Result { + if prepared.get_page_size().is_none() { + prepared.set_page_size(DEFAULT_ITER_PAGE_SIZE); + } + let (sender, receiver) = mpsc::channel::>(1); + + let worker_task = async move { + let worker = SingleConnectionRowIteratorWorker { + sender: sender.into(), + fetcher: |paging_state| { + connection.execute_with_consistency( + &prepared, &values, consistency, serial_consistency, diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 39fbdf78b5..5a31796065 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -659,16 +659,29 @@ impl Session { let values_ref = &serialized_values; let paging_state_ref = &paging_state; async move { - connection - .query_with_consistency( - query_ref, - values_ref, - consistency, - serial_consistency, - paging_state_ref.clone(), - ) - .await - .and_then(QueryResponse::into_non_error_query_response) + if values_ref.is_empty() { + connection + .query_with_consistency( + query_ref, + consistency, + serial_consistency, + paging_state_ref.clone(), + ) + .await + .and_then(QueryResponse::into_non_error_query_response) + } else { + let prepared = connection.prepare(query_ref).await?; + connection + .execute_with_consistency( + &prepared, + values_ref, + consistency, + serial_consistency, + paging_state_ref.clone(), + ) + .await + .and_then(QueryResponse::into_non_error_query_response) + } } }, &span, @@ -774,14 +787,28 @@ impl Session { .unwrap_or_else(|| self.get_default_execution_profile_handle()) .access(); - RowIterator::new_for_query( - query, - serialized_values.into_owned(), - execution_profile, - self.cluster.get_data(), - self.metrics.clone(), - ) - .await + if serialized_values.is_empty() { + RowIterator::new_for_query( + query, + execution_profile, + self.cluster.get_data(), + self.metrics.clone(), + ) + .await + } else { + // Making RowIterator::new_for_query work with values is too hard (if even possible) + // so instead of sending one prepare to a specific connection on each iterator query, + // we fully prepare a statement beforehand. + let prepared = self.prepare(query).await?; + RowIterator::new_for_prepared_statement(PreparedIteratorConfig { + prepared, + values: serialized_values.into_owned(), + execution_profile, + cluster_data: self.cluster.get_data(), + metrics: self.metrics.clone(), + }) + .await + } } /// Prepares a statement on the server side and returns a prepared statement, diff --git a/scylla/src/transport/topology.rs b/scylla/src/transport/topology.rs index 63ee14f5b2..bc8a80ca32 100644 --- a/scylla/src/transport/topology.rs +++ b/scylla/src/transport/topology.rs @@ -751,7 +751,7 @@ async fn query_peers(conn: &Arc, connect_port: u16) -> Result, connect_port: u16) -> Result Date: Tue, 5 Dec 2023 16:30:42 +0100 Subject: [PATCH 029/107] Rename SerializedValues to LegacySerializedValues After serialization refactor, there will be a new struct called SerializedValues. In order to make transition easier, we decided to retain old structs and traits for now (probably 1 release), so that quick temporary update (before proper migration to new traits) can be performed with simple replacements in the code. --- scylla-cql/benches/benchmark.rs | 4 +- scylla-cql/src/frame/request/batch.rs | 8 +-- scylla-cql/src/frame/request/mod.rs | 12 ++--- scylla-cql/src/frame/request/query.rs | 10 ++-- scylla-cql/src/frame/value.rs | 57 ++++++++++++---------- scylla-cql/src/frame/value_tests.rs | 22 ++++----- scylla-cql/src/lib.rs | 2 +- scylla-cql/src/types/serialize/row.rs | 16 +++--- scylla-macros/src/value_list.rs | 2 +- scylla/src/statement/prepared_statement.rs | 16 +++--- scylla/src/transport/connection.rs | 4 +- scylla/src/transport/iterator.rs | 6 +-- scylla/src/transport/partitioner.rs | 4 +- scylla/src/transport/session.rs | 2 +- 14 files changed, 84 insertions(+), 81 deletions(-) diff --git a/scylla-cql/benches/benchmark.rs b/scylla-cql/benches/benchmark.rs index 0aa6c89102..77525194f0 100644 --- a/scylla-cql/benches/benchmark.rs +++ b/scylla-cql/benches/benchmark.rs @@ -3,11 +3,11 @@ use std::borrow::Cow; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use scylla_cql::frame::request::SerializableRequest; -use scylla_cql::frame::value::SerializedValues; +use scylla_cql::frame::value::LegacySerializedValues; use scylla_cql::frame::value::ValueList; use scylla_cql::frame::{request::query, Compression, SerializedRequest}; -fn make_query<'a>(contents: &'a str, values: &'a SerializedValues) -> query::Query<'a> { +fn make_query<'a>(contents: &'a str, values: &'a LegacySerializedValues) -> query::Query<'a> { query::Query { contents: Cow::Borrowed(contents), parameters: query::QueryParameters { diff --git a/scylla-cql/src/frame/request/batch.rs b/scylla-cql/src/frame/request/batch.rs index 35dd8c3c3b..5b5c2f84b6 100644 --- a/scylla-cql/src/frame/request/batch.rs +++ b/scylla-cql/src/frame/request/batch.rs @@ -5,7 +5,7 @@ use crate::frame::{ frame_errors::ParseError, request::{RequestOpcode, SerializableRequest}, types::{self, SerialConsistency}, - value::{BatchValues, BatchValuesIterator, SerializedValues}, + value::{BatchValues, BatchValuesIterator, LegacySerializedValues}, }; use super::DeserializableRequest; @@ -186,7 +186,7 @@ impl<'s, 'b> From<&'s BatchStatement<'b>> for BatchStatement<'s> { } } -impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec> { +impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec> { fn deserialize(buf: &mut &[u8]) -> Result { let batch_type = buf.get_u8().try_into()?; @@ -196,7 +196,7 @@ impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec, Vec) = + let (statements, values): (Vec, Vec) = statements_with_values.into_iter().unzip(); Ok(Self { diff --git a/scylla-cql/src/frame/request/mod.rs b/scylla-cql/src/frame/request/mod.rs index cd41d6bce1..82a86220d8 100644 --- a/scylla-cql/src/frame/request/mod.rs +++ b/scylla-cql/src/frame/request/mod.rs @@ -22,7 +22,7 @@ pub use startup::Startup; use self::batch::BatchStatement; use super::types::SerialConsistency; -use super::value::SerializedValues; +use super::value::LegacySerializedValues; #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, TryFromPrimitive)] #[repr(u8)] @@ -59,7 +59,7 @@ pub trait DeserializableRequest: SerializableRequest + Sized { pub enum Request<'r> { Query(Query<'r>), Execute(Execute<'r>), - Batch(Batch<'r, BatchStatement<'r>, Vec>), + Batch(Batch<'r, BatchStatement<'r>, Vec>), } impl<'r> Request<'r> { @@ -113,7 +113,7 @@ mod tests { DeserializableRequest, SerializableRequest, }, types::{self, SerialConsistency}, - value::SerializedValues, + value::LegacySerializedValues, }, Consistency, }; @@ -129,7 +129,7 @@ mod tests { page_size: Some(323), paging_state: Some(vec![2, 1, 3, 7].into()), values: { - let mut vals = SerializedValues::new(); + let mut vals = LegacySerializedValues::new(); vals.add_value(&2137).unwrap(); Cow::Owned(vals) }, @@ -156,7 +156,7 @@ mod tests { page_size: None, paging_state: None, values: { - let mut vals = SerializedValues::new(); + let mut vals = LegacySerializedValues::new(); vals.add_named_value("the_answer", &42).unwrap(); vals.add_named_value("really?", &2137).unwrap(); Cow::Owned(vals) @@ -212,7 +212,7 @@ mod tests { timestamp: None, page_size: None, paging_state: None, - values: Cow::Owned(SerializedValues::new()), + values: Cow::Owned(LegacySerializedValues::new()), }; let query = Query { contents: contents.clone(), diff --git a/scylla-cql/src/frame/request/query.rs b/scylla-cql/src/frame/request/query.rs index ff0b0cc867..e4bc86f6bd 100644 --- a/scylla-cql/src/frame/request/query.rs +++ b/scylla-cql/src/frame/request/query.rs @@ -6,7 +6,7 @@ use bytes::{Buf, BufMut, Bytes}; use crate::{ frame::request::{RequestOpcode, SerializableRequest}, frame::types, - frame::value::SerializedValues, + frame::value::LegacySerializedValues, }; use super::DeserializableRequest; @@ -61,7 +61,7 @@ pub struct QueryParameters<'a> { pub timestamp: Option, pub page_size: Option, pub paging_state: Option, - pub values: Cow<'a, SerializedValues>, + pub values: Cow<'a, LegacySerializedValues>, } impl Default for QueryParameters<'_> { @@ -72,7 +72,7 @@ impl Default for QueryParameters<'_> { timestamp: None, page_size: None, paging_state: None, - values: Cow::Borrowed(SerializedValues::EMPTY), + values: Cow::Borrowed(LegacySerializedValues::EMPTY), } } } @@ -152,9 +152,9 @@ impl<'q> QueryParameters<'q> { let values_have_names_flag = (flags & FLAG_WITH_NAMES_FOR_VALUES) != 0; let values = Cow::Owned(if values_flag { - SerializedValues::new_from_frame(buf, values_have_names_flag)? + LegacySerializedValues::new_from_frame(buf, values_have_names_flag)? } else { - SerializedValues::new() + LegacySerializedValues::new() }); let page_size = page_size_flag.then(|| types::read_int(buf)).transpose()?; diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index fa3355f4a8..e4be751635 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -256,7 +256,7 @@ impl TryInto for CqlTime { /// Keeps a buffer with serialized Values /// Allows adding new Values and iterating over serialized ones #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct SerializedValues { +pub struct LegacySerializedValues { serialized_values: Vec, values_num: u16, contains_names: bool, @@ -282,27 +282,27 @@ pub enum SerializeValuesError { ParseError, } -pub type SerializedResult<'a> = Result, SerializeValuesError>; +pub type SerializedResult<'a> = Result, SerializeValuesError>; /// Represents list of values to be sent in a query /// gets serialized and but into request pub trait ValueList { - /// Provides a view of ValueList as SerializedValues - /// returns `Cow` to make impl ValueList for SerializedValues efficient + /// Provides a view of ValueList as LegacySerializedValues + /// returns `Cow` to make impl ValueList for LegacySerializedValues efficient fn serialized(&self) -> SerializedResult<'_>; fn write_to_request(&self, buf: &mut impl BufMut) -> Result<(), SerializeValuesError> { let serialized = self.serialized()?; - SerializedValues::write_to_request(&serialized, buf); + LegacySerializedValues::write_to_request(&serialized, buf); Ok(()) } } -impl SerializedValues { +impl LegacySerializedValues { /// Creates empty value list pub const fn new() -> Self { - SerializedValues { + LegacySerializedValues { serialized_values: Vec::new(), values_num: 0, contains_names: false, @@ -310,7 +310,7 @@ impl SerializedValues { } pub fn with_capacity(capacity: usize) -> Self { - SerializedValues { + LegacySerializedValues { serialized_values: Vec::with_capacity(capacity), values_num: 0, contains_names: false, @@ -322,7 +322,7 @@ impl SerializedValues { } /// A const empty instance, useful for taking references - pub const EMPTY: &'static SerializedValues = &SerializedValues::new(); + pub const EMPTY: &'static LegacySerializedValues = &LegacySerializedValues::new(); /// Serializes value and appends it to the list pub fn add_value(&mut self, val: &impl Value) -> Result<(), SerializeValuesError> { @@ -372,7 +372,7 @@ impl SerializedValues { } pub fn iter(&self) -> impl Iterator { - SerializedValuesIterator { + LegacySerializedValuesIterator { serialized_values: &self.serialized_values, contains_names: self.contains_names, } @@ -408,7 +408,7 @@ impl SerializedValues { let values_len_in_buf = values_beg.len() - buf.len(); let values_in_frame = &values_beg[0..values_len_in_buf]; - Ok(SerializedValues { + Ok(LegacySerializedValues { serialized_values: values_in_frame.to_vec(), values_num, contains_names, @@ -418,7 +418,7 @@ impl SerializedValues { pub fn iter_name_value_pairs(&self) -> impl Iterator, RawValue)> { let mut buf = &self.serialized_values[..]; (0..self.values_num).map(move |_| { - // `unwrap()`s here are safe, as we assume type-safety: if `SerializedValues` exits, + // `unwrap()`s here are safe, as we assume type-safety: if `LegacySerializedValues` exits, // we have a guarantee that the layout of the serialized values is valid. let name = self .contains_names @@ -430,12 +430,12 @@ impl SerializedValues { } #[derive(Clone, Copy)] -pub struct SerializedValuesIterator<'a> { +pub struct LegacySerializedValuesIterator<'a> { serialized_values: &'a [u8], contains_names: bool, } -impl<'a> Iterator for SerializedValuesIterator<'a> { +impl<'a> Iterator for LegacySerializedValuesIterator<'a> { type Item = RawValue<'a>; fn next(&mut self) -> Option { @@ -1030,14 +1030,14 @@ impl_value_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13 // Implement ValueList for the unit type impl ValueList for () { fn serialized(&self) -> SerializedResult<'_> { - Ok(Cow::Owned(SerializedValues::new())) + Ok(Cow::Owned(LegacySerializedValues::new())) } } // Implement ValueList for &[] - u8 because otherwise rust can't infer type impl ValueList for [u8; 0] { fn serialized(&self) -> SerializedResult<'_> { - Ok(Cow::Owned(SerializedValues::new())) + Ok(Cow::Owned(LegacySerializedValues::new())) } } @@ -1045,7 +1045,7 @@ impl ValueList for [u8; 0] { impl ValueList for &[T] { fn serialized(&self) -> SerializedResult<'_> { let size = std::mem::size_of_val(*self); - let mut result = SerializedValues::with_capacity(size); + let mut result = LegacySerializedValues::with_capacity(size); for val in *self { result.add_value(val)?; } @@ -1059,7 +1059,7 @@ impl ValueList for Vec { fn serialized(&self) -> SerializedResult<'_> { let slice = self.as_slice(); let size = std::mem::size_of_val(slice); - let mut result = SerializedValues::with_capacity(size); + let mut result = LegacySerializedValues::with_capacity(size); for val in self { result.add_value(val)?; } @@ -1073,7 +1073,7 @@ macro_rules! impl_value_list_for_btree_map { ($key_type:ty) => { impl ValueList for BTreeMap<$key_type, T> { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(self.len()); + let mut result = LegacySerializedValues::with_capacity(self.len()); for (key, val) in self { result.add_named_value(key, val)?; } @@ -1089,7 +1089,7 @@ macro_rules! impl_value_list_for_hash_map { ($key_type:ty) => { impl ValueList for HashMap<$key_type, T, S> { fn serialized(&self) -> SerializedResult<'_> { - let mut result = SerializedValues::with_capacity(self.len()); + let mut result = LegacySerializedValues::with_capacity(self.len()); for (key, val) in self { result.add_named_value(key, val)?; } @@ -1112,7 +1112,7 @@ impl_value_list_for_btree_map!(&str); impl ValueList for (T0,) { fn serialized(&self) -> SerializedResult<'_> { let size = std::mem::size_of_val(self); - let mut result = SerializedValues::with_capacity(size); + let mut result = LegacySerializedValues::with_capacity(size); result.add_value(&self.0)?; Ok(Cow::Owned(result)) } @@ -1126,7 +1126,7 @@ macro_rules! impl_value_list_for_tuple { { fn serialized(&self) -> SerializedResult<'_> { let size = std::mem::size_of_val(self); - let mut result = SerializedValues::with_capacity(size); + let mut result = LegacySerializedValues::with_capacity(size); $( result.add_value(&self.$FieldI) ?; )* @@ -1165,13 +1165,13 @@ impl ValueList for &T { } } -impl ValueList for SerializedValues { +impl ValueList for LegacySerializedValues { fn serialized(&self) -> SerializedResult<'_> { Ok(Cow::Borrowed(self)) } } -impl<'b> ValueList for Cow<'b, SerializedValues> { +impl<'b> ValueList for Cow<'b, LegacySerializedValues> { fn serialized(&self) -> SerializedResult<'_> { Ok(Cow::Borrowed(self.as_ref())) } @@ -1346,17 +1346,20 @@ impl<'a, T: BatchValues + ?Sized> BatchValues for &'a T { /// Allows reusing already-serialized first value /// -/// We'll need to build a `SerializedValues` for the first ~`ValueList` of a batch to figure out the shard (#448). +/// We'll need to build a `LegacySerializedValues` for the first ~`ValueList` of a batch to figure out the shard (#448). /// Once that is done, we can use that instead of re-serializing. /// /// This struct implements both `BatchValues` and `BatchValuesIterator` for that purpose pub struct BatchValuesFirstSerialized<'f, T> { - first: Option<&'f SerializedValues>, + first: Option<&'f LegacySerializedValues>, rest: T, } impl<'f, T: BatchValues> BatchValuesFirstSerialized<'f, T> { - pub fn new(batch_values: T, already_serialized_first: Option<&'f SerializedValues>) -> Self { + pub fn new( + batch_values: T, + already_serialized_first: Option<&'f LegacySerializedValues>, + ) -> Self { Self { first: already_serialized_first, rest: batch_values, diff --git a/scylla-cql/src/frame/value_tests.rs b/scylla-cql/src/frame/value_tests.rs index 0ded4b4ed0..adcdcdf0b2 100644 --- a/scylla-cql/src/frame/value_tests.rs +++ b/scylla-cql/src/frame/value_tests.rs @@ -5,8 +5,8 @@ use crate::types::serialize::{CellWriter, RowWriter}; use super::response::result::{ColumnSpec, ColumnType, TableSpec}; use super::value::{ - BatchValues, CqlDate, CqlDuration, CqlTime, CqlTimestamp, MaybeUnset, SerializeValuesError, - SerializedValues, Unset, Value, ValueList, ValueTooBig, + BatchValues, CqlDate, CqlDuration, CqlTime, CqlTimestamp, LegacySerializedValues, MaybeUnset, + SerializeValuesError, Unset, Value, ValueList, ValueTooBig, }; use bigdecimal::BigDecimal; use bytes::BufMut; @@ -832,7 +832,7 @@ fn ref_value() { #[test] fn empty_serialized_values() { - const EMPTY: SerializedValues = SerializedValues::new(); + const EMPTY: LegacySerializedValues = LegacySerializedValues::new(); assert_eq!(EMPTY.len(), 0); assert!(EMPTY.is_empty()); assert_eq!(EMPTY.iter().next(), None); @@ -844,7 +844,7 @@ fn empty_serialized_values() { #[test] fn serialized_values() { - let mut values = SerializedValues::new(); + let mut values = LegacySerializedValues::new(); assert!(values.is_empty()); // Add first value @@ -920,14 +920,14 @@ fn serialized_values() { #[test] fn unit_value_list() { - let serialized_unit: SerializedValues = + let serialized_unit: LegacySerializedValues = <() as ValueList>::serialized(&()).unwrap().into_owned(); assert!(serialized_unit.is_empty()); } #[test] fn empty_array_value_list() { - let serialized_arr: SerializedValues = <[u8; 0] as ValueList>::serialized(&[]) + let serialized_arr: LegacySerializedValues = <[u8; 0] as ValueList>::serialized(&[]) .unwrap() .into_owned(); assert!(serialized_arr.is_empty()); @@ -987,7 +987,7 @@ fn col_spec(name: &str, typ: ColumnType) -> ColumnSpec { fn serialize_values( vl: T, columns: &[ColumnSpec], -) -> SerializedValues { +) -> LegacySerializedValues { let serialized = ::serialized(&vl).unwrap().into_owned(); let mut old_serialized = Vec::new(); serialized.write_to_request(&mut old_serialized); @@ -1158,11 +1158,11 @@ fn ref_value_list() { #[test] fn serialized_values_value_list() { - let mut ser_values = SerializedValues::new(); + let mut ser_values = LegacySerializedValues::new(); ser_values.add_value(&1_i32).unwrap(); ser_values.add_value(&"qwertyuiop").unwrap(); - let ser_ser_values: Cow = ser_values.serialized().unwrap(); + let ser_ser_values: Cow = ser_values.serialized().unwrap(); assert!(matches!(ser_ser_values, Cow::Borrowed(_))); assert_eq!(&ser_values, ser_ser_values.as_ref()); @@ -1170,9 +1170,9 @@ fn serialized_values_value_list() { #[test] fn cow_serialized_values_value_list() { - let cow_ser_values: Cow = Cow::Owned(SerializedValues::new()); + let cow_ser_values: Cow = Cow::Owned(LegacySerializedValues::new()); - let serialized: Cow = cow_ser_values.serialized().unwrap(); + let serialized: Cow = cow_ser_values.serialized().unwrap(); assert!(matches!(serialized, Cow::Borrowed(_))); assert_eq!(cow_ser_values.as_ref(), serialized.as_ref()); diff --git a/scylla-cql/src/lib.rs b/scylla-cql/src/lib.rs index 6d74b680ba..83b6f3751e 100644 --- a/scylla-cql/src/lib.rs +++ b/scylla-cql/src/lib.rs @@ -17,7 +17,7 @@ pub mod _macro_internal { }; pub use crate::frame::response::result::{CqlValue, Row}; pub use crate::frame::value::{ - SerializedResult, SerializedValues, Value, ValueList, ValueTooBig, + LegacySerializedValues, SerializedResult, Value, ValueList, ValueTooBig, }; pub use crate::macros::*; diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 8e97044b11..0aaa26cdeb 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -7,7 +7,7 @@ use std::{collections::HashMap, sync::Arc}; use thiserror::Error; use crate::frame::response::result::PreparedMetadata; -use crate::frame::value::{SerializedValues, ValueList}; +use crate::frame::value::{LegacySerializedValues, ValueList}; use crate::frame::{response::result::ColumnSpec, types::RawValue}; use super::value::SerializeCql; @@ -62,7 +62,7 @@ macro_rules! fallback_impl_contents { } #[inline] fn is_empty(&self) -> bool { - SerializedValues::is_empty(self) + LegacySerializedValues::is_empty(self) } }; } @@ -233,11 +233,11 @@ impl SerializeRow for &T { } } -impl SerializeRow for SerializedValues { +impl SerializeRow for LegacySerializedValues { fallback_impl_contents!(); } -impl<'b> SerializeRow for Cow<'b, SerializedValues> { +impl<'b> SerializeRow for Cow<'b, LegacySerializedValues> { fallback_impl_contents!(); } @@ -337,7 +337,7 @@ impl_tuples!( /// /// ```rust /// # use std::borrow::Cow; -/// # use scylla_cql::frame::value::{Value, ValueList, SerializedResult, SerializedValues}; +/// # use scylla_cql::frame::value::{Value, ValueList, SerializedResult, LegacySerializedValues}; /// # use scylla_cql::impl_serialize_row_via_value_list; /// struct NoGenerics {} /// impl ValueList for NoGenerics { @@ -352,7 +352,7 @@ impl_tuples!( /// struct WithGenerics(T, U); /// impl ValueList for WithGenerics { /// fn serialized(&self) -> SerializedResult<'_> { -/// let mut values = SerializedValues::new(); +/// let mut values = LegacySerializedValues::new(); /// values.add_value(&self.0); /// values.add_value(&self.1.clone()); /// Ok(Cow::Owned(values)) @@ -576,7 +576,7 @@ pub enum ValueListToSerializeRowAdapterError { #[cfg(test)] mod tests { use crate::frame::response::result::{ColumnSpec, ColumnType, TableSpec}; - use crate::frame::value::{MaybeUnset, SerializedValues, ValueList}; + use crate::frame::value::{LegacySerializedValues, MaybeUnset, ValueList}; use crate::types::serialize::RowWriter; use super::{ @@ -638,7 +638,7 @@ mod tests { let mut sorted_row_data = Vec::new(); <_ as ValueList>::write_to_request(&sorted_row, &mut sorted_row_data).unwrap(); - let mut unsorted_row = SerializedValues::new(); + let mut unsorted_row = LegacySerializedValues::new(); unsorted_row.add_named_value("a", &1i32).unwrap(); unsorted_row.add_named_value("b", &"Ala ma kota").unwrap(); unsorted_row diff --git a/scylla-macros/src/value_list.rs b/scylla-macros/src/value_list.rs index bf6fc38e9d..bc9de23c8a 100644 --- a/scylla-macros/src/value_list.rs +++ b/scylla-macros/src/value_list.rs @@ -17,7 +17,7 @@ pub fn value_list_derive(tokens_input: TokenStream) -> Result #path::SerializedResult { - let mut result = #path::SerializedValues::with_capacity(#values_len); + let mut result = #path::LegacySerializedValues::with_capacity(#values_len); #( result.add_value(&self.#field_name)?; )* diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index 8abdf6bd91..b401a846b1 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -13,7 +13,7 @@ use scylla_cql::frame::response::result::ColumnSpec; use super::StatementConfig; use crate::frame::response::result::PreparedMetadata; use crate::frame::types::{Consistency, SerialConsistency}; -use crate::frame::value::SerializedValues; +use crate::frame::value::LegacySerializedValues; use crate::history::HistoryListener; use crate::retry_policy::RetryPolicy; use crate::routing::Token; @@ -134,7 +134,7 @@ impl PreparedStatement { /// [Self::calculate_token()]. pub fn compute_partition_key( &self, - bound_values: &SerializedValues, + bound_values: &LegacySerializedValues, ) -> Result { let partition_key = self.extract_partition_key(bound_values)?; let mut buf = BytesMut::new(); @@ -150,7 +150,7 @@ impl PreparedStatement { /// This is a preparation step necessary for calculating token based on a prepared statement. pub(crate) fn extract_partition_key<'ps>( &'ps self, - bound_values: &'ps SerializedValues, + bound_values: &'ps LegacySerializedValues, ) -> Result { PartitionKey::new(self.get_prepared_metadata(), bound_values) } @@ -158,7 +158,7 @@ impl PreparedStatement { pub(crate) fn extract_partition_key_and_calculate_token<'ps>( &'ps self, partitioner_name: &'ps PartitionerName, - serialized_values: &'ps SerializedValues, + serialized_values: &'ps LegacySerializedValues, ) -> Result, Token)>, QueryError> { if !self.is_token_aware() { return Ok(None); @@ -191,7 +191,7 @@ impl PreparedStatement { // is either way used internally, among others for display in traces. pub fn calculate_token( &self, - serialized_values: &SerializedValues, + serialized_values: &LegacySerializedValues, ) -> Result, QueryError> { self.extract_partition_key_and_calculate_token(&self.partitioner_name, serialized_values) .map(|opt| opt.map(|(_pk, token)| token)) @@ -380,7 +380,7 @@ impl<'ps> PartitionKey<'ps> { fn new( prepared_metadata: &'ps PreparedMetadata, - bound_values: &'ps SerializedValues, + bound_values: &'ps LegacySerializedValues, ) -> Result { // Iterate on values using sorted pk_indexes (see deser_prepared_metadata), // and use PartitionKeyIndex.sequence to insert the value in pk_values with the correct order. @@ -460,7 +460,7 @@ mod tests { response::result::{ ColumnSpec, ColumnType, PartitionKeyIndex, PreparedMetadata, TableSpec, }, - value::SerializedValues, + value::LegacySerializedValues, }; use crate::prepared_statement::PartitionKey; @@ -511,7 +511,7 @@ mod tests { ], [4, 0, 3], ); - let mut values = SerializedValues::new(); + let mut values = LegacySerializedValues::new(); values.add_value(&67i8).unwrap(); values.add_value(&42i16).unwrap(); values.add_value(&23i32).unwrap(); diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 12dfc61540..fb1fa60b1e 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -4,7 +4,7 @@ use scylla_cql::errors::TranslationError; use scylla_cql::frame::request::options::Options; use scylla_cql::frame::response::Error; use scylla_cql::frame::types::SerialConsistency; -use scylla_cql::frame::value::SerializedValues; +use scylla_cql::frame::value::LegacySerializedValues; use socket2::{SockRef, TcpKeepalive}; use tokio::io::{split, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter}; use tokio::net::{TcpSocket, TcpStream}; @@ -651,7 +651,7 @@ impl Connection { parameters: query::QueryParameters { consistency, serial_consistency, - values: Cow::Borrowed(SerializedValues::EMPTY), + values: Cow::Borrowed(LegacySerializedValues::EMPTY), page_size: query.get_page_size(), paging_state, timestamp: query.get_timestamp(), diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index a7dc4969b2..e5ebe4d92d 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -27,7 +27,7 @@ use crate::frame::{ result, result::{ColumnSpec, Row, Rows}, }, - value::SerializedValues, + value::LegacySerializedValues, }; use crate::history::{self, HistoryListener}; use crate::statement::Consistency; @@ -73,7 +73,7 @@ struct ReceivedPage { pub(crate) struct PreparedIteratorConfig { pub(crate) prepared: PreparedStatement, - pub(crate) values: SerializedValues, + pub(crate) values: LegacySerializedValues, pub(crate) execution_profile: Arc, pub(crate) cluster_data: Arc, pub(crate) metrics: Arc, @@ -362,7 +362,7 @@ impl RowIterator { pub(crate) async fn new_for_connection_execute_iter( mut prepared: PreparedStatement, - values: SerializedValues, + values: LegacySerializedValues, connection: Arc, consistency: Consistency, serial_consistency: Option, diff --git a/scylla/src/transport/partitioner.rs b/scylla/src/transport/partitioner.rs index 4526715ab2..e6d5c223b2 100644 --- a/scylla/src/transport/partitioner.rs +++ b/scylla/src/transport/partitioner.rs @@ -3,7 +3,7 @@ use scylla_cql::frame::types::RawValue; use std::num::Wrapping; use crate::{ - frame::value::SerializedValues, prepared_statement::TokenCalculationError, routing::Token, + frame::value::LegacySerializedValues, prepared_statement::TokenCalculationError, routing::Token, }; #[allow(clippy::upper_case_acronyms)] @@ -337,7 +337,7 @@ impl PartitionerHasher for CDCPartitionerHasher { /// NOTE: the provided values must completely constitute partition key /// and be in the order defined in CREATE TABLE statement. pub fn calculate_token_for_partition_key( - serialized_partition_key_values: &SerializedValues, + serialized_partition_key_values: &LegacySerializedValues, partitioner: &P, ) -> Result { let mut partitioner_hasher = partitioner.build_hasher(); diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 5a31796065..cb26dfd0e1 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1216,7 +1216,7 @@ impl Session { let first_value_token = statement_info.token; // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` - // directly for others (if they weren't already serialized, possibly don't even allocate the `SerializedValues`) + // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) let values = BatchValuesFirstSerialized::new(&values, first_serialized_value); let values_ref = &values; From 869c5a0e68589404d5bd3927640dceef907194bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 22:59:48 +0100 Subject: [PATCH 030/107] Makefile: build and check benches In the current makefile benchmarks are not built or checked with `cargo check` / `cargo clippy` so any errors appearing in them will not be caught locally by a dev running `make ci` to perform checks / tests. This commit adds benchmark building / checking to all relevant makefile targets. --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 7cdaeb2d13..72396cd0a7 100644 --- a/Makefile +++ b/Makefile @@ -19,15 +19,15 @@ fmt-check: .PHONY: check check: - cargo check --examples --tests + cargo check --all-targets .PHONY: check-without-features check-without-features: - cargo check --manifest-path "scylla/Cargo.toml" --features "" + cargo check --manifest-path "scylla/Cargo.toml" --features "" --all-targets .PHONY: clippy clippy: - RUSTFLAGS=-Dwarnings cargo clippy --examples --tests -- -Aclippy::uninlined_format_args + RUSTFLAGS=-Dwarnings cargo clippy --all-targets -- -Aclippy::uninlined_format_args .PHONY: test test: up @@ -42,7 +42,7 @@ dockerized-test: up .PHONY: build build: - cargo build --examples + cargo build --examples --benches .PHONY: docs docs: From d0e8502a3ee74c65b4bb649a190d7efd2e699fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Sun, 12 Nov 2023 01:21:27 +0100 Subject: [PATCH 031/107] scylla-cql: Implement SerializedValues This struct is very similar to LegacySerializedValues, but does not support named values (as those are no longer necessary after serialization refactor) and uses new serialization interface, guaranteeing better type safety. --- scylla-cql/src/types/serialize/row.rs | 222 +++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 1 deletion(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 0aaa26cdeb..381c958ae4 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -4,14 +4,19 @@ use std::fmt::Display; use std::hash::BuildHasher; use std::{collections::HashMap, sync::Arc}; +use bytes::BufMut; use thiserror::Error; +use crate::frame::frame_errors::ParseError; +use crate::frame::response::result::ColumnType; use crate::frame::response::result::PreparedMetadata; +use crate::frame::types; +use crate::frame::value::SerializeValuesError; use crate::frame::value::{LegacySerializedValues, ValueList}; use crate::frame::{response::result::ColumnSpec, types::RawValue}; use super::value::SerializeCql; -use super::{RowWriter, SerializationError}; +use super::{CellWriter, RowWriter, SerializationError}; /// Contains information needed to serialize a row. pub struct RowSerializationContext<'a> { @@ -573,9 +578,160 @@ pub enum ValueListToSerializeRowAdapterError { NoBindMarkerWithName { name: String }, } +/// A buffer containing already serialized values. +/// +/// It is not aware of the types of contained values, +/// it is basically a byte buffer in the format expected by the CQL protocol. +/// Usually there is no need for a user of a driver to use this struct, it is mostly internal. +/// The exception are APIs like `ClusterData::compute_token` / `ClusterData::get_endpoints`. +/// Allows adding new values to the buffer and iterating over the content. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct SerializedValues { + serialized_values: Vec, + element_count: u16, +} + +impl SerializedValues { + pub const fn new() -> Self { + SerializedValues { + serialized_values: Vec::new(), + element_count: 0, + } + } + + /// A const empty instance, useful for taking references + pub const EMPTY: &'static SerializedValues = &SerializedValues::new(); + + pub fn from_serializable( + ctx: &RowSerializationContext, + row: &T, + ) -> Result { + let mut data = Vec::new(); + let element_count = { + let mut writer = RowWriter::new(&mut data); + row.serialize(ctx, &mut writer)?; + match writer.value_count().try_into() { + Ok(n) => n, + Err(_) => { + return Err(SerializationError(Arc::new( + SerializeValuesError::TooManyValues, + ))) + } + } + }; + + Ok(SerializedValues { + serialized_values: data, + element_count, + }) + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.element_count() == 0 + } + + #[inline] + pub fn iter(&self) -> impl Iterator { + SerializedValuesIterator { + serialized_values: &self.serialized_values, + } + } + + #[inline] + pub fn element_count(&self) -> u16 { + // We initialize first two bytes in new() and BufBackedRowWriter does too, + // so this unwrap is safe + self.element_count + } + + #[inline] + pub fn buffer_size(&self) -> usize { + self.serialized_values.len() + } + + pub(crate) fn write_to_request(&self, buf: &mut impl BufMut) { + buf.put_u16(self.element_count); + buf.put(self.serialized_values.as_slice()) + } + + /// Serializes value and appends it to the list + pub fn add_value( + &mut self, + val: &T, + typ: &ColumnType, + ) -> Result<(), SerializationError> { + if self.element_count() == u16::MAX { + return Err(SerializationError(Arc::new( + SerializeValuesError::TooManyValues, + ))); + } + + let len_before_serialize: usize = self.serialized_values.len(); + + let writer = CellWriter::new(&mut self.serialized_values); + if let Err(e) = val.serialize(typ, writer) { + self.serialized_values.resize(len_before_serialize, 0); + Err(e) + } else { + self.element_count += 1; + Ok(()) + } + } + + /// Creates value list from the request frame + #[allow(dead_code)] + pub(crate) fn new_from_frame(buf: &mut &[u8]) -> Result { + let values_num = types::read_short(buf)?; + let values_beg = *buf; + for _ in 0..values_num { + let _serialized = types::read_value(buf)?; + } + + let values_len_in_buf = values_beg.len() - buf.len(); + let values_in_frame = &values_beg[0..values_len_in_buf]; + Ok(SerializedValues { + serialized_values: values_in_frame.to_vec(), + element_count: values_num, + }) + } + + // Temporary function, to be removed when we implement new batching API (right now it is needed in frame::request::mod.rs tests) + #[allow(dead_code)] + pub fn to_old_serialized_values(&self) -> LegacySerializedValues { + let mut frame = Vec::new(); + self.write_to_request(&mut frame); + LegacySerializedValues::new_from_frame(&mut frame.as_slice(), false).unwrap() + } +} + +impl Default for SerializedValues { + fn default() -> Self { + Self::new() + } +} + +#[derive(Clone, Copy)] +pub struct SerializedValuesIterator<'a> { + serialized_values: &'a [u8], +} + +impl<'a> Iterator for SerializedValuesIterator<'a> { + type Item = RawValue<'a>; + + fn next(&mut self) -> Option { + if self.serialized_values.is_empty() { + return None; + } + + Some(types::read_value(&mut self.serialized_values).expect("badly encoded value")) + } +} + #[cfg(test)] mod tests { use crate::frame::response::result::{ColumnSpec, ColumnType, TableSpec}; + use crate::frame::types::RawValue; use crate::frame::value::{LegacySerializedValues, MaybeUnset, ValueList}; use crate::types::serialize::RowWriter; @@ -584,6 +740,7 @@ mod tests { BuiltinTypeCheckErrorKind, RowSerializationContext, SerializeCql, SerializeRow, }; + use super::SerializedValues; use scylla_macros::SerializeRow; fn col_spec(name: &str, typ: ColumnType) -> ColumnSpec { @@ -956,4 +1113,67 @@ mod tests { BuiltinSerializationErrorKind::ColumnSerializationFailed { .. } )); } + + #[test] + fn test_empty_serialized_values() { + let values = SerializedValues::new(); + assert!(values.is_empty()); + assert_eq!(values.element_count(), 0); + assert_eq!(values.buffer_size(), 0); + assert_eq!(values.iter().count(), 0); + } + + #[test] + fn test_serialized_values_content() { + let mut values = SerializedValues::new(); + values.add_value(&1234i32, &ColumnType::Int).unwrap(); + values.add_value(&"abcdefg", &ColumnType::Ascii).unwrap(); + let mut buf = Vec::new(); + values.write_to_request(&mut buf); + assert_eq!( + buf, + [ + 0, 2, // element count + 0, 0, 0, 4, // size of int + 0, 0, 4, 210, // content of int (1234) + 0, 0, 0, 7, // size of string + 97, 98, 99, 100, 101, 102, 103, // content of string ('abcdefg') + ] + ) + } + + #[test] + fn test_serialized_values_iter() { + let mut values = SerializedValues::new(); + values.add_value(&1234i32, &ColumnType::Int).unwrap(); + values.add_value(&"abcdefg", &ColumnType::Ascii).unwrap(); + + let mut iter = values.iter(); + assert_eq!(iter.next(), Some(RawValue::Value(&[0, 0, 4, 210]))); + assert_eq!( + iter.next(), + Some(RawValue::Value(&[97, 98, 99, 100, 101, 102, 103])) + ); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_serialized_values_max_capacity() { + let mut values = SerializedValues::new(); + for _ in 0..65535 { + values + .add_value(&123456789i64, &ColumnType::BigInt) + .unwrap(); + } + + // Adding this value should fail, we reached max capacity + values + .add_value(&123456789i64, &ColumnType::BigInt) + .unwrap_err(); + + assert_eq!(values.iter().count(), 65535); + assert!(values + .iter() + .all(|v| v == RawValue::Value(&[0, 0, 0, 0, 0x07, 0x5b, 0xcd, 0x15]))) + } } From 0ddaa9a20f1f8370578df21169d9aacd8bd74a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 12:12:44 +0100 Subject: [PATCH 032/107] PreparedStatement: Add `serialize_values` helper method This method is a way to avoid repetitive code (getting prepared metadata, creating serialization context, calling `SerializedValues::from_serializable`). It is marked as `#[allow(dead_code)]` temporarily, this attribute will be removed later, when there are some uses. --- scylla/src/statement/prepared_statement.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index b401a846b1..0135a8a527 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -1,6 +1,8 @@ use bytes::{Bytes, BytesMut}; use scylla_cql::errors::{BadQuery, QueryError}; use scylla_cql::frame::types::RawValue; +use scylla_cql::types::serialize::row::{RowSerializationContext, SerializeRow, SerializedValues}; +use scylla_cql::types::serialize::SerializationError; use smallvec::{smallvec, SmallVec}; use std::convert::TryInto; use std::sync::Arc; @@ -335,6 +337,15 @@ impl PreparedStatement { pub fn get_execution_profile_handle(&self) -> Option<&ExecutionProfileHandle> { self.config.execution_profile_handle.as_ref() } + + #[allow(dead_code)] + pub(crate) fn serialize_values( + &self, + values: &impl SerializeRow, + ) -> Result { + let ctx = RowSerializationContext::from_prepared(self.get_prepared_metadata()); + SerializedValues::from_serializable(&ctx, values) + } } #[derive(Clone, Debug, Error, PartialEq, Eq, PartialOrd, Ord)] From 4a18a94f071c447754e1dce2db0bf3519c815d75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:42:41 +0100 Subject: [PATCH 033/107] scylla: export scylla_cql::types::serialize This is required so that users can implement new traits for their own types and use them in generics. --- scylla/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/scylla/src/lib.rs b/scylla/src/lib.rs index 27a3c57471..5bf9bc69e8 100644 --- a/scylla/src/lib.rs +++ b/scylla/src/lib.rs @@ -100,6 +100,7 @@ pub mod _macro_internal { pub use scylla_cql::frame; pub use scylla_cql::macros::{self, *}; +pub use scylla_cql::types::serialize; pub mod authentication; #[cfg(feature = "cloud")] From 86776016a7a8a88ef859916b59312899a995b717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:22:01 +0100 Subject: [PATCH 034/107] ClusterData: Switch `get_endpoints` to new serialization API --- scylla/src/transport/cluster.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index 503d14519d..edda684de5 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -18,6 +18,7 @@ use futures::future::join_all; use futures::{future::RemoteHandle, FutureExt}; use itertools::Itertools; use scylla_cql::errors::{BadQuery, NewSessionError}; +use scylla_cql::types::serialize::row::SerializedValues; use std::collections::HashMap; use std::net::SocketAddr; use std::sync::Arc; @@ -436,11 +437,11 @@ impl ClusterData { &self, keyspace: &str, table: &str, - partition_key: impl ValueList, + partition_key: &SerializedValues, ) -> Result>, BadQuery> { Ok(self.get_token_endpoints( keyspace, - self.compute_token(keyspace, table, partition_key)?, + self.compute_token(keyspace, table, partition_key.to_old_serialized_values())?, )) } From a523582dbe3f13c7af53656312a571275dd7fbd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:24:42 +0100 Subject: [PATCH 035/107] ClusterData: switch `compute_token` to new serialization API --- scylla/src/transport/cluster.rs | 7 +++---- scylla/src/transport/session_test.rs | 14 +++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index edda684de5..c6ea2fbf26 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -1,6 +1,5 @@ /// Cluster manages up to date information and connections to database nodes use crate::frame::response::event::{Event, StatusChangeEvent}; -use crate::frame::value::ValueList; use crate::prepared_statement::TokenCalculationError; use crate::routing::Token; use crate::transport::host_filter::HostFilter; @@ -391,7 +390,7 @@ impl ClusterData { &self, keyspace: &str, table: &str, - partition_key: impl ValueList, + partition_key: &SerializedValues, ) -> Result { let partitioner = self .keyspaces @@ -401,7 +400,7 @@ impl ClusterData { .and_then(PartitionerName::from_str) .unwrap_or_default(); - calculate_token_for_partition_key(&partition_key.serialized().unwrap(), &partitioner) + calculate_token_for_partition_key(&partition_key.to_old_serialized_values(), &partitioner) .map_err(|err| match err { TokenCalculationError::ValueTooLong(values_len) => { BadQuery::ValuesTooLongForKey(values_len, u16::MAX.into()) @@ -441,7 +440,7 @@ impl ClusterData { ) -> Result>, BadQuery> { Ok(self.get_token_endpoints( keyspace, - self.compute_token(keyspace, table, partition_key.to_old_serialized_values())?, + self.compute_token(keyspace, table, partition_key)?, )) } diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index 79df0834e3..284a222ba0 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -28,7 +28,9 @@ use assert_matches::assert_matches; use bytes::Bytes; use futures::{FutureExt, StreamExt, TryStreamExt}; use itertools::Itertools; +use scylla_cql::frame::response::result::ColumnType; use scylla_cql::frame::value::Value; +use scylla_cql::types::serialize::row::SerializedValues; use std::collections::BTreeSet; use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicBool, Ordering}; @@ -209,6 +211,9 @@ async fn test_prepared_statement() { let values = (17_i32, 16_i32, "I'm prepared!!!"); let serialized_values = values.serialized().unwrap().into_owned(); + let serialized_values_complex_pk = prepared_complex_pk_statement + .serialize_values(&values) + .unwrap(); session.execute(&prepared_statement, &values).await.unwrap(); session @@ -237,9 +242,11 @@ async fn test_prepared_statement() { .unwrap(), ); assert_eq!(token, prepared_token); + let mut pk = SerializedValues::new(); + pk.add_value(&17_i32, &ColumnType::Int).unwrap(); let cluster_data_token = session .get_cluster_data() - .compute_token(&ks, "t2", (17_i32,)) + .compute_token(&ks, "t2", &pk) .unwrap(); assert_eq!(token, cluster_data_token); } @@ -265,7 +272,7 @@ async fn test_prepared_statement() { assert_eq!(token, prepared_token); let cluster_data_token = session .get_cluster_data() - .compute_token(&ks, "complex_pk", &serialized_values) + .compute_token(&ks, "complex_pk", &serialized_values_complex_pk) .unwrap(); assert_eq!(token, cluster_data_token); } @@ -511,6 +518,7 @@ async fn test_token_calculation() { } let values = (&s,); let serialized_values = values.serialized().unwrap().into_owned(); + let new_serialized_values = prepared_statement.serialize_values(&values).unwrap(); session.execute(&prepared_statement, &values).await.unwrap(); let rs = session @@ -537,7 +545,7 @@ async fn test_token_calculation() { assert_eq!(token, prepared_token); let cluster_data_token = session .get_cluster_data() - .compute_token(&ks, "t3", &serialized_values) + .compute_token(&ks, "t3", &new_serialized_values) .unwrap(); assert_eq!(token, cluster_data_token); } From de7fc31b8f82976e98d660e05dff5b89898d5315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 12:26:46 +0100 Subject: [PATCH 036/107] PreparedStatement: Switch `calculate_token` to new serialization API --- scylla/src/statement/prepared_statement.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index 0135a8a527..d92070e8cd 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -184,19 +184,19 @@ impl PreparedStatement { Ok(Some((partition_key, token))) } - /// Calculates the token for given prepared statement and serialized values. + /// Calculates the token for given prepared statement and values. /// /// Returns the token that would be computed for executing the provided /// prepared statement with the provided values. // As this function creates a `PartitionKey`, it is intended rather for external usage (by users). // For internal purposes, `PartitionKey::calculate_token()` is preferred, as `PartitionKey` // is either way used internally, among others for display in traces. - pub fn calculate_token( - &self, - serialized_values: &LegacySerializedValues, - ) -> Result, QueryError> { - self.extract_partition_key_and_calculate_token(&self.partitioner_name, serialized_values) - .map(|opt| opt.map(|(_pk, token)| token)) + pub fn calculate_token(&self, values: &impl SerializeRow) -> Result, QueryError> { + self.extract_partition_key_and_calculate_token( + &self.partitioner_name, + &self.serialize_values(values)?.to_old_serialized_values(), + ) + .map(|opt| opt.map(|(_pk, token)| token)) } /// Returns the name of the keyspace this statement is operating on. From 0f6363669b3deeb45d8dfd879046d49a2d5a7340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:03:03 +0100 Subject: [PATCH 037/107] PreparedStatement: Switch `compute_partition_key` to new Serialization API This commit changed the type of `bound_values` argument to `impl SerializeRow` instead of `SerializedValues` in order to: 1. Make the API easier to use (constructing new SerializedValues isn't very user friendly and we want to discourage it's use). 2. Make the API type-safe - which is especially important here because this API doesn't talk to Scylla, it performs the computation locally, so Scylla wouldn't catch it if improper types are used. This change required adding a new variant to `PartitionKeyError`, because now the function needs to serialize provided values, and this operation can fail. --- scylla/src/statement/prepared_statement.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index d92070e8cd..97ae7fb30b 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -136,9 +136,11 @@ impl PreparedStatement { /// [Self::calculate_token()]. pub fn compute_partition_key( &self, - bound_values: &LegacySerializedValues, + bound_values: &impl SerializeRow, ) -> Result { - let partition_key = self.extract_partition_key(bound_values)?; + let serialized = self.serialize_values(bound_values)?; + let old_serialized = serialized.to_old_serialized_values(); + let partition_key = self.extract_partition_key(&old_serialized)?; let mut buf = BytesMut::new(); let mut writer = |chunk: &[u8]| buf.extend_from_slice(chunk); @@ -360,12 +362,14 @@ pub enum TokenCalculationError { ValueTooLong(usize), } -#[derive(Clone, Debug, Error, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Debug, Error)] pub enum PartitionKeyError { #[error(transparent)] PartitionKeyExtraction(PartitionKeyExtractionError), #[error(transparent)] TokenCalculation(TokenCalculationError), + #[error(transparent)] + Serialization(SerializationError), } impl From for PartitionKeyError { @@ -380,6 +384,12 @@ impl From for PartitionKeyError { } } +impl From for PartitionKeyError { + fn from(err: SerializationError) -> Self { + Self::Serialization(err) + } +} + pub(crate) type PartitionKeyValue<'ps> = (&'ps [u8], &'ps ColumnSpec); pub(crate) struct PartitionKey<'ps> { From c7240680700959278f08ddb023b4ada6cf14d2a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:32:22 +0100 Subject: [PATCH 038/107] RequestSpan: record request sizes lazily This commit changes the way request sizes are recorded in `RequestSpan` from eager (size passed to contructor) to lazy - size recorded later by a new `record_request_size` method. --- scylla/src/transport/iterator.rs | 6 +++++- scylla/src/transport/session.rs | 13 ++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index e5ebe4d92d..4c0ec74589 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -181,7 +181,11 @@ impl RowIterator { let query_ref = &query; - let span_creator = move || RequestSpan::new_query(&query_ref.contents, 0); + let span_creator = move || { + let span = RequestSpan::new_query(&query_ref.contents); + span.record_request_size(0); + span + }; let worker = RowIteratorWorker { sender: sender.into(), diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index cb26dfd0e1..0874863288 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -640,7 +640,8 @@ impl Session { ..Default::default() }; - let span = RequestSpan::new_query(&query.contents, serialized_values.size()); + let span = RequestSpan::new_query(&query.contents); + let span_ref = &span; let run_query_result = self .run_query( statement_info, @@ -660,6 +661,7 @@ impl Session { let paging_state_ref = &paging_state; async move { if values_ref.is_empty() { + span_ref.record_request_size(0); connection .query_with_consistency( query_ref, @@ -671,6 +673,7 @@ impl Session { .and_then(QueryResponse::into_non_error_query_response) } else { let prepared = connection.prepare(query_ref).await?; + span_ref.record_request_size(values_ref.size()); connection .execute_with_consistency( &prepared, @@ -1918,7 +1921,7 @@ pub(crate) struct RequestSpan { } impl RequestSpan { - pub(crate) fn new_query(contents: &str, request_size: usize) -> Self { + pub(crate) fn new_query(contents: &str) -> Self { use tracing::field::Empty; let span = trace_span!( @@ -1926,7 +1929,7 @@ impl RequestSpan { kind = "unprepared", contents = contents, // - request_size = request_size, + request_size = Empty, result_size = Empty, result_rows = Empty, replicas = Empty, @@ -2040,6 +2043,10 @@ impl RequestSpan { .record("replicas", tracing::field::display(&ReplicaIps(replicas))); } + pub(crate) fn record_request_size(&self, size: usize) { + self.span.record("request_size", size); + } + pub(crate) fn inc_speculative_executions(&self) { self.speculative_executions.fetch_add(1, Ordering::Relaxed); } From 0545568484f2d2fc857bf14c180f7f1c550b8c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:43:27 +0100 Subject: [PATCH 039/107] Session: switch `query_paged` method to new serialization API --- scylla/src/transport/session.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 0874863288..577b3ce554 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -16,6 +16,7 @@ use itertools::{Either, Itertools}; pub use scylla_cql::errors::TranslationError; use scylla_cql::frame::response::result::{deser_cql_value, ColumnSpec, Rows}; use scylla_cql::frame::response::NonErrorResponse; +use scylla_cql::types::serialize::row::SerializeRow; use std::borrow::Borrow; use std::collections::HashMap; use std::fmt::Display; @@ -605,7 +606,8 @@ impl Session { query: impl Into, values: impl ValueList, ) -> Result { - self.query_paged(query, values, None).await + self.query_paged(query, values.serialized()?.as_ref(), None) + .await } /// Queries the database with a custom paging state. @@ -617,11 +619,10 @@ impl Session { pub async fn query_paged( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, paging_state: Option, ) -> Result { let query: Query = query.into(); - let serialized_values = values.serialized()?; let execution_profile = query .get_execution_profile_handle() @@ -657,7 +658,7 @@ impl Session { .unwrap_or(execution_profile.serial_consistency); // Needed to avoid moving query and values into async move block let query_ref = &query; - let values_ref = &serialized_values; + let values_ref = &values; let paging_state_ref = &paging_state; async move { if values_ref.is_empty() { @@ -673,11 +674,12 @@ impl Session { .and_then(QueryResponse::into_non_error_query_response) } else { let prepared = connection.prepare(query_ref).await?; - span_ref.record_request_size(values_ref.size()); + let serialized = prepared.serialize_values(values_ref)?; + span_ref.record_request_size(serialized.buffer_size()); connection .execute_with_consistency( &prepared, - values_ref, + &serialized.to_old_serialized_values(), consistency, serial_consistency, paging_state_ref.clone(), From 50fb19df018c1e1bc6460d607d162b91e010d2dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 13:56:14 +0100 Subject: [PATCH 040/107] Session: switch `query_iter` to new serialization API --- scylla/src/transport/session.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 577b3ce554..0ccd093525 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -782,17 +782,16 @@ impl Session { pub async fn query_iter( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { let query: Query = query.into(); - let serialized_values = values.serialized()?; let execution_profile = query .get_execution_profile_handle() .unwrap_or_else(|| self.get_default_execution_profile_handle()) .access(); - if serialized_values.is_empty() { + if values.is_empty() { RowIterator::new_for_query( query, execution_profile, @@ -805,9 +804,10 @@ impl Session { // so instead of sending one prepare to a specific connection on each iterator query, // we fully prepare a statement beforehand. let prepared = self.prepare(query).await?; + let values = prepared.serialize_values(&values)?; RowIterator::new_for_prepared_statement(PreparedIteratorConfig { prepared, - values: serialized_values.into_owned(), + values: values.to_old_serialized_values(), execution_profile, cluster_data: self.cluster.get_data(), metrics: self.metrics.clone(), From 615cc33c6bd9f7f2dfd51a68a145f8a87770ffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 14:01:39 +0100 Subject: [PATCH 041/107] Session: switch `execute_paged` method to new serialization API --- scylla/src/transport/session.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 0ccd093525..9995ccf5c5 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -950,7 +950,8 @@ impl Session { prepared: &PreparedStatement, values: impl ValueList, ) -> Result { - self.execute_paged(prepared, values, None).await + self.execute_paged(prepared, values.serialized()?.as_ref(), None) + .await } /// Executes a previously prepared statement with previously received paging state @@ -962,18 +963,16 @@ impl Session { pub async fn execute_paged( &self, prepared: &PreparedStatement, - values: impl ValueList, + values: impl SerializeRow, paging_state: Option, ) -> Result { - let serialized_values = values.serialized()?; - let values_ref = &serialized_values; + let serialized_values = prepared.serialize_values(&values)?; + let old_serialized_values = serialized_values.to_old_serialized_values(); + let values_ref = &old_serialized_values; let paging_state_ref = &paging_state; let (partition_key, token) = prepared - .extract_partition_key_and_calculate_token( - prepared.get_partitioner_name(), - &serialized_values, - )? + .extract_partition_key_and_calculate_token(prepared.get_partitioner_name(), values_ref)? .unzip(); let execution_profile = prepared @@ -998,7 +997,7 @@ impl Session { let span = RequestSpan::new_prepared( partition_key.as_ref().map(|pk| pk.iter()), token, - serialized_values.size(), + serialized_values.buffer_size(), ); if !span.span().is_disabled() { From 8675646fe2b3e24f662b6520afc021d6a48e8648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 14:16:56 +0100 Subject: [PATCH 042/107] Session: switch `execute_iter` to new serialization API --- scylla/src/transport/session.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 9995ccf5c5..a1976bb01c 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1107,10 +1107,10 @@ impl Session { pub async fn execute_iter( &self, prepared: impl Into, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { let prepared = prepared.into(); - let serialized_values = values.serialized()?; + let serialized_values = prepared.serialize_values(&values)?; let execution_profile = prepared .get_execution_profile_handle() @@ -1119,7 +1119,7 @@ impl Session { RowIterator::new_for_prepared_statement(PreparedIteratorConfig { prepared, - values: serialized_values.into_owned(), + values: serialized_values.to_old_serialized_values(), execution_profile, cluster_data: self.cluster.get_data(), metrics: self.metrics.clone(), From 5ebb2d2c0c2df0321be0a12c84271741ed4b788b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 14:46:13 +0100 Subject: [PATCH 043/107] Session: switch `execute` to new serialization API This commit also adjusts `test_unusual_valuelists` in session_test.rs, because it used `&dyn Value` type explicitly. --- scylla/src/transport/session.rs | 5 ++--- scylla/src/transport/session_test.rs | 17 ++++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index a1976bb01c..c323001f56 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -948,10 +948,9 @@ impl Session { pub async fn execute( &self, prepared: &PreparedStatement, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { - self.execute_paged(prepared, values.serialized()?.as_ref(), None) - .await + self.execute_paged(prepared, values, None).await } /// Executes a previously prepared statement with previously received paging state diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index 284a222ba0..dc8a3b7671 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -29,8 +29,8 @@ use bytes::Bytes; use futures::{FutureExt, StreamExt, TryStreamExt}; use itertools::Itertools; use scylla_cql::frame::response::result::ColumnType; -use scylla_cql::frame::value::Value; use scylla_cql::types::serialize::row::SerializedValues; +use scylla_cql::types::serialize::value::SerializeCql; use std::collections::BTreeSet; use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicBool, Ordering}; @@ -1999,14 +1999,17 @@ async fn test_unusual_valuelists() { .await .unwrap(); - let values_dyn: Vec<&dyn Value> = - vec![&1 as &dyn Value, &2 as &dyn Value, &"&dyn" as &dyn Value]; + let values_dyn: Vec<&dyn SerializeCql> = vec![ + &1 as &dyn SerializeCql, + &2 as &dyn SerializeCql, + &"&dyn" as &dyn SerializeCql, + ]; session.execute(&insert_a_b_c, values_dyn).await.unwrap(); - let values_box_dyn: Vec> = vec![ - Box::new(1) as Box, - Box::new(3) as Box, - Box::new("Box dyn") as Box, + let values_box_dyn: Vec> = vec![ + Box::new(1) as Box, + Box::new(3) as Box, + Box::new("Box dyn") as Box, ]; session .execute(&insert_a_b_c, values_box_dyn) From 9bf7b77375779b28a18a6c4ce8bdb76fa5380fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:23:08 +0100 Subject: [PATCH 044/107] examples: Use both new and old serialization APIs We use both APIs so that the code works before and after switching `Session::query` from accepting `ValueList` to `SerializeRow`. We can remove usage of old APIs after the change to `Session::query`. --- examples/user-defined-type.rs | 4 ++-- examples/value_list.rs | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/user-defined-type.rs b/examples/user-defined-type.rs index 53465b2f10..1fc4ce489c 100644 --- a/examples/user-defined-type.rs +++ b/examples/user-defined-type.rs @@ -1,6 +1,6 @@ use anyhow::Result; use scylla::macros::{FromUserType, IntoUserType}; -use scylla::{IntoTypedRows, Session, SessionBuilder}; +use scylla::{IntoTypedRows, SerializeCql, Session, SessionBuilder}; use std::env; #[tokio::main] @@ -29,7 +29,7 @@ async fn main() -> Result<()> { // Define custom struct that matches User Defined Type created earlier // wrapping field in Option will gracefully handle null field values - #[derive(Debug, IntoUserType, FromUserType)] + #[derive(Debug, IntoUserType, FromUserType, SerializeCql)] struct MyType { int_val: i32, text_val: Option, diff --git a/examples/value_list.rs b/examples/value_list.rs index 44b388dcbc..6d1b75c278 100644 --- a/examples/value_list.rs +++ b/examples/value_list.rs @@ -19,7 +19,7 @@ async fn main() { .await .unwrap(); - #[derive(scylla::ValueList)] + #[derive(scylla::ValueList, scylla::SerializeRow)] struct MyType<'a> { k: i32, my: Option<&'a str>, @@ -36,8 +36,10 @@ async fn main() { .unwrap(); // You can also use type generics: - #[derive(scylla::ValueList)] - struct MyTypeWithGenerics { + #[derive(scylla::ValueList, scylla::SerializeRow)] + struct MyTypeWithGenerics< + S: scylla::frame::value::Value + scylla::serialize::value::SerializeCql, + > { k: i32, my: Option, } From 6cc643ae8ae7a20a1d026546b168fd35270b0933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:29:35 +0100 Subject: [PATCH 045/107] cql_collections_test: Use both new and old serialization APIs We use both APIs so that the code works before and after switching `Session::query` from accepting `ValueList` to `SerializeRow`. We can remove usage of old APIs after the change to `Session::query`. --- scylla/src/transport/cql_collections_test.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scylla/src/transport/cql_collections_test.rs b/scylla/src/transport/cql_collections_test.rs index cd89443271..fa0cc9a051 100644 --- a/scylla/src/transport/cql_collections_test.rs +++ b/scylla/src/transport/cql_collections_test.rs @@ -3,6 +3,7 @@ use crate::frame::value::Value; use crate::test_utils::create_new_session_builder; use crate::utils::test_utils::unique_keyspace_name; use crate::{frame::response::result::CqlValue, IntoTypedRows, Session}; +use scylla_cql::types::serialize::value::SerializeCql; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; async fn connect() -> Session { @@ -33,7 +34,7 @@ async fn insert_and_select( to_insert: &InsertT, expected: &SelectT, ) where - InsertT: Value, + InsertT: Value + SerializeCql, SelectT: FromCqlVal> + PartialEq + std::fmt::Debug, { session From ea93d395d0865d272c5fb29dcc5e8cbc609a2a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:46:50 +0100 Subject: [PATCH 046/107] cql_types_test: Use both new and old serialization APIs We use both APIs so that the code works before and after switching `Session::query` from accepting `ValueList` to `SerializeRow`. We can remove usage of old APIs after the change to `Session::query`. --- scylla/src/transport/cql_types_test.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scylla/src/transport/cql_types_test.rs b/scylla/src/transport/cql_types_test.rs index ced5075918..470f81e9e9 100644 --- a/scylla/src/transport/cql_types_test.rs +++ b/scylla/src/transport/cql_types_test.rs @@ -9,6 +9,8 @@ use crate::transport::session::Session; use crate::utils::test_utils::unique_keyspace_name; use bigdecimal::BigDecimal; use num_bigint::BigInt; +use scylla_cql::types::serialize::value::SerializeCql; +use scylla_macros::SerializeCql; use std::cmp::PartialEq; use std::fmt::Debug; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; @@ -64,7 +66,7 @@ async fn init_test(table_name: &str, type_name: &str) -> Session { // Expected values and bound values are computed using T::from_str async fn run_tests(tests: &[&str], type_name: &str) where - T: Value + FromCqlVal + FromStr + Debug + Clone + PartialEq, + T: Value + SerializeCql + FromCqlVal + FromStr + Debug + Clone + PartialEq, { let session: Session = init_test(type_name, type_name).await; session.await_schema_agreement().await.unwrap(); @@ -1361,7 +1363,8 @@ async fn test_udt_after_schema_update() { .await .unwrap(); - #[derive(IntoUserType, FromUserType, Debug, PartialEq)] + #[derive(IntoUserType, SerializeCql, FromUserType, Debug, PartialEq)] + #[scylla(crate = crate)] struct UdtV1 { pub first: i32, pub second: bool, From ac88e0aa7ee89de5ea2765bd1e5bd851069e7782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:49:33 +0100 Subject: [PATCH 047/107] session_test: Use both new and old serialization APIs We use both APIs so that the code works before and after switching `Session::query` from accepting `ValueList` to `SerializeRow`. We can remove usage of old APIs after the change to `Session::query`. --- scylla/src/transport/session_test.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index dc8a3b7671..0a8b9f2a63 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -326,9 +326,12 @@ async fn test_prepared_statement() { assert!(e.is_none()); assert_eq!((a, b, c, d), (17, 16, &String::from("I'm prepared!!!"), 7)) } - // Check that ValueList macro works + // Check that SerializeRow macro works { - #[derive(scylla::ValueList, scylla::FromRow, PartialEq, Debug, Clone)] + #[derive( + scylla::ValueList, scylla::SerializeRow, scylla::FromRow, PartialEq, Debug, Clone, + )] + #[scylla(crate = crate)] struct ComplexPk { a: i32, b: i32, From b3261cd618b476343b0df6ff3f0399e7ceaee236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 15:20:46 +0100 Subject: [PATCH 048/107] Session: switch `query` method to new serialization API --- scylla/src/transport/session.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index c323001f56..335e8d7848 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -47,9 +47,7 @@ use super::NodeRef; use crate::cql_to_rust::FromRow; use crate::frame::response::cql_to_rust::FromRowError; use crate::frame::response::result; -use crate::frame::value::{ - BatchValues, BatchValuesFirstSerialized, BatchValuesIterator, ValueList, -}; +use crate::frame::value::{BatchValues, BatchValuesFirstSerialized, BatchValuesIterator}; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::routing::Token; @@ -604,10 +602,9 @@ impl Session { pub async fn query( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { - self.query_paged(query, values.serialized()?.as_ref(), None) - .await + self.query_paged(query, values, None).await } /// Queries the database with a custom paging state. From 25a6d361c1be6b3c1064cd8e5b7f79e2fe477973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Tue, 12 Dec 2023 14:34:55 +0100 Subject: [PATCH 049/107] Book: Derive new serialization traits in udt.md and values.md --- docs/source/data-types/udt.md | 4 ++-- docs/source/queries/values.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/data-types/udt.md b/docs/source/data-types/udt.md index ceb8ae8ca6..e79ad3feae 100644 --- a/docs/source/data-types/udt.md +++ b/docs/source/data-types/udt.md @@ -37,10 +37,10 @@ Now it can be sent and received just like any other CQL value: # use std::error::Error; # async fn check_only_compiles(session: &Session) -> Result<(), Box> { use scylla::IntoTypedRows; -use scylla::macros::{FromUserType, IntoUserType}; +use scylla::macros::{FromUserType, IntoUserType, SerializeCql}; use scylla::cql_to_rust::FromCqlVal; -#[derive(Debug, IntoUserType, FromUserType)] +#[derive(Debug, IntoUserType, FromUserType, SerializeCql)] struct MyType { int_val: i32, text_val: Option, diff --git a/docs/source/queries/values.md b/docs/source/queries/values.md index 09b369b689..400e7139ab 100644 --- a/docs/source/queries/values.md +++ b/docs/source/queries/values.md @@ -12,7 +12,7 @@ or a custom struct which derives from `ValueList`. A few examples: ```rust # extern crate scylla; -# use scylla::{Session, ValueList, frame::response::result::CqlValue}; +# use scylla::{Session, ValueList, SerializeRow, frame::response::result::CqlValue}; # use std::error::Error; # use std::collections::HashMap; # async fn check_only_compiles(session: &Session) -> Result<(), Box> { @@ -34,7 +34,7 @@ session // Sending an integer and a string using a named struct. // The values will be passed in the order from the struct definition -#[derive(ValueList)] +#[derive(ValueList, SerializeRow)] struct IntString { first_col: i32, second_col: String, From c353f17dfaae8365bed13c41dfe9fa7c1ccb472e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:13:22 +0100 Subject: [PATCH 050/107] examples: Remove usage of old serialization APIs --- examples/user-defined-type.rs | 4 ++-- examples/value_list.rs | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/user-defined-type.rs b/examples/user-defined-type.rs index 1fc4ce489c..5a0f1b55f5 100644 --- a/examples/user-defined-type.rs +++ b/examples/user-defined-type.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use scylla::macros::{FromUserType, IntoUserType}; +use scylla::macros::FromUserType; use scylla::{IntoTypedRows, SerializeCql, Session, SessionBuilder}; use std::env; @@ -29,7 +29,7 @@ async fn main() -> Result<()> { // Define custom struct that matches User Defined Type created earlier // wrapping field in Option will gracefully handle null field values - #[derive(Debug, IntoUserType, FromUserType, SerializeCql)] + #[derive(Debug, FromUserType, SerializeCql)] struct MyType { int_val: i32, text_val: Option, diff --git a/examples/value_list.rs b/examples/value_list.rs index 6d1b75c278..409f8a5208 100644 --- a/examples/value_list.rs +++ b/examples/value_list.rs @@ -19,7 +19,7 @@ async fn main() { .await .unwrap(); - #[derive(scylla::ValueList, scylla::SerializeRow)] + #[derive(scylla::SerializeRow)] struct MyType<'a> { k: i32, my: Option<&'a str>, @@ -36,10 +36,8 @@ async fn main() { .unwrap(); // You can also use type generics: - #[derive(scylla::ValueList, scylla::SerializeRow)] - struct MyTypeWithGenerics< - S: scylla::frame::value::Value + scylla::serialize::value::SerializeCql, - > { + #[derive(scylla::SerializeRow)] + struct MyTypeWithGenerics { k: i32, my: Option, } From 8a3346778b4d441751ff592c76c214a2f91e5b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:15:45 +0100 Subject: [PATCH 051/107] cql_collections_test: Remove usage of old serialization APIs --- scylla/src/transport/cql_collections_test.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scylla/src/transport/cql_collections_test.rs b/scylla/src/transport/cql_collections_test.rs index fa0cc9a051..a3e8c76089 100644 --- a/scylla/src/transport/cql_collections_test.rs +++ b/scylla/src/transport/cql_collections_test.rs @@ -1,5 +1,4 @@ use crate::cql_to_rust::FromCqlVal; -use crate::frame::value::Value; use crate::test_utils::create_new_session_builder; use crate::utils::test_utils::unique_keyspace_name; use crate::{frame::response::result::CqlValue, IntoTypedRows, Session}; @@ -34,7 +33,7 @@ async fn insert_and_select( to_insert: &InsertT, expected: &SelectT, ) where - InsertT: Value + SerializeCql, + InsertT: SerializeCql, SelectT: FromCqlVal> + PartialEq + std::fmt::Debug, { session From 72a81ce8a0eecaf4260ce4fbfd7fca2848c5e2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:17:55 +0100 Subject: [PATCH 052/107] cql_types_test: Remove usage of old serialization APIs --- scylla/src/transport/cql_types_test.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scylla/src/transport/cql_types_test.rs b/scylla/src/transport/cql_types_test.rs index 470f81e9e9..6c05fc90f2 100644 --- a/scylla/src/transport/cql_types_test.rs +++ b/scylla/src/transport/cql_types_test.rs @@ -1,8 +1,8 @@ use crate as scylla; use crate::cql_to_rust::FromCqlVal; use crate::frame::response::result::CqlValue; -use crate::frame::value::{Counter, CqlDate, CqlTime, CqlTimestamp, Value}; -use crate::macros::{FromUserType, IntoUserType}; +use crate::frame::value::{Counter, CqlDate, CqlTime, CqlTimestamp}; +use crate::macros::FromUserType; use crate::test_utils::create_new_session_builder; use crate::transport::session::IntoTypedRows; use crate::transport::session::Session; @@ -66,7 +66,7 @@ async fn init_test(table_name: &str, type_name: &str) -> Session { // Expected values and bound values are computed using T::from_str async fn run_tests(tests: &[&str], type_name: &str) where - T: Value + SerializeCql + FromCqlVal + FromStr + Debug + Clone + PartialEq, + T: SerializeCql + FromCqlVal + FromStr + Debug + Clone + PartialEq, { let session: Session = init_test(type_name, type_name).await; session.await_schema_agreement().await.unwrap(); @@ -1363,7 +1363,7 @@ async fn test_udt_after_schema_update() { .await .unwrap(); - #[derive(IntoUserType, SerializeCql, FromUserType, Debug, PartialEq)] + #[derive(SerializeCql, FromUserType, Debug, PartialEq)] #[scylla(crate = crate)] struct UdtV1 { pub first: i32, From c7e766e3f76e07e769bb1f08da0ed8a7f6f5af5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:22:27 +0100 Subject: [PATCH 053/107] session_test: Remove usage of old serialization macros / traits --- scylla/src/transport/session_test.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index 0a8b9f2a63..f16a9ef2c0 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -328,9 +328,7 @@ async fn test_prepared_statement() { } // Check that SerializeRow macro works { - #[derive( - scylla::ValueList, scylla::SerializeRow, scylla::FromRow, PartialEq, Debug, Clone, - )] + #[derive(scylla::SerializeRow, scylla::FromRow, PartialEq, Debug, Clone)] #[scylla(crate = crate)] struct ComplexPk { a: i32, From 4a09baa83e6226f4a61d57806c25abbea86c5f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:34:52 +0100 Subject: [PATCH 054/107] CachingSession: Use new serialization APIs --- scylla/src/transport/caching_session.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scylla/src/transport/caching_session.rs b/scylla/src/transport/caching_session.rs index 3d3dfa0e17..14546b93e4 100644 --- a/scylla/src/transport/caching_session.rs +++ b/scylla/src/transport/caching_session.rs @@ -1,5 +1,5 @@ use crate::batch::{Batch, BatchStatement}; -use crate::frame::value::{BatchValues, ValueList}; +use crate::frame::value::BatchValues; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::transport::errors::QueryError; @@ -10,6 +10,7 @@ use bytes::Bytes; use dashmap::DashMap; use futures::future::try_join_all; use scylla_cql::frame::response::result::PreparedMetadata; +use scylla_cql::types::serialize::row::SerializeRow; use std::collections::hash_map::RandomState; use std::hash::BuildHasher; @@ -70,38 +71,35 @@ where pub async fn execute( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { let query = query.into(); let prepared = self.add_prepared_statement_owned(query).await?; - let values = values.serialized()?; - self.session.execute(&prepared, values.clone()).await + self.session.execute(&prepared, values).await } /// Does the same thing as [`Session::execute_iter`] but uses the prepared statement cache pub async fn execute_iter( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, ) -> Result { let query = query.into(); let prepared = self.add_prepared_statement_owned(query).await?; - let values = values.serialized()?; - self.session.execute_iter(prepared, values.clone()).await + self.session.execute_iter(prepared, values).await } /// Does the same thing as [`Session::execute_paged`] but uses the prepared statement cache pub async fn execute_paged( &self, query: impl Into, - values: impl ValueList, + values: impl SerializeRow, paging_state: Option, ) -> Result { let query = query.into(); let prepared = self.add_prepared_statement_owned(query).await?; - let values = values.serialized()?; self.session - .execute_paged(&prepared, values.clone(), paging_state.clone()) + .execute_paged(&prepared, values, paging_state.clone()) .await } From 3d32c1c02acd39f7e334fdec24bb242a23165481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:39:28 +0100 Subject: [PATCH 055/107] RowIterator: switch `new_for_prepared_statement` to new serialization APIs --- scylla/src/transport/iterator.rs | 10 ++++++---- scylla/src/transport/session.rs | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index 4c0ec74589..c280f8a01a 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -12,6 +12,7 @@ use bytes::Bytes; use futures::Stream; use scylla_cql::frame::response::NonErrorResponse; use scylla_cql::frame::types::SerialConsistency; +use scylla_cql::types::serialize::row::SerializedValues; use std::result::Result; use thiserror::Error; use tokio::sync::mpsc; @@ -73,7 +74,7 @@ struct ReceivedPage { pub(crate) struct PreparedIteratorConfig { pub(crate) prepared: PreparedStatement, - pub(crate) values: LegacySerializedValues, + pub(crate) values: SerializedValues, pub(crate) execution_profile: Arc, pub(crate) cluster_data: Arc, pub(crate) metrics: Arc, @@ -240,11 +241,12 @@ impl RowIterator { let worker_task = async move { let prepared_ref = &config.prepared; let values_ref = &config.values; + let old_values = values_ref.to_old_serialized_values(); let (partition_key, token) = match prepared_ref .extract_partition_key_and_calculate_token( prepared_ref.get_partitioner_name(), - values_ref, + &old_values, ) { Ok(res) => res.unzip(), Err(err) => { @@ -274,7 +276,7 @@ impl RowIterator { connection .execute_with_consistency( prepared_ref, - values_ref, + &values_ref.to_old_serialized_values(), consistency, serial_consistency, paging_state, @@ -282,7 +284,7 @@ impl RowIterator { .await }; - let serialized_values_size = config.values.size(); + let serialized_values_size = config.values.buffer_size(); let replicas: Option> = if let (Some(keyspace), Some(token)) = diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 335e8d7848..2ad1090ef7 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -804,7 +804,7 @@ impl Session { let values = prepared.serialize_values(&values)?; RowIterator::new_for_prepared_statement(PreparedIteratorConfig { prepared, - values: values.to_old_serialized_values(), + values, execution_profile, cluster_data: self.cluster.get_data(), metrics: self.metrics.clone(), @@ -1115,7 +1115,7 @@ impl Session { RowIterator::new_for_prepared_statement(PreparedIteratorConfig { prepared, - values: serialized_values.to_old_serialized_values(), + values: serialized_values, execution_profile, cluster_data: self.cluster.get_data(), metrics: self.metrics.clone(), From d3b884b85e5dbd86ea9f00060e44eac548360745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 16:57:41 +0100 Subject: [PATCH 056/107] topology: type-safe serialization in query_filter_keyspace_name This is in preparation to switch `Connection::execute_iter` to new serialization APIs. --- scylla/src/transport/topology.rs | 35 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/scylla/src/transport/topology.rs b/scylla/src/transport/topology.rs index bc8a80ca32..b6cd479a09 100644 --- a/scylla/src/transport/topology.rs +++ b/scylla/src/transport/topology.rs @@ -15,7 +15,6 @@ use rand::seq::SliceRandom; use rand::{thread_rng, Rng}; use scylla_cql::errors::NewSessionError; use scylla_cql::frame::response::result::Row; -use scylla_cql::frame::value::ValueList; use scylla_macros::FromRow; use std::borrow::BorrowMut; use std::cell::Cell; @@ -854,28 +853,30 @@ async fn create_peer_from_row( })) } -fn query_filter_keyspace_name( +fn query_filter_keyspace_name<'a>( conn: &Arc, - query_str: &str, - keyspaces_to_fetch: &[String], -) -> impl Stream> { - let keyspaces = &[keyspaces_to_fetch] as &[&[String]]; - let (query_str, query_values) = if !keyspaces_to_fetch.is_empty() { - (format!("{query_str} where keyspace_name in ?"), keyspaces) - } else { - (query_str.into(), &[] as &[&[String]]) - }; - let query_values = query_values.serialized().map(|sv| sv.into_owned()); - let mut query = Query::new(query_str); + query_str: &'a str, + keyspaces_to_fetch: &'a [String], +) -> impl Stream> + 'a { let conn = conn.clone(); - query.set_page_size(1024); + let fut = async move { - let query_values = query_values?; - if query_values.is_empty() { + if keyspaces_to_fetch.is_empty() { + let mut query = Query::new(query_str); + query.set_page_size(1024); + conn.query_iter(query).await } else { + let keyspaces = &[keyspaces_to_fetch] as &[&[String]]; + let query_str = format!("{query_str} where keyspace_name in ?"); + + let mut query = Query::new(query_str); + query.set_page_size(1024); + let prepared = conn.prepare(&query).await?; - conn.execute_iter(prepared, query_values).await + let serialized_values = prepared.serialize_values(&keyspaces)?; + conn.execute_iter(prepared, &serialized_values.to_old_serialized_values()) + .await } }; fut.into_stream().try_flatten() From d5b8636d9651520a365f14d441d14981b28a0318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 17:03:24 +0100 Subject: [PATCH 057/107] Connection: switch `execute_iter` to new serialization API --- scylla/src/transport/connection.rs | 6 +++--- scylla/src/transport/topology.rs | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index fb1fa60b1e..818dccfe9a 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -5,6 +5,7 @@ use scylla_cql::frame::request::options::Options; use scylla_cql::frame::response::Error; use scylla_cql::frame::types::SerialConsistency; use scylla_cql::frame::value::LegacySerializedValues; +use scylla_cql::types::serialize::row::SerializedValues; use socket2::{SockRef, TcpKeepalive}; use tokio::io::{split, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter}; use tokio::net::{TcpSocket, TcpStream}; @@ -744,17 +745,16 @@ impl Connection { pub(crate) async fn execute_iter( self: Arc, prepared_statement: PreparedStatement, - values: impl ValueList, + values: SerializedValues, ) -> Result { let consistency = prepared_statement .config .determine_consistency(self.config.default_consistency); let serial_consistency = prepared_statement.config.serial_consistency.flatten(); - let serialized = values.serialized()?.into_owned(); RowIterator::new_for_connection_execute_iter( prepared_statement, - serialized, + values.to_old_serialized_values(), self, consistency, serial_consistency, diff --git a/scylla/src/transport/topology.rs b/scylla/src/transport/topology.rs index b6cd479a09..e7a2adcff2 100644 --- a/scylla/src/transport/topology.rs +++ b/scylla/src/transport/topology.rs @@ -875,8 +875,7 @@ fn query_filter_keyspace_name<'a>( let prepared = conn.prepare(&query).await?; let serialized_values = prepared.serialize_values(&keyspaces)?; - conn.execute_iter(prepared, &serialized_values.to_old_serialized_values()) - .await + conn.execute_iter(prepared, serialized_values).await } }; fut.into_stream().try_flatten() From a4a27de194203f8d4d22c8e580e4e4f506899da8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 17:13:14 +0100 Subject: [PATCH 058/107] RowIterator: switch `new_for_connection_execute_iter` to new serialization API --- scylla/src/transport/connection.rs | 2 +- scylla/src/transport/iterator.rs | 15 +++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 818dccfe9a..879c02313b 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -754,7 +754,7 @@ impl Connection { RowIterator::new_for_connection_execute_iter( prepared_statement, - values.to_old_serialized_values(), + values, self, consistency, serial_consistency, diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index c280f8a01a..f95bce8457 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -23,12 +23,9 @@ use super::execution_profile::ExecutionProfileInner; use super::session::RequestSpan; use crate::cql_to_rust::{FromRow, FromRowError}; -use crate::frame::{ - response::{ - result, - result::{ColumnSpec, Row, Rows}, - }, - value::LegacySerializedValues, +use crate::frame::response::{ + result, + result::{ColumnSpec, Row, Rows}, }; use crate::history::{self, HistoryListener}; use crate::statement::Consistency; @@ -368,7 +365,7 @@ impl RowIterator { pub(crate) async fn new_for_connection_execute_iter( mut prepared: PreparedStatement, - values: LegacySerializedValues, + values: SerializedValues, connection: Arc, consistency: Consistency, serial_consistency: Option, @@ -378,13 +375,15 @@ impl RowIterator { } let (sender, receiver) = mpsc::channel::>(1); + let old_values = values.to_old_serialized_values(); + let worker_task = async move { let worker = SingleConnectionRowIteratorWorker { sender: sender.into(), fetcher: |paging_state| { connection.execute_with_consistency( &prepared, - &values, + &old_values, consistency, serial_consistency, paging_state, From 7ae1abb0f40d988ece3de278407fa1d38fe996c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 17:35:35 +0100 Subject: [PATCH 059/107] Connection: switch `execute` method to new serialization API --- scylla/src/transport/connection.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 879c02313b..a4729cf375 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -667,13 +667,13 @@ impl Connection { pub(crate) async fn execute( &self, prepared: PreparedStatement, - values: impl ValueList, + values: SerializedValues, paging_state: Option, ) -> Result { // This method is used only for driver internal queries, so no need to consult execution profile here. self.execute_with_consistency( &prepared, - values, + &values.to_old_serialized_values(), prepared .config .determine_consistency(self.config.default_consistency), @@ -1938,7 +1938,8 @@ mod tests { let prepared = connection.prepare(&insert_query).await.unwrap(); for v in &values { let prepared_clone = prepared.clone(); - let fut = async { connection.execute(prepared_clone, (*v,), None).await }; + let values = prepared_clone.serialize_values(&(*v,)).unwrap(); + let fut = async { connection.execute(prepared_clone, values, None).await }; insert_futures.push(fut); } @@ -2036,10 +2037,11 @@ mod tests { let conn = conn.clone(); async move { let prepared = conn.prepare(&q).await.unwrap(); - let response = conn - .execute(prepared.clone(), (j, vec![j as u8; j as usize]), None) - .await + let values = prepared + .serialize_values(&(j, vec![j as u8; j as usize])) .unwrap(); + let response = + conn.execute(prepared.clone(), values, None).await.unwrap(); // QueryResponse might contain an error - make sure that there were no errors let _nonerror_response = response.into_non_error_query_response().unwrap(); From 5960e3bac7637966eefb856386aeca4426f7311d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 17:45:27 +0100 Subject: [PATCH 060/107] Connection: switch `execute_with_consistency` to new serialization API --- scylla/src/transport/connection.rs | 10 ++++------ scylla/src/transport/iterator.rs | 6 ++---- scylla/src/transport/session.rs | 10 +++++++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index a4729cf375..43eacf265b 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -54,7 +54,7 @@ use crate::frame::{ request::{self, batch, execute, query, register, SerializableRequest}, response::{event::Event, result, NonErrorResponse, Response, ResponseOpcode}, server_event_type::EventType, - value::{BatchValues, BatchValuesIterator, ValueList}, + value::{BatchValues, BatchValuesIterator}, FrameParams, SerializedRequest, }; use crate::query::Query; @@ -673,7 +673,7 @@ impl Connection { // This method is used only for driver internal queries, so no need to consult execution profile here. self.execute_with_consistency( &prepared, - &values.to_old_serialized_values(), + &values, prepared .config .determine_consistency(self.config.default_consistency), @@ -686,19 +686,17 @@ impl Connection { pub(crate) async fn execute_with_consistency( &self, prepared_statement: &PreparedStatement, - values: impl ValueList, + values: &SerializedValues, consistency: Consistency, serial_consistency: Option, paging_state: Option, ) -> Result { - let serialized_values = values.serialized()?; - let execute_frame = execute::Execute { id: prepared_statement.get_id().to_owned(), parameters: query::QueryParameters { consistency, serial_consistency, - values: serialized_values, + values: Cow::Owned(values.to_old_serialized_values()), page_size: prepared_statement.get_page_size(), timestamp: prepared_statement.get_timestamp(), paging_state, diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index f95bce8457..9af5f13d60 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -273,7 +273,7 @@ impl RowIterator { connection .execute_with_consistency( prepared_ref, - &values_ref.to_old_serialized_values(), + values_ref, consistency, serial_consistency, paging_state, @@ -375,15 +375,13 @@ impl RowIterator { } let (sender, receiver) = mpsc::channel::>(1); - let old_values = values.to_old_serialized_values(); - let worker_task = async move { let worker = SingleConnectionRowIteratorWorker { sender: sender.into(), fetcher: |paging_state| { connection.execute_with_consistency( &prepared, - &old_values, + &values, consistency, serial_consistency, paging_state, diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 2ad1090ef7..0b22b75460 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -676,7 +676,7 @@ impl Session { connection .execute_with_consistency( &prepared, - &serialized.to_old_serialized_values(), + &serialized, consistency, serial_consistency, paging_state_ref.clone(), @@ -964,11 +964,15 @@ impl Session { ) -> Result { let serialized_values = prepared.serialize_values(&values)?; let old_serialized_values = serialized_values.to_old_serialized_values(); - let values_ref = &old_serialized_values; + let values_ref = &serialized_values; + let old_values_ref = &old_serialized_values; let paging_state_ref = &paging_state; let (partition_key, token) = prepared - .extract_partition_key_and_calculate_token(prepared.get_partitioner_name(), values_ref)? + .extract_partition_key_and_calculate_token( + prepared.get_partitioner_name(), + old_values_ref, + )? .unzip(); let execution_profile = prepared From 991ea48516f5d7f8bdb07a3d64ed6d98f6fbcd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 17:56:41 +0100 Subject: [PATCH 061/107] PreparedStatement: switch `extract_partition_key*` and `PrimaryKey::new` methods to new serialization API This commit switches all those methods at once, because switching them one by one is too problematic - lifetime issue of PrimaryKey and temporary LegacySerializedValues. --- scylla/src/statement/prepared_statement.rs | 37 ++++++++++++---------- scylla/src/transport/iterator.rs | 3 +- scylla/src/transport/session.rs | 7 +--- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index 97ae7fb30b..5ee2a13e33 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -15,7 +15,6 @@ use scylla_cql::frame::response::result::ColumnSpec; use super::StatementConfig; use crate::frame::response::result::PreparedMetadata; use crate::frame::types::{Consistency, SerialConsistency}; -use crate::frame::value::LegacySerializedValues; use crate::history::HistoryListener; use crate::retry_policy::RetryPolicy; use crate::routing::Token; @@ -139,8 +138,7 @@ impl PreparedStatement { bound_values: &impl SerializeRow, ) -> Result { let serialized = self.serialize_values(bound_values)?; - let old_serialized = serialized.to_old_serialized_values(); - let partition_key = self.extract_partition_key(&old_serialized)?; + let partition_key = self.extract_partition_key(&serialized)?; let mut buf = BytesMut::new(); let mut writer = |chunk: &[u8]| buf.extend_from_slice(chunk); @@ -154,7 +152,7 @@ impl PreparedStatement { /// This is a preparation step necessary for calculating token based on a prepared statement. pub(crate) fn extract_partition_key<'ps>( &'ps self, - bound_values: &'ps LegacySerializedValues, + bound_values: &'ps SerializedValues, ) -> Result { PartitionKey::new(self.get_prepared_metadata(), bound_values) } @@ -162,7 +160,7 @@ impl PreparedStatement { pub(crate) fn extract_partition_key_and_calculate_token<'ps>( &'ps self, partitioner_name: &'ps PartitionerName, - serialized_values: &'ps LegacySerializedValues, + serialized_values: &'ps SerializedValues, ) -> Result, Token)>, QueryError> { if !self.is_token_aware() { return Ok(None); @@ -196,7 +194,7 @@ impl PreparedStatement { pub fn calculate_token(&self, values: &impl SerializeRow) -> Result, QueryError> { self.extract_partition_key_and_calculate_token( &self.partitioner_name, - &self.serialize_values(values)?.to_old_serialized_values(), + &self.serialize_values(values)?, ) .map(|opt| opt.map(|(_pk, token)| token)) } @@ -401,7 +399,7 @@ impl<'ps> PartitionKey<'ps> { fn new( prepared_metadata: &'ps PreparedMetadata, - bound_values: &'ps LegacySerializedValues, + bound_values: &'ps SerializedValues, ) -> Result { // Iterate on values using sorted pk_indexes (see deser_prepared_metadata), // and use PartitionKeyIndex.sequence to insert the value in pk_values with the correct order. @@ -418,7 +416,10 @@ impl<'ps> PartitionKey<'ps> { let next_val = values_iter .nth((pk_index.index - values_iter_offset) as usize) .ok_or_else(|| { - PartitionKeyExtractionError::NoPkIndexValue(pk_index.index, bound_values.len()) + PartitionKeyExtractionError::NoPkIndexValue( + pk_index.index, + bound_values.element_count(), + ) })?; // Add it in sequence order to pk_values if let RawValue::Value(v) = next_val { @@ -477,11 +478,11 @@ impl<'ps> PartitionKey<'ps> { #[cfg(test)] mod tests { - use scylla_cql::frame::{ - response::result::{ + use scylla_cql::{ + frame::response::result::{ ColumnSpec, ColumnType, PartitionKeyIndex, PreparedMetadata, TableSpec, }, - value::LegacySerializedValues, + types::serialize::row::SerializedValues, }; use crate::prepared_statement::PartitionKey; @@ -532,12 +533,14 @@ mod tests { ], [4, 0, 3], ); - let mut values = LegacySerializedValues::new(); - values.add_value(&67i8).unwrap(); - values.add_value(&42i16).unwrap(); - values.add_value(&23i32).unwrap(); - values.add_value(&89i64).unwrap(); - values.add_value(&[1u8, 2, 3, 4, 5]).unwrap(); + let mut values = SerializedValues::new(); + values.add_value(&67i8, &ColumnType::TinyInt).unwrap(); + values.add_value(&42i16, &ColumnType::SmallInt).unwrap(); + values.add_value(&23i32, &ColumnType::Int).unwrap(); + values.add_value(&89i64, &ColumnType::BigInt).unwrap(); + values + .add_value(&[1u8, 2, 3, 4, 5], &ColumnType::Blob) + .unwrap(); let pk = PartitionKey::new(&meta, &values).unwrap(); let pk_cols = Vec::from_iter(pk.iter()); diff --git a/scylla/src/transport/iterator.rs b/scylla/src/transport/iterator.rs index 9af5f13d60..366a7ccb4a 100644 --- a/scylla/src/transport/iterator.rs +++ b/scylla/src/transport/iterator.rs @@ -238,12 +238,11 @@ impl RowIterator { let worker_task = async move { let prepared_ref = &config.prepared; let values_ref = &config.values; - let old_values = values_ref.to_old_serialized_values(); let (partition_key, token) = match prepared_ref .extract_partition_key_and_calculate_token( prepared_ref.get_partitioner_name(), - &old_values, + values_ref, ) { Ok(res) => res.unzip(), Err(err) => { diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 0b22b75460..bf8c8f5200 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -963,16 +963,11 @@ impl Session { paging_state: Option, ) -> Result { let serialized_values = prepared.serialize_values(&values)?; - let old_serialized_values = serialized_values.to_old_serialized_values(); let values_ref = &serialized_values; - let old_values_ref = &old_serialized_values; let paging_state_ref = &paging_state; let (partition_key, token) = prepared - .extract_partition_key_and_calculate_token( - prepared.get_partitioner_name(), - old_values_ref, - )? + .extract_partition_key_and_calculate_token(prepared.get_partitioner_name(), values_ref)? .unzip(); let execution_profile = prepared From 4d5c33ac6bddadf054628509eb7b59f904ee6e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 18:19:59 +0100 Subject: [PATCH 062/107] partitioner: switch `calculate_token_for_partition_key` to new serialization API --- scylla/benches/benchmark.rs | 60 ++++++++++++++++++++-------- scylla/src/transport/cluster.rs | 11 +++-- scylla/src/transport/partitioner.rs | 10 ++--- scylla/src/transport/session_test.rs | 48 +++++++++++++--------- 4 files changed, 81 insertions(+), 48 deletions(-) diff --git a/scylla/benches/benchmark.rs b/scylla/benches/benchmark.rs index b33b08a21b..20440ea0b7 100644 --- a/scylla/benches/benchmark.rs +++ b/scylla/benches/benchmark.rs @@ -3,9 +3,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use bytes::BytesMut; use scylla::{ frame::types, - frame::value::ValueList, transport::partitioner::{calculate_token_for_partition_key, Murmur3Partitioner}, }; +use scylla_cql::{frame::response::result::ColumnType, types::serialize::row::SerializedValues}; fn types_benchmark(c: &mut Criterion) { let mut buf = BytesMut::with_capacity(64); @@ -40,23 +40,49 @@ fn types_benchmark(c: &mut Criterion) { } fn calculate_token_bench(c: &mut Criterion) { - let simple_pk = ("I'm prepared!!!",); - let serialized_simple_pk = simple_pk.serialized().unwrap().into_owned(); - let simple_pk_long_column = ( - 17_i32, - 16_i32, - String::from_iter(std::iter::repeat('.').take(2000)), - ); - let serialized_simple_pk_long_column = simple_pk_long_column.serialized().unwrap().into_owned(); + let mut serialized_simple_pk = SerializedValues::new(); + serialized_simple_pk + .add_value(&"I'm prepared!!!", &ColumnType::Text) + .unwrap(); - let complex_pk = (17_i32, 16_i32, "I'm prepared!!!"); - let serialized_complex_pk = complex_pk.serialized().unwrap().into_owned(); - let complex_pk_long_column = ( - 17_i32, - 16_i32, - String::from_iter(std::iter::repeat('.').take(2000)), - ); - let serialized_values_long_column = complex_pk_long_column.serialized().unwrap().into_owned(); + let mut serialized_simple_pk_long_column = SerializedValues::new(); + serialized_simple_pk_long_column + .add_value(&17_i32, &ColumnType::Int) + .unwrap(); + serialized_simple_pk_long_column + .add_value(&16_i32, &ColumnType::Int) + .unwrap(); + serialized_simple_pk_long_column + .add_value( + &String::from_iter(std::iter::repeat('.').take(2000)), + &ColumnType::Text, + ) + .unwrap(); + + let mut serialized_complex_pk = SerializedValues::new(); + serialized_complex_pk + .add_value(&17_i32, &ColumnType::Int) + .unwrap(); + serialized_complex_pk + .add_value(&16_i32, &ColumnType::Int) + .unwrap(); + serialized_complex_pk + .add_value(&"I'm prepared!!!", &ColumnType::Text) + .unwrap(); + + let mut serialized_values_long_column = SerializedValues::new(); + serialized_values_long_column + .add_value(&17_i32, &ColumnType::Int) + .unwrap(); + serialized_values_long_column + .add_value(&16_i32, &ColumnType::Int) + .unwrap(); + serialized_values_long_column + .add_value( + &String::from_iter(std::iter::repeat('.').take(2000)), + &ColumnType::Text, + ) + .unwrap(); c.bench_function("calculate_token_from_partition_key simple pk", |b| { b.iter(|| calculate_token_for_partition_key(&serialized_simple_pk, &Murmur3Partitioner)) diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs index c6ea2fbf26..0098391854 100644 --- a/scylla/src/transport/cluster.rs +++ b/scylla/src/transport/cluster.rs @@ -400,12 +400,11 @@ impl ClusterData { .and_then(PartitionerName::from_str) .unwrap_or_default(); - calculate_token_for_partition_key(&partition_key.to_old_serialized_values(), &partitioner) - .map_err(|err| match err { - TokenCalculationError::ValueTooLong(values_len) => { - BadQuery::ValuesTooLongForKey(values_len, u16::MAX.into()) - } - }) + calculate_token_for_partition_key(partition_key, &partitioner).map_err(|err| match err { + TokenCalculationError::ValueTooLong(values_len) => { + BadQuery::ValuesTooLongForKey(values_len, u16::MAX.into()) + } + }) } /// Access to replicas owning a given token diff --git a/scylla/src/transport/partitioner.rs b/scylla/src/transport/partitioner.rs index e6d5c223b2..7a9f4b083a 100644 --- a/scylla/src/transport/partitioner.rs +++ b/scylla/src/transport/partitioner.rs @@ -1,10 +1,8 @@ use bytes::Buf; -use scylla_cql::frame::types::RawValue; +use scylla_cql::{frame::types::RawValue, types::serialize::row::SerializedValues}; use std::num::Wrapping; -use crate::{ - frame::value::LegacySerializedValues, prepared_statement::TokenCalculationError, routing::Token, -}; +use crate::{prepared_statement::TokenCalculationError, routing::Token}; #[allow(clippy::upper_case_acronyms)] #[derive(Clone, PartialEq, Debug, Default)] @@ -337,12 +335,12 @@ impl PartitionerHasher for CDCPartitionerHasher { /// NOTE: the provided values must completely constitute partition key /// and be in the order defined in CREATE TABLE statement. pub fn calculate_token_for_partition_key( - serialized_partition_key_values: &LegacySerializedValues, + serialized_partition_key_values: &SerializedValues, partitioner: &P, ) -> Result { let mut partitioner_hasher = partitioner.build_hasher(); - if serialized_partition_key_values.len() == 1 { + if serialized_partition_key_values.element_count() == 1 { let val = serialized_partition_key_values.iter().next().unwrap(); if let RawValue::Value(val) = val { partitioner_hasher.write(val); diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index f16a9ef2c0..e0d247e53b 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -29,7 +29,7 @@ use bytes::Bytes; use futures::{FutureExt, StreamExt, TryStreamExt}; use itertools::Itertools; use scylla_cql::frame::response::result::ColumnType; -use scylla_cql::types::serialize::row::SerializedValues; +use scylla_cql::types::serialize::row::{SerializeRow, SerializedValues}; use scylla_cql::types::serialize::value::SerializeCql; use std::collections::BTreeSet; use std::collections::{BTreeMap, HashMap}; @@ -2788,26 +2788,24 @@ async fn test_manual_primary_key_computation() { async fn assert_tokens_equal( session: &Session, prepared: &PreparedStatement, - pk_values_in_pk_order: impl ValueList, - all_values_in_query_order: impl ValueList, + serialized_pk_values_in_pk_order: &SerializedValues, + all_values_in_query_order: impl SerializeRow, ) { - let serialized_values_in_pk_order = - pk_values_in_pk_order.serialized().unwrap().into_owned(); - let serialized_values_in_query_order = - all_values_in_query_order.serialized().unwrap().into_owned(); + let token_by_prepared = prepared + .calculate_token(&all_values_in_query_order) + .unwrap() + .unwrap(); session - .execute(prepared, &serialized_values_in_query_order) + .execute(prepared, all_values_in_query_order) .await .unwrap(); - let token_by_prepared = prepared - .calculate_token(&serialized_values_in_query_order) - .unwrap() - .unwrap(); - let token_by_hand = - calculate_token_for_partition_key(&serialized_values_in_pk_order, &Murmur3Partitioner) - .unwrap(); + let token_by_hand = calculate_token_for_partition_key( + serialized_pk_values_in_pk_order, + &Murmur3Partitioner, + ) + .unwrap(); println!( "by_prepared: {}, by_hand: {}", token_by_prepared.value, token_by_hand.value @@ -2831,13 +2829,16 @@ async fn test_manual_primary_key_computation() { .await .unwrap(); - let pk_values_in_pk_order = (17_i32,); + let mut pk_values_in_pk_order = SerializedValues::new(); + pk_values_in_pk_order + .add_value(&17_i32, &ColumnType::Int) + .unwrap(); let all_values_in_query_order = (17_i32, 16_i32, "I'm prepared!!!"); assert_tokens_equal( &session, &prepared_simple_pk, - pk_values_in_pk_order, + &pk_values_in_pk_order, all_values_in_query_order, ) .await; @@ -2857,13 +2858,22 @@ async fn test_manual_primary_key_computation() { .await .unwrap(); - let pk_values_in_pk_order = (17_i32, 16_i32, "I'm prepared!!!"); + let mut pk_values_in_pk_order = SerializedValues::new(); + pk_values_in_pk_order + .add_value(&17_i32, &ColumnType::Int) + .unwrap(); + pk_values_in_pk_order + .add_value(&16_i32, &ColumnType::Int) + .unwrap(); + pk_values_in_pk_order + .add_value(&"I'm prepared!!!", &ColumnType::Ascii) + .unwrap(); let all_values_in_query_order = (17_i32, "I'm prepared!!!", 16_i32); assert_tokens_equal( &session, &prepared_complex_pk, - pk_values_in_pk_order, + &pk_values_in_pk_order, all_values_in_query_order, ) .await; From ee61cb0f67c01a5b5cd14b90e6f8eb932b2bb163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 18:37:23 +0100 Subject: [PATCH 063/107] QueryParameters: switch `values` to new `SerializedValues` --- scylla-cql/benches/benchmark.rs | 25 ++++++++++++++++++------- scylla-cql/src/frame/request/mod.rs | 21 +++++++++++---------- scylla-cql/src/frame/request/query.rs | 24 ++++++++++++++---------- scylla/src/transport/connection.rs | 5 ++--- 4 files changed, 45 insertions(+), 30 deletions(-) diff --git a/scylla-cql/benches/benchmark.rs b/scylla-cql/benches/benchmark.rs index 77525194f0..2ab15f5051 100644 --- a/scylla-cql/benches/benchmark.rs +++ b/scylla-cql/benches/benchmark.rs @@ -3,17 +3,17 @@ use std::borrow::Cow; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use scylla_cql::frame::request::SerializableRequest; -use scylla_cql::frame::value::LegacySerializedValues; -use scylla_cql::frame::value::ValueList; +use scylla_cql::frame::response::result::ColumnType; use scylla_cql::frame::{request::query, Compression, SerializedRequest}; +use scylla_cql::types::serialize::row::SerializedValues; -fn make_query<'a>(contents: &'a str, values: &'a LegacySerializedValues) -> query::Query<'a> { +fn make_query(contents: &str, values: SerializedValues) -> query::Query<'_> { query::Query { contents: Cow::Borrowed(contents), parameters: query::QueryParameters { consistency: scylla_cql::Consistency::LocalQuorum, serial_consistency: None, - values: Cow::Borrowed(values), + values: Cow::Owned(values), page_size: None, paging_state: None, timestamp: None, @@ -22,13 +22,24 @@ fn make_query<'a>(contents: &'a str, values: &'a LegacySerializedValues) -> quer } fn serialized_request_make_bench(c: &mut Criterion) { + let mut values = SerializedValues::new(); let mut group = c.benchmark_group("LZ4Compression.SerializedRequest"); let query_args = [ - ("INSERT foo INTO ks.table_name (?)", &(1234,).serialized().unwrap()), - ("INSERT foo, bar, baz INTO ks.table_name (?, ?, ?)", &(1234, "a value", "i am storing a string").serialized().unwrap()), + ("INSERT foo INTO ks.table_name (?)", { + values.add_value(&1234, &ColumnType::Int).unwrap(); + values.clone() + }), + ("INSERT foo, bar, baz INTO ks.table_name (?, ?, ?)", { + values.add_value(&"a value", &ColumnType::Text).unwrap(); + values.add_value(&"i am storing a string", &ColumnType::Text).unwrap(); + values.clone() + }), ( "INSERT foo, bar, baz, boop, blah INTO longer_keyspace.a_big_table_name (?, ?, ?, ?, 1000)", - &(1234, "a value", "i am storing a string", "dc0c8cd7-d954-47c1-8722-a857941c43fb").serialized().unwrap() + { + values.add_value(&"dc0c8cd7-d954-47c1-8722-a857941c43fb", &ColumnType::Text).unwrap(); + values.clone() + } ), ]; let queries = query_args.map(|(q, v)| make_query(q, v)); diff --git a/scylla-cql/src/frame/request/mod.rs b/scylla-cql/src/frame/request/mod.rs index 82a86220d8..8a625f2806 100644 --- a/scylla-cql/src/frame/request/mod.rs +++ b/scylla-cql/src/frame/request/mod.rs @@ -112,9 +112,10 @@ mod tests { query::{Query, QueryParameters}, DeserializableRequest, SerializableRequest, }, + response::result::ColumnType, types::{self, SerialConsistency}, - value::LegacySerializedValues, }, + types::serialize::row::SerializedValues, Consistency, }; @@ -129,8 +130,8 @@ mod tests { page_size: Some(323), paging_state: Some(vec![2, 1, 3, 7].into()), values: { - let mut vals = LegacySerializedValues::new(); - vals.add_value(&2137).unwrap(); + let mut vals = SerializedValues::new(); + vals.add_value(&2137, &ColumnType::Int).unwrap(); Cow::Owned(vals) }, }; @@ -156,9 +157,9 @@ mod tests { page_size: None, paging_state: None, values: { - let mut vals = LegacySerializedValues::new(); - vals.add_named_value("the_answer", &42).unwrap(); - vals.add_named_value("really?", &2137).unwrap(); + let mut vals = SerializedValues::new(); + vals.add_value(&42, &ColumnType::Int).unwrap(); + vals.add_value(&2137, &ColumnType::Int).unwrap(); Cow::Owned(vals) }, }; @@ -189,8 +190,8 @@ mod tests { // Not execute's values, because named values are not supported in batches. values: vec![ - query.parameters.values.deref().clone(), - query.parameters.values.deref().clone(), + query.parameters.values.deref().to_old_serialized_values(), + query.parameters.values.deref().to_old_serialized_values(), ], }; { @@ -212,7 +213,7 @@ mod tests { timestamp: None, page_size: None, paging_state: None, - values: Cow::Owned(LegacySerializedValues::new()), + values: Cow::Borrowed(SerializedValues::EMPTY), }; let query = Query { contents: contents.clone(), @@ -261,7 +262,7 @@ mod tests { serial_consistency: None, timestamp: None, - values: vec![query.parameters.values.deref().clone()], + values: vec![query.parameters.values.deref().to_old_serialized_values()], }; { let mut buf = Vec::new(); diff --git a/scylla-cql/src/frame/request/query.rs b/scylla-cql/src/frame/request/query.rs index e4bc86f6bd..348127eda7 100644 --- a/scylla-cql/src/frame/request/query.rs +++ b/scylla-cql/src/frame/request/query.rs @@ -1,12 +1,14 @@ use std::borrow::Cow; -use crate::frame::{frame_errors::ParseError, types::SerialConsistency}; +use crate::{ + frame::{frame_errors::ParseError, types::SerialConsistency}, + types::serialize::row::SerializedValues, +}; use bytes::{Buf, BufMut, Bytes}; use crate::{ frame::request::{RequestOpcode, SerializableRequest}, frame::types, - frame::value::LegacySerializedValues, }; use super::DeserializableRequest; @@ -61,7 +63,7 @@ pub struct QueryParameters<'a> { pub timestamp: Option, pub page_size: Option, pub paging_state: Option, - pub values: Cow<'a, LegacySerializedValues>, + pub values: Cow<'a, SerializedValues>, } impl Default for QueryParameters<'_> { @@ -72,7 +74,7 @@ impl Default for QueryParameters<'_> { timestamp: None, page_size: None, paging_state: None, - values: Cow::Borrowed(LegacySerializedValues::EMPTY), + values: Cow::Borrowed(SerializedValues::EMPTY), } } } @@ -102,10 +104,6 @@ impl QueryParameters<'_> { flags |= FLAG_WITH_DEFAULT_TIMESTAMP; } - if self.values.has_names() { - flags |= FLAG_WITH_NAMES_FOR_VALUES; - } - buf.put_u8(flags); if !self.values.is_empty() { @@ -151,10 +149,16 @@ impl<'q> QueryParameters<'q> { let default_timestamp_flag = (flags & FLAG_WITH_DEFAULT_TIMESTAMP) != 0; let values_have_names_flag = (flags & FLAG_WITH_NAMES_FOR_VALUES) != 0; + if values_have_names_flag { + return Err(ParseError::BadIncomingData( + "Named values in frame are currently unsupported".to_string(), + )); + } + let values = Cow::Owned(if values_flag { - LegacySerializedValues::new_from_frame(buf, values_have_names_flag)? + SerializedValues::new_from_frame(buf)? } else { - LegacySerializedValues::new() + SerializedValues::new() }); let page_size = page_size_flag.then(|| types::read_int(buf)).transpose()?; diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 43eacf265b..3faa6a5f0a 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -4,7 +4,6 @@ use scylla_cql::errors::TranslationError; use scylla_cql::frame::request::options::Options; use scylla_cql::frame::response::Error; use scylla_cql::frame::types::SerialConsistency; -use scylla_cql::frame::value::LegacySerializedValues; use scylla_cql::types::serialize::row::SerializedValues; use socket2::{SockRef, TcpKeepalive}; use tokio::io::{split, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter}; @@ -652,7 +651,7 @@ impl Connection { parameters: query::QueryParameters { consistency, serial_consistency, - values: Cow::Borrowed(LegacySerializedValues::EMPTY), + values: Cow::Borrowed(SerializedValues::EMPTY), page_size: query.get_page_size(), paging_state, timestamp: query.get_timestamp(), @@ -696,7 +695,7 @@ impl Connection { parameters: query::QueryParameters { consistency, serial_consistency, - values: Cow::Owned(values.to_old_serialized_values()), + values: Cow::Borrowed(values), page_size: prepared_statement.get_page_size(), timestamp: prepared_statement.get_timestamp(), paging_state, From a35b5b36646327df2251fd8c1a7d4204b08d546f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 20:13:09 +0100 Subject: [PATCH 064/107] scylla: Remove most uses of fallback SerializeRow impls There are fallback implementations of SerializeRow for LegacySerializedValues provided to help users transition to new traits. This commit updates most of the code that uses those implementations, so that removing them in the future is easier. Uses that are not removed: - Session::batch: will be removed when batches are switched to new API - value_tests::map_value_list: will be removed while removing LegacySerializedValues - serialize/row.rs tests: will be removed while removing LegacySerializedValues --- examples/compare-tokens.rs | 4 +--- scylla/src/transport/session_test.rs | 23 +++++++---------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/examples/compare-tokens.rs b/examples/compare-tokens.rs index 5c56aa5f4d..3e46d20b44 100644 --- a/examples/compare-tokens.rs +++ b/examples/compare-tokens.rs @@ -1,5 +1,4 @@ use anyhow::Result; -use scylla::frame::value::ValueList; use scylla::routing::Token; use scylla::transport::NodeAddr; use scylla::{Session, SessionBuilder}; @@ -29,8 +28,7 @@ async fn main() -> Result<()> { .query("INSERT INTO ks.t (pk) VALUES (?)", (pk,)) .await?; - let serialized_pk = (pk,).serialized()?.into_owned(); - let t = prepared.calculate_token(&serialized_pk)?.unwrap().value; + let t = prepared.calculate_token(&(pk,))?.unwrap().value; println!( "Token endpoints for query: {:?}", diff --git a/scylla/src/transport/session_test.rs b/scylla/src/transport/session_test.rs index e0d247e53b..b6c9c20ba4 100644 --- a/scylla/src/transport/session_test.rs +++ b/scylla/src/transport/session_test.rs @@ -1,7 +1,6 @@ use crate as scylla; use crate::batch::{Batch, BatchStatement}; use crate::frame::response::result::Row; -use crate::frame::value::ValueList; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::retry_policy::{QueryInfo, RetryDecision, RetryPolicy, RetrySession}; @@ -210,7 +209,6 @@ async fn test_prepared_statement() { .unwrap(); let values = (17_i32, 16_i32, "I'm prepared!!!"); - let serialized_values = values.serialized().unwrap().into_owned(); let serialized_values_complex_pk = prepared_complex_pk_statement .serialize_values(&values) .unwrap(); @@ -236,11 +234,8 @@ async fn test_prepared_statement() { .as_bigint() .unwrap(), }; - let prepared_token = Murmur3Partitioner.hash_one( - &prepared_statement - .compute_partition_key(&serialized_values) - .unwrap(), - ); + let prepared_token = Murmur3Partitioner + .hash_one(&prepared_statement.compute_partition_key(&values).unwrap()); assert_eq!(token, prepared_token); let mut pk = SerializedValues::new(); pk.add_value(&17_i32, &ColumnType::Int).unwrap(); @@ -266,7 +261,7 @@ async fn test_prepared_statement() { }; let prepared_token = Murmur3Partitioner.hash_one( &prepared_complex_pk_statement - .compute_partition_key(&serialized_values) + .compute_partition_key(&values) .unwrap(), ); assert_eq!(token, prepared_token); @@ -518,8 +513,7 @@ async fn test_token_calculation() { s.push('a'); } let values = (&s,); - let serialized_values = values.serialized().unwrap().into_owned(); - let new_serialized_values = prepared_statement.serialize_values(&values).unwrap(); + let serialized_values = prepared_statement.serialize_values(&values).unwrap(); session.execute(&prepared_statement, &values).await.unwrap(); let rs = session @@ -538,15 +532,12 @@ async fn test_token_calculation() { .as_bigint() .unwrap(), }; - let prepared_token = Murmur3Partitioner.hash_one( - &prepared_statement - .compute_partition_key(&serialized_values) - .unwrap(), - ); + let prepared_token = Murmur3Partitioner + .hash_one(&prepared_statement.compute_partition_key(&values).unwrap()); assert_eq!(token, prepared_token); let cluster_data_token = session .get_cluster_data() - .compute_token(&ks, "t3", &new_serialized_values) + .compute_token(&ks, "t3", &serialized_values) .unwrap(); assert_eq!(token, cluster_data_token); } From 64c6ac6a483c97a95397dee40d3721cbcd4dd4f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 11 Dec 2023 22:41:25 +0100 Subject: [PATCH 065/107] Remove unnecessary `#[allow(dead_code)]` During the switch to new serialization API `#[allow(dead_code)]` was added in several places as a temporary measure. Those can now be removed. --- scylla-cql/src/types/serialize/row.rs | 2 -- scylla/src/statement/prepared_statement.rs | 1 - 2 files changed, 3 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 381c958ae4..9ad7d0e56c 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -680,7 +680,6 @@ impl SerializedValues { } /// Creates value list from the request frame - #[allow(dead_code)] pub(crate) fn new_from_frame(buf: &mut &[u8]) -> Result { let values_num = types::read_short(buf)?; let values_beg = *buf; @@ -697,7 +696,6 @@ impl SerializedValues { } // Temporary function, to be removed when we implement new batching API (right now it is needed in frame::request::mod.rs tests) - #[allow(dead_code)] pub fn to_old_serialized_values(&self) -> LegacySerializedValues { let mut frame = Vec::new(); self.write_to_request(&mut frame); diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index 5ee2a13e33..f61fab901e 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -338,7 +338,6 @@ impl PreparedStatement { self.config.execution_profile_handle.as_ref() } - #[allow(dead_code)] pub(crate) fn serialize_values( &self, values: &impl SerializeRow, From 7030873fd108b3223fb90a31b05e5b4c87aa7c35 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 13 Dec 2023 08:35:18 +0100 Subject: [PATCH 066/107] serialize/{row,value}: rename confusing "mismatch" variants When serializing a Rust struct / CqlValue::UserDefinedType to a CQL UDT, or a Rust struct / BTreeMap / HashMap as a list of named values, there can be a fields/columns mismatch between the Rust and CQL definition and a field/column can be present in one or missing in another. Current error kind variants have very confusing or plainly wrong names/descriptions/messages. Fix the situation in this commit by enforcing a consistent naming scheme: - If something from missing in Rust data but required by the CQL type, use `MissingFor`. - If something is present in Rust data but missing from the CQL type, use `NoSuch` or `NoWithName`. Apart from that, fix the docstrings and the error messages. --- scylla-cql/src/types/serialize/row.rs | 24 ++++++++--------- scylla-cql/src/types/serialize/value.rs | 34 ++++++++++++------------- scylla-macros/src/serialize/cql.rs | 8 +++--- scylla-macros/src/serialize/row.rs | 8 +++--- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 9ad7d0e56c..e8b2eae861 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -167,7 +167,7 @@ macro_rules! impl_serialize_row_for_map { match self.get(col.name.as_str()) { None => { return Err(mk_typck_err::( - BuiltinTypeCheckErrorKind::MissingValueForColumn { + BuiltinTypeCheckErrorKind::ValueMissingForColumn { name: col.name.clone(), }, )) @@ -191,7 +191,7 @@ macro_rules! impl_serialize_row_for_map { // Report the lexicographically first value for deterministic error messages let name = unused_columns.iter().min().unwrap(); return Err(mk_typck_err::( - BuiltinTypeCheckErrorKind::ColumnMissingForValue { + BuiltinTypeCheckErrorKind::NoColumnWithName { name: name.to_string(), }, )); @@ -513,10 +513,10 @@ pub enum BuiltinTypeCheckErrorKind { /// The Rust type provides a value for some column, but that column is not /// present in the statement. - MissingValueForColumn { name: String }, + NoColumnWithName { name: String }, /// A value required by the statement is not provided by the Rust type. - ColumnMissingForValue { name: String }, + ValueMissingForColumn { name: String }, /// A different column name was expected at given position. ColumnNameMismatch { @@ -531,16 +531,16 @@ impl Display for BuiltinTypeCheckErrorKind { BuiltinTypeCheckErrorKind::WrongColumnCount { actual, asked_for } => { write!(f, "wrong column count: the query requires {asked_for} columns, but {actual} were provided") } - BuiltinTypeCheckErrorKind::MissingValueForColumn { name } => { + BuiltinTypeCheckErrorKind::NoColumnWithName { name } => { write!( f, - "value for column {name} was not provided, but the query requires it" + "value for column {name} was provided, but there is no bind marker for this column in the query" ) } - BuiltinTypeCheckErrorKind::ColumnMissingForValue { name } => { + BuiltinTypeCheckErrorKind::ValueMissingForColumn { name } => { write!( f, - "value for column {name} was provided, but there is no bind marker for this column in the query" + "value for column {name} was not provided, but the query requires it" ) } BuiltinTypeCheckErrorKind::ColumnNameMismatch { rust_column_name, db_column_name } => write!( @@ -947,7 +947,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::ColumnMissingForValue { .. } + BuiltinTypeCheckErrorKind::ValueMissingForColumn { .. } )); let spec_duplicate_column = [ @@ -965,7 +965,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::MissingValueForColumn { .. } + BuiltinTypeCheckErrorKind::NoColumnWithName { .. } )); let spec_wrong_type = [ @@ -1074,7 +1074,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::ColumnMissingForValue { .. } + BuiltinTypeCheckErrorKind::ValueMissingForColumn { .. } )); let spec_duplicate_column = [ @@ -1092,7 +1092,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::MissingValueForColumn { .. } + BuiltinTypeCheckErrorKind::NoColumnWithName { .. } )); let spec_wrong_type = [ diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 567b59cfab..a466ab136b 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -613,7 +613,7 @@ fn serialize_udt<'b>( let fname = indexed_fields.keys().min().unwrap(); return Err(mk_typck_err::( typ, - UdtTypeCheckErrorKind::UnexpectedFieldInDestination { + UdtTypeCheckErrorKind::NoSuchFieldInUdt { field_name: fname.to_string(), }, )); @@ -1309,11 +1309,11 @@ pub enum UdtTypeCheckErrorKind { /// The name of the UDT being serialized to does not match. NameMismatch { keyspace: String, type_name: String }, - /// One of the fields that is required to be present by the Rust struct was not present in the CQL UDT type. - MissingField { field_name: String }, + /// The Rust data does not have a field that is required in the CQL UDT type. + ValueMissingForUdtField { field_name: String }, - /// The Rust data contains a field that is not present in the UDT - UnexpectedFieldInDestination { field_name: String }, + /// The Rust data contains a field that is not present in the UDT. + NoSuchFieldInUdt { field_name: String }, /// A different field name was expected at given position. FieldNameMismatch { @@ -1336,12 +1336,12 @@ impl Display for UdtTypeCheckErrorKind { f, "the Rust UDT name does not match the actual CQL UDT name ({keyspace}.{type_name})" ), - UdtTypeCheckErrorKind::MissingField { field_name } => { - write!(f, "the field {field_name} is missing from the CQL UDT type") + UdtTypeCheckErrorKind::ValueMissingForUdtField { field_name } => { + write!(f, "the field {field_name} is missing in the Rust data but is required by the CQL UDT type") } - UdtTypeCheckErrorKind::UnexpectedFieldInDestination { field_name } => write!( + UdtTypeCheckErrorKind::NoSuchFieldInUdt { field_name } => write!( f, - "the field {field_name} present in the Rust data is not present in the CQL type" + "the field {field_name} that is present in the Rust data is not present in the CQL type" ), UdtTypeCheckErrorKind::FieldNameMismatch { rust_field_name, db_field_name } => write!( f, @@ -1584,7 +1584,9 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::MissingField { .. }) + BuiltinTypeCheckErrorKind::UdtError( + UdtTypeCheckErrorKind::ValueMissingForUdtField { .. } + ) )); let typ_unexpected_field = ColumnType::UserDefinedType { @@ -1608,9 +1610,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::UdtError( - UdtTypeCheckErrorKind::UnexpectedFieldInDestination { .. } - ) + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) )); let typ_wrong_type = ColumnType::UserDefinedType { @@ -1788,7 +1788,9 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::MissingField { .. }) + BuiltinTypeCheckErrorKind::UdtError( + UdtTypeCheckErrorKind::ValueMissingForUdtField { .. } + ) )); let typ_unexpected_field = ColumnType::UserDefinedType { @@ -1812,9 +1814,7 @@ mod tests { let err = err.0.downcast_ref::().unwrap(); assert!(matches!( err.kind, - BuiltinTypeCheckErrorKind::UdtError( - UdtTypeCheckErrorKind::UnexpectedFieldInDestination { .. } - ) + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) )); let typ_unexpected_field = ColumnType::UserDefinedType { diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index d3c5788401..4756901183 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -189,7 +189,7 @@ impl<'a> Generator for FieldSortingGenerator<'a> { } )* _ => return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::UnexpectedFieldInDestination { + #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { field_name: <_ as ::std::clone::Clone>::clone(field_name), } )), @@ -204,7 +204,7 @@ impl<'a> Generator for FieldSortingGenerator<'a> { #( if !#visited_flag_names { return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::MissingField { + #crate_path::UdtTypeCheckErrorKind::ValueMissingForUdtField { field_name: <_ as ::std::string::ToString>::to_string(#rust_field_names), } )); @@ -299,7 +299,7 @@ impl<'a> Generator for FieldOrderedGenerator<'a> { } None => { return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::MissingField { + #crate_path::UdtTypeCheckErrorKind::ValueMissingForUdtField { field_name: <_ as ::std::string::ToString>::to_string(#rust_field_name), } )); @@ -312,7 +312,7 @@ impl<'a> Generator for FieldOrderedGenerator<'a> { statements.push(parse_quote! { if let Some((field_name, typ)) = field_iter.next() { return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::UnexpectedFieldInDestination { + #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { field_name: <_ as ::std::clone::Clone>::clone(field_name), } )); diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs index ee0f702d27..44b402d791 100644 --- a/scylla-macros/src/serialize/row.rs +++ b/scylla-macros/src/serialize/row.rs @@ -166,7 +166,7 @@ impl<'a> Generator for ColumnSortingGenerator<'a> { } )* _ => return ::std::result::Result::Err(mk_typck_err( - #crate_path::BuiltinRowTypeCheckErrorKind::MissingValueForColumn { + #crate_path::BuiltinRowTypeCheckErrorKind::NoColumnWithName { name: <_ as ::std::clone::Clone>::clone(&&spec.name), } )), @@ -181,7 +181,7 @@ impl<'a> Generator for ColumnSortingGenerator<'a> { #( if !#visited_flag_names { return ::std::result::Result::Err(mk_typck_err( - #crate_path::BuiltinRowTypeCheckErrorKind::ColumnMissingForValue { + #crate_path::BuiltinRowTypeCheckErrorKind::ValueMissingForColumn { name: <_ as ::std::string::ToString>::to_string(#rust_field_names), } )); @@ -267,7 +267,7 @@ impl<'a> Generator for ColumnOrderedGenerator<'a> { } None => { return ::std::result::Result::Err(mk_typck_err( - #crate_path::BuiltinRowTypeCheckErrorKind::ColumnMissingForValue { + #crate_path::BuiltinRowTypeCheckErrorKind::ValueMissingForColumn { name: <_ as ::std::string::ToString>::to_string(#rust_field_name), } )); @@ -280,7 +280,7 @@ impl<'a> Generator for ColumnOrderedGenerator<'a> { statements.push(parse_quote! { if let Some(spec) = column_iter.next() { return ::std::result::Result::Err(mk_typck_err( - #crate_path::BuiltinRowTypeCheckErrorKind::MissingValueForColumn { + #crate_path::BuiltinRowTypeCheckErrorKind::NoColumnWithName { name: <_ as ::std::clone::Clone>::clone(&spec.name), } )); From bdabeeceea76022ccc8d8a0e59b058e33efe473b Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 11 Dec 2023 19:28:04 +0100 Subject: [PATCH 067/107] serialize/row: check for unused named values in legacy fallback The code that adapts `ValueList` to `SerializeRow` and rewrites named values to regular values based on the context didn't use to check whether there are some superfluous values that do not match to any of the bind markers. Fix this. --- scylla-cql/src/types/serialize/row.rs | 30 +++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index e8b2eae861..edada4f2d1 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -432,21 +432,40 @@ pub fn serialize_legacy_row( if !serialized.has_names() { serialized.iter().for_each(append_value); } else { - let values_by_name = serialized + let mut values_by_name = serialized .iter_name_value_pairs() - .map(|(k, v)| (k.unwrap(), v)) + .map(|(k, v)| (k.unwrap(), (v, false))) .collect::>(); + let mut unused_count = values_by_name.len(); for col in ctx.columns() { - let val = values_by_name.get(col.name.as_str()).ok_or_else(|| { + let (val, visited) = values_by_name.get_mut(col.name.as_str()).ok_or_else(|| { SerializationError(Arc::new( - ValueListToSerializeRowAdapterError::NoBindMarkerWithName { + ValueListToSerializeRowAdapterError::ValueMissingForBindMarker { name: col.name.clone(), }, )) })?; + if !*visited { + *visited = true; + unused_count -= 1; + } append_value(*val); } + + if unused_count != 0 { + // Choose the lexicographically earliest name for the sake + // of deterministic errors + let name = values_by_name + .iter() + .filter_map(|(k, (_, visited))| (!visited).then_some(k)) + .min() + .unwrap() + .to_string(); + return Err(SerializationError::new( + ValueListToSerializeRowAdapterError::NoBindMarkerWithName { name }, + )); + } } Ok(()) @@ -574,6 +593,9 @@ impl Display for BuiltinSerializationErrorKind { #[derive(Error, Debug)] pub enum ValueListToSerializeRowAdapterError { + #[error("Missing named value for column {name}")] + ValueMissingForBindMarker { name: String }, + #[error("There is no bind marker with name {name}, but a value for it was provided")] NoBindMarkerWithName { name: String }, } From 2810fafdc1bc1f8c9c811b56c7d90422db233b8e Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 11 Dec 2023 19:29:36 +0100 Subject: [PATCH 068/107] serialize/value: dedicated error variant for too big legacy values Add a dedicated error variant to ValueToSerializeCqlAdapterError, returned in case when the legacy implementation tried to serialize but overflowed the maximum allowed size by the protocol. Previously, the `ValueTooBig` error defined in frame::value would be returned, which was inconsistent with other error cases for the `Value` -> `SerializeCql` translation code. --- scylla-cql/src/types/serialize/value.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index a466ab136b..d91693e51e 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -909,7 +909,8 @@ pub fn serialize_legacy_value<'b, T: Value>( ) -> Result, SerializationError> { // It's an inefficient and slightly tricky but correct implementation. let mut buf = Vec::new(); - ::serialize(v, &mut buf).map_err(|err| SerializationError(Arc::new(err)))?; + ::serialize(v, &mut buf) + .map_err(|_| SerializationError::new(ValueToSerializeCqlAdapterError::TooBig))?; // Analyze the output. // All this dance shows how unsafe our previous interface was... @@ -1373,6 +1374,9 @@ impl Display for UdtSerializationErrorKind { #[derive(Error, Debug)] pub enum ValueToSerializeCqlAdapterError { + #[error("The value is too big to be serialized as it exceeds the maximum 2GB size limit")] + TooBig, + #[error("Output produced by the Value trait is too short to be considered a value: {size} < 4 minimum bytes")] TooShort { size: usize }, From e57a7f851de95a84ca66abb423b71be349f94e98 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Tue, 12 Dec 2023 03:14:19 +0100 Subject: [PATCH 069/107] serialize/value: fix error returned from dynamic tuples In case when serialization of one of the fields fails, the tuple should return an error indicating that serialization of the tuple failed, and nest the error returned by field serialization inside it. The error used to have the wrong ColumnType put into it - a loop variable containing the ColumnType of the field shadowed the function argument containing the ColumnType of the whole tuple. Fix the issue by renaming the loop variable and un-shadowing the function argument as a result. --- scylla-cql/src/types/serialize/value.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index d91693e51e..23203c8361 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -632,11 +632,11 @@ fn serialize_tuple_like<'t, 'b>( ) -> Result, SerializationError> { let mut builder = writer.into_value_builder(); - for (index, (el, typ)) in field_values.zip(field_types).enumerate() { + for (index, (el, el_typ)) in field_values.zip(field_types).enumerate() { let sub = builder.make_sub_writer(); match el { None => sub.set_null(), - Some(el) => serialize_cql_value(el, typ, sub).map_err(|err| { + Some(el) => serialize_cql_value(el, el_typ, sub).map_err(|err| { let err = fix_cql_value_name_in_err(err); mk_ser_err::( typ, From fa1cf21e0ecfe60c68a47eb0f2eea02e4924818b Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Tue, 12 Dec 2023 03:11:24 +0100 Subject: [PATCH 070/107] frame/value: implement some common traits for Unset It's a good practice to implement common traits for public types, and some of them will be used in the tests introduced in the next commits. --- scylla-cql/src/frame/value.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index e4be751635..a6abccd89a 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -36,6 +36,7 @@ pub struct ValueTooBig; pub struct ValueOverflow; /// Represents an unset value +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Unset; /// Represents an counter value From 53d14908516d7cb24e5efa9fbc91c5611671a1d7 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Tue, 12 Dec 2023 03:14:46 +0100 Subject: [PATCH 071/107] serialize/{row,value}: add tests for serialization errors The PR that introduced impls of the SerializeRow/SerializeCql for the types that used to implement the old traits relied on the tests in `value_tests.rs` - they were extended with more test cases and modified to run on both the old and the new traits. However, this was only done for the tests that serialize stuff without causing errors. The new impls return errors that are much more detailed than what the old ones used to return, and - as evidenced by the bugs fixed in previous commits - badly needed their own set of tests. Add such a set. All different kinds of built-in errors should be covered, with the exception of size overflow errors which would require allocating huge amounts of memory to trigger them. --- scylla-cql/src/types/serialize/row.rs | 128 ++++++- scylla-cql/src/types/serialize/value.rs | 440 +++++++++++++++++++++++- 2 files changed, 564 insertions(+), 4 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index edada4f2d1..de9a45dc1d 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -750,10 +750,12 @@ impl<'a> Iterator for SerializedValuesIterator<'a> { #[cfg(test)] mod tests { + use std::collections::BTreeMap; + use crate::frame::response::result::{ColumnSpec, ColumnType, TableSpec}; use crate::frame::types::RawValue; use crate::frame::value::{LegacySerializedValues, MaybeUnset, ValueList}; - use crate::types::serialize::RowWriter; + use crate::types::serialize::{RowWriter, SerializationError}; use super::{ BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, @@ -881,6 +883,13 @@ mod tests { ret } + fn do_serialize_err(t: T, columns: &[ColumnSpec]) -> SerializationError { + let ctx = RowSerializationContext { columns }; + let mut ret = Vec::new(); + let mut builder = RowWriter::new(&mut ret); + t.serialize(&ctx, &mut builder).unwrap_err() + } + fn col(name: &str, typ: ColumnType) -> ColumnSpec { ColumnSpec { table_spec: TableSpec { @@ -892,6 +901,123 @@ mod tests { } } + fn get_typeck_err(err: &SerializationError) -> &BuiltinTypeCheckError { + match err.0.downcast_ref() { + Some(err) => err, + None => panic!("not a BuiltinTypeCheckError: {}", err), + } + } + + fn get_ser_err(err: &SerializationError) -> &BuiltinSerializationError { + match err.0.downcast_ref() { + Some(err) => err, + None => panic!("not a BuiltinSerializationError: {}", err), + } + } + + #[test] + fn test_tuple_errors() { + // Unit + #[allow(clippy::let_unit_value)] // The let binding below is intentional + let v = (); + let spec = [col("a", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<()>()); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::WrongColumnCount { + actual: 0, + asked_for: 1, + } + )); + + // Non-unit tuple + // Count mismatch + let v = ("Ala ma kota",); + let spec = [col("a", ColumnType::Text), col("b", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<(&str,)>()); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::WrongColumnCount { + actual: 1, + asked_for: 2, + } + )); + + // Serialization of one of the element fails + let v = ("Ala ma kota", 123_i32); + let spec = [col("a", ColumnType::Text), col("b", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<(&str, i32)>()); + let BuiltinSerializationErrorKind::ColumnSerializationFailed { name, err: _ } = &err.kind; + assert_eq!(name, "b"); + } + + #[test] + fn test_slice_errors() { + // Non-unit tuple + // Count mismatch + let v = vec!["Ala ma kota"]; + let spec = [col("a", ColumnType::Text), col("b", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::WrongColumnCount { + actual: 1, + asked_for: 2, + } + )); + + // Serialization of one of the element fails + let v = vec!["Ala ma kota", "Kot ma pchły"]; + let spec = [col("a", ColumnType::Text), col("b", ColumnType::Int)]; + let err = do_serialize_err(v, &spec); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + let BuiltinSerializationErrorKind::ColumnSerializationFailed { name, err: _ } = &err.kind; + assert_eq!(name, "b"); + } + + #[test] + fn test_map_errors() { + // Missing value for a bind marker + let v: BTreeMap<_, _> = vec![("a", 123_i32)].into_iter().collect(); + let spec = [col("a", ColumnType::Int), col("b", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + let BuiltinTypeCheckErrorKind::ValueMissingForColumn { name } = &err.kind else { + panic!("unexpected error kind: {}", err.kind) + }; + assert_eq!(name, "b"); + + // Additional value, not present in the query + let v: BTreeMap<_, _> = vec![("a", 123_i32), ("b", 456_i32)].into_iter().collect(); + let spec = [col("a", ColumnType::Int)]; + let err = do_serialize_err(v, &spec); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + let BuiltinTypeCheckErrorKind::NoColumnWithName { name } = &err.kind else { + panic!("unexpected error kind: {}", err.kind) + }; + assert_eq!(name, "b"); + + // Serialization of one of the element fails + let v: BTreeMap<_, _> = vec![("a", 123_i32), ("b", 456_i32)].into_iter().collect(); + let spec = [col("a", ColumnType::Int), col("b", ColumnType::Text)]; + let err = do_serialize_err(v, &spec); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + let BuiltinSerializationErrorKind::ColumnSerializationFailed { name, err: _ } = &err.kind; + assert_eq!(name, "b"); + } + // Do not remove. It's not used in tests but we keep it here to check that // we properly ignore warnings about unused variables, unnecessary `mut`s // etc. that usually pop up when generating code for empty structs. diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 23203c8361..b12416ba97 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1389,14 +1389,20 @@ pub enum ValueToSerializeCqlAdapterError { #[cfg(test)] mod tests { + use std::collections::BTreeMap; + use crate::frame::response::result::{ColumnType, CqlValue}; - use crate::frame::value::{MaybeUnset, Value}; + use crate::frame::value::{MaybeUnset, Unset, Value}; use crate::types::serialize::value::{ BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, - BuiltinTypeCheckErrorKind, + BuiltinTypeCheckErrorKind, MapSerializationErrorKind, MapTypeCheckErrorKind, + SetOrListSerializationErrorKind, SetOrListTypeCheckErrorKind, TupleSerializationErrorKind, + TupleTypeCheckErrorKind, }; - use crate::types::serialize::CellWriter; + use crate::types::serialize::{CellWriter, SerializationError}; + use bigdecimal::BigDecimal; + use num_bigint::BigInt; use scylla_macros::SerializeCql; use super::{SerializeCql, UdtSerializationErrorKind, UdtTypeCheckErrorKind}; @@ -1441,6 +1447,434 @@ mod tests { ret } + fn do_serialize_err(t: T, typ: &ColumnType) -> SerializationError { + let mut ret = Vec::new(); + let writer = CellWriter::new(&mut ret); + t.serialize(typ, writer).unwrap_err() + } + + fn get_typeck_err(err: &SerializationError) -> &BuiltinTypeCheckError { + match err.0.downcast_ref() { + Some(err) => err, + None => panic!("not a BuiltinTypeCheckError: {}", err), + } + } + + fn get_ser_err(err: &SerializationError) -> &BuiltinSerializationError { + match err.0.downcast_ref() { + Some(err) => err, + None => panic!("not a BuiltinSerializationError: {}", err), + } + } + + #[test] + fn test_native_errors() { + // Simple type mismatch + let v = 123_i32; + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Int], + }, + )); + + // str (and also Uuid) are interesting because they accept two types, + // also check str here + let v = "Ala ma kota"; + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<&str>()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Ascii, ColumnType::Text], + }, + )); + + // We'll skip testing for SizeOverflow as this would require producing + // a value which is at least 2GB in size. + + // Value overflow (type out of representable range) + let v = BigDecimal::new(BigInt::from(123), 1i64 << 40); + let err = do_serialize_err(v, &ColumnType::Decimal); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, ColumnType::Decimal); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::ValueOverflow, + )); + } + + #[test] + fn test_set_or_list_errors() { + // Not a set or list + let v = vec![123_i32]; + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::SetOrListError(SetOrListTypeCheckErrorKind::NotSetOrList), + )); + + // Trick: Unset is a ZST, so [Unset; 1 << 33] is a ZST, too. + // While it's probably incorrect to use Unset in a collection, this + // allows us to trigger the right error without going out of memory. + // Such an array is also created instantaneously. + let v = &[Unset; 1 << 33] as &[Unset]; + let typ = ColumnType::List(Box::new(ColumnType::Int)); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<&[Unset]>()); + assert_eq!(err.got, typ); + assert!(matches!( + err.kind, + BuiltinSerializationErrorKind::SetOrListError( + SetOrListSerializationErrorKind::TooManyElements + ), + )); + + // Error during serialization of an element + let v = vec![123_i32]; + let typ = ColumnType::List(Box::new(ColumnType::Double)); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::SetOrListError( + SetOrListSerializationErrorKind::ElementSerializationFailed(err), + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Int], + } + )); + } + + #[test] + fn test_map_errors() { + // Not a map + let v = BTreeMap::from([("foo", "bar")]); + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MapError(MapTypeCheckErrorKind::NotMap), + )); + + // It's not practical to check the TooManyElements error as it would + // require allocating a huge amount of memory. + + // Error during serialization of a key + let v = BTreeMap::from([(123_i32, 456_i32)]); + let typ = ColumnType::Map(Box::new(ColumnType::Double), Box::new(ColumnType::Int)); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::MapError( + MapSerializationErrorKind::KeySerializationFailed(err), + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Int], + } + )); + + // Error during serialization of a value + let v = BTreeMap::from([(123_i32, 456_i32)]); + let typ = ColumnType::Map(Box::new(ColumnType::Int), Box::new(ColumnType::Double)); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::>()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::MapError( + MapSerializationErrorKind::ValueSerializationFailed(err), + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Int], + } + )); + } + + #[test] + fn test_tuple_errors() { + // Not a tuple + let v = (123_i32, 456_i32, 789_i32); + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<(i32, i32, i32)>()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::TupleError(TupleTypeCheckErrorKind::NotTuple), + )); + + // The Rust tuple has more elements than the CQL type + let v = (123_i32, 456_i32, 789_i32); + let typ = ColumnType::Tuple(vec![ColumnType::Int; 2]); + let err = do_serialize_err(v, &typ); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<(i32, i32, i32)>()); + assert_eq!(err.got, typ); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::TupleError(TupleTypeCheckErrorKind::WrongElementCount { + actual: 3, + asked_for: 2, + }), + )); + + // Error during serialization of one of the elements + let v = (123_i32, "Ala ma kota", 789.0_f64); + let typ = ColumnType::Tuple(vec![ColumnType::Int, ColumnType::Text, ColumnType::Uuid]); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::<(i32, &str, f64)>()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::TupleError( + TupleSerializationErrorKind::ElementSerializationFailed { index: 2, err }, + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Double], + } + )); + } + + #[test] + fn test_cql_value_errors() { + // Tried to encode Empty value into a non-emptyable type + let v = CqlValue::Empty; + let err = do_serialize_err(v, &ColumnType::Counter); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, ColumnType::Counter); + assert!(matches!(err.kind, BuiltinTypeCheckErrorKind::NotEmptyable)); + + // Handle tuples and UDTs in separate tests, as they have some + // custom logic + } + + #[test] + fn test_cql_value_tuple_errors() { + // Not a tuple + let v = CqlValue::Tuple(vec![ + Some(CqlValue::Int(123_i32)), + Some(CqlValue::Int(456_i32)), + Some(CqlValue::Int(789_i32)), + ]); + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::TupleError(TupleTypeCheckErrorKind::NotTuple), + )); + + // The Rust tuple has more elements than the CQL type + let v = CqlValue::Tuple(vec![ + Some(CqlValue::Int(123_i32)), + Some(CqlValue::Int(456_i32)), + Some(CqlValue::Int(789_i32)), + ]); + let typ = ColumnType::Tuple(vec![ColumnType::Int; 2]); + let err = do_serialize_err(v, &typ); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, typ); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::TupleError(TupleTypeCheckErrorKind::WrongElementCount { + actual: 3, + asked_for: 2, + }), + )); + + // Error during serialization of one of the elements + let v = CqlValue::Tuple(vec![ + Some(CqlValue::Int(123_i32)), + Some(CqlValue::Text("Ala ma kota".to_string())), + Some(CqlValue::Double(789_f64)), + ]); + let typ = ColumnType::Tuple(vec![ColumnType::Int, ColumnType::Text, ColumnType::Uuid]); + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::TupleError( + TupleSerializationErrorKind::ElementSerializationFailed { index: 2, err }, + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Double], + } + )); + } + + #[test] + fn test_cql_value_udt_errors() { + // Not a UDT + let v = CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "udt".to_string(), + fields: vec![ + ("a".to_string(), Some(CqlValue::Int(123_i32))), + ("b".to_string(), Some(CqlValue::Int(456_i32))), + ("c".to_string(), Some(CqlValue::Int(789_i32))), + ], + }; + let err = do_serialize_err(v, &ColumnType::Double); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, ColumnType::Double); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NotUdt), + )); + + // Wrong type name + let v = CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "udt".to_string(), + fields: vec![ + ("a".to_string(), Some(CqlValue::Int(123_i32))), + ("b".to_string(), Some(CqlValue::Int(456_i32))), + ("c".to_string(), Some(CqlValue::Int(789_i32))), + ], + }; + let typ = ColumnType::UserDefinedType { + type_name: "udt2".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Int), + ("b".to_string(), ColumnType::Int), + ("c".to_string(), ColumnType::Int), + ], + }; + let err = do_serialize_err(v, &typ); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, typ); + let BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NameMismatch { + keyspace, + type_name, + }) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + assert_eq!(keyspace, "ks"); + assert_eq!(type_name, "udt2"); + + // Some fields are missing from the CQL type + let v = CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "udt".to_string(), + fields: vec![ + ("a".to_string(), Some(CqlValue::Int(123_i32))), + ("b".to_string(), Some(CqlValue::Int(456_i32))), + ("c".to_string(), Some(CqlValue::Int(789_i32))), + ], + }; + let typ = ColumnType::UserDefinedType { + type_name: "udt".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Int), + ("b".to_string(), ColumnType::Int), + // c is missing + ], + }; + let err = do_serialize_err(v, &typ); + let err = get_typeck_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, typ); + let BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { + field_name, + }) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + assert_eq!(field_name, "c"); + + // It is allowed for a Rust UDT to have less fields than the CQL UDT, + // so skip UnexpectedFieldInDestination. + + // Error during serialization of one of the fields + let v = CqlValue::UserDefinedType { + keyspace: "ks".to_string(), + type_name: "udt".to_string(), + fields: vec![ + ("a".to_string(), Some(CqlValue::Int(123_i32))), + ("b".to_string(), Some(CqlValue::Int(456_i32))), + ("c".to_string(), Some(CqlValue::Int(789_i32))), + ], + }; + let typ = ColumnType::UserDefinedType { + type_name: "udt".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Int), + ("b".to_string(), ColumnType::Int), + ("c".to_string(), ColumnType::Double), + ], + }; + let err = do_serialize_err(v, &typ); + let err = get_ser_err(&err); + assert_eq!(err.rust_name, std::any::type_name::()); + assert_eq!(err.got, typ); + let BuiltinSerializationErrorKind::UdtError( + UdtSerializationErrorKind::FieldSerializationFailed { field_name, err }, + ) = &err.kind + else { + panic!("unexpected error kind: {}", err.kind) + }; + assert_eq!(field_name, "c"); + let err = get_typeck_err(err); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::MismatchedType { + expected: &[ColumnType::Int], + } + )); + } + // Do not remove. It's not used in tests but we keep it here to check that // we properly ignore warnings about unused variables, unnecessary `mut`s // etc. that usually pop up when generating code for empty structs. From 7cf355b3e645faa1c8a970f4173358b894fb21f0 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 11 Dec 2023 19:28:12 +0100 Subject: [PATCH 072/107] serialize: document all public items Put the `#![warn(missing_docs)]` attribute at the top of the `types::serialize` module to trigger warnings for undocumented public items, and then add docstrings to the items reported by the warnings. --- scylla-cql/src/types/serialize/mod.rs | 23 ++++++ scylla-cql/src/types/serialize/row.rs | 79 ++++++++++++++++++--- scylla-cql/src/types/serialize/value.rs | 94 ++++++++++++++++++++++--- 3 files changed, 178 insertions(+), 18 deletions(-) diff --git a/scylla-cql/src/types/serialize/mod.rs b/scylla-cql/src/types/serialize/mod.rs index 230462759d..b61541debf 100644 --- a/scylla-cql/src/types/serialize/mod.rs +++ b/scylla-cql/src/types/serialize/mod.rs @@ -1,3 +1,7 @@ +#![warn(missing_docs)] + +//! Types and traits related to serialization of values to the CQL format. + use std::{error::Error, fmt::Display, sync::Arc}; use thiserror::Error; @@ -7,6 +11,25 @@ pub mod value; pub mod writers; pub use writers::{CellValueBuilder, CellWriter, RowWriter}; + +/// An error indicating that a failure happened during serialization. +/// +/// The error is type-erased so that the crate users can define their own +/// serialization impls and their errors. As for the impls defined or generated +/// by the driver itself, the following errors can be returned: +/// +/// - [`row::BuiltinSerializationError`] is returned when serialization of +/// one of types with an impl built into the driver fails. It is also returned +/// from impls generated by the `SerializeRow` macro. +/// - [`value::BuiltinSerializationError`] is analogous to the above but is +/// returned from [`SerializeCql::serialize`](value::SerializeCql::serialize) +/// instead both in the case of builtin impls and impls generated by the +/// `SerializeCql` macro. It won't be returned by the `Session` directly, +/// but it might be nested in the [`row::BuiltinSerializationError`]. +/// - [`row::ValueListToSerializeRowAdapterError`] is returned in case when +/// a list of named values encoded with the legacy `ValueList` trait is passed +/// as an argument to the statement, and rewriting it using the new +/// `SerializeRow` interface fails. #[derive(Debug, Clone, Error)] pub struct SerializationError(Arc); diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index de9a45dc1d..edd0293cac 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -1,3 +1,5 @@ +//! Contains the [`SerializeRow`] trait and its implementations. + use std::borrow::Cow; use std::collections::{BTreeMap, HashSet}; use std::fmt::Display; @@ -24,6 +26,7 @@ pub struct RowSerializationContext<'a> { } impl<'a> RowSerializationContext<'a> { + /// Creates the serialization context from prepared statement metadata. #[inline] pub fn from_prepared(prepared: &'a PreparedMetadata) -> Self { Self { @@ -45,14 +48,30 @@ impl<'a> RowSerializationContext<'a> { } } +/// Represents a set of values that can be sent along a CQL statement. +/// +/// This is a low-level trait that is exposed to the specifics to the CQL +/// protocol and usually does not have to be implemented directly. See the +/// chapter on "Query Values" in the driver docs for information about how +/// this trait is supposed to be used. pub trait SerializeRow { /// Serializes the row according to the information in the given context. + /// + /// It's the trait's responsibility to produce values in the order as + /// specified in given serialization context. fn serialize( &self, ctx: &RowSerializationContext<'_>, writer: &mut RowWriter, ) -> Result<(), SerializationError>; + /// Returns whether this row contains any values or not. + /// + /// This method is used before executing a simple statement in order to check + /// whether there are any values provided to it. If there are some, then + /// the query will be prepared first in order to obtain information about + /// the bind marker types and names so that the values can be properly + /// type checked and serialized. fn is_empty(&self) -> bool; } @@ -424,7 +443,8 @@ pub fn serialize_legacy_row( RawValue::Null => cell_writer.set_null(), RawValue::Unset => cell_writer.set_unset(), // The unwrap below will succeed because the value was successfully - // deserialized from the CQL format, so it must have + // deserialized from the CQL format, so it must have had correct + // size. RawValue::Value(v) => cell_writer.set_value(v).unwrap(), }; }; @@ -527,19 +547,35 @@ fn mk_ser_err_named( #[derive(Debug, Clone)] #[non_exhaustive] pub enum BuiltinTypeCheckErrorKind { - /// The Rust type expects `asked_for` column, but the query requires `actual`. - WrongColumnCount { actual: usize, asked_for: usize }, + /// The Rust type expects `actual` column, but the statement requires `asked_for`. + WrongColumnCount { + /// The number of values that the Rust type provides. + actual: usize, + + /// The number of columns that the statement requires. + asked_for: usize, + }, /// The Rust type provides a value for some column, but that column is not /// present in the statement. - NoColumnWithName { name: String }, + NoColumnWithName { + /// Name of the column that is missing in the statement. + name: String, + }, /// A value required by the statement is not provided by the Rust type. - ValueMissingForColumn { name: String }, + ValueMissingForColumn { + /// Name of the column for which the Rust type doesn't + /// provide a value. + name: String, + }, /// A different column name was expected at given position. ColumnNameMismatch { + /// Name of the column, as expected by the Rust type. rust_column_name: String, + + /// Name of the column for which the DB requested a value. db_column_name: String, }, } @@ -576,7 +612,10 @@ impl Display for BuiltinTypeCheckErrorKind { pub enum BuiltinSerializationErrorKind { /// One of the columns failed to serialize. ColumnSerializationFailed { + /// Name of the column that failed to serialize. name: String, + + /// The error that caused the column serialization to fail. err: SerializationError, }, } @@ -591,13 +630,27 @@ impl Display for BuiltinSerializationErrorKind { } } +/// Describes a failure to translate the output of the [`ValueList`] legacy trait +/// into an output of the [`SerializeRow`] trait. #[derive(Error, Debug)] pub enum ValueListToSerializeRowAdapterError { + /// The values generated by the [`ValueList`] trait were provided in + /// name-value pairs, and there is a column in the statement for which + /// there is no corresponding named value. #[error("Missing named value for column {name}")] - ValueMissingForBindMarker { name: String }, + ValueMissingForBindMarker { + /// Name of the bind marker for which there is no value. + name: String, + }, + /// The values generated by the [`ValueList`] trait were provided in + /// name-value pairs, and there is a named value which does not match + /// to any of the columns. #[error("There is no bind marker with name {name}, but a value for it was provided")] - NoBindMarkerWithName { name: String }, + NoBindMarkerWithName { + /// Name of the value that does not match to any of the bind markers. + name: String, + }, } /// A buffer containing already serialized values. @@ -614,6 +667,7 @@ pub struct SerializedValues { } impl SerializedValues { + /// Constructs a new, empty `SerializedValues`. pub const fn new() -> Self { SerializedValues { serialized_values: Vec::new(), @@ -624,6 +678,7 @@ impl SerializedValues { /// A const empty instance, useful for taking references pub const EMPTY: &'static SerializedValues = &SerializedValues::new(); + /// Constructs `SerializedValues` from given [`SerializeRow`] object. pub fn from_serializable( ctx: &RowSerializationContext, row: &T, @@ -648,11 +703,13 @@ impl SerializedValues { }) } + /// Returns `true` if the row contains no elements. #[inline] pub fn is_empty(&self) -> bool { self.element_count() == 0 } + /// Returns an iterator over the values serialized into the object so far. #[inline] pub fn iter(&self) -> impl Iterator { SerializedValuesIterator { @@ -660,13 +717,13 @@ impl SerializedValues { } } + /// Returns the number of values written so far. #[inline] pub fn element_count(&self) -> u16 { - // We initialize first two bytes in new() and BufBackedRowWriter does too, - // so this unwrap is safe self.element_count } + /// Returns the total serialized size of the values written so far. #[inline] pub fn buffer_size(&self) -> usize { self.serialized_values.len() @@ -717,7 +774,8 @@ impl SerializedValues { }) } - // Temporary function, to be removed when we implement new batching API (right now it is needed in frame::request::mod.rs tests) + /// Temporary function, to be removed when we implement new batching API (right now it is needed in frame::request::mod.rs tests) + // TODO: Remove pub fn to_old_serialized_values(&self) -> LegacySerializedValues { let mut frame = Vec::new(); self.write_to_request(&mut frame); @@ -731,6 +789,7 @@ impl Default for SerializedValues { } } +/// An iterator over raw values in some [`SerializedValues`]. #[derive(Clone, Copy)] pub struct SerializedValuesIterator<'a> { serialized_values: &'a [u8], diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index b12416ba97..53061caa68 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1,3 +1,5 @@ +//! Contains the [`SerializeCql`] trait and its implementations. + use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::Display; use std::hash::BuildHasher; @@ -27,8 +29,28 @@ use crate::frame::value::ValueOverflow; use super::writers::WrittenCellProof; use super::{CellWriter, SerializationError}; +/// A type that can be serialized and sent along with a CQL statement. +/// +/// This is a low-level trait that is exposed to the specifics to the CQL +/// protocol and usually does not have to be implemented directly. See the +/// chapter on "Query Values" in the driver docs for information about how +/// this trait is supposed to be used. pub trait SerializeCql { /// Serializes the value to given CQL type. + /// + /// The value should produce a `[value]`, according to the [CQL protocol + /// specification](https://github.com/apache/cassandra/blob/trunk/doc/native_protocol_v4.spec), + /// containing the serialized value. See section 6 of the document on how + /// the contents of the `[value]` should look like. + /// + /// The value produced should match the type provided by `typ`. If the + /// value cannot be serialized to that type, an error should be returned. + /// + /// The [`CellWriter`] provided to the method ensures that the value produced + /// will be properly framed (i.e. incorrectly written value should not + /// cause the rest of the request to be misinterpreted), but otherwise + /// the implementor of the trait is responsible for producing the a value + /// in a correct format. fn serialize<'b>( &self, typ: &ColumnType, @@ -1014,7 +1036,10 @@ fn mk_ser_err_named( #[non_exhaustive] pub enum BuiltinTypeCheckErrorKind { /// Expected one from a list of particular types. - MismatchedType { expected: &'static [ColumnType] }, + MismatchedType { + /// The list of types that the Rust type can serialize as. + expected: &'static [ColumnType], + }, /// Expected a type that can be empty. NotEmptyable, @@ -1154,6 +1179,7 @@ impl Display for BuiltinSerializationErrorKind { } } +/// Describes why type checking of a map type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum MapTypeCheckErrorKind { @@ -1174,6 +1200,7 @@ impl Display for MapTypeCheckErrorKind { } } +/// Describes why serialization of a map type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum MapSerializationErrorKind { @@ -1206,6 +1233,7 @@ impl Display for MapSerializationErrorKind { } } +/// Describes why type checking of a set or list type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum SetOrListTypeCheckErrorKind { @@ -1226,6 +1254,7 @@ impl Display for SetOrListTypeCheckErrorKind { } } +/// Describes why serialization of a set or list type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum SetOrListSerializationErrorKind { @@ -1252,6 +1281,7 @@ impl Display for SetOrListSerializationErrorKind { } } +/// Describes why type checking of a tuple failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum TupleTypeCheckErrorKind { @@ -1263,7 +1293,13 @@ pub enum TupleTypeCheckErrorKind { /// Note that it is allowed to write a Rust tuple with less elements /// than the corresponding CQL type, but not more. The additional, unknown /// elements will be set to null. - WrongElementCount { actual: usize, asked_for: usize }, + WrongElementCount { + /// The number of elements that the Rust tuple has. + actual: usize, + + /// The number of elements that the CQL tuple type has. + asked_for: usize, + }, } impl Display for TupleTypeCheckErrorKind { @@ -1281,12 +1317,16 @@ impl Display for TupleTypeCheckErrorKind { } } +/// Describes why serialize of a tuple failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum TupleSerializationErrorKind { /// One of the tuple elements failed to serialize. ElementSerializationFailed { + /// Index of the tuple element that failed to serialize. index: usize, + + /// The error that caused the tuple field serialization to fail. err: SerializationError, }, } @@ -1301,6 +1341,7 @@ impl Display for TupleSerializationErrorKind { } } +/// Describes why type checking of a user defined type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum UdtTypeCheckErrorKind { @@ -1308,17 +1349,32 @@ pub enum UdtTypeCheckErrorKind { NotUdt, /// The name of the UDT being serialized to does not match. - NameMismatch { keyspace: String, type_name: String }, + NameMismatch { + /// Keyspace in which the UDT was defined. + keyspace: String, + + /// Name of the UDT. + type_name: String, + }, /// The Rust data does not have a field that is required in the CQL UDT type. - ValueMissingForUdtField { field_name: String }, + ValueMissingForUdtField { + /// Name of field that the CQL UDT requires but is missing in the Rust struct. + field_name: String, + }, /// The Rust data contains a field that is not present in the UDT. - NoSuchFieldInUdt { field_name: String }, + NoSuchFieldInUdt { + /// Name of the Rust struct field that is missing in the UDT. + field_name: String, + }, /// A different field name was expected at given position. FieldNameMismatch { + /// The name of the Rust field. rust_field_name: String, + + /// The name of the CQL UDT field. db_field_name: String, }, } @@ -1352,12 +1408,16 @@ impl Display for UdtTypeCheckErrorKind { } } +/// Describes why serialization of a user defined type failed. #[derive(Debug, Clone)] #[non_exhaustive] pub enum UdtSerializationErrorKind { /// One of the fields failed to serialize. FieldSerializationFailed { + /// Name of the field which failed to serialize. field_name: String, + + /// The error that caused the UDT field serialization to fail. err: SerializationError, }, } @@ -1372,19 +1432,37 @@ impl Display for UdtSerializationErrorKind { } } +/// Describes a failure to translate the output of the [`Value`] legacy trait +/// into an output of the [`SerializeCql`] trait. #[derive(Error, Debug)] pub enum ValueToSerializeCqlAdapterError { + /// The value is too bit to be serialized as it exceeds the maximum 2GB size limit. #[error("The value is too big to be serialized as it exceeds the maximum 2GB size limit")] TooBig, + /// Output produced by the Value trait is less than 4 bytes in size and cannot be considered to be a proper CQL-encoded value. #[error("Output produced by the Value trait is too short to be considered a value: {size} < 4 minimum bytes")] - TooShort { size: usize }, + TooShort { + /// Size of the produced data. + size: usize, + }, + /// Mismatch between the value size written at the beginning and the actual size of the data appended to the Vec. #[error("Mismatch between the declared value size vs. actual size: {declared} != {actual}")] - DeclaredVsActualSizeMismatch { declared: usize, actual: usize }, + DeclaredVsActualSizeMismatch { + /// The declared size of the output. + declared: usize, + /// The actual size of the output. + actual: usize, + }, + + /// The value size written at the beginning is invalid (it is negative and less than -2). #[error("Invalid declared value size: {size}")] - InvalidDeclaredSize { size: i32 }, + InvalidDeclaredSize { + /// Declared size of the output. + size: i32, + }, } #[cfg(test)] From 7fe1ef8b373bf42138c983de67b2782e83e98f59 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 13 Dec 2023 23:22:42 +0100 Subject: [PATCH 073/107] treewide: add Legacy- prefix to BatchValues and its friends We will introduce types with the same name but different interface, and we want to move off the current ones gradually. Rename the existing ones as the first step. --- scylla-cql/src/frame/request/batch.rs | 6 +- scylla-cql/src/frame/value.rs | 82 ++++++++++++------------- scylla-cql/src/frame/value_tests.rs | 16 ++--- scylla/src/transport/caching_session.rs | 4 +- scylla/src/transport/connection.rs | 8 +-- scylla/src/transport/session.rs | 8 ++- 6 files changed, 63 insertions(+), 61 deletions(-) diff --git a/scylla-cql/src/frame/request/batch.rs b/scylla-cql/src/frame/request/batch.rs index 5b5c2f84b6..bed85d83f0 100644 --- a/scylla-cql/src/frame/request/batch.rs +++ b/scylla-cql/src/frame/request/batch.rs @@ -5,7 +5,7 @@ use crate::frame::{ frame_errors::ParseError, request::{RequestOpcode, SerializableRequest}, types::{self, SerialConsistency}, - value::{BatchValues, BatchValuesIterator, LegacySerializedValues}, + value::{LegacyBatchValues, LegacyBatchValuesIterator, LegacySerializedValues}, }; use super::DeserializableRequest; @@ -20,7 +20,7 @@ pub struct Batch<'b, Statement, Values> where BatchStatement<'b>: From<&'b Statement>, Statement: Clone, - Values: BatchValues, + Values: LegacyBatchValues, { pub statements: Cow<'b, [Statement]>, pub batch_type: BatchType, @@ -72,7 +72,7 @@ impl SerializableRequest for Batch<'_, Statement, Values> where for<'s> BatchStatement<'s>: From<&'s Statement>, Statement: Clone, - Values: BatchValues, + Values: LegacyBatchValues, { const OPCODE: RequestOpcode = RequestOpcode::Batch; diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index a6abccd89a..f4c4d809c1 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -454,15 +454,15 @@ impl<'a> Iterator for LegacySerializedValuesIterator<'a> { } /// Represents List of ValueList for Batch statement -pub trait BatchValues { +pub trait LegacyBatchValues { /// For some unknown reason, this type, when not resolved to a concrete type for a given async function, /// cannot live across await boundaries while maintaining the corresponding future `Send`, unless `'r: 'static` /// /// See for more details - type BatchValuesIter<'r>: BatchValuesIterator<'r> + type LegacyBatchValuesIter<'r>: LegacyBatchValuesIterator<'r> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_>; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_>; } /// An iterator-like for `ValueList` @@ -473,7 +473,7 @@ pub trait BatchValues { /// It's just essentially making methods from `ValueList` accessible instead of being an actual iterator because of /// compiler limitations that would otherwise be very complex to overcome. /// (specifically, types being different would require yielding enums for tuple impls) -pub trait BatchValuesIterator<'a> { +pub trait LegacyBatchValuesIterator<'a> { fn next_serialized(&mut self) -> Option>; fn write_next_to_request( &mut self, @@ -496,11 +496,11 @@ pub trait BatchValuesIterator<'a> { /// /// Essentially used internally by this lib to provide implementers of `BatchValuesIterator` for cases /// that always serialize the same concrete `ValueList` type -pub struct BatchValuesIteratorFromIterator { +pub struct LegacyBatchValuesIteratorFromIterator { it: IT, } -impl<'r, 'a: 'r, IT, VL> BatchValuesIterator<'r> for BatchValuesIteratorFromIterator +impl<'r, 'a: 'r, IT, VL> LegacyBatchValuesIterator<'r> for LegacyBatchValuesIteratorFromIterator where IT: Iterator, VL: ValueList + 'a, @@ -519,13 +519,13 @@ where } } -impl From for BatchValuesIteratorFromIterator +impl From for LegacyBatchValuesIteratorFromIterator where IT: Iterator, IT::Item: ValueList, { fn from(it: IT) -> Self { - BatchValuesIteratorFromIterator { it } + LegacyBatchValuesIteratorFromIterator { it } } } @@ -1192,12 +1192,12 @@ impl<'b> ValueList for Cow<'b, LegacySerializedValues> { /// The underlying iterator will always be cloned at least once, once to compute the length if it can't be known /// in advance, and be re-cloned at every retry. /// It is consequently expected that the provided iterator is cheap to clone (e.g. `slice.iter().map(...)`). -pub struct BatchValuesFromIter<'a, IT> { +pub struct LegacyBatchValuesFromIter<'a, IT> { it: IT, _spooky: std::marker::PhantomData<&'a ()>, } -impl<'a, IT, VL> BatchValuesFromIter<'a, IT> +impl<'a, IT, VL> LegacyBatchValuesFromIter<'a, IT> where IT: Iterator + Clone, VL: ValueList + 'a, @@ -1210,7 +1210,7 @@ where } } -impl<'a, IT, VL> From for BatchValuesFromIter<'a, IT> +impl<'a, IT, VL> From for LegacyBatchValuesFromIter<'a, IT> where IT: Iterator + Clone, VL: ValueList + 'a, @@ -1220,38 +1220,38 @@ where } } -impl<'a, IT, VL> BatchValues for BatchValuesFromIter<'a, IT> +impl<'a, IT, VL> LegacyBatchValues for LegacyBatchValuesFromIter<'a, IT> where IT: Iterator + Clone, VL: ValueList + 'a, { - type BatchValuesIter<'r> = BatchValuesIteratorFromIterator where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + type LegacyBatchValuesIter<'r> = LegacyBatchValuesIteratorFromIterator where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { self.it.clone().into() } } // Implement BatchValues for slices of ValueList types -impl BatchValues for [T] { - type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { +impl LegacyBatchValues for [T] { + type LegacyBatchValuesIter<'r> = LegacyBatchValuesIteratorFromIterator> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { self.iter().into() } } // Implement BatchValues for Vec -impl BatchValues for Vec { - type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { - BatchValues::batch_values_iter(self.as_slice()) +impl LegacyBatchValues for Vec { + type LegacyBatchValuesIter<'r> = LegacyBatchValuesIteratorFromIterator> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { + LegacyBatchValues::batch_values_iter(self.as_slice()) } } // Here is an example implementation for (T0, ) // Further variants are done using a macro -impl BatchValues for (T0,) { - type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { +impl LegacyBatchValues for (T0,) { + type LegacyBatchValuesIter<'r> = LegacyBatchValuesIteratorFromIterator> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { std::iter::once(&self.0).into() } } @@ -1263,19 +1263,19 @@ pub struct TupleValuesIter<'a, T> { macro_rules! impl_batch_values_for_tuple { ( $($Ti:ident),* ; $($FieldI:tt),* ; $TupleSize:tt) => { - impl<$($Ti),+> BatchValues for ($($Ti,)+) + impl<$($Ti),+> LegacyBatchValues for ($($Ti,)+) where $($Ti: ValueList),+ { - type BatchValuesIter<'r> = TupleValuesIter<'r, ($($Ti,)+)> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + type LegacyBatchValuesIter<'r> = TupleValuesIter<'r, ($($Ti,)+)> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { TupleValuesIter { tuple: self, idx: 0, } } } - impl<'r, $($Ti),+> BatchValuesIterator<'r> for TupleValuesIter<'r, ($($Ti,)+)> + impl<'r, $($Ti),+> LegacyBatchValuesIterator<'r> for TupleValuesIter<'r, ($($Ti,)+)> where $($Ti: ValueList),+ { @@ -1338,10 +1338,10 @@ impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15; 16); // Every &impl BatchValues should also implement BatchValues -impl<'a, T: BatchValues + ?Sized> BatchValues for &'a T { - type BatchValuesIter<'r> = ::BatchValuesIter<'r> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { - ::batch_values_iter(*self) +impl<'a, T: LegacyBatchValues + ?Sized> LegacyBatchValues for &'a T { + type LegacyBatchValuesIter<'r> = ::LegacyBatchValuesIter<'r> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { + ::batch_values_iter(*self) } } @@ -1351,12 +1351,12 @@ impl<'a, T: BatchValues + ?Sized> BatchValues for &'a T { /// Once that is done, we can use that instead of re-serializing. /// /// This struct implements both `BatchValues` and `BatchValuesIterator` for that purpose -pub struct BatchValuesFirstSerialized<'f, T> { +pub struct LegacyBatchValuesFirstSerialized<'f, T> { first: Option<&'f LegacySerializedValues>, rest: T, } -impl<'f, T: BatchValues> BatchValuesFirstSerialized<'f, T> { +impl<'f, T: LegacyBatchValues> LegacyBatchValuesFirstSerialized<'f, T> { pub fn new( batch_values: T, already_serialized_first: Option<&'f LegacySerializedValues>, @@ -1368,19 +1368,19 @@ impl<'f, T: BatchValues> BatchValuesFirstSerialized<'f, T> { } } -impl<'f, BV: BatchValues> BatchValues for BatchValuesFirstSerialized<'f, BV> { - type BatchValuesIter<'r> = - BatchValuesFirstSerialized<'f, ::BatchValuesIter<'r>> where Self: 'r; - fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { - BatchValuesFirstSerialized { +impl<'f, BV: LegacyBatchValues> LegacyBatchValues for LegacyBatchValuesFirstSerialized<'f, BV> { + type LegacyBatchValuesIter<'r> = + LegacyBatchValuesFirstSerialized<'f, ::LegacyBatchValuesIter<'r>> where Self: 'r; + fn batch_values_iter(&self) -> Self::LegacyBatchValuesIter<'_> { + LegacyBatchValuesFirstSerialized { first: self.first, rest: self.rest.batch_values_iter(), } } } -impl<'a, 'f: 'a, IT: BatchValuesIterator<'a>> BatchValuesIterator<'a> - for BatchValuesFirstSerialized<'f, IT> +impl<'a, 'f: 'a, IT: LegacyBatchValuesIterator<'a>> LegacyBatchValuesIterator<'a> + for LegacyBatchValuesFirstSerialized<'f, IT> { fn next_serialized(&mut self) -> Option> { match self.first.take() { diff --git a/scylla-cql/src/frame/value_tests.rs b/scylla-cql/src/frame/value_tests.rs index adcdcdf0b2..847482eebf 100644 --- a/scylla-cql/src/frame/value_tests.rs +++ b/scylla-cql/src/frame/value_tests.rs @@ -1,12 +1,12 @@ -use crate::frame::{response::result::CqlValue, types::RawValue, value::BatchValuesIterator}; +use crate::frame::{response::result::CqlValue, types::RawValue, value::LegacyBatchValuesIterator}; use crate::types::serialize::row::{RowSerializationContext, SerializeRow}; use crate::types::serialize::value::SerializeCql; use crate::types::serialize::{CellWriter, RowWriter}; use super::response::result::{ColumnSpec, ColumnType, TableSpec}; use super::value::{ - BatchValues, CqlDate, CqlDuration, CqlTime, CqlTimestamp, LegacySerializedValues, MaybeUnset, - SerializeValuesError, Unset, Value, ValueList, ValueTooBig, + CqlDate, CqlDuration, CqlTime, CqlTimestamp, LegacyBatchValues, LegacySerializedValues, + MaybeUnset, SerializeValuesError, Unset, Value, ValueList, ValueTooBig, }; use bigdecimal::BigDecimal; use bytes::BufMut; @@ -1235,7 +1235,7 @@ fn vec_batch_values() { #[test] fn tuple_batch_values() { - fn check_twoi32_tuple(tuple: impl BatchValues, size: usize) { + fn check_twoi32_tuple(tuple: impl LegacyBatchValues, size: usize) { let mut it = tuple.batch_values_iter(); for i in 0..size { let mut request: Vec = Vec::new(); @@ -1428,8 +1428,8 @@ fn ref_batch_values() { let batch_values: &[&[i8]] = &[&[1, 2], &[2, 3, 4, 5], &[6]]; return check_ref_bv::<&&&&&[&[i8]]>(&&&&batch_values); - fn check_ref_bv(batch_values: B) { - let mut it = ::batch_values_iter(&batch_values); + fn check_ref_bv(batch_values: B) { + let mut it = ::batch_values_iter(&batch_values); let mut request: Vec = Vec::new(); it.write_next_to_request(&mut request).unwrap().unwrap(); @@ -1440,7 +1440,7 @@ fn ref_batch_values() { #[test] #[allow(clippy::needless_borrow)] fn check_ref_tuple() { - fn assert_has_batch_values(bv: BV) { + fn assert_has_batch_values(bv: BV) { let mut it = bv.batch_values_iter(); let mut request: Vec = Vec::new(); while let Some(res) = it.write_next_to_request(&mut request) { @@ -1457,7 +1457,7 @@ fn check_ref_tuple() { #[test] fn check_batch_values_iterator_is_not_lending() { // This is an interesting property if we want to improve the batch shard selection heuristic - fn f(bv: impl BatchValues) { + fn f(bv: impl LegacyBatchValues) { let mut it = bv.batch_values_iter(); let mut it2 = bv.batch_values_iter(); // Make sure we can hold all these at the same time diff --git a/scylla/src/transport/caching_session.rs b/scylla/src/transport/caching_session.rs index 14546b93e4..4a841fa250 100644 --- a/scylla/src/transport/caching_session.rs +++ b/scylla/src/transport/caching_session.rs @@ -1,5 +1,5 @@ use crate::batch::{Batch, BatchStatement}; -use crate::frame::value::BatchValues; +use crate::frame::value::LegacyBatchValues; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::transport::errors::QueryError; @@ -108,7 +108,7 @@ where pub async fn batch( &self, batch: &Batch, - values: impl BatchValues, + values: impl LegacyBatchValues, ) -> Result { let all_prepared: bool = batch .statements diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 3faa6a5f0a..19db130168 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -53,7 +53,7 @@ use crate::frame::{ request::{self, batch, execute, query, register, SerializableRequest}, response::{event::Event, result, NonErrorResponse, Response, ResponseOpcode}, server_event_type::EventType, - value::{BatchValues, BatchValuesIterator}, + value::{LegacyBatchValues, LegacyBatchValuesIterator}, FrameParams, SerializedRequest, }; use crate::query::Query; @@ -763,7 +763,7 @@ impl Connection { pub(crate) async fn batch( &self, batch: &Batch, - values: impl BatchValues, + values: impl LegacyBatchValues, ) -> Result { self.batch_with_consistency( batch, @@ -779,7 +779,7 @@ impl Connection { pub(crate) async fn batch_with_consistency( &self, init_batch: &Batch, - values: impl BatchValues, + values: impl LegacyBatchValues, consistency: Consistency, serial_consistency: Option, ) -> Result { @@ -831,7 +831,7 @@ impl Connection { async fn prepare_batch<'b>( &self, init_batch: &'b Batch, - values: impl BatchValues, + values: impl LegacyBatchValues, ) -> Result, QueryError> { let mut to_prepare = HashSet::<&str>::new(); diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index bf8c8f5200..e2e4fd8fb6 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -47,7 +47,9 @@ use super::NodeRef; use crate::cql_to_rust::FromRow; use crate::frame::response::cql_to_rust::FromRowError; use crate::frame::response::result; -use crate::frame::value::{BatchValues, BatchValuesFirstSerialized, BatchValuesIterator}; +use crate::frame::value::{ + LegacyBatchValues, LegacyBatchValuesFirstSerialized, LegacyBatchValuesIterator, +}; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::routing::Token; @@ -1165,7 +1167,7 @@ impl Session { pub async fn batch( &self, batch: &Batch, - values: impl BatchValues, + values: impl LegacyBatchValues, ) -> Result { // Shard-awareness behavior for batch will be to pick shard based on first batch statement's shard // If users batch statements by shard, they will be rewarded with full shard awareness @@ -1216,7 +1218,7 @@ impl Session { // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) - let values = BatchValuesFirstSerialized::new(&values, first_serialized_value); + let values = LegacyBatchValuesFirstSerialized::new(&values, first_serialized_value); let values_ref = &values; let span = RequestSpan::new_batch(); From d83c9e29d780bb47df450621fc79d5c71ecd8cc7 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 16:49:36 +0100 Subject: [PATCH 074/107] serialize: optimize copying values from SerializedValues to RowWriter Add a method to RowWriter which allows copying all contents of some SerializedValues object to it. Without it, the only method would be to parse the values in the SerializedValues via iter() and write them to the RowWriter, which is unnecessarily ineffective. --- scylla-cql/src/types/serialize/row.rs | 5 +++++ scylla-cql/src/types/serialize/writers.rs | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index edd0293cac..2b48ac91a1 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -734,6 +734,11 @@ impl SerializedValues { buf.put(self.serialized_values.as_slice()) } + // Gets the serialized values as raw bytes, without the preceding u16 length. + pub(crate) fn get_contents(&self) -> &[u8] { + &self.serialized_values + } + /// Serializes value and appends it to the list pub fn add_value( &mut self, diff --git a/scylla-cql/src/types/serialize/writers.rs b/scylla-cql/src/types/serialize/writers.rs index cd1ccd7f62..6587634a47 100644 --- a/scylla-cql/src/types/serialize/writers.rs +++ b/scylla-cql/src/types/serialize/writers.rs @@ -2,6 +2,8 @@ use thiserror::Error; +use super::row::SerializedValues; + /// An interface that facilitates writing values for a CQL query. pub struct RowWriter<'buf> { // Buffer that this value should be serialized to. @@ -39,6 +41,14 @@ impl<'buf> RowWriter<'buf> { self.value_count += 1; CellWriter::new(self.buf) } + + /// Appends the values from an existing [`SerializedValues`] object to the + /// current `RowWriter`. + #[inline] + pub fn append_serialize_row(&mut self, sv: &SerializedValues) { + self.value_count += sv.element_count() as usize; + self.buf.extend_from_slice(sv.get_contents()) + } } /// Represents a handle to a CQL value that needs to be written into. From 87154987ba5af54fa2cfddaf9e70ca19a825f0b8 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 07:21:02 +0100 Subject: [PATCH 075/107] serialize/row: add RowSerializationContext::empty We will need to pass an empty row serialization context when we serialize values for unprepared queries in a batch. --- scylla-cql/src/types/serialize/row.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 2b48ac91a1..a62a99ae1f 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -34,6 +34,13 @@ impl<'a> RowSerializationContext<'a> { } } + /// Constructs an empty `RowSerializationContext`, as if for a statement + /// with no bind markers. + #[inline] + pub const fn empty() -> Self { + Self { columns: &[] } + } + /// Returns column/bind marker specifications for given query. #[inline] pub fn columns(&self) -> &'a [ColumnSpec] { From daa5266009d3862d488ec7780d24ec739a074344 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 18:48:28 +0100 Subject: [PATCH 076/107] serialize/row: simplify SerializedValues::from_serializable It's not needed to use a block to make sure that `writer` is dropped before `data` is used, non-lexical lifetimes already take care of that. --- scylla-cql/src/types/serialize/row.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index a62a99ae1f..18d074d5fd 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -691,16 +691,14 @@ impl SerializedValues { row: &T, ) -> Result { let mut data = Vec::new(); - let element_count = { - let mut writer = RowWriter::new(&mut data); - row.serialize(ctx, &mut writer)?; - match writer.value_count().try_into() { - Ok(n) => n, - Err(_) => { - return Err(SerializationError(Arc::new( - SerializeValuesError::TooManyValues, - ))) - } + let mut writer = RowWriter::new(&mut data); + row.serialize(ctx, &mut writer)?; + let element_count = match writer.value_count().try_into() { + Ok(n) => n, + Err(_) => { + return Err(SerializationError(Arc::new( + SerializeValuesError::TooManyValues, + ))) } }; From cc83cb6d468628bdd78ef4dac9ca9be4fcdc2866 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 18:44:58 +0100 Subject: [PATCH 077/107] serialize/row: add SerializedValues::from_closure method It will be used to serialize data produced by upcoming batch values iterators. --- scylla-cql/src/types/serialize/row.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 18d074d5fd..8bf7d965ed 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -690,9 +690,17 @@ impl SerializedValues { ctx: &RowSerializationContext, row: &T, ) -> Result { + Self::from_closure(|writer| row.serialize(ctx, writer)).map(|(sr, _)| sr) + } + + /// Constructs `SerializedValues` via given closure. + pub fn from_closure(f: F) -> Result<(Self, R), SerializationError> + where + F: FnOnce(&mut RowWriter) -> Result, + { let mut data = Vec::new(); let mut writer = RowWriter::new(&mut data); - row.serialize(ctx, &mut writer)?; + let ret = f(&mut writer)?; let element_count = match writer.value_count().try_into() { Ok(n) => n, Err(_) => { @@ -702,10 +710,13 @@ impl SerializedValues { } }; - Ok(SerializedValues { - serialized_values: data, - element_count, - }) + Ok(( + SerializedValues { + serialized_values: data, + element_count, + }, + ret, + )) } /// Returns `true` if the row contains no elements. From 1e61f9ccca320b664964b2ad04d80161293279c7 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 09:24:33 +0100 Subject: [PATCH 078/107] session: construct RoutingInfo outside the match A small refactor in order to improve the clarity of the later commits. --- scylla/src/transport/session.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index e2e4fd8fb6..ec8be9f7d7 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1198,23 +1198,21 @@ impl Session { .serial_consistency .unwrap_or(execution_profile.serial_consistency); - let statement_info = match (first_serialized_value, batch.statements.first()) { - (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { - RoutingInfo { - consistency, - serial_consistency, - token: ps.calculate_token(first_serialized_value)?, - keyspace: ps.get_keyspace_name(), - is_confirmed_lwt: false, + let (first_value_token, keyspace_name) = + match (first_serialized_value, batch.statements.first()) { + (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { + let token = ps.calculate_token(first_serialized_value)?; + (token, ps.get_keyspace_name()) } - } - _ => RoutingInfo { - consistency, - serial_consistency, - ..Default::default() - }, + _ => (None, None), + }; + let statement_info = RoutingInfo { + consistency, + serial_consistency, + token: first_value_token, + keyspace: keyspace_name, + is_confirmed_lwt: false, }; - let first_value_token = statement_info.token; // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) From e90975e13a1ec19e86f77602b59ab331d075b81a Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 09:28:49 +0100 Subject: [PATCH 079/107] session: inline the .as_deref() call A small refactor to improve the clarity of later commits. --- scylla/src/transport/session.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index ec8be9f7d7..80005197ab 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1181,7 +1181,6 @@ impl Session { } // Extract first serialized_value let first_serialized_value = values.batch_values_iter().next_serialized().transpose()?; - let first_serialized_value = first_serialized_value.as_deref(); let execution_profile = batch .get_execution_profile_handle() @@ -1199,7 +1198,7 @@ impl Session { .unwrap_or(execution_profile.serial_consistency); let (first_value_token, keyspace_name) = - match (first_serialized_value, batch.statements.first()) { + match (first_serialized_value.as_deref(), batch.statements.first()) { (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { let token = ps.calculate_token(first_serialized_value)?; (token, ps.get_keyspace_name()) @@ -1216,7 +1215,8 @@ impl Session { // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) - let values = LegacyBatchValuesFirstSerialized::new(&values, first_serialized_value); + let values = + LegacyBatchValuesFirstSerialized::new(&values, first_serialized_value.as_deref()); let values_ref = &values; let span = RequestSpan::new_batch(); From ccd60b3a94453cd6999abf84dcd736f1f434b33f Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 09:29:58 +0100 Subject: [PATCH 080/107] session: construct first_serialized_value in a narrower scope This refactor will improve clarity of the next change, but will also prevent an obscure issue from happening that causes futures not to be `Send` when an instance of a GAT exists across an await point. --- scylla/src/transport/session.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 80005197ab..474638e464 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1179,8 +1179,6 @@ impl Session { BadQuery::TooManyQueriesInBatchStatement(batch_statements_length), )); } - // Extract first serialized_value - let first_serialized_value = values.batch_values_iter().next_serialized().transpose()?; let execution_profile = batch .get_execution_profile_handle() @@ -1197,14 +1195,19 @@ impl Session { .serial_consistency .unwrap_or(execution_profile.serial_consistency); - let (first_value_token, keyspace_name) = - match (first_serialized_value.as_deref(), batch.statements.first()) { + let (first_serialized_value, first_value_token, keyspace_name) = { + // Extract first serialized_value + let first_serialized_value = + values.batch_values_iter().next_serialized().transpose()?; + + match (first_serialized_value, batch.statements.first()) { (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { - let token = ps.calculate_token(first_serialized_value)?; - (token, ps.get_keyspace_name()) + let token = ps.calculate_token(&first_serialized_value)?; + (Some(first_serialized_value), token, ps.get_keyspace_name()) } - _ => (None, None), - }; + _ => (None, None, None), + } + }; let statement_info = RoutingInfo { consistency, serial_consistency, From 5ffd3ab7e0ad89ff63fa5eae2fa6ce30a52729f5 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 18:48:06 +0100 Subject: [PATCH 081/107] session: work around lifetime issues with temporaries If we changed the code to use the new BatchValues API first, the compiler would complain about some lifetime issues of temporary objects passed to the `match` block. --- scylla/src/transport/session.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 474638e464..e5c01ed688 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1200,13 +1200,15 @@ impl Session { let first_serialized_value = values.batch_values_iter().next_serialized().transpose()?; - match (first_serialized_value, batch.statements.first()) { + // The temporary "p" is necessary because lifetimes + let p = match (first_serialized_value, batch.statements.first()) { (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { let token = ps.calculate_token(&first_serialized_value)?; (Some(first_serialized_value), token, ps.get_keyspace_name()) } _ => (None, None, None), - } + }; + p }; let statement_info = RoutingInfo { consistency, From 2f881246053d74ff9cc41500744498ea525bf400 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 09:12:33 +0100 Subject: [PATCH 082/107] prepared_statement: add calculate_token_untyped method The PreparedStatement::calculate_token method takes an object that implements SerializeRow, serializes it to SerializedValues and returns the token. For the sake of an optimization in the code that handles batches we would like to provide a SerializedValues object directly. Extract the part that calculates token from SerializedValues to a new, separate method, but keep it pub(crate) - we'd like to avoid exposing type-unsafe interfaces to the users. --- scylla/src/statement/prepared_statement.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scylla/src/statement/prepared_statement.rs b/scylla/src/statement/prepared_statement.rs index f61fab901e..a3cd155e7c 100644 --- a/scylla/src/statement/prepared_statement.rs +++ b/scylla/src/statement/prepared_statement.rs @@ -192,11 +192,17 @@ impl PreparedStatement { // For internal purposes, `PartitionKey::calculate_token()` is preferred, as `PartitionKey` // is either way used internally, among others for display in traces. pub fn calculate_token(&self, values: &impl SerializeRow) -> Result, QueryError> { - self.extract_partition_key_and_calculate_token( - &self.partitioner_name, - &self.serialize_values(values)?, - ) - .map(|opt| opt.map(|(_pk, token)| token)) + self.calculate_token_untyped(&self.serialize_values(values)?) + } + + // A version of calculate_token which skips serialization and uses SerializedValues directly. + // Not type-safe, so not exposed to users. + pub(crate) fn calculate_token_untyped( + &self, + values: &SerializedValues, + ) -> Result, QueryError> { + self.extract_partition_key_and_calculate_token(&self.partitioner_name, values) + .map(|opt| opt.map(|(_pk, token)| token)) } /// Returns the name of the keyspace this statement is operating on. From 51bf7abd27168f38e133d6f5f78b4fdda95c1eb8 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 00:15:17 +0100 Subject: [PATCH 083/107] frame: use Vec for request serialization, not BufMut `SerializableRequest::serialize` accepts `impl BufMut`, but in reality we only pass `Vec` as an argument. Change the type of the argument to Vec, as in further commits we will need the argument to be precisely Vec. --- scylla-cql/src/frame/request/auth_response.rs | 3 +-- scylla-cql/src/frame/request/batch.rs | 2 +- scylla-cql/src/frame/request/execute.rs | 4 ++-- scylla-cql/src/frame/request/mod.rs | 4 ++-- scylla-cql/src/frame/request/options.rs | 3 +-- scylla-cql/src/frame/request/prepare.rs | 3 +-- scylla-cql/src/frame/request/query.rs | 2 +- scylla-cql/src/frame/request/register.rs | 4 +--- scylla-cql/src/frame/request/startup.rs | 3 +-- 9 files changed, 11 insertions(+), 17 deletions(-) diff --git a/scylla-cql/src/frame/request/auth_response.rs b/scylla-cql/src/frame/request/auth_response.rs index aa515b8aed..dabbf20d34 100644 --- a/scylla-cql/src/frame/request/auth_response.rs +++ b/scylla-cql/src/frame/request/auth_response.rs @@ -1,5 +1,4 @@ use crate::frame::frame_errors::ParseError; -use bytes::BufMut; use crate::frame::request::{RequestOpcode, SerializableRequest}; use crate::frame::types::write_bytes_opt; @@ -12,7 +11,7 @@ pub struct AuthResponse { impl SerializableRequest for AuthResponse { const OPCODE: RequestOpcode = RequestOpcode::AuthResponse; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { write_bytes_opt(self.response.as_ref(), buf) } } diff --git a/scylla-cql/src/frame/request/batch.rs b/scylla-cql/src/frame/request/batch.rs index bed85d83f0..0c779aae1b 100644 --- a/scylla-cql/src/frame/request/batch.rs +++ b/scylla-cql/src/frame/request/batch.rs @@ -76,7 +76,7 @@ where { const OPCODE: RequestOpcode = RequestOpcode::Batch; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { // Serializing type of batch buf.put_u8(self.batch_type as u8); diff --git a/scylla-cql/src/frame/request/execute.rs b/scylla-cql/src/frame/request/execute.rs index 59cdf1efa9..531c60f124 100644 --- a/scylla-cql/src/frame/request/execute.rs +++ b/scylla-cql/src/frame/request/execute.rs @@ -1,5 +1,5 @@ use crate::frame::frame_errors::ParseError; -use bytes::{BufMut, Bytes}; +use bytes::Bytes; use crate::{ frame::request::{query, RequestOpcode, SerializableRequest}, @@ -17,7 +17,7 @@ pub struct Execute<'a> { impl SerializableRequest for Execute<'_> { const OPCODE: RequestOpcode = RequestOpcode::Execute; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { // Serializing statement id types::write_short_bytes(&self.id[..], buf)?; diff --git a/scylla-cql/src/frame/request/mod.rs b/scylla-cql/src/frame/request/mod.rs index 8a625f2806..71e3c1bd60 100644 --- a/scylla-cql/src/frame/request/mod.rs +++ b/scylla-cql/src/frame/request/mod.rs @@ -8,7 +8,7 @@ pub mod register; pub mod startup; use crate::{frame::frame_errors::ParseError, Consistency}; -use bytes::{BufMut, Bytes}; +use bytes::Bytes; use num_enum::TryFromPrimitive; pub use auth_response::AuthResponse; @@ -40,7 +40,7 @@ pub enum RequestOpcode { pub trait SerializableRequest { const OPCODE: RequestOpcode; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError>; + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError>; fn to_bytes(&self) -> Result { let mut v = Vec::new(); diff --git a/scylla-cql/src/frame/request/options.rs b/scylla-cql/src/frame/request/options.rs index a1a5e8d5fe..5efdada0c6 100644 --- a/scylla-cql/src/frame/request/options.rs +++ b/scylla-cql/src/frame/request/options.rs @@ -1,5 +1,4 @@ use crate::frame::frame_errors::ParseError; -use bytes::BufMut; use crate::frame::request::{RequestOpcode, SerializableRequest}; @@ -8,7 +7,7 @@ pub struct Options; impl SerializableRequest for Options { const OPCODE: RequestOpcode = RequestOpcode::Options; - fn serialize(&self, _buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, _buf: &mut Vec) -> Result<(), ParseError> { Ok(()) } } diff --git a/scylla-cql/src/frame/request/prepare.rs b/scylla-cql/src/frame/request/prepare.rs index d427389181..c30e25727a 100644 --- a/scylla-cql/src/frame/request/prepare.rs +++ b/scylla-cql/src/frame/request/prepare.rs @@ -1,5 +1,4 @@ use crate::frame::frame_errors::ParseError; -use bytes::BufMut; use crate::{ frame::request::{RequestOpcode, SerializableRequest}, @@ -13,7 +12,7 @@ pub struct Prepare<'a> { impl<'a> SerializableRequest for Prepare<'a> { const OPCODE: RequestOpcode = RequestOpcode::Prepare; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { types::write_long_string(self.query, buf)?; Ok(()) } diff --git a/scylla-cql/src/frame/request/query.rs b/scylla-cql/src/frame/request/query.rs index 348127eda7..164118f081 100644 --- a/scylla-cql/src/frame/request/query.rs +++ b/scylla-cql/src/frame/request/query.rs @@ -38,7 +38,7 @@ pub struct Query<'q> { impl SerializableRequest for Query<'_> { const OPCODE: RequestOpcode = RequestOpcode::Query; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { types::write_long_string(&self.contents, buf)?; self.parameters.serialize(buf)?; Ok(()) diff --git a/scylla-cql/src/frame/request/register.rs b/scylla-cql/src/frame/request/register.rs index d8f88ea3d7..c29c821964 100644 --- a/scylla-cql/src/frame/request/register.rs +++ b/scylla-cql/src/frame/request/register.rs @@ -1,5 +1,3 @@ -use bytes::BufMut; - use crate::frame::{ frame_errors::ParseError, request::{RequestOpcode, SerializableRequest}, @@ -14,7 +12,7 @@ pub struct Register { impl SerializableRequest for Register { const OPCODE: RequestOpcode = RequestOpcode::Register; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { let event_types_list = self .event_types_to_register_for .iter() diff --git a/scylla-cql/src/frame/request/startup.rs b/scylla-cql/src/frame/request/startup.rs index 044a98830c..a1d41df5c4 100644 --- a/scylla-cql/src/frame/request/startup.rs +++ b/scylla-cql/src/frame/request/startup.rs @@ -1,5 +1,4 @@ use crate::frame::frame_errors::ParseError; -use bytes::BufMut; use std::collections::HashMap; @@ -15,7 +14,7 @@ pub struct Startup { impl SerializableRequest for Startup { const OPCODE: RequestOpcode = RequestOpcode::Startup; - fn serialize(&self, buf: &mut impl BufMut) -> Result<(), ParseError> { + fn serialize(&self, buf: &mut Vec) -> Result<(), ParseError> { types::write_string_map(&self.options, buf)?; Ok(()) } From 4478f6a69f936313c1bf3e89e9886030c3303140 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 08:52:57 +0100 Subject: [PATCH 084/107] scylla-macros: allow renaming fields in UDTs In some cases, it might be desirable to name the Rust struct fields differently than the UDT fields (e.g. due to a different naming convention). Add an attribute to the `SerializeCql` macro, inspired by serde's `rename` attribute, which causes the fields to be matched on a specified name instead of the Rust name. --- scylla-cql/src/macros.rs | 9 ++- scylla-cql/src/types/serialize/value.rs | 80 +++++++++++++++++++++++++ scylla-macros/src/serialize/cql.rs | 69 +++++++++++++++++++-- 3 files changed, 151 insertions(+), 7 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 51cc79ce24..fe58f820a8 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -46,7 +46,7 @@ pub use scylla_macros::ValueList; /// } /// ``` /// -/// # Attributes +/// # Struct attributes /// /// `#[scylla(flavor = "flavor_name")]` /// @@ -86,6 +86,13 @@ pub use scylla_macros::ValueList; /// It's not possible to automatically resolve those issues in the procedural /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. +/// +/// # Field attributes +/// +/// `#[scylla(rename = "name_in_the_udt")]` +/// +/// Serializes the field to the UDT struct field with given name instead of +/// its Rust name. pub use scylla_macros::SerializeCql; /// Derive macro for the [`SerializeRow`](crate::types::serialize::row::SerializeRow) trait diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 53061caa68..69a0cd5ba5 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -2354,4 +2354,84 @@ mod tests { ) )); } + + #[derive(SerializeCql, Debug)] + #[scylla(crate = crate)] + struct TestUdtWithFieldRename { + a: String, + #[scylla(rename = "x")] + b: i32, + } + + #[derive(SerializeCql, Debug)] + #[scylla(crate = crate, flavor = "enforce_order")] + struct TestUdtWithFieldRenameAndEnforceOrder { + a: String, + #[scylla(rename = "x")] + b: i32, + } + + #[test] + fn test_udt_serialization_with_field_rename() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("x".to_string(), ColumnType::Int), + ("a".to_string(), ColumnType::Text), + ], + }; + + let mut reference = Vec::new(); + // Total length of the struct is 23 + reference.extend_from_slice(&23i32.to_be_bytes()); + // Field 'x' + reference.extend_from_slice(&4i32.to_be_bytes()); + reference.extend_from_slice(&42i32.to_be_bytes()); + // Field 'a' + reference.extend_from_slice(&("Ala ma kota".len() as i32).to_be_bytes()); + reference.extend_from_slice("Ala ma kota".as_bytes()); + + let udt = do_serialize( + TestUdtWithFieldRename { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &typ, + ); + + assert_eq!(reference, udt); + } + + #[test] + fn test_udt_serialization_with_field_rename_and_enforce_order() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("x".to_string(), ColumnType::Int), + ], + }; + + let mut reference = Vec::new(); + // Total length of the struct is 23 + reference.extend_from_slice(&23i32.to_be_bytes()); + // Field 'a' + reference.extend_from_slice(&("Ala ma kota".len() as i32).to_be_bytes()); + reference.extend_from_slice("Ala ma kota".as_bytes()); + // Field 'x' + reference.extend_from_slice(&4i32.to_be_bytes()); + reference.extend_from_slice(&42i32.to_be_bytes()); + + let udt = do_serialize( + TestUdtWithFieldRenameAndEnforceOrder { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &typ, + ); + + assert_eq!(reference, udt); + } } diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index 4756901183..5cb0959297 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use darling::FromAttributes; use proc_macro::TokenStream; use proc_macro2::Span; @@ -23,9 +25,30 @@ impl Attributes { } } +struct Field { + ident: syn::Ident, + ty: syn::Type, + attrs: FieldAttributes, +} + +impl Field { + fn field_name(&self) -> String { + match &self.attrs.rename { + Some(name) => name.clone(), + None => self.ident.to_string(), + } + } +} + +#[derive(FromAttributes)] +#[darling(attributes(scylla))] +struct FieldAttributes { + rename: Option, +} + struct Context { attributes: Attributes, - fields: Vec, + fields: Vec, } pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result { @@ -38,8 +61,19 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result>()?; let ctx = Context { attributes, fields }; + ctx.validate()?; let gen: Box = match ctx.attributes.flavor { Some(Flavor::MatchByName) | None => Box::new(FieldSortingGenerator { ctx: &ctx }), @@ -57,6 +91,27 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result Result<(), syn::Error> { + let mut errors = darling::Error::accumulator(); + + // Check for name collisions + let mut used_names = HashMap::::new(); + for field in self.fields.iter() { + let field_name = field.field_name(); + if let Some(other_field) = used_names.get(&field_name) { + let other_field_ident = &other_field.ident; + let msg = format!("the UDT field name `{field_name}` used by this struct field is already used by field `{other_field_ident}`"); + let err = darling::Error::custom(msg).with_span(&field.ident); + errors.push(err); + } else { + used_names.insert(field_name, field); + } + } + + errors.finish()?; + Ok(()) + } + fn generate_udt_type_match(&self, err: syn::Expr) -> syn::Stmt { let crate_path = self.attributes.crate_path(); @@ -126,9 +181,11 @@ impl<'a> Generator for FieldSortingGenerator<'a> { .iter() .map(|f| f.ident.clone()) .collect::>(); - let rust_field_names = rust_field_idents + let rust_field_names = self + .ctx + .fields .iter() - .map(|i| i.as_ref().unwrap().to_string()) + .map(|f| f.field_name()) .collect::>(); let udt_field_names = rust_field_names.clone(); // For now, it's the same let field_types = self.ctx.fields.iter().map(|f| &f.ty).collect::>(); @@ -269,8 +326,8 @@ impl<'a> Generator for FieldOrderedGenerator<'a> { // Serialize each field for field in self.ctx.fields.iter() { - let rust_field_ident = field.ident.as_ref().unwrap(); - let rust_field_name = rust_field_ident.to_string(); + let rust_field_ident = &field.ident; + let rust_field_name = field.field_name(); let typ = &field.ty; statements.push(parse_quote! { match field_iter.next() { From 37f17e2f1935c82a3f9faeb1b89eba837299d2e0 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 09:23:07 +0100 Subject: [PATCH 085/107] scylla-macros: allow renaming fields in rows Like we just did for `SerializeCql`, introduce a `rename` annotation to `SerializeRow` which allows associating given Rust field to a column / bind marker with given name, instead of matching on the Rust field name. The motivation is similar as in the case of `SerializeCql`: sometimes, users might want to use a different naming scheme in Rust and for the column names. --- scylla-cql/src/macros.rs | 15 ++++-- scylla-cql/src/types/serialize/row.rs | 48 +++++++++++++++++++ scylla-macros/src/serialize/row.rs | 69 ++++++++++++++++++++++++--- 3 files changed, 122 insertions(+), 10 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index fe58f820a8..31ea74bfab 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -86,11 +86,11 @@ pub use scylla_macros::ValueList; /// It's not possible to automatically resolve those issues in the procedural /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. -/// +/// /// # Field attributes -/// +/// /// `#[scylla(rename = "name_in_the_udt")]` -/// +/// /// Serializes the field to the UDT struct field with given name instead of /// its Rust name. pub use scylla_macros::SerializeCql; @@ -130,7 +130,7 @@ pub use scylla_macros::SerializeCql; /// } /// ``` /// -/// # Attributes +/// # Struct attributes /// /// `#[scylla(flavor = "flavor_name")]` /// @@ -170,6 +170,13 @@ pub use scylla_macros::SerializeCql; /// It's not possible to automatically resolve those issues in the procedural /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. +/// +/// # Field attributes +/// +/// `#[scylla(rename = "column_or_bind_marker_name")]` +/// +/// Serializes the field to the column / bind marker with given name instead of +/// its Rust name. pub use scylla_macros::SerializeRow; // Reexports for derive(IntoUserType) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index edd0293cac..18ff317e4c 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -1381,4 +1381,52 @@ mod tests { .iter() .all(|v| v == RawValue::Value(&[0, 0, 0, 0, 0x07, 0x5b, 0xcd, 0x15]))) } + + #[derive(SerializeRow, Debug)] + #[scylla(crate = crate)] + struct TestRowWithColumnRename { + a: String, + #[scylla(rename = "x")] + b: i32, + } + + #[derive(SerializeRow, Debug)] + #[scylla(crate = crate, flavor = "enforce_order")] + struct TestRowWithColumnRenameAndEnforceOrder { + a: String, + #[scylla(rename = "x")] + b: i32, + } + + #[test] + fn test_row_serialization_with_column_rename() { + let spec = [col("x", ColumnType::Int), col("a", ColumnType::Text)]; + + let reference = do_serialize((42i32, "Ala ma kota"), &spec); + let row = do_serialize( + TestRowWithColumnRename { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &spec, + ); + + assert_eq!(reference, row); + } + + #[test] + fn test_row_serialization_with_column_rename_and_enforce_order() { + let spec = [col("a", ColumnType::Text), col("x", ColumnType::Int)]; + + let reference = do_serialize(("Ala ma kota", 42i32), &spec); + let row = do_serialize( + TestRowWithColumnRenameAndEnforceOrder { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &spec, + ); + + assert_eq!(reference, row); + } } diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs index 44b402d791..dfea7e9ac5 100644 --- a/scylla-macros/src/serialize/row.rs +++ b/scylla-macros/src/serialize/row.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use darling::FromAttributes; use proc_macro::TokenStream; use proc_macro2::Span; @@ -23,9 +25,30 @@ impl Attributes { } } +struct Field { + ident: syn::Ident, + ty: syn::Type, + attrs: FieldAttributes, +} + +impl Field { + fn column_name(&self) -> String { + match &self.attrs.rename { + Some(name) => name.clone(), + None => self.ident.to_string(), + } + } +} + +#[derive(FromAttributes)] +#[darling(attributes(scylla))] +struct FieldAttributes { + rename: Option, +} + struct Context { attributes: Attributes, - fields: Vec, + fields: Vec, } pub fn derive_serialize_row(tokens_input: TokenStream) -> Result { @@ -38,8 +61,19 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result>()?; let ctx = Context { attributes, fields }; + ctx.validate()?; let gen: Box = match ctx.attributes.flavor { Some(Flavor::MatchByName) | None => Box::new(ColumnSortingGenerator { ctx: &ctx }), @@ -59,6 +93,27 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result Result<(), syn::Error> { + let mut errors = darling::Error::accumulator(); + + // Check for name collisions + let mut used_names = HashMap::::new(); + for field in self.fields.iter() { + let column_name = field.column_name(); + if let Some(other_field) = used_names.get(&column_name) { + let other_field_ident = &other_field.ident; + let msg = format!("the column / bind marker name `{column_name}` used by this struct field is already used by field `{other_field_ident}`"); + let err = darling::Error::custom(msg).with_span(&field.ident); + errors.push(err); + } else { + used_names.insert(column_name, field); + } + } + + errors.finish()?; + Ok(()) + } + fn generate_mk_typck_err(&self) -> syn::Stmt { let crate_path = self.attributes.crate_path(); parse_quote! { @@ -114,9 +169,11 @@ impl<'a> Generator for ColumnSortingGenerator<'a> { .iter() .map(|f| f.ident.clone()) .collect::>(); - let rust_field_names = rust_field_idents + let rust_field_names = self + .ctx + .fields .iter() - .map(|i| i.as_ref().unwrap().to_string()) + .map(|f| f.column_name()) .collect::>(); let udt_field_names = rust_field_names.clone(); // For now, it's the same let field_types = self.ctx.fields.iter().map(|f| &f.ty).collect::>(); @@ -237,8 +294,8 @@ impl<'a> Generator for ColumnOrderedGenerator<'a> { // Serialize each field for field in self.ctx.fields.iter() { - let rust_field_ident = field.ident.as_ref().unwrap(); - let rust_field_name = rust_field_ident.to_string(); + let rust_field_ident = &field.ident; + let rust_field_name = field.column_name(); let typ = &field.ty; statements.push(parse_quote! { match column_iter.next() { From eaafc9626b058670fbb6ad4814fe5ef8acfb092b Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 09:32:38 +0100 Subject: [PATCH 086/107] scylla-macros: simplify handling of flavor Implement `Default` for `Flavor`, which allows to get rid of the `Option` wrapper in the macro attributes struct and simplify some of the code as a result. --- scylla-macros/src/serialize/cql.rs | 7 ++++--- scylla-macros/src/serialize/mod.rs | 3 ++- scylla-macros/src/serialize/row.rs | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index 5cb0959297..93ebea5593 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -13,7 +13,8 @@ struct Attributes { #[darling(rename = "crate")] crate_path: Option, - flavor: Option, + #[darling(default)] + flavor: Flavor, } impl Attributes { @@ -76,8 +77,8 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result = match ctx.attributes.flavor { - Some(Flavor::MatchByName) | None => Box::new(FieldSortingGenerator { ctx: &ctx }), - Some(Flavor::EnforceOrder) => Box::new(FieldOrderedGenerator { ctx: &ctx }), + Flavor::MatchByName => Box::new(FieldSortingGenerator { ctx: &ctx }), + Flavor::EnforceOrder => Box::new(FieldOrderedGenerator { ctx: &ctx }), }; let serialize_item = gen.generate_serialize(); diff --git a/scylla-macros/src/serialize/mod.rs b/scylla-macros/src/serialize/mod.rs index 183183fa91..28c8b91097 100644 --- a/scylla-macros/src/serialize/mod.rs +++ b/scylla-macros/src/serialize/mod.rs @@ -3,8 +3,9 @@ use darling::FromMeta; pub(crate) mod cql; pub(crate) mod row; -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Default)] enum Flavor { + #[default] MatchByName, EnforceOrder, } diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs index dfea7e9ac5..af78d10c6f 100644 --- a/scylla-macros/src/serialize/row.rs +++ b/scylla-macros/src/serialize/row.rs @@ -13,7 +13,8 @@ struct Attributes { #[darling(rename = "crate")] crate_path: Option, - flavor: Option, + #[darling(default)] + flavor: Flavor, } impl Attributes { @@ -76,8 +77,8 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result = match ctx.attributes.flavor { - Some(Flavor::MatchByName) | None => Box::new(ColumnSortingGenerator { ctx: &ctx }), - Some(Flavor::EnforceOrder) => Box::new(ColumnOrderedGenerator { ctx: &ctx }), + Flavor::MatchByName => Box::new(ColumnSortingGenerator { ctx: &ctx }), + Flavor::EnforceOrder => Box::new(ColumnOrderedGenerator { ctx: &ctx }), }; let serialize_item = gen.generate_serialize(); From 344112af6d400463651c94a226f17e8da3177944 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 09:47:14 +0100 Subject: [PATCH 087/107] scylla-macros: add skip_name_checks attribute to SerializeCql Introduce an attribute to the `SerializeCql` macro which causes the generated code to skip checking names of the serialized Rust fields against the UDT field names. The motivation behind the attribute is to ease transition off the old `IntoUserType` macro which blindly serialized the fields of the struct as they are defined in the Rust code. While it disables the name checks, type checking is still done, so there is protection against type confusion, at least. --- scylla-cql/src/macros.rs | 9 ++++++ scylla-cql/src/types/serialize/value.rs | 39 +++++++++++++++++++++++++ scylla-macros/src/serialize/cql.rs | 36 +++++++++++++++++++++-- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 31ea74bfab..dc86708712 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -87,6 +87,15 @@ pub use scylla_macros::ValueList; /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. /// +/// `#[scylla(skip_name_checks)] +/// +/// _Specific only to the `enforce_order` flavor._ +/// +/// Skips checking Rust field names against names of the UDT fields. With this +/// annotation, the generated implementation will allow mismatch between Rust +/// struct field names and UDT field names, i.e. it's OK if i-th field has a +/// different name in Rust and in the UDT. Fields are still being type-checked. +/// /// # Field attributes /// /// `#[scylla(rename = "name_in_the_udt")]` diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 69a0cd5ba5..fe4e63789c 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -2434,4 +2434,43 @@ mod tests { assert_eq!(reference, udt); } + + #[derive(SerializeCql, Debug)] + #[scylla(crate = crate, flavor = "enforce_order", skip_name_checks)] + struct TestUdtWithSkippedNameChecks { + a: String, + b: i32, + } + + #[test] + fn test_udt_serialization_with_skipped_name_checks() { + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("x".to_string(), ColumnType::Int), + ], + }; + + let mut reference = Vec::new(); + // Total length of the struct is 23 + reference.extend_from_slice(&23i32.to_be_bytes()); + // Field 'a' + reference.extend_from_slice(&("Ala ma kota".len() as i32).to_be_bytes()); + reference.extend_from_slice("Ala ma kota".as_bytes()); + // Field 'x' + reference.extend_from_slice(&4i32.to_be_bytes()); + reference.extend_from_slice(&42i32.to_be_bytes()); + + let udt = do_serialize( + TestUdtWithFieldRenameAndEnforceOrder { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &typ, + ); + + assert_eq!(reference, udt); + } } diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index 93ebea5593..1aa9d05835 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -15,6 +15,9 @@ struct Attributes { #[darling(default)] flavor: Flavor, + + #[darling(default)] + skip_name_checks: bool, } impl Attributes { @@ -74,7 +77,7 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result>()?; let ctx = Context { attributes, fields }; - ctx.validate()?; + ctx.validate(&input.ident)?; let gen: Box = match ctx.attributes.flavor { Flavor::MatchByName => Box::new(FieldSortingGenerator { ctx: &ctx }), @@ -92,9 +95,31 @@ pub fn derive_serialize_cql(tokens_input: TokenStream) -> Result Result<(), syn::Error> { + fn validate(&self, struct_ident: &syn::Ident) -> Result<(), syn::Error> { let mut errors = darling::Error::accumulator(); + if self.attributes.skip_name_checks { + // Skipping name checks is only available in enforce_order mode + if self.attributes.flavor != Flavor::EnforceOrder { + let err = darling::Error::custom( + "the `skip_name_checks` attribute is only allowed with the `enforce_order` flavor", + ) + .with_span(struct_ident); + errors.push(err); + } + + // `rename` annotations don't make sense with skipped name checks + for field in self.fields.iter() { + if field.attrs.rename.is_some() { + let err = darling::Error::custom( + "the `rename` annotations don't make sense with `skip_name_checks` attribute", + ) + .with_span(&field.ident); + errors.push(err); + } + } + } + // Check for name collisions let mut used_names = HashMap::::new(); for field in self.fields.iter() { @@ -330,10 +355,15 @@ impl<'a> Generator for FieldOrderedGenerator<'a> { let rust_field_ident = &field.ident; let rust_field_name = field.field_name(); let typ = &field.ty; + let name_check_expression: syn::Expr = if !self.ctx.attributes.skip_name_checks { + parse_quote! { field_name == #rust_field_name } + } else { + parse_quote! { true } + }; statements.push(parse_quote! { match field_iter.next() { Some((field_name, typ)) => { - if field_name == #rust_field_name { + if #name_check_expression { let sub_builder = #crate_path::CellValueBuilder::make_sub_writer(&mut builder); match <#typ as #crate_path::SerializeCql>::serialize(&self.#rust_field_ident, typ, sub_builder) { Ok(_proof) => {}, From 39908a6c9ca54b498c5f406192dc0887347087e8 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 09:50:43 +0100 Subject: [PATCH 088/107] scylla-macros: add skip_name_checks attribute to SerializeRow Introduces an attribute to `SerializeRow` macro which causes name checks to be skipped when serializing in `enforce_order` flavor. The motivation is the same as in the case of `SerializeCql` - the old `ValueList` macro didn't have access to the bind marker names and types, and disabling the name check might make it easier for some to transition off the old macro. --- scylla-cql/src/macros.rs | 10 ++++++++ scylla-cql/src/types/serialize/row.rs | 23 +++++++++++++++++ scylla-macros/src/serialize/row.rs | 36 ++++++++++++++++++++++++--- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index dc86708712..6c6f2b7243 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -180,6 +180,16 @@ pub use scylla_macros::SerializeCql; /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. /// +/// `#[scylla(skip_name_checks)] +/// +/// _Specific only to the `enforce_order` flavor._ +/// +/// Skips checking Rust field names against names of the columns / bind markers. +/// With this annotation, the generated implementation will allow mismatch +/// between Rust struct field names and the column / bind markers, i.e. it's +/// OK if i-th Rust struct field has a different name than the column / bind +/// marker. The values are still being type-checked. +/// /// # Field attributes /// /// `#[scylla(rename = "column_or_bind_marker_name")]` diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 18ff317e4c..d3170bc73b 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -1429,4 +1429,27 @@ mod tests { assert_eq!(reference, row); } + + #[derive(SerializeRow, Debug)] + #[scylla(crate = crate, flavor = "enforce_order", skip_name_checks)] + struct TestRowWithSkippedNameChecks { + a: String, + b: i32, + } + + #[test] + fn test_row_serialization_with_skipped_name_checks() { + let spec = [col("a", ColumnType::Text), col("x", ColumnType::Int)]; + + let reference = do_serialize(("Ala ma kota", 42i32), &spec); + let row = do_serialize( + TestRowWithSkippedNameChecks { + a: "Ala ma kota".to_owned(), + b: 42, + }, + &spec, + ); + + assert_eq!(reference, row); + } } diff --git a/scylla-macros/src/serialize/row.rs b/scylla-macros/src/serialize/row.rs index af78d10c6f..122bed93dd 100644 --- a/scylla-macros/src/serialize/row.rs +++ b/scylla-macros/src/serialize/row.rs @@ -15,6 +15,9 @@ struct Attributes { #[darling(default)] flavor: Flavor, + + #[darling(default)] + skip_name_checks: bool, } impl Attributes { @@ -74,7 +77,7 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result>()?; let ctx = Context { attributes, fields }; - ctx.validate()?; + ctx.validate(&input.ident)?; let gen: Box = match ctx.attributes.flavor { Flavor::MatchByName => Box::new(ColumnSortingGenerator { ctx: &ctx }), @@ -94,9 +97,31 @@ pub fn derive_serialize_row(tokens_input: TokenStream) -> Result Result<(), syn::Error> { + fn validate(&self, struct_ident: &syn::Ident) -> Result<(), syn::Error> { let mut errors = darling::Error::accumulator(); + if self.attributes.skip_name_checks { + // Skipping name checks is only available in enforce_order mode + if self.attributes.flavor != Flavor::EnforceOrder { + let err = darling::Error::custom( + "the `skip_name_checks` attribute is only allowed with the `enforce_order` flavor", + ) + .with_span(struct_ident); + errors.push(err); + } + + // `rename` annotations don't make sense with skipped name checks + for field in self.fields.iter() { + if field.attrs.rename.is_some() { + let err = darling::Error::custom( + "the `rename` annotations don't make sense with `skip_name_checks` attribute", + ) + .with_span(&field.ident); + errors.push(err); + } + } + } + // Check for name collisions let mut used_names = HashMap::::new(); for field in self.fields.iter() { @@ -298,10 +323,15 @@ impl<'a> Generator for ColumnOrderedGenerator<'a> { let rust_field_ident = &field.ident; let rust_field_name = field.column_name(); let typ = &field.ty; + let name_check_expression: syn::Expr = if !self.ctx.attributes.skip_name_checks { + parse_quote! { spec.name == #rust_field_name } + } else { + parse_quote! { true } + }; statements.push(parse_quote! { match column_iter.next() { Some(spec) => { - if spec.name == #rust_field_name { + if #name_check_expression { let cell_writer = #crate_path::RowWriter::make_cell_writer(writer); match <#typ as #crate_path::SerializeCql>::serialize(&self.#rust_field_ident, &spec.typ, cell_writer) { Ok(_proof) => {}, From 067ccf97e32b5835daa1bb1a47647c9697c4572b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 14 Dec 2023 13:37:52 +0100 Subject: [PATCH 089/107] Docs: Add warnings about performance of query --- docs/source/queries/paged.md | 13 +++++++++++++ docs/source/queries/simple.md | 5 +++++ scylla/src/transport/session.rs | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/docs/source/queries/paged.md b/docs/source/queries/paged.md index dab3672210..8112c9308b 100644 --- a/docs/source/queries/paged.md +++ b/docs/source/queries/paged.md @@ -5,6 +5,14 @@ allow to receive the whole result page by page. `Session::query_iter` and `Session::execute_iter` take a [simple query](simple.md) or a [prepared query](prepared.md) and return an `async` iterator over result `Rows`. +> ***Warning***\ +> In case of unprepared variant (`Session::query_iter`) if the values are not empty +> driver will first fully prepare a query (which means issuing additional request to each +> node in a cluster). This will have a performance penalty - how big it is depends on +> the size of your cluster (more nodes - more requests) and the size of returned +> result (more returned pages - more amortized penalty). In any case, it is preferable to +> use `Session::execute_iter`. + ### Examples Use `query_iter` to perform a [simple query](simple.md) with paging: ```rust @@ -119,6 +127,11 @@ let res2 = session # } ``` +> ***Warning***\ +> If the values are not empty, driver first needs to send a `PREPARE` request +> in order to fetch information required to serialize values. This will affect +> performance because 2 round trips will be required instead of 1. + On a `PreparedStatement`: ```rust # extern crate scylla; diff --git a/docs/source/queries/simple.md b/docs/source/queries/simple.md index 25190338dd..5b668013a1 100644 --- a/docs/source/queries/simple.md +++ b/docs/source/queries/simple.md @@ -22,6 +22,11 @@ session > > When page size is set, `query` will return only the first page of results. +> ***Warning***\ +> If the values are not empty, driver first needs to send a `PREPARE` request +> in order to fetch information required to serialize values. This will affect +> performance because 2 round trips will be required instead of 1. + ### First argument - the query As the first argument `Session::query` takes anything implementing `Into`.\ You can create a query manually to set custom options. For example to change query consistency: diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index bf8c8f5200..871f32ec03 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -558,6 +558,10 @@ impl Session { /// /// This is the easiest way to make a query, but performance is worse than that of prepared queries. /// + /// It is discouraged to use this method with non-empty values argument (`is_empty()` method from `SerializeRow` + /// trait returns false). In such case, query first needs to be prepared (on a single connection), so + /// driver will perform 2 round trips instead of 1. Please use [`Session::execute()`] instead. + /// /// See [the book](https://rust-driver.docs.scylladb.com/stable/queries/simple.html) for more information /// # Arguments /// * `query` - query to perform, can be just a `&str` or the [Query] struct. @@ -608,6 +612,11 @@ impl Session { } /// Queries the database with a custom paging state. + /// + /// It is discouraged to use this method with non-empty values argument (`is_empty()` method from `SerializeRow` + /// trait returns false). In such case, query first needs to be prepared (on a single connection), so + /// driver will perform 2 round trips instead of 1. Please use [`Session::execute_paged()`] instead. + /// /// # Arguments /// /// * `query` - query to be performed @@ -749,6 +758,10 @@ impl Session { /// Returns an async iterator (stream) over all received rows\ /// Page size can be specified in the [Query] passed to the function /// + /// It is discouraged to use this method with non-empty values argument (`is_empty()` method from `SerializeRow` + /// trait returns false). In such case, query first needs to be prepared (on a single connection), so + /// driver will initially perform 2 round trips instead of 1. Please use [`Session::execute_iter()`] instead. + /// /// See [the book](https://rust-driver.docs.scylladb.com/stable/queries/paged.html) for more information /// /// # Arguments @@ -1128,6 +1141,11 @@ impl Session { /// /// Batch values must contain values for each of the queries /// + /// Avoid using non-empty values (`SerializeRow::is_empty()` return false) for simple queries + /// inside the batch. Such queries will first need to be prepared, so the driver will need to + /// send (numer_of_unprepared_queries_with_values + 1) requests instead of 1 request, severly + /// affecting performance. + /// /// See [the book](https://rust-driver.docs.scylladb.com/stable/queries/batch.html) for more information /// /// # Arguments From bc97ffd8ecfb7dff8f74c00bb93c69b5433e549d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Thu, 14 Dec 2023 14:38:56 +0100 Subject: [PATCH 090/107] Docs: Update informations about serialization --- docs/source/data-types/udt.md | 29 ++++++++++++++++++------- docs/source/queries/values.md | 41 +++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/docs/source/data-types/udt.md b/docs/source/data-types/udt.md index e79ad3feae..ac5b134a62 100644 --- a/docs/source/data-types/udt.md +++ b/docs/source/data-types/udt.md @@ -8,17 +8,25 @@ For example let's say `my_type` was created using this query: CREATE TYPE ks.my_type (int_val int, text_val text) ``` -To use this type in the driver, create a matching struct and derive `IntoUserType` and `FromUserType`: +To use this type in the driver, create a matching struct and derive: +- `SerializeCql`: in order to be able to use this struct in query parameters. \ + This macro requires fields of UDT and struct to have matching names, but the order + of the fields is not required to be the same. \ + Note: you can use different name using `rename` attribute - see `SerializeCql` macro documentation. +- `FromUserType`: in order to be able to use this struct in query results. \ + This macro requires fields of UDT and struct to be in the same *ORDER*. \ + This mismatch between `SerializeCql` and `FromUserType` requirements is a temporary situation - in the future `FromUserType` (or the macro that replaces it) will also require matching names. ```rust # extern crate scylla; # async fn check_only_compiles() { -use scylla::macros::{FromUserType, IntoUserType}; +use scylla::macros::{FromUserType, SerializeCql}; // Define a custom struct that matches the User Defined Type created earlier. -// Fields must be in the same order as they are in the database. +// Fields must be in the same order as they are in the database and also +// have the same names. // Wrapping a field in Option will gracefully handle null field values. -#[derive(Debug, IntoUserType, FromUserType)] +#[derive(Debug, FromUserType, SerializeCql)] struct MyType { int_val: i32, text_val: Option, @@ -27,8 +35,13 @@ struct MyType { ``` > ***Important***\ -> Fields in the Rust struct must be defined in the same order as they are in the database. -> When sending and receiving values, the driver will (de)serialize fields one after another, without looking at field names. +> For deserialization, fields in the Rust struct must be defined in the same order as they are in the database. +> When receiving values, the driver will (de)serialize fields one after another, without looking at field names. + +> ***Important***\ +> For serialization, by default fields in the Rust struct must be defined with the same names as they are in the database. +> The driver will serialize the fields in the order defined by the UDT, matching Rust fields by name. +> You can change this behaviour using macro attributes, see `SerializeCql` macro documentation for more information. Now it can be sent and received just like any other CQL value: ```rust @@ -37,10 +50,10 @@ Now it can be sent and received just like any other CQL value: # use std::error::Error; # async fn check_only_compiles(session: &Session) -> Result<(), Box> { use scylla::IntoTypedRows; -use scylla::macros::{FromUserType, IntoUserType, SerializeCql}; +use scylla::macros::{FromUserType, SerializeCql}; use scylla::cql_to_rust::FromCqlVal; -#[derive(Debug, IntoUserType, FromUserType, SerializeCql)] +#[derive(Debug, FromUserType, SerializeCql)] struct MyType { int_val: i32, text_val: Option, diff --git a/docs/source/queries/values.md b/docs/source/queries/values.md index 400e7139ab..a8ba9dcf71 100644 --- a/docs/source/queries/values.md +++ b/docs/source/queries/values.md @@ -5,14 +5,14 @@ Each `?` in query text will be filled with the matching value. > **Never** pass values by adding strings, this could lead to [SQL Injection](https://en.wikipedia.org/wiki/SQL_injection) -Each list of values to send in a query must implement the trait `ValueList`.\ +Each list of values to send in a query must implement the trait `SerializeRow`.\ By default this can be a slice `&[]`, a tuple `()` (max 16 elements) of values to send, -or a custom struct which derives from `ValueList`. +or a custom struct which derives from `SerializeRow`. A few examples: ```rust # extern crate scylla; -# use scylla::{Session, ValueList, SerializeRow, frame::response::result::CqlValue}; +# use scylla::{Session, SerializeRow, frame::response::result::CqlValue}; # use std::error::Error; # use std::collections::HashMap; # async fn check_only_compiles(session: &Session) -> Result<(), Box> { @@ -33,22 +33,45 @@ session .await?; // Sending an integer and a string using a named struct. -// The values will be passed in the order from the struct definition -#[derive(ValueList, SerializeRow)] +// Names of fields must match names of columns in request, +// but having them in the same order is not required. +// If the fields are in the same order, you can use attribute: +// `#[scylla(flavor = "enforce_order")]` +// in order to skip sorting the fields and just check if they +// are in the same order. See documentation of this macro +// for more information. +#[derive(SerializeRow)] struct IntString { - first_col: i32, - second_col: String, + a: i32, + b: String, } let int_string = IntString { - first_col: 42_i32, - second_col: "hello".to_owned(), + a: 42_i32, + b: "hello".to_owned(), }; session .query("INSERT INTO ks.tab (a, b) VALUES(?, ?)", int_string) .await?; +// You can use named bind markers in query if you want +// your names in struct to be different than column names. +#[derive(SerializeRow)] +struct IntStringCustom { + first_value: i32, + second_value: String, +} + +let int_string_custom = IntStringCustom { + first_value: 42_i32, + second_value: "hello".to_owned(), +}; + +session + .query("INSERT INTO ks.tab (a, b) VALUES(:first_value, :second_value)", int_string_custom) + .await?; + // Sending a single value as a tuple requires a trailing coma (Rust syntax): session.query("INSERT INTO ks.tab (a) VALUES(?)", (2_i32,)).await?; From 25d8d9c69b926408ada2dc2901fb210da49703be Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 13 Dec 2023 13:32:46 +0100 Subject: [PATCH 091/107] serialize: introduce new BatchValues and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the successors of the previous BatchValues trait and its friends. The structure of the new traits is similar to the old traits, but they come in two flavors: - BatchValue, BatchValueIterator - those are user-facing traits. They allow iterating over the sets of values for batch's statements, but need to have the information about the names and types of the columns/bind markers supplied from the outside. - RawBatchValues, RawBatchValueIterator, RawSerializeRow - those serve as a glue between the logic in `scylla` and `scylla-cql`. They are analogous to `BatchValues`, `BatchValueIterator` and `SerializeRow`, but do not need the type information to be able to serialize themselves into the request. Co-authored-by: Karol Baryła --- scylla-cql/src/types/serialize/batch.rs | 307 ++++++++++++++++++++ scylla-cql/src/types/serialize/mod.rs | 2 + scylla-cql/src/types/serialize/raw_batch.rs | 95 ++++++ 3 files changed, 404 insertions(+) create mode 100644 scylla-cql/src/types/serialize/batch.rs create mode 100644 scylla-cql/src/types/serialize/raw_batch.rs diff --git a/scylla-cql/src/types/serialize/batch.rs b/scylla-cql/src/types/serialize/batch.rs new file mode 100644 index 0000000000..5deddcebb7 --- /dev/null +++ b/scylla-cql/src/types/serialize/batch.rs @@ -0,0 +1,307 @@ +//! Contains the [`BatchValues`] and [`BatchValuesIterator`] trait and their +//! implementations. + +use super::row::{RowSerializationContext, SerializeRow}; +use super::{RowWriter, SerializationError}; + +/// Represents a list of sets of values for a batch statement. +/// +/// The data in the object can be consumed with an iterator-like object returned +/// by the [`BatchValues::batch_values_iter`] method. +pub trait BatchValues { + /// An `Iterator`-like object over the values from the parent `BatchValues` object. + // For some unknown reason, this type, when not resolved to a concrete type for a given async function, + // cannot live across await boundaries while maintaining the corresponding future `Send`, unless `'r: 'static` + // + // See for more details + type BatchValuesIter<'r>: BatchValuesIterator<'r> + where + Self: 'r; + + /// Returns an iterator over the data contained in this object. + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_>; +} + +/// An `Iterator`-like object over the values from the parent [`BatchValues`] object. +/// +/// It's not a true [`Iterator`] because it does not provide direct access to the +/// items being iterated over, instead it allows calling methods of the underlying +/// [`SerializeRow`] trait while advancing the iterator. +pub trait BatchValuesIterator<'bv> { + /// Serializes the next set of values in the sequence and advances the iterator. + fn serialize_next( + &mut self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Option>; + + /// Returns whether the next set of values is empty or not and advances the iterator. + fn is_empty_next(&mut self) -> Option; + + /// Skips the next set of values. + fn skip_next(&mut self) -> Option<()>; + + /// Return the number of sets of values, consuming the iterator in the process. + #[inline] + fn count(mut self) -> usize + where + Self: Sized, + { + let mut count = 0; + while self.skip_next().is_some() { + count += 1; + } + count + } +} + +/// Implements `BatchValuesIterator` from an `Iterator` over references to things that implement `SerializeRow` +/// +/// Essentially used internally by this lib to provide implementers of `BatchValuesIterator` for cases +/// that always serialize the same concrete `SerializeRow` type +pub struct BatchValuesIteratorFromIterator { + it: IT, +} + +impl<'bv, 'sr: 'bv, IT, SR> BatchValuesIterator<'bv> for BatchValuesIteratorFromIterator +where + IT: Iterator, + SR: SerializeRow + 'sr, +{ + #[inline] + fn serialize_next( + &mut self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Option> { + self.it.next().map(|sr| sr.serialize(ctx, writer)) + } + + #[inline] + fn is_empty_next(&mut self) -> Option { + self.it.next().map(|sr| sr.is_empty()) + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + self.it.next().map(|_| ()) + } + + #[inline] + fn count(self) -> usize + where + Self: Sized, + { + self.it.count() + } +} + +impl From for BatchValuesIteratorFromIterator +where + IT: Iterator, + IT::Item: SerializeRow, +{ + #[inline] + fn from(it: IT) -> Self { + BatchValuesIteratorFromIterator { it } + } +} + +// +// BatchValues impls +// + +/// Implements `BatchValues` from an `Iterator` over references to things that implement `SerializeRow` +/// +/// This is to avoid requiring allocating a new `Vec` containing all the `SerializeRow`s directly: +/// with this, one can write: +/// `session.batch(&batch, BatchValuesFromIter::from(lines_to_insert.iter().map(|l| &l.value_list)))` +/// where `lines_to_insert` may also contain e.g. data to pick the statement... +/// +/// The underlying iterator will always be cloned at least once, once to compute the length if it can't be known +/// in advance, and be re-cloned at every retry. +/// It is consequently expected that the provided iterator is cheap to clone (e.g. `slice.iter().map(...)`). +pub struct BatchValuesFromIterator<'sr, IT> { + it: IT, + + // Without artificially introducing a lifetime to the struct, I couldn't get + // impl BatchValues for BatchValuesFromIterator to work. I wish I understood + // why it's needed. + _phantom: std::marker::PhantomData<&'sr ()>, +} + +impl<'sr, IT, SR> BatchValuesFromIterator<'sr, IT> +where + IT: Iterator + Clone, + SR: SerializeRow + 'sr, +{ + /// Creates a new `BatchValuesFromIter`` object. + #[inline] + pub fn new(into_iter: impl IntoIterator) -> Self { + Self { + it: into_iter.into_iter(), + _phantom: std::marker::PhantomData, + } + } +} + +impl<'sr, IT, SR> From for BatchValuesFromIterator<'sr, IT> +where + IT: Iterator + Clone, + SR: SerializeRow + 'sr, +{ + #[inline] + fn from(it: IT) -> Self { + Self::new(it) + } +} + +impl<'sr, IT, SR> BatchValues for BatchValuesFromIterator<'sr, IT> +where + IT: Iterator + Clone, + SR: SerializeRow + 'sr, +{ + type BatchValuesIter<'r> = BatchValuesIteratorFromIterator where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + self.it.clone().into() + } +} + +// Implement BatchValues for slices of SerializeRow types +impl BatchValues for [T] { + type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + self.iter().into() + } +} + +// Implement BatchValues for Vec +impl BatchValues for Vec { + type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + BatchValues::batch_values_iter(self.as_slice()) + } +} + +// Here is an example implementation for (T0, ) +// Further variants are done using a macro +impl BatchValues for (T0,) { + type BatchValuesIter<'r> = BatchValuesIteratorFromIterator> where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + std::iter::once(&self.0).into() + } +} + +/// A [`BatchValuesIterator`] over a tuple. +pub struct TupleValuesIter<'sr, T> { + tuple: &'sr T, + idx: usize, +} + +macro_rules! impl_batch_values_for_tuple { + ( $($Ti:ident),* ; $($FieldI:tt),* ; $TupleSize:tt) => { + impl<$($Ti),+> BatchValues for ($($Ti,)+) + where + $($Ti: SerializeRow),+ + { + type BatchValuesIter<'r> = TupleValuesIter<'r, ($($Ti,)+)> where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + TupleValuesIter { + tuple: self, + idx: 0, + } + } + } + + impl<'bv, $($Ti),+> BatchValuesIterator<'bv> for TupleValuesIter<'bv, ($($Ti,)+)> + where + $($Ti: SerializeRow),+ + { + #[inline] + fn serialize_next( + &mut self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Option> { + let ret = match self.idx { + $( + $FieldI => self.tuple.$FieldI.serialize(ctx, writer), + )* + _ => return None, + }; + self.idx += 1; + Some(ret) + } + + #[inline] + fn is_empty_next(&mut self) -> Option { + let ret = match self.idx { + $( + $FieldI => self.tuple.$FieldI.is_empty(), + )* + _ => return None, + }; + self.idx += 1; + Some(ret) + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + if self.idx < $TupleSize { + self.idx += 1; + Some(()) + } else { + None + } + } + + #[inline] + fn count(self) -> usize { + $TupleSize - self.idx + } + } + } +} + +impl_batch_values_for_tuple!(T0, T1; 0, 1; 2); +impl_batch_values_for_tuple!(T0, T1, T2; 0, 1, 2; 3); +impl_batch_values_for_tuple!(T0, T1, T2, T3; 0, 1, 2, 3; 4); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4; 0, 1, 2, 3, 4; 5); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5; 0, 1, 2, 3, 4, 5; 6); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6; 0, 1, 2, 3, 4, 5, 6; 7); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7; 0, 1, 2, 3, 4, 5, 6, 7; 8); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8; 0, 1, 2, 3, 4, 5, 6, 7, 8; 9); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9; 10); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10; 11); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11; 12); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12; 13); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13; 14); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14; 15); +impl_batch_values_for_tuple!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15; 16); + +// Every &impl BatchValues should also implement BatchValues +impl<'a, T: BatchValues + ?Sized> BatchValues for &'a T { + type BatchValuesIter<'r> = ::BatchValuesIter<'r> where Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + ::batch_values_iter(*self) + } +} diff --git a/scylla-cql/src/types/serialize/mod.rs b/scylla-cql/src/types/serialize/mod.rs index b61541debf..7c6c62c7a7 100644 --- a/scylla-cql/src/types/serialize/mod.rs +++ b/scylla-cql/src/types/serialize/mod.rs @@ -6,6 +6,8 @@ use std::{error::Error, fmt::Display, sync::Arc}; use thiserror::Error; +pub mod batch; +pub mod raw_batch; pub mod row; pub mod value; pub mod writers; diff --git a/scylla-cql/src/types/serialize/raw_batch.rs b/scylla-cql/src/types/serialize/raw_batch.rs new file mode 100644 index 0000000000..23624a20b6 --- /dev/null +++ b/scylla-cql/src/types/serialize/raw_batch.rs @@ -0,0 +1,95 @@ +//! Contains the [`RawBatchValues`] and [`RawBatchValuesIterator`] trait and their +//! implementations. + +use super::row::SerializedValues; +use super::{RowWriter, SerializationError}; + +/// Represents a list of sets of values for a batch statement. +/// +/// Unlike [`BatchValues`](super::batch::BatchValues), it doesn't require type +/// information from the statements of the batch in order to be serialized. +/// +/// This is a lower level trait than [`BatchValues`](super::batch::BatchValues) +/// and is only used for interaction between the code in `scylla` and +/// `scylla-cql` crates. If you are a regular user of the driver, you shouldn't +/// care about this trait at all. +pub trait RawBatchValues { + /// An `Iterator`-like object over the values from the parent `BatchValues` object. + // For some unknown reason, this type, when not resolved to a concrete type for a given async function, + // cannot live across await boundaries while maintaining the corresponding future `Send`, unless `'r: 'static` + // + // See for more details + type RawBatchValuesIter<'r>: RawBatchValuesIterator<'r> + where + Self: 'r; + + /// Returns an iterator over the data contained in this object. + fn batch_values_iter(&self) -> Self::RawBatchValuesIter<'_>; +} + +/// An `Iterator`-like object over the values from the parent [`RawBatchValues`] object. +/// +/// It's not a true [`Iterator`] because it does not provide direct access to the +/// items being iterated over, instead it allows calling methods of the underlying +/// [`SerializeRow`](super::row::SerializeRow) trait while advancing the iterator. +/// +/// Unlike [`BatchValuesIterator`](super::batch::BatchValuesIterator), it doesn't +/// need type information for serialization. +pub trait RawBatchValuesIterator<'a> { + /// Serializes the next set of values in the sequence and advances the iterator. + fn serialize_next(&mut self, writer: &mut RowWriter) -> Option>; + + /// Returns whether the next set of values is empty or not and advances the iterator. + fn is_empty_next(&mut self) -> Option; + + /// Skips the next set of values. + fn skip_next(&mut self) -> Option<()>; + + /// Return the number of sets of values, consuming the iterator in the process. + #[inline] + fn count(mut self) -> usize + where + Self: Sized, + { + let mut count = 0; + while self.skip_next().is_some() { + count += 1; + } + count + } +} + +/// An implementation used by `scylla-proxy` +impl RawBatchValues for Vec { + type RawBatchValuesIter<'r> = std::slice::Iter<'r, SerializedValues> + where + Self: 'r; + + fn batch_values_iter(&self) -> Self::RawBatchValuesIter<'_> { + self.iter() + } +} + +impl<'r> RawBatchValuesIterator<'r> for std::slice::Iter<'r, SerializedValues> { + #[inline] + fn serialize_next(&mut self, writer: &mut RowWriter) -> Option> { + self.next().map(|sv| { + writer.append_serialize_row(sv); + Ok(()) + }) + } + + fn is_empty_next(&mut self) -> Option { + self.next().map(|sv| sv.is_empty()) + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + self.next().map(|_| ()) + } + + #[inline] + fn count(self) -> usize { + <_ as Iterator>::count(self) + } +} From cb7787ddf14fb05500d2ea0f9776ca1508860170 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Wed, 13 Dec 2023 23:44:39 +0100 Subject: [PATCH 092/107] raw_batch: implement RawBatchValueAdapter Implement an adapter layer which takes a `BatchValues` object (which needs type information to serialize), pairs it with an iterator over `RowSerializationContext` objects and returns something which implements `RawBatchValues` (which don't require type information to serialize). It will be used by the `scylla` crate to pass batch data to `scylla-cql` in a type-erased form. --- scylla-cql/src/types/serialize/raw_batch.rs | 77 +++++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/scylla-cql/src/types/serialize/raw_batch.rs b/scylla-cql/src/types/serialize/raw_batch.rs index 23624a20b6..e378f42dcb 100644 --- a/scylla-cql/src/types/serialize/raw_batch.rs +++ b/scylla-cql/src/types/serialize/raw_batch.rs @@ -1,15 +1,16 @@ //! Contains the [`RawBatchValues`] and [`RawBatchValuesIterator`] trait and their //! implementations. -use super::row::SerializedValues; +use super::batch::{BatchValues, BatchValuesIterator}; +use super::row::{RowSerializationContext, SerializedValues}; use super::{RowWriter, SerializationError}; /// Represents a list of sets of values for a batch statement. /// -/// Unlike [`BatchValues`](super::batch::BatchValues), it doesn't require type +/// Unlike [`BatchValues`]), it doesn't require type /// information from the statements of the batch in order to be serialized. /// -/// This is a lower level trait than [`BatchValues`](super::batch::BatchValues) +/// This is a lower level trait than [`BatchValues`]) /// and is only used for interaction between the code in `scylla` and /// `scylla-cql` crates. If you are a regular user of the driver, you shouldn't /// care about this trait at all. @@ -33,7 +34,7 @@ pub trait RawBatchValues { /// items being iterated over, instead it allows calling methods of the underlying /// [`SerializeRow`](super::row::SerializeRow) trait while advancing the iterator. /// -/// Unlike [`BatchValuesIterator`](super::batch::BatchValuesIterator), it doesn't +/// Unlike [`BatchValuesIterator`], it doesn't /// need type information for serialization. pub trait RawBatchValuesIterator<'a> { /// Serializes the next set of values in the sequence and advances the iterator. @@ -59,7 +60,7 @@ pub trait RawBatchValuesIterator<'a> { } } -/// An implementation used by `scylla-proxy` +// An implementation used by `scylla-proxy` impl RawBatchValues for Vec { type RawBatchValuesIter<'r> = std::slice::Iter<'r, SerializedValues> where @@ -93,3 +94,69 @@ impl<'r> RawBatchValuesIterator<'r> for std::slice::Iter<'r, SerializedValues> { <_ as Iterator>::count(self) } } + +/// Takes `BatchValues` and an iterator over contexts, and turns them into a `RawBatchValues`. +pub struct RawBatchValuesAdapter { + batch_values: BV, + contexts: CTX, +} + +impl RawBatchValuesAdapter { + /// Creates a new `RawBatchValuesAdapter` object. + #[inline] + pub fn new(batch_values: BV, contexts: CTX) -> Self { + Self { + batch_values, + contexts, + } + } +} + +impl<'ctx, BV, CTX> RawBatchValues for RawBatchValuesAdapter +where + BV: BatchValues, + CTX: Iterator> + Clone, +{ + type RawBatchValuesIter<'r> = RawBatchValuesIteratorAdapter, CTX> + where + Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::RawBatchValuesIter<'_> { + RawBatchValuesIteratorAdapter { + batch_values_iterator: self.batch_values.batch_values_iter(), + contexts: self.contexts.clone(), + } + } +} + +/// Takes `BatchValuesIterator` and an iterator over contexts, and turns them into a `RawBatchValuesIterator`. +pub struct RawBatchValuesIteratorAdapter { + batch_values_iterator: BVI, + contexts: CTX, +} + +impl<'bvi, 'ctx, BVI, CTX> RawBatchValuesIterator<'bvi> for RawBatchValuesIteratorAdapter +where + BVI: BatchValuesIterator<'bvi>, + CTX: Iterator>, +{ + #[inline] + fn serialize_next(&mut self, writer: &mut RowWriter) -> Option> { + let ctx = self.contexts.next()?; + self.batch_values_iterator.serialize_next(&ctx, writer) + } + + fn is_empty_next(&mut self) -> Option { + self.contexts.next()?; + let ret = self.batch_values_iterator.is_empty_next()?; + Some(ret) + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + self.contexts.next()?; + self.batch_values_iterator.skip_next()?; + Some(()) + } +} From a9b7da3f068782c008fa461f60384cad75060699 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 09:13:18 +0100 Subject: [PATCH 093/107] statement/batch: reintroduce BatchValuesFirstSerialized The purpose of the new struct is to enable token-aware routing of batches - which already exists in the old API - in the new API. Batches are routed according to the token calculated based on the first statement in the batch (if the first statement is a prepared statement). Calculation of the token must happen before the load balancing policy computes a plan and chooses the first connection, but serialization only happens after a connection is chosen. In order not to repeat serialization work, BatchValuesFirstSerialized wrapper can be used to transform a BatchValues into another BatchValues which caches the result of the first serialization. The types are put into a module in the `scylla` crate and hidden inside it. The wrapping functionality is exposed via a function which constructs the BatchValuesFirstSerialized object but returns it as `impl BatchValues`. --- scylla/src/statement/batch.rs | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/scylla/src/statement/batch.rs b/scylla/src/statement/batch.rs index 6805dcb275..fd940db903 100644 --- a/scylla/src/statement/batch.rs +++ b/scylla/src/statement/batch.rs @@ -197,3 +197,93 @@ impl<'a: 'b, 'b> From<&'a BatchStatement> } } } + +pub(crate) mod batch_values { + use scylla_cql::types::serialize::batch::BatchValues; + use scylla_cql::types::serialize::batch::BatchValuesIterator; + use scylla_cql::types::serialize::row::RowSerializationContext; + use scylla_cql::types::serialize::row::SerializedValues; + use scylla_cql::types::serialize::{RowWriter, SerializationError}; + + struct BatchValuesFirstSerialized { + // Contains the first value of BV in a serialized form. + // The first value in the iterator returned from `rest` should be skipped! + first: Option, + rest: BV, + } + + #[allow(dead_code)] + pub(crate) fn new_batch_values_first_serialized( + rest: impl BatchValues, + first: Option, + ) -> impl BatchValues { + BatchValuesFirstSerialized { first, rest } + } + + impl BatchValues for BatchValuesFirstSerialized + where + BV: BatchValues, + { + type BatchValuesIter<'r> = BatchValuesFirstSerializedIterator<'r, BV::BatchValuesIter<'r>> + where + Self: 'r; + + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + BatchValuesFirstSerializedIterator { + first: self.first.as_ref(), + rest: self.rest.batch_values_iter(), + } + } + } + + struct BatchValuesFirstSerializedIterator<'f, BVI> { + first: Option<&'f SerializedValues>, + rest: BVI, + } + + impl<'f, BVI> BatchValuesIterator<'f> for BatchValuesFirstSerializedIterator<'f, BVI> + where + BVI: BatchValuesIterator<'f>, + { + #[inline] + fn serialize_next( + &mut self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Option> { + match self.first.take() { + Some(sr) => { + writer.append_serialize_row(sr); + self.rest.skip_next(); + Some(Ok(())) + } + None => self.rest.serialize_next(ctx, writer), + } + } + + #[inline] + fn is_empty_next(&mut self) -> Option { + match self.first.take() { + Some(s) => { + self.rest.skip_next(); + Some(s.is_empty()) + } + None => self.rest.is_empty_next(), + } + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + self.first = None; + self.rest.skip_next() + } + + #[inline] + fn count(self) -> usize + where + Self: Sized, + { + self.rest.count() + } + } +} From 6b88c7efeae5c84c291262befcd9770567571735 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 18:45:18 +0100 Subject: [PATCH 094/107] scylla-cql/frame, scylla/{statement,transport}: switch to new API The driver is updated to use the new BatchValues API on all layers at once. Fortunately, there aren't many changes and they are mostly simple. --- scylla-cql/src/frame/request/batch.rs | 43 ++++++++++++++++++------- scylla-cql/src/frame/request/mod.rs | 10 +++--- scylla/src/statement/batch.rs | 1 - scylla/src/transport/caching_session.rs | 4 +-- scylla/src/transport/connection.rs | 27 ++++++++++------ scylla/src/transport/session.rs | 35 ++++++++++++-------- 6 files changed, 78 insertions(+), 42 deletions(-) diff --git a/scylla-cql/src/frame/request/batch.rs b/scylla-cql/src/frame/request/batch.rs index 0c779aae1b..7f7895e1de 100644 --- a/scylla-cql/src/frame/request/batch.rs +++ b/scylla-cql/src/frame/request/batch.rs @@ -1,11 +1,18 @@ use bytes::{Buf, BufMut}; use std::{borrow::Cow, convert::TryInto}; -use crate::frame::{ - frame_errors::ParseError, - request::{RequestOpcode, SerializableRequest}, - types::{self, SerialConsistency}, - value::{LegacyBatchValues, LegacyBatchValuesIterator, LegacySerializedValues}, +use crate::{ + frame::{ + frame_errors::ParseError, + request::{RequestOpcode, SerializableRequest}, + types::{self, SerialConsistency}, + value::SerializeValuesError, + }, + types::serialize::{ + raw_batch::{RawBatchValues, RawBatchValuesIterator}, + row::SerializedValues, + RowWriter, SerializationError, + }, }; use super::DeserializableRequest; @@ -20,7 +27,7 @@ pub struct Batch<'b, Statement, Values> where BatchStatement<'b>: From<&'b Statement>, Statement: Clone, - Values: LegacyBatchValues, + Values: RawBatchValues, { pub statements: Cow<'b, [Statement]>, pub batch_type: BatchType, @@ -72,7 +79,7 @@ impl SerializableRequest for Batch<'_, Statement, Values> where for<'s> BatchStatement<'s>: From<&'s Statement>, Statement: Clone, - Values: LegacyBatchValues, + Values: RawBatchValues, { const OPCODE: RequestOpcode = RequestOpcode::Batch; @@ -93,9 +100,23 @@ where let mut value_lists = self.values.batch_values_iter(); for (idx, statement) in self.statements.iter().enumerate() { BatchStatement::from(statement).serialize(buf)?; + + // Reserve two bytes for length + let length_pos = buf.len(); + buf.extend_from_slice(&[0, 0]); + let mut row_writer = RowWriter::new(buf); value_lists - .write_next_to_request(buf) + .serialize_next(&mut row_writer) .ok_or_else(|| counts_mismatch_err(idx, self.statements.len()))??; + // Go back and put the length + let count: u16 = match row_writer.value_count().try_into() { + Ok(n) => n, + Err(_) => { + return Err(SerializationError::new(SerializeValuesError::TooManyValues).into()) + } + }; + buf[length_pos..length_pos + 2].copy_from_slice(&count.to_be_bytes()); + n_serialized_statements += 1; } // At this point, we have all statements serialized. If any values are still left, we have a mismatch. @@ -186,7 +207,7 @@ impl<'s, 'b> From<&'s BatchStatement<'b>> for BatchStatement<'s> { } } -impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec> { +impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec> { fn deserialize(buf: &mut &[u8]) -> Result { let batch_type = buf.get_u8().try_into()?; @@ -196,7 +217,7 @@ impl<'b> DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec DeserializableRequest for Batch<'b, BatchStatement<'b>, Vec, Vec) = + let (statements, values): (Vec, Vec) = statements_with_values.into_iter().unzip(); Ok(Self { diff --git a/scylla-cql/src/frame/request/mod.rs b/scylla-cql/src/frame/request/mod.rs index 71e3c1bd60..e0146156a2 100644 --- a/scylla-cql/src/frame/request/mod.rs +++ b/scylla-cql/src/frame/request/mod.rs @@ -7,6 +7,7 @@ pub mod query; pub mod register; pub mod startup; +use crate::types::serialize::row::SerializedValues; use crate::{frame::frame_errors::ParseError, Consistency}; use bytes::Bytes; use num_enum::TryFromPrimitive; @@ -22,7 +23,6 @@ pub use startup::Startup; use self::batch::BatchStatement; use super::types::SerialConsistency; -use super::value::LegacySerializedValues; #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, TryFromPrimitive)] #[repr(u8)] @@ -59,7 +59,7 @@ pub trait DeserializableRequest: SerializableRequest + Sized { pub enum Request<'r> { Query(Query<'r>), Execute(Execute<'r>), - Batch(Batch<'r, BatchStatement<'r>, Vec>), + Batch(Batch<'r, BatchStatement<'r>, Vec>), } impl<'r> Request<'r> { @@ -190,8 +190,8 @@ mod tests { // Not execute's values, because named values are not supported in batches. values: vec![ - query.parameters.values.deref().to_old_serialized_values(), - query.parameters.values.deref().to_old_serialized_values(), + query.parameters.values.deref().clone(), + query.parameters.values.deref().clone(), ], }; { @@ -262,7 +262,7 @@ mod tests { serial_consistency: None, timestamp: None, - values: vec![query.parameters.values.deref().to_old_serialized_values()], + values: vec![query.parameters.values.deref().clone()], }; { let mut buf = Vec::new(); diff --git a/scylla/src/statement/batch.rs b/scylla/src/statement/batch.rs index fd940db903..3debe17a1c 100644 --- a/scylla/src/statement/batch.rs +++ b/scylla/src/statement/batch.rs @@ -212,7 +212,6 @@ pub(crate) mod batch_values { rest: BV, } - #[allow(dead_code)] pub(crate) fn new_batch_values_first_serialized( rest: impl BatchValues, first: Option, diff --git a/scylla/src/transport/caching_session.rs b/scylla/src/transport/caching_session.rs index 4a841fa250..f3d0d4db88 100644 --- a/scylla/src/transport/caching_session.rs +++ b/scylla/src/transport/caching_session.rs @@ -1,5 +1,4 @@ use crate::batch::{Batch, BatchStatement}; -use crate::frame::value::LegacyBatchValues; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::transport::errors::QueryError; @@ -10,6 +9,7 @@ use bytes::Bytes; use dashmap::DashMap; use futures::future::try_join_all; use scylla_cql::frame::response::result::PreparedMetadata; +use scylla_cql::types::serialize::batch::BatchValues; use scylla_cql::types::serialize::row::SerializeRow; use std::collections::hash_map::RandomState; use std::hash::BuildHasher; @@ -108,7 +108,7 @@ where pub async fn batch( &self, batch: &Batch, - values: impl LegacyBatchValues, + values: impl BatchValues, ) -> Result { let all_prepared: bool = batch .statements diff --git a/scylla/src/transport/connection.rs b/scylla/src/transport/connection.rs index 19db130168..b6b91b69db 100644 --- a/scylla/src/transport/connection.rs +++ b/scylla/src/transport/connection.rs @@ -4,7 +4,9 @@ use scylla_cql::errors::TranslationError; use scylla_cql::frame::request::options::Options; use scylla_cql::frame::response::Error; use scylla_cql::frame::types::SerialConsistency; -use scylla_cql::types::serialize::row::SerializedValues; +use scylla_cql::types::serialize::batch::{BatchValues, BatchValuesIterator}; +use scylla_cql::types::serialize::raw_batch::RawBatchValuesAdapter; +use scylla_cql::types::serialize::row::{RowSerializationContext, SerializedValues}; use socket2::{SockRef, TcpKeepalive}; use tokio::io::{split, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader, BufWriter}; use tokio::net::{TcpSocket, TcpStream}; @@ -53,7 +55,6 @@ use crate::frame::{ request::{self, batch, execute, query, register, SerializableRequest}, response::{event::Event, result, NonErrorResponse, Response, ResponseOpcode}, server_event_type::EventType, - value::{LegacyBatchValues, LegacyBatchValuesIterator}, FrameParams, SerializedRequest, }; use crate::query::Query; @@ -763,7 +764,7 @@ impl Connection { pub(crate) async fn batch( &self, batch: &Batch, - values: impl LegacyBatchValues, + values: impl BatchValues, ) -> Result { self.batch_with_consistency( batch, @@ -779,12 +780,21 @@ impl Connection { pub(crate) async fn batch_with_consistency( &self, init_batch: &Batch, - values: impl LegacyBatchValues, + values: impl BatchValues, consistency: Consistency, serial_consistency: Option, ) -> Result { let batch = self.prepare_batch(init_batch, &values).await?; + let contexts = batch.statements.iter().map(|bs| match bs { + BatchStatement::Query(_) => RowSerializationContext::empty(), + BatchStatement::PreparedStatement(ps) => { + RowSerializationContext::from_prepared(ps.get_prepared_metadata()) + } + }); + + let values = RawBatchValuesAdapter::new(values, contexts); + let batch_frame = batch::Batch { statements: Cow::Borrowed(&batch.statements), values, @@ -831,7 +841,7 @@ impl Connection { async fn prepare_batch<'b>( &self, init_batch: &'b Batch, - values: impl LegacyBatchValues, + values: impl BatchValues, ) -> Result, QueryError> { let mut to_prepare = HashSet::<&str>::new(); @@ -839,11 +849,8 @@ impl Connection { let mut values_iter = values.batch_values_iter(); for stmt in &init_batch.statements { if let BatchStatement::Query(query) = stmt { - let value = values_iter.next_serialized().transpose()?; - if let Some(v) = value { - if v.len() > 0 { - to_prepare.insert(&query.contents); - } + if let Some(false) = values_iter.is_empty_next() { + to_prepare.insert(&query.contents); } } else { values_iter.skip_next(); diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index e5c01ed688..3bc0177bcf 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -1,6 +1,7 @@ //! `Session` is the main object used in the driver.\ //! It manages all connections to the cluster and allows to perform queries. +use crate::batch::batch_values; #[cfg(feature = "cloud")] use crate::cloud::CloudConfig; @@ -16,7 +17,8 @@ use itertools::{Either, Itertools}; pub use scylla_cql::errors::TranslationError; use scylla_cql::frame::response::result::{deser_cql_value, ColumnSpec, Rows}; use scylla_cql::frame::response::NonErrorResponse; -use scylla_cql::types::serialize::row::SerializeRow; +use scylla_cql::types::serialize::batch::{BatchValues, BatchValuesIterator}; +use scylla_cql::types::serialize::row::{RowSerializationContext, SerializeRow, SerializedValues}; use std::borrow::Borrow; use std::collections::HashMap; use std::fmt::Display; @@ -47,9 +49,6 @@ use super::NodeRef; use crate::cql_to_rust::FromRow; use crate::frame::response::cql_to_rust::FromRowError; use crate::frame::response::result; -use crate::frame::value::{ - LegacyBatchValues, LegacyBatchValuesFirstSerialized, LegacyBatchValuesIterator, -}; use crate::prepared_statement::PreparedStatement; use crate::query::Query; use crate::routing::Token; @@ -1167,7 +1166,7 @@ impl Session { pub async fn batch( &self, batch: &Batch, - values: impl LegacyBatchValues, + values: impl BatchValues, ) -> Result { // Shard-awareness behavior for batch will be to pick shard based on first batch statement's shard // If users batch statements by shard, they will be rewarded with full shard awareness @@ -1196,15 +1195,25 @@ impl Session { .unwrap_or(execution_profile.serial_consistency); let (first_serialized_value, first_value_token, keyspace_name) = { - // Extract first serialized_value - let first_serialized_value = - values.batch_values_iter().next_serialized().transpose()?; + let mut values_iter = values.batch_values_iter(); // The temporary "p" is necessary because lifetimes - let p = match (first_serialized_value, batch.statements.first()) { - (Some(first_serialized_value), Some(BatchStatement::PreparedStatement(ps))) => { - let token = ps.calculate_token(&first_serialized_value)?; - (Some(first_serialized_value), token, ps.get_keyspace_name()) + let p = match batch.statements.first() { + Some(BatchStatement::PreparedStatement(ps)) => { + let ctx = RowSerializationContext::from_prepared(ps.get_prepared_metadata()); + let (first_serialized_value, did_write) = + SerializedValues::from_closure(|writer| { + values_iter + .serialize_next(&ctx, writer) + .transpose() + .map(|o| o.is_some()) + })?; + if did_write { + let token = ps.calculate_token_untyped(&first_serialized_value)?; + (Some(first_serialized_value), token, ps.get_keyspace_name()) + } else { + (None, None, None) + } } _ => (None, None, None), }; @@ -1221,7 +1230,7 @@ impl Session { // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) let values = - LegacyBatchValuesFirstSerialized::new(&values, first_serialized_value.as_deref()); + batch_values::new_batch_values_first_serialized(&values, first_serialized_value); let values_ref = &values; let span = RequestSpan::new_batch(); From e6c2aa5627fe8c28e1a1ffd711c96eff43d0d378 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 11:28:28 +0100 Subject: [PATCH 095/107] session: move the batch token awareness logic to a separate function Simplify the logic of `Session::batch` by moving the parts responsible for token calculation and wrapping the `BatchValues` argument into a separate function in the `batch_values` module. --- scylla/src/statement/batch.rs | 59 ++++++++++++++++++++++++++++++--- scylla/src/transport/session.rs | 42 ++++++----------------- 2 files changed, 64 insertions(+), 37 deletions(-) diff --git a/scylla/src/statement/batch.rs b/scylla/src/statement/batch.rs index 3debe17a1c..efe95031e2 100644 --- a/scylla/src/statement/batch.rs +++ b/scylla/src/statement/batch.rs @@ -199,12 +199,62 @@ impl<'a: 'b, 'b> From<&'a BatchStatement> } pub(crate) mod batch_values { + use scylla_cql::errors::QueryError; use scylla_cql::types::serialize::batch::BatchValues; use scylla_cql::types::serialize::batch::BatchValuesIterator; use scylla_cql::types::serialize::row::RowSerializationContext; use scylla_cql::types::serialize::row::SerializedValues; use scylla_cql::types::serialize::{RowWriter, SerializationError}; + use crate::routing::Token; + + use super::BatchStatement; + + // Takes an optional reference to the first statement in the batch and + // the batch values, and tries to compute the token for the statement. + // Returns the (optional) token and batch values. If the function needed + // to serialize values for the first statement, the returned batch values + // will cache the results of the serialization. + // + // NOTE: Batch values returned by this function might not type check + // the first statement when it is serialized! However, if they don't, + // then the first row was already checked by the function. It is assumed + // that `statement` holds the first prepared statement of the batch (if + // there is one), and that it will be used later to serialize the values. + pub(crate) fn peek_first_token<'bv>( + values: impl BatchValues + 'bv, + statement: Option<&BatchStatement>, + ) -> Result<(Option, impl BatchValues + 'bv), QueryError> { + let mut values_iter = values.batch_values_iter(); + let (token, first_values) = match statement { + Some(BatchStatement::PreparedStatement(ps)) => { + let ctx = RowSerializationContext::from_prepared(ps.get_prepared_metadata()); + let (first_values, did_write) = SerializedValues::from_closure(|writer| { + values_iter + .serialize_next(&ctx, writer) + .transpose() + .map(|o| o.is_some()) + })?; + if did_write { + let token = ps.calculate_token_untyped(&first_values)?; + (token, Some(first_values)) + } else { + (None, None) + } + } + _ => (None, None), + }; + + // Need to do it explicitly, otherwise the next line will complain + // that `values_iter` still borrows `values`. + std::mem::drop(values_iter); + + // Reuse the already serialized first value via `BatchValuesFirstSerialized`. + let values = BatchValuesFirstSerialized::new(values, first_values); + + Ok((token, values)) + } + struct BatchValuesFirstSerialized { // Contains the first value of BV in a serialized form. // The first value in the iterator returned from `rest` should be skipped! @@ -212,11 +262,10 @@ pub(crate) mod batch_values { rest: BV, } - pub(crate) fn new_batch_values_first_serialized( - rest: impl BatchValues, - first: Option, - ) -> impl BatchValues { - BatchValuesFirstSerialized { first, rest } + impl BatchValuesFirstSerialized { + fn new(rest: BV, first: Option) -> Self { + Self { first, rest } + } } impl BatchValues for BatchValuesFirstSerialized diff --git a/scylla/src/transport/session.rs b/scylla/src/transport/session.rs index 3bc0177bcf..34f1f8aa57 100644 --- a/scylla/src/transport/session.rs +++ b/scylla/src/transport/session.rs @@ -17,8 +17,8 @@ use itertools::{Either, Itertools}; pub use scylla_cql::errors::TranslationError; use scylla_cql::frame::response::result::{deser_cql_value, ColumnSpec, Rows}; use scylla_cql::frame::response::NonErrorResponse; -use scylla_cql::types::serialize::batch::{BatchValues, BatchValuesIterator}; -use scylla_cql::types::serialize::row::{RowSerializationContext, SerializeRow, SerializedValues}; +use scylla_cql::types::serialize::batch::BatchValues; +use scylla_cql::types::serialize::row::SerializeRow; use std::borrow::Borrow; use std::collections::HashMap; use std::fmt::Display; @@ -1194,31 +1194,15 @@ impl Session { .serial_consistency .unwrap_or(execution_profile.serial_consistency); - let (first_serialized_value, first_value_token, keyspace_name) = { - let mut values_iter = values.batch_values_iter(); - - // The temporary "p" is necessary because lifetimes - let p = match batch.statements.first() { - Some(BatchStatement::PreparedStatement(ps)) => { - let ctx = RowSerializationContext::from_prepared(ps.get_prepared_metadata()); - let (first_serialized_value, did_write) = - SerializedValues::from_closure(|writer| { - values_iter - .serialize_next(&ctx, writer) - .transpose() - .map(|o| o.is_some()) - })?; - if did_write { - let token = ps.calculate_token_untyped(&first_serialized_value)?; - (Some(first_serialized_value), token, ps.get_keyspace_name()) - } else { - (None, None, None) - } - } - _ => (None, None, None), - }; - p + let keyspace_name = match batch.statements.first() { + Some(BatchStatement::PreparedStatement(ps)) => ps.get_keyspace_name(), + _ => None, }; + + let (first_value_token, values) = + batch_values::peek_first_token(values, batch.statements.first())?; + let values_ref = &values; + let statement_info = RoutingInfo { consistency, serial_consistency, @@ -1227,12 +1211,6 @@ impl Session { is_confirmed_lwt: false, }; - // Reuse first serialized value when serializing query, and delegate to `BatchValues::write_next_to_request` - // directly for others (if they weren't already serialized, possibly don't even allocate the `LegacySerializedValues`) - let values = - batch_values::new_batch_values_first_serialized(&values, first_serialized_value); - let values_ref = &values; - let span = RequestSpan::new_batch(); let run_query_result = self From 666ca2519ad6fb71bf632d49387b905773ed9c59 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 06:11:27 +0100 Subject: [PATCH 096/107] serialize/batch: add compatibility layer for the legacy API In case somebody has a custom implementation of BatchValues, they can use the adapter. --- scylla-cql/src/types/serialize/batch.rs | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/scylla-cql/src/types/serialize/batch.rs b/scylla-cql/src/types/serialize/batch.rs index 5deddcebb7..4fed3524a3 100644 --- a/scylla-cql/src/types/serialize/batch.rs +++ b/scylla-cql/src/types/serialize/batch.rs @@ -1,6 +1,8 @@ //! Contains the [`BatchValues`] and [`BatchValuesIterator`] trait and their //! implementations. +use crate::frame::value::{LegacyBatchValues, LegacyBatchValuesIterator}; + use super::row::{RowSerializationContext, SerializeRow}; use super::{RowWriter, SerializationError}; @@ -305,3 +307,58 @@ impl<'a, T: BatchValues + ?Sized> BatchValues for &'a T { ::batch_values_iter(*self) } } + +/// A newtype wrapper which adjusts an existing types that implement +/// [`LegacyBatchValues`] to the current [`BatchValues`] API. +/// +/// Note that the [`LegacyBatchValues`] trait is deprecated and will be +/// removed in the future, and you should prefer using [`BatchValues`] as it is +/// more type-safe. +pub struct LegacyBatchValuesAdapter(pub T); + +impl BatchValues for LegacyBatchValuesAdapter +where + T: LegacyBatchValues, +{ + type BatchValuesIter<'r> = LegacyBatchValuesIteratorAdapter> + where + Self: 'r; + + #[inline] + fn batch_values_iter(&self) -> Self::BatchValuesIter<'_> { + LegacyBatchValuesIteratorAdapter(self.0.batch_values_iter()) + } +} + +/// A newtype wrapper which adjusts an existing types that implement +/// [`LegacyBatchValuesIterator`] to the current [`BatchValuesIterator`] API. +pub struct LegacyBatchValuesIteratorAdapter(pub T); + +impl<'r, T> BatchValuesIterator<'r> for LegacyBatchValuesIteratorAdapter +where + T: LegacyBatchValuesIterator<'r>, +{ + #[inline] + fn serialize_next( + &mut self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Option> { + self.0.next_serialized().map(|sv| { + sv.map_err(SerializationError::new) + .and_then(|sv| sv.serialize(ctx, writer)) + }) + } + + #[inline] + fn is_empty_next(&mut self) -> Option { + self.0 + .next_serialized() + .map(|sv| sv.map_or(false, |sv| sv.len() == 0)) + } + + #[inline] + fn skip_next(&mut self) -> Option<()> { + self.0.skip_next() + } +} From 2ea431c799b554ab16f567c72e48ebad14945064 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 05:40:09 +0100 Subject: [PATCH 097/107] value_tests: adapt batch tests to both new and legacy API This way can reuse the existing tests for impls of the new traits and see that they behave in the same way. --- scylla-cql/src/frame/value_tests.rs | 181 +++++++++++++++++++++++----- 1 file changed, 149 insertions(+), 32 deletions(-) diff --git a/scylla-cql/src/frame/value_tests.rs b/scylla-cql/src/frame/value_tests.rs index 847482eebf..cb6a94ee49 100644 --- a/scylla-cql/src/frame/value_tests.rs +++ b/scylla-cql/src/frame/value_tests.rs @@ -1,4 +1,5 @@ use crate::frame::{response::result::CqlValue, types::RawValue, value::LegacyBatchValuesIterator}; +use crate::types::serialize::batch::{BatchValues, BatchValuesIterator, LegacyBatchValuesAdapter}; use crate::types::serialize::row::{RowSerializationContext, SerializeRow}; use crate::types::serialize::value::SerializeCql; use crate::types::serialize::{CellWriter, RowWriter}; @@ -1178,19 +1179,79 @@ fn cow_serialized_values_value_list() { assert_eq!(cow_ser_values.as_ref(), serialized.as_ref()); } +fn make_batch_value_iters<'bv, BV: BatchValues + LegacyBatchValues>( + bv: &'bv BV, + adapter_bv: &'bv LegacyBatchValuesAdapter<&'bv BV>, +) -> ( + BV::LegacyBatchValuesIter<'bv>, + BV::BatchValuesIter<'bv>, + as BatchValues>::BatchValuesIter<'bv>, +) { + ( + ::batch_values_iter(bv), + ::batch_values_iter(bv), + <_ as BatchValues>::batch_values_iter(adapter_bv), + ) +} + +fn serialize_batch_value_iterators<'a>( + (legacy_bvi, bvi, bvi_adapted): &mut ( + impl LegacyBatchValuesIterator<'a>, + impl BatchValuesIterator<'a>, + impl BatchValuesIterator<'a>, + ), + columns: &[ColumnSpec], +) -> Vec { + let mut legacy_data = Vec::new(); + legacy_bvi + .write_next_to_request(&mut legacy_data) + .unwrap() + .unwrap(); + + fn serialize_bvi<'bv>( + bvi: &mut impl BatchValuesIterator<'bv>, + ctx: &RowSerializationContext, + ) -> Vec { + let mut data = vec![0, 0]; + let mut writer = RowWriter::new(&mut data); + bvi.serialize_next(ctx, &mut writer).unwrap().unwrap(); + let value_count: u16 = writer.value_count().try_into().unwrap(); + data[0..2].copy_from_slice(&value_count.to_be_bytes()); + data + } + + let ctx = RowSerializationContext { columns }; + let data = serialize_bvi(bvi, &ctx); + let adapted_data = serialize_bvi(bvi_adapted, &ctx); + + assert_eq!(legacy_data, data); + assert_eq!(adapted_data, data); + data +} + #[test] fn slice_batch_values() { let batch_values: &[&[i8]] = &[&[1, 2], &[2, 3, 4, 5], &[6]]; - let mut it = batch_values.batch_values_iter(); + let legacy_batch_values = LegacyBatchValuesAdapter(&batch_values); + + let mut iters = make_batch_value_iters(&batch_values, &legacy_batch_values); { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[ + col_spec("a", ColumnType::TinyInt), + col_spec("b", ColumnType::TinyInt), + ]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!(request, vec![0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 1, 2]); } { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[ + col_spec("a", ColumnType::TinyInt), + col_spec("b", ColumnType::TinyInt), + col_spec("c", ColumnType::TinyInt), + col_spec("d", ColumnType::TinyInt), + ]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!( request, vec![0, 4, 0, 0, 0, 1, 2, 0, 0, 0, 1, 3, 0, 0, 0, 1, 4, 0, 0, 0, 1, 5] @@ -1198,28 +1259,42 @@ fn slice_batch_values() { } { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[col_spec("a", ColumnType::TinyInt)]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!(request, vec![0, 1, 0, 0, 0, 1, 6]); } - assert_eq!(it.write_next_to_request(&mut Vec::new()), None); + assert_eq!(iters.0.write_next_to_request(&mut Vec::new()), None); + + let ctx = RowSerializationContext { columns: &[] }; + let mut data = Vec::new(); + let mut writer = RowWriter::new(&mut data); + assert!(iters.1.serialize_next(&ctx, &mut writer).is_none()); } #[test] fn vec_batch_values() { let batch_values: Vec> = vec![vec![1, 2], vec![2, 3, 4, 5], vec![6]]; + let legacy_batch_values = LegacyBatchValuesAdapter(&batch_values); - let mut it = batch_values.batch_values_iter(); + let mut iters = make_batch_value_iters(&batch_values, &legacy_batch_values); { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[ + col_spec("a", ColumnType::TinyInt), + col_spec("b", ColumnType::TinyInt), + ]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!(request, vec![0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 1, 2]); } { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[ + col_spec("a", ColumnType::TinyInt), + col_spec("b", ColumnType::TinyInt), + col_spec("c", ColumnType::TinyInt), + col_spec("d", ColumnType::TinyInt), + ]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!( request, vec![0, 4, 0, 0, 0, 1, 2, 0, 0, 0, 1, 3, 0, 0, 0, 1, 4, 0, 0, 0, 1, 5] @@ -1227,19 +1302,24 @@ fn vec_batch_values() { } { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[col_spec("a", ColumnType::TinyInt)]; + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!(request, vec![0, 1, 0, 0, 0, 1, 6]); } } #[test] fn tuple_batch_values() { - fn check_twoi32_tuple(tuple: impl LegacyBatchValues, size: usize) { - let mut it = tuple.batch_values_iter(); + fn check_twoi32_tuple(tuple: impl BatchValues + LegacyBatchValues, size: usize) { + let legacy_tuple = LegacyBatchValuesAdapter(&tuple); + let mut iters = make_batch_value_iters(&tuple, &legacy_tuple); for i in 0..size { - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let cols = &[ + col_spec("a", ColumnType::Int), + col_spec("b", ColumnType::Int), + ]; + + let request = serialize_batch_value_iterators(&mut iters, cols); let mut expected: Vec = Vec::new(); let i: i32 = i.try_into().unwrap(); @@ -1426,13 +1506,17 @@ fn tuple_batch_values() { #[allow(clippy::needless_borrow)] fn ref_batch_values() { let batch_values: &[&[i8]] = &[&[1, 2], &[2, 3, 4, 5], &[6]]; + let cols = &[ + col_spec("a", ColumnType::TinyInt), + col_spec("b", ColumnType::TinyInt), + ]; - return check_ref_bv::<&&&&&[&[i8]]>(&&&&batch_values); - fn check_ref_bv(batch_values: B) { - let mut it = ::batch_values_iter(&batch_values); + return check_ref_bv::<&&&&&[&[i8]]>(&&&&batch_values, cols); + fn check_ref_bv(batch_values: B, cols: &[ColumnSpec]) { + let legacy_batch_values = LegacyBatchValuesAdapter(&batch_values); + let mut iters = make_batch_value_iters(&batch_values, &legacy_batch_values); - let mut request: Vec = Vec::new(); - it.write_next_to_request(&mut request).unwrap().unwrap(); + let request = serialize_batch_value_iterators(&mut iters, cols); assert_eq!(request, vec![0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 1, 2]); } } @@ -1440,18 +1524,32 @@ fn ref_batch_values() { #[test] #[allow(clippy::needless_borrow)] fn check_ref_tuple() { - fn assert_has_batch_values(bv: BV) { - let mut it = bv.batch_values_iter(); - let mut request: Vec = Vec::new(); - while let Some(res) = it.write_next_to_request(&mut request) { - res.unwrap() + fn assert_has_batch_values( + bv: BV, + cols: &[&[ColumnSpec]], + ) { + let legacy_bv = LegacyBatchValuesAdapter(&bv); + let mut iters = make_batch_value_iters(&bv, &legacy_bv); + for cols in cols { + serialize_batch_value_iterators(&mut iters, cols); } } let s = String::from("hello"); let tuple: ((&str,),) = ((&s,),); - assert_has_batch_values::<&_>(&tuple); + let cols: &[&[ColumnSpec]] = &[&[col_spec("a", ColumnType::Text)]]; + assert_has_batch_values::<&_>(&tuple, cols); let tuple2: ((&str, &str), (&str, &str)) = ((&s, &s), (&s, &s)); - assert_has_batch_values::<&_>(&tuple2); + let cols: &[&[ColumnSpec]] = &[ + &[ + col_spec("a", ColumnType::Text), + col_spec("b", ColumnType::Text), + ], + &[ + col_spec("a", ColumnType::Text), + col_spec("b", ColumnType::Text), + ], + ]; + assert_has_batch_values::<&_>(&tuple2, cols); } #[test] @@ -1469,5 +1567,24 @@ fn check_batch_values_iterator_is_not_lending() { ]; let _ = v; } - f(((10,), (11,))) + fn g(bv: impl BatchValues) { + let mut it = bv.batch_values_iter(); + let mut it2 = bv.batch_values_iter(); + + let columns = &[col_spec("a", ColumnType::Int)]; + let ctx = RowSerializationContext { columns }; + let mut data = Vec::new(); + let mut writer = RowWriter::new(&mut data); + + // Make sure we can hold all these at the same time + let v = vec![ + it.serialize_next(&ctx, &mut writer).unwrap().unwrap(), + it2.serialize_next(&ctx, &mut writer).unwrap().unwrap(), + it.serialize_next(&ctx, &mut writer).unwrap().unwrap(), + it2.serialize_next(&ctx, &mut writer).unwrap().unwrap(), + ]; + let _ = v; + } + f(((10,), (11,))); + g(((10,), (11,))); } From fbeafd72e51725f251a1bc7be513f3167360d39e Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Thu, 14 Dec 2023 19:27:58 +0100 Subject: [PATCH 098/107] serialize/row: remove SerializedValues::to_old_serialized_values Migration of batches to the new API is complete, so this method is no longer needed and can finally be removed. --- scylla-cql/src/types/serialize/row.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index 8bf7d965ed..3c68453391 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -794,14 +794,6 @@ impl SerializedValues { element_count: values_num, }) } - - /// Temporary function, to be removed when we implement new batching API (right now it is needed in frame::request::mod.rs tests) - // TODO: Remove - pub fn to_old_serialized_values(&self) -> LegacySerializedValues { - let mut frame = Vec::new(); - self.write_to_request(&mut frame); - LegacySerializedValues::new_from_frame(&mut frame.as_slice(), false).unwrap() - } } impl Default for SerializedValues { From 8c06b18f37ffe5b88643f8ed4eef44d4726bbb07 Mon Sep 17 00:00:00 2001 From: RoDmitry Date: Sat, 16 Dec 2023 19:06:52 +0400 Subject: [PATCH 099/107] impl Default for MaybeUnset --- scylla-cql/src/frame/value.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scylla-cql/src/frame/value.rs b/scylla-cql/src/frame/value.rs index f4c4d809c1..c29e73b75f 100644 --- a/scylla-cql/src/frame/value.rs +++ b/scylla-cql/src/frame/value.rs @@ -44,8 +44,9 @@ pub struct Unset; pub struct Counter(pub i64); /// Enum providing a way to represent a value that might be unset -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Default)] pub enum MaybeUnset { + #[default] Unset, Set(V), } From 778a46c557d08dc4a9dd8c4835d95c0b2c2fe81f Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 18 Dec 2023 03:11:10 +0100 Subject: [PATCH 100/107] docs: update batch.md to warn about automatic preparation Add warnings about the fact that Session::batch now automatically prepares simple statements with non-empty value lists. Update the first example not to use such statements in order not to promote suboptimal usage of the API. --- docs/source/queries/batch.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/source/queries/batch.md b/docs/source/queries/batch.md index e316d4243f..4d9694c45e 100644 --- a/docs/source/queries/batch.md +++ b/docs/source/queries/batch.md @@ -17,7 +17,7 @@ use scylla::prepared_statement::PreparedStatement; let mut batch: Batch = Default::default(); // Add a simple statement to the batch using its text -batch.append_statement("INSERT INTO ks.tab(a, b) VALUES(?, ?)"); +batch.append_statement("INSERT INTO ks.tab(a, b) VALUES(1, 2)"); // Add a simple statement created manually to the batch let simple: Query = Query::new("INSERT INTO ks.tab (a, b) VALUES(3, 4)"); @@ -30,7 +30,7 @@ let prepared: PreparedStatement = session batch.append_statement(prepared); // Specify bound values to use with each statement -let batch_values = ((1_i32, 2_i32), +let batch_values = ((), (), (5_i32,)); @@ -40,6 +40,13 @@ session.batch(&batch, batch_values).await?; # } ``` +> ***Warning***\ +> Using simple statements with bind markers in batches is strongly discouraged. +> For each simple statement with a non-empty list of values in the batch, +> the driver will send a prepare request, and it will be done **sequentially**. +> Results of preparation are not cached between `Session::batch` calls. +> Consider preparing the statements before putting them into the batch. + ### Preparing a batch Instead of preparing each statement individually, it's possible to prepare a whole batch at once: @@ -129,6 +136,8 @@ let batch_values = ((1_i32, 2_i32), // Tuple with two values for the first state ()); // Empty tuple/unit for the third statement // Run the batch +// Note that the driver will prepare the first two statements, due to them +// not being prepared and having a non-empty list of values. session.batch(&batch, batch_values).await?; # Ok(()) # } From abee4376013ee277c7ddb68f9b2e7263bcc7ee0f Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Mon, 18 Dec 2023 02:52:39 +0100 Subject: [PATCH 101/107] serialize: add `ValueAdapter` and `ValueListAdapter` Add newtype wrappers that implement `SerializeRow`/`SerializeCql` if the type wrapped over implements `ValueList`/`Value`. It should be useful when migrating codebases to the new traits gradually so that objects from the unchanged code which only implement the old trait can be passed to the code which was already updated to the new traits. --- scylla-cql/src/types/serialize/row.rs | 56 ++++++++++++++++++++++++- scylla-cql/src/types/serialize/value.rs | 44 ++++++++++++++++++- 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/scylla-cql/src/types/serialize/row.rs b/scylla-cql/src/types/serialize/row.rs index b85c64a907..451edb85ca 100644 --- a/scylla-cql/src/types/serialize/row.rs +++ b/scylla-cql/src/types/serialize/row.rs @@ -418,6 +418,34 @@ macro_rules! impl_serialize_row_via_value_list { }; } +/// Implements [`SerializeRow`] if the type wrapped over implements [`ValueList`]. +/// +/// See the [`impl_serialize_row_via_value_list`] macro on information about +/// the properties of the [`SerializeRow`] implementation. +pub struct ValueListAdapter(pub T); + +impl SerializeRow for ValueListAdapter +where + T: ValueList, +{ + #[inline] + fn serialize( + &self, + ctx: &RowSerializationContext<'_>, + writer: &mut RowWriter, + ) -> Result<(), SerializationError> { + serialize_legacy_row(&self.0, ctx, writer) + } + + #[inline] + fn is_empty(&self) -> bool { + match self.0.serialized() { + Ok(s) => s.is_empty(), + Err(_) => false, + } + } +} + /// Serializes an object implementing [`ValueList`] by using the [`RowWriter`] /// interface. /// @@ -822,11 +850,13 @@ impl<'a> Iterator for SerializedValuesIterator<'a> { #[cfg(test)] mod tests { + use std::borrow::Cow; use std::collections::BTreeMap; use crate::frame::response::result::{ColumnSpec, ColumnType, TableSpec}; use crate::frame::types::RawValue; - use crate::frame::value::{LegacySerializedValues, MaybeUnset, ValueList}; + use crate::frame::value::{LegacySerializedValues, MaybeUnset, SerializedResult, ValueList}; + use crate::types::serialize::row::ValueListAdapter; use crate::types::serialize::{RowWriter, SerializationError}; use super::{ @@ -973,6 +1003,30 @@ mod tests { } } + #[test] + fn test_legacy_wrapper() { + struct Foo; + impl ValueList for Foo { + fn serialized(&self) -> SerializedResult<'_> { + let mut values = LegacySerializedValues::new(); + values.add_value(&123i32)?; + values.add_value(&321i32)?; + Ok(Cow::Owned(values)) + } + } + + let columns = &[ + col_spec("a", ColumnType::Int), + col_spec("b", ColumnType::Int), + ]; + let buf = do_serialize(ValueListAdapter(Foo), columns); + let expected = vec![ + 0, 0, 0, 4, 0, 0, 0, 123, // First value + 0, 0, 0, 4, 0, 0, 1, 65, // Second value + ]; + assert_eq!(buf, expected); + } + fn get_typeck_err(err: &SerializationError) -> &BuiltinTypeCheckError { match err.0.downcast_ref() { Some(err) => err, diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index fe4e63789c..4b5f9aae27 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -912,6 +912,26 @@ macro_rules! impl_serialize_cql_via_value { }; } +/// Implements [`SerializeCql`] if the type wrapped over implements [`Value`]. +/// +/// See the [`impl_serialize_cql_via_value`] macro on information about +/// the properties of the [`SerializeCql`] implementation. +pub struct ValueAdapter(pub T); + +impl SerializeCql for ValueAdapter +where + T: Value, +{ + #[inline] + fn serialize<'b>( + &self, + _typ: &ColumnType, + writer: CellWriter<'b>, + ) -> Result, SerializationError> { + serialize_legacy_value(&self.0, writer) + } +} + /// Serializes a value implementing [`Value`] by using the [`CellWriter`] /// interface. /// @@ -1470,12 +1490,12 @@ mod tests { use std::collections::BTreeMap; use crate::frame::response::result::{ColumnType, CqlValue}; - use crate::frame::value::{MaybeUnset, Unset, Value}; + use crate::frame::value::{MaybeUnset, Unset, Value, ValueTooBig}; use crate::types::serialize::value::{ BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, BuiltinTypeCheckErrorKind, MapSerializationErrorKind, MapTypeCheckErrorKind, SetOrListSerializationErrorKind, SetOrListTypeCheckErrorKind, TupleSerializationErrorKind, - TupleTypeCheckErrorKind, + TupleTypeCheckErrorKind, ValueAdapter, }; use crate::types::serialize::{CellWriter, SerializationError}; @@ -1531,6 +1551,26 @@ mod tests { t.serialize(typ, writer).unwrap_err() } + #[test] + fn test_legacy_wrapper() { + struct Foo; + impl Value for Foo { + fn serialize(&self, buf: &mut Vec) -> Result<(), ValueTooBig> { + let s = "Ala ma kota"; + buf.extend_from_slice(&(s.len() as i32).to_be_bytes()); + buf.extend_from_slice(s.as_bytes()); + Ok(()) + } + } + + let buf = do_serialize(ValueAdapter(Foo), &ColumnType::Text); + let expected = vec![ + 0, 0, 0, 11, // Length of the value + 65, 108, 97, 32, 109, 97, 32, 107, 111, 116, 97, // The string + ]; + assert_eq!(buf, expected); + } + fn get_typeck_err(err: &SerializationError) -> &BuiltinTypeCheckError { match err.0.downcast_ref() { Some(err) => err, From 1f66942b2becfe5fd6ce16c495a5fcf87a5a6cc7 Mon Sep 17 00:00:00 2001 From: Piotr Dulikowski Date: Fri, 15 Dec 2023 18:33:29 +0100 Subject: [PATCH 102/107] docs: add migration guide for pre-0.11 serialization In 0.11, the serialization traits are reworked and some guidance will be needed to adapt the existing code to the new traits. Add a document which explains the reason behind the changes, potential issues to watch for and tools that should make migration easier. --- docs/source/SUMMARY.md | 3 + docs/source/contents.rst | 1 + docs/source/index.md | 1 + .../migration-guides/0.11-serialization.md | 100 ++++++++++++++++++ .../migration-guides/migration-guides.md | 11 ++ 5 files changed, 116 insertions(+) create mode 100644 docs/source/migration-guides/0.11-serialization.md create mode 100644 docs/source/migration-guides/migration-guides.md diff --git a/docs/source/SUMMARY.md b/docs/source/SUMMARY.md index 471e8efdad..43f3fcd612 100644 --- a/docs/source/SUMMARY.md +++ b/docs/source/SUMMARY.md @@ -7,6 +7,9 @@ - [Running Scylla using Docker](quickstart/scylla-docker.md) - [Connecting and running a simple query](quickstart/example.md) +- [Migration guides](migration-guides/migration-guides.md) + - [Adjusting code to changes in serialization API introduced in 0.11](migration-guides/0.11-deserialization.md) + - [Connecting to the cluster](connecting/connecting.md) - [Compression](connecting/compression.md) - [Authentication](connecting/authentication.md) diff --git a/docs/source/contents.rst b/docs/source/contents.rst index 0e0446baf7..5bc4a37c9e 100644 --- a/docs/source/contents.rst +++ b/docs/source/contents.rst @@ -13,6 +13,7 @@ retry-policy/retry-policy speculative-execution/speculative metrics/metrics + migration-guides/migration-guides logging/logging tracing/tracing schema/schema diff --git a/docs/source/index.md b/docs/source/index.md index c5e1191b1f..d2a6b79313 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,6 +13,7 @@ Although optimized for Scylla, the driver is also compatible with [Apache Cassan ## Contents * [Quick start](quickstart/quickstart.md) - Setting up a Rust project using `scylla-rust-driver` and running a few queries +* [Migration guides](migration-guides/migration-guides.md) - How to update the code that used an older version of this driver * [Connecting to the cluster](connecting/connecting.md) - Configuring a connection to scylla cluster * [Making queries](queries/queries.md) - Making different types of queries (simple, prepared, batch, paged) * [Execution profiles](execution-profiles/execution-profiles.md) - Grouping query execution configuration options together and switching them all at once diff --git a/docs/source/migration-guides/0.11-serialization.md b/docs/source/migration-guides/0.11-serialization.md new file mode 100644 index 0000000000..cd9b985bf9 --- /dev/null +++ b/docs/source/migration-guides/0.11-serialization.md @@ -0,0 +1,100 @@ +# Adjusting code to changes in serialization API introduced in 0.11 + +## Background + +When executing a statement through the CQL protocol, values for the bind markers are sent in a serialized, untyped form. In order to implement a safer and more robust interface, drivers can use the information returned after preparing a statement to check the type of data provided by the user against the actual types of the bind markers. + +Before 0.11, the driver couldn't do this kind of type checking. For example, in the case of non-batch queries, the only information about the user data it has is that it implements `ValueList` - defined as follows: + +```rust +pub trait ValueList { + fn serialized(&self) -> SerializedResult<'_>; + fn write_to_request(&self, buf: &mut impl BufMut) -> Result<(), SerializeValuesError>; +} +``` + +The driver would naively serialize the data and hope that the user took care to send correct types of values. Failing to do so would, in the best case, fail on the DB-side validation; in the worst case, the data in its raw form may be reinterpreted as another type in an unintended manner. + +Another problem is that the information from the prepared statement response is required to robustly serialize user defined types, as UDTs require their fields to be serialized in the same order as they are defined in the database schema. The `IntoUserType` macro which implements Rust struct -> UDT serialization just expects that the order of the Rust struct fields matches the schema, but ensuring this can be very cumbersome for the users. + +In version 0.11, a new set of traits is introduced and the old ones are deprecated. The new traits receive more information during serialization such as names of the column/bind markers and their types, which allows to fix the issues mentioned in the previous section. + +## Old vs. new + +Both the old and the new APIs are based on three core traits: + +| Old API | New API | Description | +| ---------------------------------------------------- | -------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Value` | `SerializeCql` | A type that can serialize itself to a single CQL value. For example, `i32` serializes itself into a representation that is compatible with the CQL `int` type. | +| `ValueList` | `SerializeRow` | A type that can serialize itself as a list of values for a CQL statement. For example, a `(i32, &str)` produces a list of two values which can be used in a query with two bind markers, e.g. `SELECT * FROM table WHERE pk = ? AND ck = ?`. Optionally, values in the produced list may be associated with names which is useful when using it with a query with named bind markers, e.g. `SELECT * FROM table WHERE pk = :pk AND ck = :ck`. | +| `LegacyBatchValues` (previously named `BatchValues`) | `BatchValues` | Represents a source of data for a batch request. It is essentially equivalent to a list of `ValueList`, one for each statement in the batch. For example, `((1, 2), (3, 4, 5))` can be used for a batch with two statements, the first one having two bind markers and the second one having three. | + +All methods which take one of the old traits were changed to take the new trait - notably, this includes `Session::query`, `(Caching)Session::execute`, `(Caching)Session::batch`. + +The driver comes a set of `impl`s of those traits which allow to represent any CQL type (for example, see [Data Types](../data-types/data-types.md) page for a list of for which `Value` and `SerializeCql` is implemented). If the driver implements an old trait for some type, then it also provides implements the new trait for the same type. + +## Migration scenarios + +### Different default behavior in `SerializeRow`/`SerializeCql` macros + +By default, the `SerializeRow` and `SerializeCql` **will match the fields in the Rust struct by name to bind marker names** (in case of `SerializeRow`) **or UDT field names** (in case of `SerializeCql`). This is different from the old `ValueList` and `IntoUserType` macros which did not look at the field names at all and would expect the user to order the fields correctly. While the new behavior is much more ergonomic, you might have reasons not to use it. + +> **NOTE:** The deserialization macro counterparts `FromRow` and `FromUserType` have the same limitation as the old serialization macros - they require struct fields to be properly ordered. While a similar rework is planned for the deserialization traits in a future release, for the time being it might not be worth keeping the column names in sync with the database. + +In order to bring the old behavior to the new macros (the only difference being type checking which cannot be disabled right now) you can configure it using attributes, as shown in the snippet below: + +```rust +// The exact same attributes apply to the `SerializeRow` macro and their +// effect is completely analogous. +#[derive(SerializeCql)] +#[scylla(flavor = "enforce_order", skip_name_checks)] +struct Person { + name: String, + surname: String, + age: i16, +} +``` + +Refer to the API reference page for the `SerializeRow` and `SerializeCql` macros in the `scylla` crate to learn more about the supported attributes and their meaning. + +### Preparing is mandatory with a non-empty list of values + +> **NOTE:** The considerations in this section only concerns users of the `Session` API, `CachingSession` is not affected as it already does preparation before execute and caches the result. + +As explained in the [Background](#background) section, the driver uses data returned from the database after preparing a statement in order to implement type checking. As the new API makes type checking mandatory, **the driver must prepare the statement** so that the data for the bind markers can be type checked. It is done in case of the existing methods which used to send unprepared statements: `Session::query` and `Session::batch`. + +> **NOTE:** The driver will skip preparation if it detects that the list of values for the statement is empty, as there is nothing to be type checked. + +If you send simple statements along with non-empty lists of values, the slowdown will be as follows: + +- For `Session::query`, the driver will prepare the statement before sending it, incurring an additional round-trip. +- For `Session::batch`, the driver will send a prepare request for each *unique* unprepared statement with a non-empty list of values. **This is done serially!** + +In both cases, if the additional roundtrips are unacceptable, you should prepare the statements beforehand and reuse them - which aligns with our general recommendation against use of simple statements in performance sensitive scenarios. + +### Migrating from old to new traits *gradually* + +In some cases, migration will be as easy as changing occurrences of `IntoUserType` to `SerializeCql` and `ValueList` to `SerializeRow` and adding some atributes for procedural macros. However, if you have a large enough codebase or some custom, complicated implementations of the old traits then you might not want to migrate everything at once. To support gradual migration, the old traits were not removed but rather deprecated, and we introduced some additional utilities. + +#### Converting an object implementing an old trait to a new trait + +We provide a number of newtype wrappers: + +- `ValueAdapter` - implements `SerializeCql` if the type wrapped over implements `Value`, +- `ValueListAdapter` - implements `SerializeRow` if the type wrapped over implements `ValueList`, +- `LegacyBatchValuesAdapter` - implements `BatchValues` if the type wrapped over implements `LegacyBatchValues`. + +Note that these wrappers are not zero cost and incur some overhead: in case of `ValueAdapter` and `ValueListAdapter`, the data is first written into a newly allocated buffer and then rewritten to the final buffer. In case of `LegacyBatchValuesAdapter` there shouldn't be any additional allocations unless the implementation has an efficient, non-default `Self::LegacyBatchValuesIterator::write_next_to_request` implementation (which is not the case for the built-in `impl`s). + +Naturally, the implementations provided by the wrappers are not type safe as they directly use methods from the old traits. + +Conversion in the other direction is not possible. + +#### Custom implementations of old traits + +It is possible to directly generate an `impl` of `SerializeRow` and `SerializeCql` on a type which implements, respectively, `ValueList` or `Value`, without using the wrappers from the previous section. The following macros are provided: + +- `impl_serialize_cql_via_value` - implements `SerializeCql` if the type wrapped over implements `Value`, +- `impl_serialize_row_via_value_list` - implements `SerializeRow` if the type wrapped over implements `ValueList`, + +The implementations are practically as those generated by the wrappers described in the previous section. diff --git a/docs/source/migration-guides/migration-guides.md b/docs/source/migration-guides/migration-guides.md new file mode 100644 index 0000000000..554af6e41a --- /dev/null +++ b/docs/source/migration-guides/migration-guides.md @@ -0,0 +1,11 @@ +# Migration guides + +- [Serialization changes in version 0.11](0.11-serialization.md) + +```eval_rst +.. toctree:: + :hidden: + :glob: + + 0.11-serialization +``` From eb8a1e10902d812a859df9d959d0751b389d928b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 18 Dec 2023 02:51:03 +0100 Subject: [PATCH 103/107] macros: Add `force_exact_match` attribute to SerializeCql This commit changes default behavior of SerializeCql: now it doesn't return error if there is additional field in UDT, not present in Rust struct. This is to support adding fields to UDT in database. New attribute, called `force_exact_match`, is added to opt-in to previous behavior. Documentation is adjusted, and failing test fragments are removed. --- scylla-cql/src/macros.rs | 23 ++++++-- scylla-cql/src/types/serialize/value.rs | 48 ---------------- scylla-macros/src/serialize/cql.rs | 73 ++++++++++++++++++++----- 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/scylla-cql/src/macros.rs b/scylla-cql/src/macros.rs index 6c6f2b7243..0593112668 100644 --- a/scylla-cql/src/macros.rs +++ b/scylla-cql/src/macros.rs @@ -18,8 +18,18 @@ pub use scylla_macros::ValueList; /// /// At the moment, only structs with named fields are supported. /// -/// Serialization will fail if there are some fields in the UDT that don't match -/// to any of the Rust struct fields, _or vice versa_. +/// Serialization will fail if there are some fields in the Rust struct that don't match +/// to any of the UDT fields. +/// +/// If there are fields in UDT that are not present in Rust definition: +/// - serialization will succeed in "match_by_name" flavor (default). Missing +/// fields in the middle of UDT will be sent as NULLs, missing fields at the end will not be sent +/// at all. +/// - serialization will succed if suffix of UDT fields is missing. If there are missing fields in the +/// middle it will fail. Note that if "skip_name_checks" is enabled, and the types happen to match, +/// it is possible for serialization to succeed with unexpected result. +/// This behavior is the default to support ALTERing UDTs by adding new fields. +/// You can require exact match of fields using `force_exact_match` attribute. /// /// In case of failure, either [`BuiltinTypeCheckError`](crate::types::serialize::value::BuiltinTypeCheckError) /// or [`BuiltinSerializationError`](crate::types::serialize::value::BuiltinSerializationError) @@ -42,7 +52,7 @@ pub use scylla_macros::ValueList; /// struct MyUdt { /// a: i32, /// b: Option, -/// c: Vec, +/// // No "c" field - it is not mandatory by default for all fields to be present /// } /// ``` /// @@ -87,7 +97,7 @@ pub use scylla_macros::ValueList; /// macro itself, so in those cases the user must provide an alternative path /// to either the `scylla` or `scylla-cql` crate. /// -/// `#[scylla(skip_name_checks)] +/// `#[scylla(skip_name_checks)]` /// /// _Specific only to the `enforce_order` flavor._ /// @@ -96,6 +106,11 @@ pub use scylla_macros::ValueList; /// struct field names and UDT field names, i.e. it's OK if i-th field has a /// different name in Rust and in the UDT. Fields are still being type-checked. /// +/// `#[scylla(force_exact_match)]` +/// +/// Forces Rust struct to have all the fields present in UDT, otherwise +/// serialization fails. +/// /// # Field attributes /// /// `#[scylla(rename = "name_in_the_udt")]` diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 4b5f9aae27..062f369e7a 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -2145,30 +2145,6 @@ mod tests { ) )); - let typ_unexpected_field = ColumnType::UserDefinedType { - type_name: "typ".to_string(), - keyspace: "ks".to_string(), - field_types: vec![ - ("a".to_string(), ColumnType::Text), - ("b".to_string(), ColumnType::Int), - ( - "c".to_string(), - ColumnType::List(Box::new(ColumnType::BigInt)), - ), - // Unexpected field - ("d".to_string(), ColumnType::Counter), - ], - }; - - let err = udt - .serialize(&typ_unexpected_field, CellWriter::new(&mut data)) - .unwrap_err(); - let err = err.0.downcast_ref::().unwrap(); - assert!(matches!( - err.kind, - BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) - )); - let typ_wrong_type = ColumnType::UserDefinedType { type_name: "typ".to_string(), keyspace: "ks".to_string(), @@ -2349,30 +2325,6 @@ mod tests { ) )); - let typ_unexpected_field = ColumnType::UserDefinedType { - type_name: "typ".to_string(), - keyspace: "ks".to_string(), - field_types: vec![ - ("a".to_string(), ColumnType::Text), - ("b".to_string(), ColumnType::Int), - ( - "c".to_string(), - ColumnType::List(Box::new(ColumnType::BigInt)), - ), - // Unexpected field - ("d".to_string(), ColumnType::Counter), - ], - }; - - let err = - <_ as SerializeCql>::serialize(&udt, &typ_unexpected_field, CellWriter::new(&mut data)) - .unwrap_err(); - let err = err.0.downcast_ref::().unwrap(); - assert!(matches!( - err.kind, - BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) - )); - let typ_unexpected_field = ColumnType::UserDefinedType { type_name: "typ".to_string(), keyspace: "ks".to_string(), diff --git a/scylla-macros/src/serialize/cql.rs b/scylla-macros/src/serialize/cql.rs index 1aa9d05835..3ba74e671e 100644 --- a/scylla-macros/src/serialize/cql.rs +++ b/scylla-macros/src/serialize/cql.rs @@ -18,6 +18,9 @@ struct Attributes { #[darling(default)] skip_name_checks: bool, + + #[darling(default)] + force_exact_match: bool, } impl Attributes { @@ -216,6 +219,37 @@ impl<'a> Generator for FieldSortingGenerator<'a> { let udt_field_names = rust_field_names.clone(); // For now, it's the same let field_types = self.ctx.fields.iter().map(|f| &f.ty).collect::>(); + let missing_rust_field_expression: syn::Expr = if self.ctx.attributes.force_exact_match { + parse_quote! { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + } + )) + } + } else { + parse_quote! { + skipped_fields += 1 + } + }; + + let serialize_missing_nulls_statement: syn::Stmt = if self.ctx.attributes.force_exact_match + { + // Not sure if there is better way to create no-op statement + // parse_quote!{} / parse_quote!{ ; } doesn't work + parse_quote! { + (); + } + } else { + parse_quote! { + while skipped_fields > 0 { + let sub_builder = #crate_path::CellValueBuilder::make_sub_writer(&mut builder); + sub_builder.set_null(); + skipped_fields -= 1; + } + } + }; + // Declare helper lambdas for creating errors statements.push(self.ctx.generate_mk_typck_err()); statements.push(self.ctx.generate_mk_ser_err()); @@ -241,6 +275,16 @@ impl<'a> Generator for FieldSortingGenerator<'a> { let mut remaining_count = #field_count; }); + // We want to send nulls for missing rust fields in the middle, but send + // nothing for those fields at the end of UDT. While executing the loop + // we don't know if there will be any more present fields. The solution is + // to count how many fields we missed and send them when we find any present field. + if !self.ctx.attributes.force_exact_match { + statements.push(parse_quote! { + let mut skipped_fields = 0; + }); + } + // Turn the cell writer into a value builder statements.push(parse_quote! { let mut builder = #crate_path::CellWriter::into_value_builder(writer); @@ -253,6 +297,7 @@ impl<'a> Generator for FieldSortingGenerator<'a> { match ::std::string::String::as_str(field_name) { #( #udt_field_names => { + #serialize_missing_nulls_statement let sub_builder = #crate_path::CellValueBuilder::make_sub_writer(&mut builder); match <#field_types as #crate_path::SerializeCql>::serialize(&self.#rust_field_idents, field_type, sub_builder) { ::std::result::Result::Ok(_proof) => {} @@ -271,11 +316,7 @@ impl<'a> Generator for FieldSortingGenerator<'a> { } } )* - _ => return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { - field_name: <_ as ::std::clone::Clone>::clone(field_name), - } - )), + _ => #missing_rust_field_expression, } } }); @@ -396,16 +437,18 @@ impl<'a> Generator for FieldOrderedGenerator<'a> { }); } - // Check whether there are some fields remaining - statements.push(parse_quote! { - if let Some((field_name, typ)) = field_iter.next() { - return ::std::result::Result::Err(mk_typck_err( - #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { - field_name: <_ as ::std::clone::Clone>::clone(field_name), - } - )); - } - }); + if self.ctx.attributes.force_exact_match { + // Check whether there are some fields remaining + statements.push(parse_quote! { + if let Some((field_name, typ)) = field_iter.next() { + return ::std::result::Result::Err(mk_typck_err( + #crate_path::UdtTypeCheckErrorKind::NoSuchFieldInUdt { + field_name: <_ as ::std::clone::Clone>::clone(field_name), + } + )); + } + }); + } parse_quote! { fn serialize<'b>( From 77fb6e20aebbb98206e3a78ca4a3410d8ef5d9d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 18 Dec 2023 02:52:50 +0100 Subject: [PATCH 104/107] scylla-cql: Test new behavior of SerializeCql This commit adds tests for new behavior of SerializeCql - ignoring unknown UDT fields. It also adds tests for new attribute (`force_exact_match`). --- scylla-cql/src/types/serialize/value.rs | 229 +++++++++++++++++++++++- scylla/src/transport/cql_types_test.rs | 204 +++++++++++++++++++++ 2 files changed, 432 insertions(+), 1 deletion(-) diff --git a/scylla-cql/src/types/serialize/value.rs b/scylla-cql/src/types/serialize/value.rs index 062f369e7a..d337022623 100644 --- a/scylla-cql/src/types/serialize/value.rs +++ b/scylla-cql/src/types/serialize/value.rs @@ -1490,7 +1490,7 @@ mod tests { use std::collections::BTreeMap; use crate::frame::response::result::{ColumnType, CqlValue}; - use crate::frame::value::{MaybeUnset, Unset, Value, ValueTooBig}; + use crate::frame::value::{Counter, MaybeUnset, Unset, Value, ValueTooBig}; use crate::types::serialize::value::{ BuiltinSerializationError, BuiltinSerializationErrorKind, BuiltinTypeCheckError, BuiltinTypeCheckErrorKind, MapSerializationErrorKind, MapTypeCheckErrorKind, @@ -2109,6 +2109,95 @@ mod tests { assert_eq!(reference, udt); } + #[test] + fn test_udt_serialization_with_missing_rust_fields_at_end() { + let udt = TestUdtWithFieldSorting::default(); + + let typ_normal = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected fields + ("d".to_string(), ColumnType::Counter), + ("e".to_string(), ColumnType::Counter), + ], + }; + + let result_normal = do_serialize(&udt, &typ_normal); + let result_additional_field = do_serialize(&udt, &typ_unexpected_field); + + assert_eq!(result_normal, result_additional_field); + } + + #[derive(SerializeCql, Debug, PartialEq, Default)] + #[scylla(crate = crate)] + struct TestUdtWithFieldSorting2 { + a: String, + b: i32, + d: Option, + c: Vec, + } + + #[derive(SerializeCql, Debug, PartialEq, Default)] + #[scylla(crate = crate)] + struct TestUdtWithFieldSorting3 { + a: String, + b: i32, + d: Option, + e: Option, + c: Vec, + } + + #[test] + fn test_udt_serialization_with_missing_rust_field_in_middle() { + let udt = TestUdtWithFieldSorting::default(); + let udt2 = TestUdtWithFieldSorting2::default(); + let udt3 = TestUdtWithFieldSorting3::default(); + + let typ = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + // Unexpected fields + ("d".to_string(), ColumnType::Counter), + ("e".to_string(), ColumnType::Float), + // Remaining normal field + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let result_1 = do_serialize(udt, &typ); + let result_2 = do_serialize(udt2, &typ); + let result_3 = do_serialize(udt3, &typ); + + assert_eq!(result_1, result_2); + assert_eq!(result_2, result_3); + } + #[test] fn test_udt_serialization_failing_type_check() { let typ_not_udt = ColumnType::Ascii; @@ -2268,6 +2357,44 @@ mod tests { assert_eq!(reference, udt); } + #[test] + fn test_udt_serialization_with_enforced_order_additional_field() { + let udt = TestUdtWithEnforcedOrder::default(); + + let typ_normal = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected field + ("d".to_string(), ColumnType::Counter), + ], + }; + + let result_normal = do_serialize(&udt, &typ_normal); + let result_additional_field = do_serialize(&udt, &typ_unexpected_field); + + assert_eq!(result_normal, result_additional_field); + } + #[test] fn test_udt_serialization_with_enforced_order_failing_type_check() { let typ_not_udt = ColumnType::Ascii; @@ -2465,4 +2592,104 @@ mod tests { assert_eq!(reference, udt); } + + #[derive(SerializeCql, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate, force_exact_match)] + struct TestStrictUdtWithFieldSorting { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_strict_udt_with_field_sorting_rejects_additional_field() { + let udt = TestStrictUdtWithFieldSorting::default(); + let mut data = Vec::new(); + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected field + ("d".to_string(), ColumnType::Counter), + ], + }; + + let err = udt + .serialize(&typ_unexpected_field, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) + )); + + let typ_unexpected_field_middle = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + // Unexpected field + ("b_c".to_string(), ColumnType::Counter), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + ], + }; + + let err = udt + .serialize(&typ_unexpected_field_middle, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) + )); + } + + #[derive(SerializeCql, Debug, PartialEq, Eq, Default)] + #[scylla(crate = crate, flavor = "enforce_order", force_exact_match)] + struct TestStrictUdtWithEnforcedOrder { + a: String, + b: i32, + c: Vec, + } + + #[test] + fn test_strict_udt_with_enforced_order_rejects_additional_field() { + let udt = TestStrictUdtWithEnforcedOrder::default(); + let mut data = Vec::new(); + + let typ_unexpected_field = ColumnType::UserDefinedType { + type_name: "typ".to_string(), + keyspace: "ks".to_string(), + field_types: vec![ + ("a".to_string(), ColumnType::Text), + ("b".to_string(), ColumnType::Int), + ( + "c".to_string(), + ColumnType::List(Box::new(ColumnType::BigInt)), + ), + // Unexpected field + ("d".to_string(), ColumnType::Counter), + ], + }; + + let err = + <_ as SerializeCql>::serialize(&udt, &typ_unexpected_field, CellWriter::new(&mut data)) + .unwrap_err(); + let err = err.0.downcast_ref::().unwrap(); + assert!(matches!( + err.kind, + BuiltinTypeCheckErrorKind::UdtError(UdtTypeCheckErrorKind::NoSuchFieldInUdt { .. }) + )); + } } diff --git a/scylla/src/transport/cql_types_test.rs b/scylla/src/transport/cql_types_test.rs index 6c05fc90f2..1ab0997728 100644 --- a/scylla/src/transport/cql_types_test.rs +++ b/scylla/src/transport/cql_types_test.rs @@ -1491,3 +1491,207 @@ async fn test_empty() { assert_eq!(empty, CqlValue::Empty); } + +#[tokio::test] +async fn test_udt_with_missing_field() { + let table_name = "udt_tests"; + let type_name = "usertype1"; + + let session: Session = create_new_session_builder().build().await.unwrap(); + let ks = unique_keyspace_name(); + + session + .query( + format!( + "CREATE KEYSPACE IF NOT EXISTS {} WITH REPLICATION = \ + {{'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1}}", + ks + ), + &[], + ) + .await + .unwrap(); + session.use_keyspace(ks, false).await.unwrap(); + + session + .query(format!("DROP TABLE IF EXISTS {}", table_name), &[]) + .await + .unwrap(); + + session + .query(format!("DROP TYPE IF EXISTS {}", type_name), &[]) + .await + .unwrap(); + + session + .query( + format!( + "CREATE TYPE IF NOT EXISTS {} (first int, second boolean, third float, fourth blob)", + type_name + ), + &[], + ) + .await + .unwrap(); + + session + .query( + format!( + "CREATE TABLE IF NOT EXISTS {} (id int PRIMARY KEY, val {})", + table_name, type_name + ), + &[], + ) + .await + .unwrap(); + + let mut id = 0; + + async fn verify_insert_select_identity( + session: &Session, + table_name: &str, + id: i32, + element: TQ, + expected: TR, + ) where + TQ: SerializeCql, + TR: FromCqlVal + PartialEq + Debug, + { + session + .query( + format!("INSERT INTO {}(id,val) VALUES (?,?)", table_name), + &(id, &element), + ) + .await + .unwrap(); + let result = session + .query( + format!("SELECT val from {} WHERE id = ?", table_name), + &(id,), + ) + .await + .unwrap() + .rows + .unwrap() + .into_typed::<(TR,)>() + .next() + .unwrap() + .unwrap() + .0; + assert_eq!(expected, result); + } + + #[derive(FromUserType, Debug, PartialEq)] + struct UdtFull { + pub first: i32, + pub second: bool, + pub third: Option, + pub fourth: Option>, + } + + #[derive(SerializeCql)] + #[scylla(crate = crate)] + struct UdtV1 { + pub first: i32, + pub second: bool, + } + + verify_insert_select_identity( + &session, + table_name, + id, + UdtV1 { + first: 3, + second: true, + }, + UdtFull { + first: 3, + second: true, + third: None, + fourth: None, + }, + ) + .await; + + id += 1; + + #[derive(SerializeCql)] + #[scylla(crate = crate)] + struct UdtV2 { + pub first: i32, + pub second: bool, + pub third: Option, + } + + verify_insert_select_identity( + &session, + table_name, + id, + UdtV2 { + first: 3, + second: true, + third: Some(123.45), + }, + UdtFull { + first: 3, + second: true, + third: Some(123.45), + fourth: None, + }, + ) + .await; + + id += 1; + + #[derive(SerializeCql)] + #[scylla(crate = crate)] + struct UdtV3 { + pub first: i32, + pub second: bool, + pub fourth: Option>, + } + + verify_insert_select_identity( + &session, + table_name, + id, + UdtV3 { + first: 3, + second: true, + fourth: Some(vec![3, 6, 9]), + }, + UdtFull { + first: 3, + second: true, + third: None, + fourth: Some(vec![3, 6, 9]), + }, + ) + .await; + + id += 1; + + #[derive(SerializeCql)] + #[scylla(crate = crate, flavor="enforce_order")] + struct UdtV4 { + pub first: i32, + pub second: bool, + } + + verify_insert_select_identity( + &session, + table_name, + id, + UdtV4 { + first: 3, + second: true, + }, + UdtFull { + first: 3, + second: true, + third: None, + fourth: None, + }, + ) + .await; +} From bce8d241b600bed9601d41121b43e70e7f994716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 18 Dec 2023 20:15:08 +0100 Subject: [PATCH 105/107] scylla-macros: bump version to 0.2.2 --- Cargo.lock.msrv | 2 +- scylla-cql/Cargo.toml | 2 +- scylla-macros/Cargo.toml | 2 +- scylla/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock.msrv b/Cargo.lock.msrv index 59c9ee1b56..5083598ee2 100644 --- a/Cargo.lock.msrv +++ b/Cargo.lock.msrv @@ -1500,7 +1500,7 @@ dependencies = [ [[package]] name = "scylla-macros" -version = "0.2.0" +version = "0.2.2" dependencies = [ "darling", "proc-macro2", diff --git a/scylla-cql/Cargo.toml b/scylla-cql/Cargo.toml index 536e9d1c34..93c35087fe 100644 --- a/scylla-cql/Cargo.toml +++ b/scylla-cql/Cargo.toml @@ -10,7 +10,7 @@ categories = ["database"] license = "MIT OR Apache-2.0" [dependencies] -scylla-macros = { version = "0.2.0", path = "../scylla-macros" } +scylla-macros = { version = "0.2.2", path = "../scylla-macros" } byteorder = "1.3.4" bytes = "1.0.1" num_enum = "0.6" diff --git a/scylla-macros/Cargo.toml b/scylla-macros/Cargo.toml index ac5f5d16f1..b9fa22662c 100644 --- a/scylla-macros/Cargo.toml +++ b/scylla-macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scylla-macros" -version = "0.2.0" +version = "0.2.2" edition = "2021" description = "proc macros for scylla async CQL driver" repository = "https://github.com/scylladb/scylla-rust-driver" diff --git a/scylla/Cargo.toml b/scylla/Cargo.toml index 098673faec..d4bce879f5 100644 --- a/scylla/Cargo.toml +++ b/scylla/Cargo.toml @@ -23,7 +23,7 @@ time = ["scylla-cql/time"] full-serialization = ["chrono", "time", "secret"] [dependencies] -scylla-macros = { version = "0.2.0", path = "../scylla-macros" } +scylla-macros = { version = "0.2.2", path = "../scylla-macros" } scylla-cql = { version = "0.0.8", path = "../scylla-cql" } byteorder = "1.3.4" bytes = "1.0.1" From b5faf8b4ff4f70b2e08d36c0fc7ae70aaa86cba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 18 Dec 2023 20:16:02 +0100 Subject: [PATCH 106/107] scylla-cql: bump version to 0.0.10 --- Cargo.lock.msrv | 2 +- scylla-cql/Cargo.toml | 2 +- scylla-proxy/Cargo.toml | 2 +- scylla/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock.msrv b/Cargo.lock.msrv index 5083598ee2..73d04998e3 100644 --- a/Cargo.lock.msrv +++ b/Cargo.lock.msrv @@ -1477,7 +1477,7 @@ dependencies = [ [[package]] name = "scylla-cql" -version = "0.0.8" +version = "0.0.10" dependencies = [ "async-trait", "bigdecimal", diff --git a/scylla-cql/Cargo.toml b/scylla-cql/Cargo.toml index 93c35087fe..d7ace3735c 100644 --- a/scylla-cql/Cargo.toml +++ b/scylla-cql/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scylla-cql" -version = "0.0.8" +version = "0.0.10" edition = "2021" description = "CQL data types and primitives, for interacting with Scylla." repository = "https://github.com/scylladb/scylla-rust-driver" diff --git a/scylla-proxy/Cargo.toml b/scylla-proxy/Cargo.toml index ad9cfffa59..0c3000fff6 100644 --- a/scylla-proxy/Cargo.toml +++ b/scylla-proxy/Cargo.toml @@ -13,7 +13,7 @@ license = "MIT OR Apache-2.0" defaults = [] [dependencies] -scylla-cql = { version = "0.0.8", path = "../scylla-cql" } +scylla-cql = { version = "0.0.10", path = "../scylla-cql" } byteorder = "1.3.4" bytes = "1.2.0" futures = "0.3.6" diff --git a/scylla/Cargo.toml b/scylla/Cargo.toml index d4bce879f5..dbc8397eb2 100644 --- a/scylla/Cargo.toml +++ b/scylla/Cargo.toml @@ -24,7 +24,7 @@ full-serialization = ["chrono", "time", "secret"] [dependencies] scylla-macros = { version = "0.2.2", path = "../scylla-macros" } -scylla-cql = { version = "0.0.8", path = "../scylla-cql" } +scylla-cql = { version = "0.0.10", path = "../scylla-cql" } byteorder = "1.3.4" bytes = "1.0.1" futures = "0.3.6" From 449ec0cad7f939633ba737626310480c9c307c92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karol=20Bary=C5=82a?= Date: Mon, 18 Dec 2023 20:19:38 +0100 Subject: [PATCH 107/107] scylla: bump version to 0.11.0 --- Cargo.lock.msrv | 2 +- docs/pyproject.toml | 2 +- docs/source/conf.py | 6 +++--- docs/source/quickstart/create-project.md | 2 +- scylla/Cargo.toml | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock.msrv b/Cargo.lock.msrv index 73d04998e3..ac9f6fc438 100644 --- a/Cargo.lock.msrv +++ b/Cargo.lock.msrv @@ -1432,7 +1432,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scylla" -version = "0.9.0" +version = "0.11.0" dependencies = [ "arc-swap", "assert_matches", diff --git a/docs/pyproject.toml b/docs/pyproject.toml index 6d2b3bb240..6e44db3302 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "sphinx-docs" description = "ScyllaDB Documentation" -version = "0.9.0" +version = "0.11.0" authors = ["ScyllaDB Documentation Contributors"] [tool.poetry.dependencies] diff --git a/docs/source/conf.py b/docs/source/conf.py index c65e86b5ee..bbf58323db 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,14 +14,14 @@ # -- Global variables # Build documentation for the following tags and branches -TAGS = ['v0.9.1', 'v0.10.1'] +TAGS = ['v0.10.1', 'v0.11.0'] BRANCHES = ['main'] # Set the latest version. -LATEST_VERSION = 'v0.10.1' +LATEST_VERSION = 'v0.11.0' # Set which versions are not released yet. UNSTABLE_VERSIONS = ['main'] # Set which versions are deprecated -DEPRECATED_VERSIONS = ['v0.9.1'] +DEPRECATED_VERSIONS = ['v0.10.1'] # -- General configuration diff --git a/docs/source/quickstart/create-project.md b/docs/source/quickstart/create-project.md index 96bfad98f3..c6ee6bc949 100644 --- a/docs/source/quickstart/create-project.md +++ b/docs/source/quickstart/create-project.md @@ -8,7 +8,7 @@ cargo new myproject In `Cargo.toml` add useful dependencies: ```toml [dependencies] -scylla = "0.8" +scylla = "0.11" tokio = { version = "1.12", features = ["full"] } futures = "0.3.6" uuid = "1.0" diff --git a/scylla/Cargo.toml b/scylla/Cargo.toml index dbc8397eb2..1949ce84e6 100644 --- a/scylla/Cargo.toml +++ b/scylla/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "scylla" -version = "0.9.0" +version = "0.11.0" edition = "2021" description = "Async CQL driver for Rust, optimized for Scylla, fully compatible with Apache Cassandra™" repository = "https://github.com/scylladb/scylla-rust-driver"