Skip to content

Commit

Permalink
fix: use allocating deserializers when scratch space exceeded
Browse files Browse the repository at this point in the history
Issue #32 describes a class of errors where `deserialize_str` (and `deserialize_bytes`) have match
expressions that fall through to an error when the `Text` (`Bytes`) lengths are longer than can fit
in the scratch array.

This patch works around that limitation by delegating to the corresponding allocating methods
`deserialize_string` (`deserialize_byte_buf`) in case the scratch space is too small.

This should address the issue even for types whose `Deserialize` visitor does not implement
`visit_string`, since the default implementation of that method delegates back to `visit_str` once
the fully-deserialized `String` is in hand.

This addition raises a question to me about the stance of the `ciborium` crate on `no_std`/`alloc`
support. This workaround depends on being able to use `String` and `Vec`, which rules out
`no_std`. But these allocating deserializers already exist in the code and don't appear to be
guarded by `cfg` directives, so I don't believe this patch changes the level of support provided for
these environments.

Adds regression tests for slightly-too-long string and byte sequence roundtripping.
  • Loading branch information
acfoltzer committed Oct 12, 2022
1 parent 109c371 commit c32f3c9
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 0 deletions.
12 changes: 12 additions & 0 deletions ciborium/src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,12 @@ where
}
}

// Longer strings require alloaction; delegate to `deserialize_string`
item @ Header::Text(_) => {
self.decoder.push(item);
self.deserialize_string(visitor)
}

header => Err(header.expected("str")),
};
}
Expand Down Expand Up @@ -371,6 +377,12 @@ where
visitor.visit_bytes(&self.scratch[..len])
}

// Longer byte sequences require alloaction; delegate to `deserialize_byte_buf`
item @ Header::Bytes(_) => {
self.decoder.push(item);
self.deserialize_byte_buf(visitor)
}

Header::Array(len) => self.recurse(|me| {
let access = Access(me, len);
visitor.visit_seq(access)
Expand Down
111 changes: 111 additions & 0 deletions ciborium/tests/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ extern crate std;
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use std::fmt::Debug;
use std::io::Cursor;

use ciborium::value::Value;
use ciborium::{cbor, de::from_reader, ser::into_writer};

use rstest::rstest;
use serde::de::Visitor;
use serde::{de::DeserializeOwned, Deserialize, Serialize};

macro_rules! val {
Expand Down Expand Up @@ -416,3 +418,112 @@ fn byte_vec_serde_bytes_compatibility(input: Vec<u8>) {
let bytes: Vec<u8> = from_reader(&buf[..]).unwrap();
assert_eq!(input, bytes);
}

// Regression test for #32 where strings and bytes longer than 4096 bytes previously failed to
// roundtrip if `deserialize_str` and `deserialize_bytes` (and not their owned equivalents) are used
// in the deserializers.

#[derive(Clone, Debug, PartialEq, Eq)]
struct LongString {
s: String,
}

impl Serialize for LongString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(self.s.as_str())
}
}

impl<'de> Deserialize<'de> for LongString {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_str(LongStringVisitor)
}
}

struct LongStringVisitor;

impl<'de> Visitor<'de> for LongStringVisitor {
type Value = LongString;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "string")
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(LongString { s: v.to_owned() })
}
}

#[test]
fn long_string_roundtrips() {
let s = String::from_utf8(vec![b'A'; 5000]).unwrap();
let long_string = LongString { s };

let mut buf = vec![];
into_writer(&long_string, Cursor::new(&mut buf)).unwrap();
let long_string_de = from_reader(Cursor::new(&buf)).unwrap();

assert_eq!(long_string, long_string_de);
}

#[derive(Clone, Debug, PartialEq, Eq)]
struct LongBytes {
v: Vec<u8>,
}

impl Serialize for LongBytes {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_bytes(self.v.as_slice())
}
}

impl<'de> Deserialize<'de> for LongBytes {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_bytes(LongBytesVisitor)
}
}

struct LongBytesVisitor;

impl<'de> Visitor<'de> for LongBytesVisitor {
type Value = LongBytes;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "bytes")
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(LongBytes { v: v.to_vec() })
}
}

#[test]
fn long_bytes_roundtrips() {
let long_bytes = LongBytes {
v: vec![b'A'; 5000],
};

let mut buf = vec![];
into_writer(&long_bytes, Cursor::new(&mut buf)).unwrap();
let long_bytes_de = from_reader(Cursor::new(&buf)).unwrap();

assert_eq!(long_bytes, long_bytes_de);
}

0 comments on commit c32f3c9

Please sign in to comment.