From 3ec4b96f443302838f2eebee415e1e14fd9730b8 Mon Sep 17 00:00:00 2001 From: Stanislav Ravas Date: Mon, 15 Jan 2024 09:15:45 +0100 Subject: [PATCH 1/3] Serializer: Implement collect_str --- src/ser/mod.rs | 151 ++++++++++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 57 deletions(-) diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 0674aa15..ad0066d5 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -94,6 +94,66 @@ impl<'a> Serializer<'a> { Ok(()) } } + + fn push_char(&mut self, c: char) -> Result<()> { + // Do escaping according to "6. MUST represent all strings (including object member names) in + // their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/ + // + // We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8, + // even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates + // cannot exist in a Rust String. If they do, the bug is in the String constructor. + // An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w + + // Temporary storage for encoded a single char. + // A char is up to 4 bytes long wehn encoded to UTF-8. + let mut encoding_tmp = [0u8; 4]; + + match c { + '\\' => { + self.push(b'\\')?; + self.push(b'\\')?; + } + '"' => { + self.push(b'\\')?; + self.push(b'"')?; + } + '\u{0008}' => { + self.push(b'\\')?; + self.push(b'b')?; + } + '\u{0009}' => { + self.push(b'\\')?; + self.push(b't')?; + } + '\u{000A}' => { + self.push(b'\\')?; + self.push(b'n')?; + } + '\u{000C}' => { + self.push(b'\\')?; + self.push(b'f')?; + } + '\u{000D}' => { + self.push(b'\\')?; + self.push(b'r')?; + } + '\u{0000}'..='\u{001F}' => { + self.push(b'\\')?; + self.push(b'u')?; + self.push(b'0')?; + self.push(b'0')?; + let (hex1, hex2) = hex(c as u8); + self.push(hex1)?; + self.push(hex2)?; + } + _ => { + let encoded = c.encode_utf8(&mut encoding_tmp as &mut [u8]); + self.extend_from_slice(encoded.as_bytes())?; + } + } + + Ok(()) + } } // NOTE(serialize_*signed) This is basically the numtoa implementation minus the lookup tables, @@ -263,62 +323,8 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { fn serialize_str(self, v: &str) -> Result { self.push(b'"')?; - // Do escaping according to "6. MUST represent all strings (including object member names) in - // their minimal-length UTF-8 encoding": https://gibson042.github.io/canonicaljson-spec/ - // - // We don't need to escape lone surrogates because surrogate pairs do not exist in valid UTF-8, - // even if they can exist in JSON or JavaScript strings (UCS-2 based). As a result, lone surrogates - // cannot exist in a Rust String. If they do, the bug is in the String constructor. - // An excellent explanation is available at https://www.youtube.com/watch?v=HhIEDWmQS3w - - // Temporary storage for encoded a single char. - // A char is up to 4 bytes long wehn encoded to UTF-8. - let mut encoding_tmp = [0u8; 4]; - for c in v.chars() { - match c { - '\\' => { - self.push(b'\\')?; - self.push(b'\\')?; - } - '"' => { - self.push(b'\\')?; - self.push(b'"')?; - } - '\u{0008}' => { - self.push(b'\\')?; - self.push(b'b')?; - } - '\u{0009}' => { - self.push(b'\\')?; - self.push(b't')?; - } - '\u{000A}' => { - self.push(b'\\')?; - self.push(b'n')?; - } - '\u{000C}' => { - self.push(b'\\')?; - self.push(b'f')?; - } - '\u{000D}' => { - self.push(b'\\')?; - self.push(b'r')?; - } - '\u{0000}'..='\u{001F}' => { - self.push(b'\\')?; - self.push(b'u')?; - self.push(b'0')?; - self.push(b'0')?; - let (hex1, hex2) = hex(c as u8); - self.push(hex1)?; - self.push(hex2)?; - } - _ => { - let encoded = c.encode_utf8(&mut encoding_tmp as &mut [u8]); - self.extend_from_slice(encoded.as_bytes())?; - } - } + self.push_char(c)?; } self.push(b'"') @@ -434,14 +440,45 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { Ok(SerializeStructVariant::new(self)) } - fn collect_str(self, _value: &T) -> Result + fn collect_str(self, value: &T) -> Result where T: fmt::Display, { - unreachable!() + self.push(b'"')?; + + let mut col = StringCollector::new(self); + fmt::write(&mut col, format_args!("{}", value)) + .or(Err(Error::BufferFull))?; + + self.push(b'"') } } +struct StringCollector<'a, 'b> { + ser: &'a mut Serializer<'b> +} + +impl<'a, 'b> StringCollector<'a, 'b> { + pub fn new(ser: &'a mut Serializer<'b>) -> Self { + Self { ser } + } + + fn do_write_str(&mut self, s: &str) -> Result<()> { + for c in s.chars() { + self.ser.push_char(c)?; + } + + Ok(()) + } +} + +impl<'a, 'b> fmt::Write for StringCollector<'a, 'b> { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.do_write_str(s).or(Err(fmt::Error)) + } +} + + /// Serializes the given data structure as a string of JSON text #[cfg(feature = "heapless")] pub fn to_string(value: &T) -> Result> From 6767ae1d957a5ed2508f38f59389040e77858de8 Mon Sep 17 00:00:00 2001 From: Stanislav Ravas Date: Mon, 22 Jan 2024 10:31:55 +0100 Subject: [PATCH 2/3] Add changelog entry for adding Serializer::collect_str --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ee5de8b..13565f95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Support for optional package `defmt` which allows for easy conversion for error types when using tools like `probe-rs` for logging over debuggers. +- Implement `Serializer::collect_str` ### Changed From e13c3f8467ceebb00923b82858604439a968b3fd Mon Sep 17 00:00:00 2001 From: Stanislav Ravas Date: Mon, 22 Jan 2024 10:33:05 +0100 Subject: [PATCH 3/3] Serializer::collect_str: Format with cargo fmt --- src/ser/mod.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ser/mod.rs b/src/ser/mod.rs index ad0066d5..cbec8a10 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -447,15 +447,14 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { self.push(b'"')?; let mut col = StringCollector::new(self); - fmt::write(&mut col, format_args!("{}", value)) - .or(Err(Error::BufferFull))?; + fmt::write(&mut col, format_args!("{}", value)).or(Err(Error::BufferFull))?; self.push(b'"') } } struct StringCollector<'a, 'b> { - ser: &'a mut Serializer<'b> + ser: &'a mut Serializer<'b>, } impl<'a, 'b> StringCollector<'a, 'b> { @@ -478,7 +477,6 @@ impl<'a, 'b> fmt::Write for StringCollector<'a, 'b> { } } - /// Serializes the given data structure as a string of JSON text #[cfg(feature = "heapless")] pub fn to_string(value: &T) -> Result>