From ae176f26bf8cb20673db4a1b35e77fd8d3536891 Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Wed, 17 Jul 2019 13:16:33 +0200 Subject: [PATCH 1/8] Make concatenation of raw slices public Chooses the very cautious option of erroring in all cases of a ZST concatentation. --- src/lib.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8012e3c..01f9350 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ #![no_std] -use core::{slice, str}; +use core::{mem, slice, str}; /// Error that can occur during [`concat`](fn.concat.html). #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -55,16 +55,64 @@ pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { } } -fn concat_slice<'a>(a: &'a [u8], b: &'a [u8]) -> Result<&'a [u8], Error> { +/// Concatenate two slices if they are adjacent. +/// +/// If two slices are adjacent to each other in memory, this function +/// concatenates both, creating a single longer slice. Note that slices of +/// zero-sized types (ZST) are never considered adjacent. Otherwise it would be +/// possible to concatenate a slice to itself. +/// +/// # Errors +/// +/// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if the +/// result is too long to be represented as a slice (size in bytes is larger +/// than `isize::MAX`). +/// +/// When T is a ZST then returns `Err(TooLong)` if the total length would overflow +/// `usize` and `Err(NotAdjacent)` otherwise. +/// +/// # Examples +/// +/// Correct usage: +/// +/// ```rust +/// # use str_concat::concat_slice; +/// let s = b"0123456789"; +/// assert_eq!(b"0123456", concat_slice(&s[..5], &s[5..7]).unwrap()); +/// ``` +/// +/// Non-adjacent byte slices: +/// +/// ```rust +/// # use str_concat::{concat_slice, Error}; +/// let s = b"0123456789"; +/// assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7])) +/// ``` +/// +pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { let a_ptr = a.as_ptr(); let b_ptr = b.as_ptr(); let a_len = a.len(); let b_len = b.len(); + if mem::size_of::() == 0 { + // Not safety critical but we check that the length is as expected. + if a_len.checked_add(b_len).is_none() { + return Err(Error::TooLong) + } + + // Never consider ZST slices adjacent. You could otherwise infinitely + // duplicate a non-zero length slice by concatenating it to itself. + return Err(Error::NotAdjacent) + } + + // `max_len <= isize::max_value()` + let max_len = isize::max_value() as usize / mem::size_of::(); + // These should be guaranteed for the slices. - assert!(a_len <= isize::max_value() as usize); - assert!(b_len <= isize::max_value() as usize); + assert!(a_len <= max_len as usize); + assert!(b_len <= max_len as usize); unsafe { // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1 @@ -82,14 +130,16 @@ fn concat_slice<'a>(a: &'a [u8], b: &'a [u8]) -> Result<&'a [u8], Error> { // can always store the sum of two positive `isize`. // [1]: https://doc.rust-lang.org/reference/types/numeric.html#machine-dependent-integer-types let new_len = a_len.checked_add(b_len).unwrap(); - if new_len > isize::max_value() as usize { + // Ensure the length is bounded. The bound is strict from the definition of `max_len` + // `new_len <= max_len` <=> `new_len * mem::size_of::() <= isize::max_value()` + if !(new_len <= max_len) { return Err(Error::TooLong); } // https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety // * slices are adjacent (checked above) // * no double-free / leak because we work on borrowed data // * no use-after-free because `a` and `b` have same lifetime - // * the total size is smaller than isize bytes, len is and we have `u8` + // * the total size is smaller than `isize::MAX` bytes, as max_len is rounded down Ok(slice::from_raw_parts(a_ptr, new_len)) } } From 06abf2760c3a85657abf1ee27db7c7223c8085d2 Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Wed, 17 Jul 2019 13:43:29 +0200 Subject: [PATCH 2/8] Add unordered variant for slices --- src/lib.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 01f9350..7a081f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -173,7 +173,7 @@ pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { let a_ptr = a.as_bytes().as_ptr() as usize; let a_end_ptr = a_ptr + a.len(); let b_ptr = b.as_bytes().as_ptr() as usize; - + // make the order of `a` and `b` not matter let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr { (a, b) @@ -184,6 +184,47 @@ pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { concat(a, b) } +/// Concatenate two adjacent slices no matter their order. +/// +/// This is the same as [`concat_slice`] except that it also concatenates `b` to +/// `a` if `b` is in front of `a` (in which case of [`concat_slice`] errors). +/// Keep in mind that slices of ZSTs will still not be concatenated. +/// +/// # Examples +/// +/// Reversed order: +/// +/// ```rust +/// # use str_concat::concat_slice_unordered; +/// let s = b"0123456789"; +/// assert_eq!(b"0123456", concat_slice_unordered(&s[5..7], &s[..5]).unwrap()); +/// ``` +/// +/// Normal order: +/// +/// ```rust +/// # use str_concat::{concat_slice_unordered, Error}; +/// let s = b"0123456789"; +/// assert_eq!(b"0123456", concat_slice_unordered(&s[..5], &s[5..7]).unwrap()) +/// ``` +/// +/// [`concat_slice`]: fn.concat_slice.html +pub fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { + // add lengths to handle empty cases correctly + let a_ptr = a.as_ptr() as usize; + let a_end_ptr = a_ptr + a.len() * mem::size_of::(); + let b_ptr = b.as_ptr() as usize; + + // make the order of `a` and `b` not matter + let (a, b) = if a_ptr <= b_ptr && a_end_ptr <= b_ptr { + (a, b) + } else { + (b, a) + }; + + concat_slice(a, b) +} + #[cfg(test)] mod tests { use super::{concat, concat_unordered, Error}; From 3951acdc3a1d2dcc06fbbffcd7002db791da6391 Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Wed, 17 Jul 2019 14:38:59 +0200 Subject: [PATCH 3/8] Test the behaviour of slice variants --- src/lib.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 7a081f6..c7a95e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -227,7 +227,7 @@ pub fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], #[cfg(test)] mod tests { - use super::{concat, concat_unordered, Error}; + use super::{concat, concat_unordered, concat_slice, concat_slice_unordered, Error}; #[test] fn simple_success() { @@ -277,4 +277,34 @@ mod tests { assert_eq!(Ok("0123"), concat_unordered(s, &s[4..])); assert_eq!(Ok("0123"), concat_unordered(&s[4..], s)); } + + #[test] + fn typed_slices() { + #[derive(Debug, PartialEq)] + struct T(usize); + + let s: &[T] = &[T(0), T(1), T(2), T(3)][..]; + assert_eq!(Ok(s), concat_slice(&s[..2], &s[2..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[..2], &s[2..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[2..], &s[..2])); + + // One slice empty + assert_eq!(Ok(s), concat_slice(&s[..0], s)); + assert_eq!(Ok(s), concat_slice_unordered(&s[..0], s)); + assert_eq!(Ok(s), concat_slice_unordered(s, &s[..0])); + assert_eq!(Ok(s), concat_slice(s, &s[4..])); + assert_eq!(Ok(s), concat_slice_unordered(s, &s[4..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[4..], s)); + } + + #[test] + fn typed_fail() { + #[derive(Debug, PartialEq)] + struct T(usize); + + let s: &[T] = &[T(0), T(1), T(2), T(3)][..]; + assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[2..])); + assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[2..])); + assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2])); + } } From fc7b2548684c809de9c27755e91e01339a2f53d0 Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Mon, 22 Jul 2019 11:02:16 +0200 Subject: [PATCH 4/8] Mark all primitive functions as unsafe See issue #7, we must guarantee that slices are from the same allocation. However, pointer equality testing can not reliably test this so it must be provided by the caller. Some actual safe interface is coming back at some point, currently the proof obligation is still highly reduced. --- src/lib.rs | 220 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 141 insertions(+), 79 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c7a95e7..544ea4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,12 @@ pub enum Error { /// Returns `Err` if the two slices aren't adjacent, `a` is after `b`, or if /// `a` is too long for proper concatenation (longer than `isize::MAX`). /// +/// # Safety +/// +/// The provided slices must come from the same underlying allocation. The adjacency test can not +/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of +/// another. However, all slices must be within a single allocation. +/// /// # Examples /// /// Correct usage: @@ -27,7 +33,10 @@ pub enum Error { /// ```rust /// # use str_concat::concat; /// let s = "0123456789"; -/// assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap()); +/// unsafe { +/// // SAFETY: slices from the same str originally. +/// assert_eq!("0123456", concat(&s[..5], &s[5..7]).unwrap()); +/// } /// ``` /// /// Non-adjacent string slices: @@ -35,24 +44,25 @@ pub enum Error { /// ```rust /// # use str_concat::{concat, Error}; /// let s = "0123456789"; -/// assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) +/// unsafe { +/// // SAFETY: slices from the same str originally. +/// assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) +/// } /// ``` -pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { +pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { let slice = concat_slice(a.as_bytes(), b.as_bytes())?; - unsafe { - // * concatenating two valid UTF8 strings will produce a valid UTF8 string - // * a BOM in `b` is still valid: - // > It is important to understand that the character U+FEFF appearing at - // > any position other than the beginning of a stream MUST be interpreted - // > with the semantics for the zero-width non-breaking space, and MUST - // > NOT be interpreted as a signature. - // * the grapheme *clusters* (and thus potentially the semantics of the string - // might change if the first code point of `b` is a combining character, - // a zero width joiner or similar. - // This does not affect the correctness of UTF-8. - Ok(str::from_utf8_unchecked(slice)) - } + // * concatenating two valid UTF8 strings will produce a valid UTF8 string + // * a BOM in `b` is still valid: + // > It is important to understand that the character U+FEFF appearing at + // > any position other than the beginning of a stream MUST be interpreted + // > with the semantics for the zero-width non-breaking space, and MUST + // > NOT be interpreted as a signature. + // * the grapheme *clusters* (and thus potentially the semantics of the string + // might change if the first code point of `b` is a combining character, + // a zero width joiner or similar. + // This does not affect the correctness of UTF-8. + Ok(str::from_utf8_unchecked(slice)) } /// Concatenate two slices if they are adjacent. @@ -71,6 +81,12 @@ pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// When T is a ZST then returns `Err(TooLong)` if the total length would overflow /// `usize` and `Err(NotAdjacent)` otherwise. /// +/// # Safety +/// +/// The provided slices must come from the same underlying allocation. The adjacency test can not +/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of +/// another. However, all slices must be within a single allocation. +/// /// # Examples /// /// Correct usage: @@ -78,7 +94,10 @@ pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// ```rust /// # use str_concat::concat_slice; /// let s = b"0123456789"; -/// assert_eq!(b"0123456", concat_slice(&s[..5], &s[5..7]).unwrap()); +/// unsafe { +/// // SAFETY: slices from the same bytes originally. +/// assert_eq!(b"0123456", concat_slice(&s[..5], &s[5..7]).unwrap()); +/// } /// ``` /// /// Non-adjacent byte slices: @@ -86,10 +105,13 @@ pub fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// ```rust /// # use str_concat::{concat_slice, Error}; /// let s = b"0123456789"; -/// assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7])) +/// unsafe { +/// // SAFETY: slices from the same bytes originally. +/// assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7])) +/// } /// ``` /// -pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { +pub unsafe fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { let a_ptr = a.as_ptr(); let b_ptr = b.as_ptr(); @@ -114,34 +136,32 @@ pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { assert!(a_len <= max_len as usize); assert!(b_len <= max_len as usize); - unsafe { - // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1 - // * starting pointer in-bounds obviously - // * ending pointer one byte past the end of an allocated object - // * explicit isize overflow check above - // * no wraparound required - // why: this is the one byte past the end pointer for the input slice `a` - if a_ptr.offset(a_len as isize) != b_ptr { - return Err(Error::NotAdjacent); - } - // UNWRAP: both smaller than isize, can't wrap in usize. - // This is because in rust `usize` and `isize` are both guaranteed to have - // the same number of bits as a pointer [1]. As `isize` is signed, a `usize` - // can always store the sum of two positive `isize`. - // [1]: https://doc.rust-lang.org/reference/types/numeric.html#machine-dependent-integer-types - let new_len = a_len.checked_add(b_len).unwrap(); - // Ensure the length is bounded. The bound is strict from the definition of `max_len` - // `new_len <= max_len` <=> `new_len * mem::size_of::() <= isize::max_value()` - if !(new_len <= max_len) { - return Err(Error::TooLong); - } - // https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety - // * slices are adjacent (checked above) - // * no double-free / leak because we work on borrowed data - // * no use-after-free because `a` and `b` have same lifetime - // * the total size is smaller than `isize::MAX` bytes, as max_len is rounded down - Ok(slice::from_raw_parts(a_ptr, new_len)) + // https://doc.rust-lang.org/std/primitive.pointer.html#safety-1 + // * starting pointer in-bounds obviously + // * ending pointer one byte past the end of an allocated object + // * explicit isize overflow check above + // * no wraparound required + // why: this is the one byte past the end pointer for the input slice `a` + if a_ptr.offset(a_len as isize) != b_ptr { + return Err(Error::NotAdjacent); + } + // UNWRAP: both smaller than isize, can't wrap in usize. + // This is because in rust `usize` and `isize` are both guaranteed to have + // the same number of bits as a pointer [1]. As `isize` is signed, a `usize` + // can always store the sum of two positive `isize`. + // [1]: https://doc.rust-lang.org/reference/types/numeric.html#machine-dependent-integer-types + let new_len = a_len.checked_add(b_len).unwrap(); + // Ensure the length is bounded. The bound is strict from the definition of `max_len` + // `new_len <= max_len` <=> `new_len * mem::size_of::() <= isize::max_value()` + if !(new_len <= max_len) { + return Err(Error::TooLong); } + // https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety + // * slices are adjacent (checked above) + // * no double-free / leak because we work on borrowed data + // * no use-after-free because `a` and `b` have same lifetime + // * the total size is smaller than `isize::MAX` bytes, as max_len is rounded down + Ok(slice::from_raw_parts(a_ptr, new_len)) } /// Concatenate two adjacent string slices no matter their order. @@ -149,6 +169,12 @@ pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { /// This is the same as [`concat`] except that it also concatenates /// `b` to `a` if `b` is in front of `a` (in which case [`concat`] errors). /// +/// # Safety +/// +/// The provided slices must come from the same underlying allocation. The adjacency test can not +/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of +/// another. However, all slices must be within a single allocation. +/// /// # Examples /// /// Reversed order: @@ -156,7 +182,10 @@ pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { /// ```rust /// # use str_concat::concat_unordered; /// let s = "0123456789"; -/// assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap()); +/// unsafe { +/// // SAFETY: slices from the same str originally. +/// assert_eq!("0123456", concat_unordered(&s[5..7], &s[..5]).unwrap()); +/// } /// ``` /// /// Normal order: @@ -164,11 +193,14 @@ pub fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { /// ```rust /// # use str_concat::{concat_unordered, Error}; /// let s = "0123456789"; -/// assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap()) +/// unsafe { +/// // SAFETY: slices from the same str originally. +/// assert_eq!("0123456", concat_unordered(&s[..5], &s[5..7]).unwrap()) +/// } /// ``` /// /// [`concat`]: fn.concat.html -pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { +pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { // add lengths to handle empty-string cases correctly let a_ptr = a.as_bytes().as_ptr() as usize; let a_end_ptr = a_ptr + a.len(); @@ -190,6 +222,12 @@ pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// `a` if `b` is in front of `a` (in which case of [`concat_slice`] errors). /// Keep in mind that slices of ZSTs will still not be concatenated. /// +/// # Safety +/// +/// The provided slices must come from the same underlying allocation. The adjacency test can not +/// reliably differentiate between the one-past-the-end pointer of one allocation and the start of +/// another. However, all slices must be within a single allocation. +/// /// # Examples /// /// Reversed order: @@ -197,7 +235,10 @@ pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// ```rust /// # use str_concat::concat_slice_unordered; /// let s = b"0123456789"; -/// assert_eq!(b"0123456", concat_slice_unordered(&s[5..7], &s[..5]).unwrap()); +/// unsafe { +/// // SAFETY: slices from the same bytes originally. +/// assert_eq!(b"0123456", concat_slice_unordered(&s[5..7], &s[..5]).unwrap()); +/// } /// ``` /// /// Normal order: @@ -205,11 +246,14 @@ pub fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// ```rust /// # use str_concat::{concat_slice_unordered, Error}; /// let s = b"0123456789"; -/// assert_eq!(b"0123456", concat_slice_unordered(&s[..5], &s[5..7]).unwrap()) +/// unsafe { +/// // SAFETY: slices from the same bytes originally. +/// assert_eq!(b"0123456", concat_slice_unordered(&s[..5], &s[5..7]).unwrap()) +/// } /// ``` /// /// [`concat_slice`]: fn.concat_slice.html -pub fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { +pub unsafe fn concat_slice_unordered<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Error> { // add lengths to handle empty cases correctly let a_ptr = a.as_ptr() as usize; let a_end_ptr = a_ptr + a.len() * mem::size_of::(); @@ -232,50 +276,64 @@ mod tests { #[test] fn simple_success() { let s = "0123456789"; - assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7])); - assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7])); + unsafe { + assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7])); + assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7])); + } } #[test] fn unordered() { let s = "0123456789"; - assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5])); - assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5])); + unsafe { + assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5])); + assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5])); + } } #[test] fn simple_fail() { let s = "0123456789"; - assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) + unsafe { + assert_eq!(Err(Error::NotAdjacent), concat(&s[..5], &s[6..7])) + } } #[test] fn zero_width_joiner() { let s = "0\u{200d}1"; - assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5])); + unsafe { + assert_eq!(Ok("0\u{200d}1"), concat(&s[..1], &s[1..5])); + } } #[test] fn zero_width_joiner_combining_grave() { let s = "0\u{200d}̀1"; - assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7])); + unsafe { + assert_eq!(Ok("0\u{200d}\u{300}1"), concat(&s[..1], &s[1..7])); + } } #[test] fn bom() { let s = "0\u{FEFF}1"; - assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5])); + unsafe { + assert_eq!(Ok("0\u{FEFF}1"), concat(&s[..1], &s[1..5])); + } } #[test] fn empty_str() { let s = "0123"; - assert_eq!(Ok("0123"), concat(&s[..0], s)); - assert_eq!(Ok("0123"), concat_unordered(&s[..0], s)); - assert_eq!(Ok("0123"), concat_unordered(s, &s[..0])); - assert_eq!(Ok("0123"), concat(s, &s[4..])); - assert_eq!(Ok("0123"), concat_unordered(s, &s[4..])); - assert_eq!(Ok("0123"), concat_unordered(&s[4..], s)); + unsafe { + assert_eq!(Ok("0123"), concat(&s[..0], s)); + assert_eq!(Ok("0123"), concat_unordered(&s[..0], s)); + assert_eq!(Ok("0123"), concat_unordered(s, &s[..0])); + assert_eq!(Ok("0123"), concat(s, &s[4..])); + assert_eq!(Ok("0123"), concat_unordered(s, &s[4..])); + assert_eq!(Ok("0123"), concat_unordered(&s[4..], s)); + } } #[test] @@ -284,17 +342,19 @@ mod tests { struct T(usize); let s: &[T] = &[T(0), T(1), T(2), T(3)][..]; - assert_eq!(Ok(s), concat_slice(&s[..2], &s[2..])); - assert_eq!(Ok(s), concat_slice_unordered(&s[..2], &s[2..])); - assert_eq!(Ok(s), concat_slice_unordered(&s[2..], &s[..2])); + unsafe { + assert_eq!(Ok(s), concat_slice(&s[..2], &s[2..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[..2], &s[2..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[2..], &s[..2])); - // One slice empty - assert_eq!(Ok(s), concat_slice(&s[..0], s)); - assert_eq!(Ok(s), concat_slice_unordered(&s[..0], s)); - assert_eq!(Ok(s), concat_slice_unordered(s, &s[..0])); - assert_eq!(Ok(s), concat_slice(s, &s[4..])); - assert_eq!(Ok(s), concat_slice_unordered(s, &s[4..])); - assert_eq!(Ok(s), concat_slice_unordered(&s[4..], s)); + // One slice empty + assert_eq!(Ok(s), concat_slice(&s[..0], s)); + assert_eq!(Ok(s), concat_slice_unordered(&s[..0], s)); + assert_eq!(Ok(s), concat_slice_unordered(s, &s[..0])); + assert_eq!(Ok(s), concat_slice(s, &s[4..])); + assert_eq!(Ok(s), concat_slice_unordered(s, &s[4..])); + assert_eq!(Ok(s), concat_slice_unordered(&s[4..], s)); + } } #[test] @@ -303,8 +363,10 @@ mod tests { struct T(usize); let s: &[T] = &[T(0), T(1), T(2), T(3)][..]; - assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[2..])); - assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[2..])); - assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2])); + unsafe { + assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[2..])); + assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[2..])); + assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2])); + } } } From a1ea8b42468dc9d8cb39082668c52cf417f24be2 Mon Sep 17 00:00:00 2001 From: HeroicKatora Date: Thu, 25 Jul 2019 12:12:26 +0200 Subject: [PATCH 5/8] Address review comments on documentation --- src/lib.rs | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 544ea4e..d4b853f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,7 +79,13 @@ pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// than `isize::MAX`). /// /// When T is a ZST then returns `Err(TooLong)` if the total length would overflow -/// `usize` and `Err(NotAdjacent)` otherwise. +/// `usize` and `Err(NotAdjacent)` otherwise. This is because ZST-slices are [extra +/// weird][zst-str-concat] and [their safety][zst-unsafe-wg1] is not yet [fully +/// determined][zst-unsafe-wg2]. +/// +/// [zst-str-concat]: https://github.com/oberien/str-concat/issues/5 +/// [zst-unsafe-wg1]: https://github.com/rust-lang/unsafe-code-guidelines/issues/93 +/// [zst-unsafe-wg2]: https://github.com/rust-lang/unsafe-code-guidelines/issues/168 /// /// # Safety /// @@ -93,10 +99,12 @@ pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// /// ```rust /// # use str_concat::concat_slice; -/// let s = b"0123456789"; +/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; /// unsafe { /// // SAFETY: slices from the same bytes originally. -/// assert_eq!(b"0123456", concat_slice(&s[..5], &s[5..7]).unwrap()); +/// assert_eq!( +/// [0, 1, 2, 3, 4, 5, 6], +/// concat_slice(&s[..5], &s[5..7]).unwrap()); /// } /// ``` /// @@ -104,7 +112,7 @@ pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// /// ```rust /// # use str_concat::{concat_slice, Error}; -/// let s = b"0123456789"; +/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; /// unsafe { /// // SAFETY: slices from the same bytes originally. /// assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..5], &s[6..7])) @@ -126,6 +134,10 @@ pub unsafe fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Err // Never consider ZST slices adjacent. You could otherwise infinitely // duplicate a non-zero length slice by concatenating it to itself. + // See: https://github.com/rust-lang/unsafe-code-guidelines/issues/93 + // and https://github.com/rust-lang/unsafe-code-guidelines/issues/168 + // where the second link makes it sound like it would be possible but + // not necessarily easy. return Err(Error::NotAdjacent) } @@ -220,7 +232,7 @@ pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Er /// /// This is the same as [`concat_slice`] except that it also concatenates `b` to /// `a` if `b` is in front of `a` (in which case of [`concat_slice`] errors). -/// Keep in mind that slices of ZSTs will still not be concatenated. +/// Keep in mind that slices of zero-sized types (ZST) will still not be concatenated. /// /// # Safety /// @@ -234,10 +246,12 @@ pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Er /// /// ```rust /// # use str_concat::concat_slice_unordered; -/// let s = b"0123456789"; +/// let s = [0, 1, 2, 3, 4, 5, 6]; /// unsafe { /// // SAFETY: slices from the same bytes originally. -/// assert_eq!(b"0123456", concat_slice_unordered(&s[5..7], &s[..5]).unwrap()); +/// assert_eq!( +/// [0, 1, 2, 3, 4, 5, 6], +/// concat_slice_unordered(&s[5..7], &s[..5]).unwrap()); /// } /// ``` /// @@ -245,10 +259,12 @@ pub unsafe fn concat_unordered<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Er /// /// ```rust /// # use str_concat::{concat_slice_unordered, Error}; -/// let s = b"0123456789"; +/// let s = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; /// unsafe { /// // SAFETY: slices from the same bytes originally. -/// assert_eq!(b"0123456", concat_slice_unordered(&s[..5], &s[5..7]).unwrap()) +/// assert_eq!( +/// [0, 1, 2, 3, 4, 5, 6], +/// concat_slice_unordered(&s[..5], &s[5..7]).unwrap()) /// } /// ``` /// From f3ff0f6b843de39f2da44e249ecdcc2781c8499d Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Tue, 28 Jan 2020 17:51:19 +0100 Subject: [PATCH 6/8] Address remaining review comments --- src/lib.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d4b853f..ffa8882 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,10 +78,9 @@ pub unsafe fn concat<'a>(a: &'a str, b: &'a str) -> Result<&'a str, Error> { /// result is too long to be represented as a slice (size in bytes is larger /// than `isize::MAX`). /// -/// When T is a ZST then returns `Err(TooLong)` if the total length would overflow -/// `usize` and `Err(NotAdjacent)` otherwise. This is because ZST-slices are [extra -/// weird][zst-str-concat] and [their safety][zst-unsafe-wg1] is not yet [fully -/// determined][zst-unsafe-wg2]. +/// When T is a zero-sized type (ZST) then always returns `Err(NotAdjacent)` otherwise. This is +/// because ZST-slices are [extra weird][zst-str-concat] and [their safety][zst-unsafe-wg1] is not +/// yet [fully determined][zst-unsafe-wg2]. /// /// [zst-str-concat]: https://github.com/oberien/str-concat/issues/5 /// [zst-unsafe-wg1]: https://github.com/rust-lang/unsafe-code-guidelines/issues/93 @@ -127,17 +126,15 @@ pub unsafe fn concat_slice<'a, T>(a: &'a [T], b: &'a [T]) -> Result<&'a [T], Err let b_len = b.len(); if mem::size_of::() == 0 { - // Not safety critical but we check that the length is as expected. - if a_len.checked_add(b_len).is_none() { - return Err(Error::TooLong) - } - - // Never consider ZST slices adjacent. You could otherwise infinitely - // duplicate a non-zero length slice by concatenating it to itself. + // NOTE(HeroicKatora) + // Never consider ZST slices adjacent through this function. You could + // infinitely duplicate a non-zero length slice by concatenating it to + // itself as opposed to non-ZST slice types. That would just be weird. + // + // It is however safe. // See: https://github.com/rust-lang/unsafe-code-guidelines/issues/93 // and https://github.com/rust-lang/unsafe-code-guidelines/issues/168 - // where the second link makes it sound like it would be possible but - // not necessarily easy. + // Issue: https://github.com/oberien/str-concat/issues/5 return Err(Error::NotAdjacent) } From 36e27ffbee5eeb81858b05dcc161a9a90e53d11b Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Wed, 5 Feb 2020 22:04:02 +0100 Subject: [PATCH 7/8] Add negative test for ZSTs --- src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ffa8882..2a6f78d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -382,4 +382,16 @@ mod tests { assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[2..], &s[..2])); } } + + #[test] + fn zst_fail() { + #[derive(Clone, Copy, Debug, PartialEq)] + struct Zst; + + let s: &[Zst] = &[Zst; 4]; + unsafe { + assert_eq!(Err(Error::NotAdjacent), concat_slice(&s[..1], &s[1..])); + assert_eq!(Err(Error::NotAdjacent), concat_slice_unordered(&s[..1], &s[1..])); + } + } } From 4b5f3dbdf00723f89d3e90842fbe154d1eea9649 Mon Sep 17 00:00:00 2001 From: Andreas Molzer Date: Wed, 5 Feb 2020 22:04:11 +0100 Subject: [PATCH 8/8] Adjust documentation example to unsafe requirement --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5c37742..39d7769 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,18 @@ use str_concat::{concat, concat_unordered, Error}; fn main() { let s = "0123456789"; // ordered, `a` before `b` - assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7])); - assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7])); + unsafe { + // SAFETY: the slices are from the same `&str`. + assert_eq!(Ok("0123456"), concat(&s[..5], &s[5..7])); + assert_eq!(Ok("0123456"), concat_unordered(&s[..5], &s[5..7])); + } // unordered, `b` before `a` - assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5])); - assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5])); + unsafe { + // SAFETY: the slices are from the same `&str`. + assert_eq!(Err(Error::NotAdjacent), concat(&s[5..7], &s[..5])); + assert_eq!(Ok("0123456"), concat_unordered(&s[5..7], &s[..5])); + } } ```