Skip to content

Commit

Permalink
Implement DecInt without itoa
Browse files Browse the repository at this point in the history
Using `itoa` is unlikely to have any actual benefits for any reasonable
usecase. You are unlikely to format lots of integers in a tight loop
without performing IO operations in between, so any speed-benefits of
`itoa` are likely moot. The buffer `itoa` writes to is not re-used by
`DecInt`, which makes the generated code use a `memcpy` call. `itoa`'s
buffer is twice the size as we need, because it is implemented to be
able to hold an `i128`/`u128`. The `i128`/`u128` implementation of
`DecInt` would panic if the input was outside of `-10**19 + 1 ..=
10**20 - 1`.

This PR implements the integer stringification naively using a
div-mod-10 loop. Users will be able to use integers as path arguments
without opting-in to any feature.

This is a breaking change because the feature `itoa` was removed, and
because the implementation for `i128`/`u128` was removed.
  • Loading branch information
Kijewski committed Dec 22, 2024
1 parent 7be3386 commit 9d47c4e
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 62 deletions.
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ rust-version = "1.63"

[dependencies]
bitflags = { version = "2.4.0", default-features = false }
itoa = { version = "1.0.13", default-features = false, optional = true }

# Special dependencies used in rustc-dep-of-std mode.
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
Expand Down Expand Up @@ -159,10 +158,10 @@ time = []
param = ["fs"]

# Enable this to enable `rustix::io::proc_self_*` (on Linux) and `ttyname`.
procfs = ["once_cell", "itoa", "fs"]
procfs = ["once_cell", "fs"]

# Enable `rustix::pty::*`.
pty = ["itoa", "fs"]
pty = ["fs"]

# Enable `rustix::termios::*`.
termios = []
Expand Down
4 changes: 1 addition & 3 deletions src/path/arg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
use crate::ffi::CStr;
use crate::io;
#[cfg(feature = "itoa")]
use crate::path::DecInt;
use crate::path::SMALL_PATH_BUFFER_SIZE;
#[cfg(all(feature = "alloc", feature = "itoa"))]
#[cfg(feature = "alloc")]
use alloc::borrow::ToOwned;
use core::mem::MaybeUninit;
use core::{ptr, slice, str};
Expand Down Expand Up @@ -982,7 +981,6 @@ impl Arg for Vec<u8> {
}
}

#[cfg(feature = "itoa")]
impl Arg for DecInt {
#[inline]
fn as_str(&self) -> io::Result<&str> {
Expand Down
176 changes: 151 additions & 25 deletions src/path/dec_int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

use crate::backend::fd::{AsFd, AsRawFd};
use crate::ffi::CStr;
use core::hint::unreachable_unchecked;
use core::mem::{self, MaybeUninit};
use itoa::{Buffer, Integer};
use core::num::NonZeroU8;
#[cfg(all(feature = "std", unix))]
use std::os::unix::ffi::OsStrExt;
#[cfg(all(feature = "std", target_os = "wasi"))]
Expand All @@ -36,35 +37,155 @@ use {core::fmt, std::ffi::OsStr, std::path::Path};
/// ```
#[derive(Clone)]
pub struct DecInt {
// Enough to hold an {u,i}64 and NUL terminator.
buf: [MaybeUninit<u8>; u64::MAX_STR_LEN + 1],
len: usize,
buf: [MaybeUninit<u8>; BUF_LEN],
len: NonZeroU8,
}
const _: () = assert!(u64::MAX_STR_LEN == i64::MAX_STR_LEN);

/// Enough to hold an {u,i}64 and NUL terminator.
const BUF_LEN: usize = U64_MAX_STR_LEN + 1;

/// Maximum length of a formatted [`u64`].
const U64_MAX_STR_LEN: usize = "18446744073709551615".len();

/// Maximum length of a formatted [`i64`].
#[allow(dead_code)]
const I64_MAX_STR_LEN: usize = "-9223372036854775808".len();

const _: () = assert!(U64_MAX_STR_LEN == I64_MAX_STR_LEN);

mod private {
pub trait Sealed: Copy {
type Unsigned: super::Integer;

fn as_unsigned(self) -> (bool, Self::Unsigned);
fn eq_zero(self) -> bool;
fn div_mod_10(&mut self) -> u8;
}

macro_rules! impl_unsigned {
($($ty:ty)+) => { $(
impl Sealed for $ty {
type Unsigned = $ty;

#[inline]
fn as_unsigned(self) -> (bool, $ty) {
(false, self)
}

#[inline]
fn eq_zero(self) -> bool {
self == 0
}

#[inline]
fn div_mod_10(&mut self) -> u8 {
let result = (*self % 10) as u8;
*self /= 10;
result
}
}
)+ }
}

macro_rules! impl_signed {
($($signed:ty : $unsigned:ty)+) => { $(
impl Sealed for $signed {
type Unsigned = $unsigned;

#[inline]
fn as_unsigned(self) -> (bool, $unsigned) {
if self >= 0 {
(false, self as $unsigned)
} else {
(true, !(self as $unsigned) + 1)
}
}

#[inline]
fn eq_zero(self) -> bool {
unimplemented!()
}

#[inline]
fn div_mod_10(&mut self) -> u8 {
unimplemented!()
}
}
)+ }
}

impl_unsigned!(u8 u16 u32 u64);
impl_signed!(i8:u8 i16:u16 i32:u32 i64:u64);

#[cfg(any(
target_pointer_width = "16",
target_pointer_width = "32",
target_pointer_width = "64"
))]
const _: () = {
impl_unsigned!(usize);
impl_signed!(isize:usize);
};
}

/// An integer that can be used by [`DecInt::new`].
pub trait Integer: private::Sealed {}

impl Integer for i8 {}
impl Integer for i16 {}
impl Integer for i32 {}
impl Integer for i64 {}
impl Integer for u8 {}
impl Integer for u16 {}
impl Integer for u32 {}
impl Integer for u64 {}

#[cfg(any(
target_pointer_width = "16",
target_pointer_width = "32",
target_pointer_width = "64"
))]
const _: () = {
impl Integer for isize {}
impl Integer for usize {}
};

impl DecInt {
/// Construct a new path component from an integer.
#[inline]
pub fn new<Int: Integer>(i: Int) -> Self {
let mut buf = [MaybeUninit::uninit(); 21];

let mut str_buf = Buffer::new();
let str_buf = str_buf.format(i);
assert!(
str_buf.len() < buf.len(),
"{str_buf}{} unsupported.",
core::any::type_name::<Int>()
);

buf[..str_buf.len()].copy_from_slice(unsafe {
// SAFETY: you can always go from init to uninit
mem::transmute::<&[u8], &[MaybeUninit<u8>]>(str_buf.as_bytes())
});
buf[str_buf.len()] = MaybeUninit::new(0);

Self {
use private::Sealed;

let (is_neg, mut i) = i.as_unsigned();
let mut len = 1;
let mut buf = [MaybeUninit::uninit(); BUF_LEN];
buf[BUF_LEN - 1] = MaybeUninit::new(b'\0');

// We use `loop { …; if cond { break } }` instead of `while !cond { … }` so the loop is
// entered at least once. This way `0` does not need a special handling.
loop {
len += 1;
if len > BUF_LEN {
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
unsafe { unreachable_unchecked() };
}
buf[BUF_LEN - len] = MaybeUninit::new(b'0' + i.div_mod_10());
if i.eq_zero() {
break;
}
}

if is_neg {
len += 1;
if len > BUF_LEN {
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
unsafe { unreachable_unchecked() };
}
buf[BUF_LEN - len] = MaybeUninit::new(b'-');
}

DecInt {
buf,
len: str_buf.len(),
len: NonZeroU8::new(len as u8).unwrap(),
}
}

Expand Down Expand Up @@ -96,7 +217,12 @@ impl DecInt {
/// Return the raw byte buffer including the NUL byte.
#[inline]
pub fn as_bytes_with_nul(&self) -> &[u8] {
let init = &self.buf[..=self.len];
let len = usize::from(self.len.get());
if len > BUF_LEN {
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
unsafe { unreachable_unchecked() };
}
let init = &self.buf[(self.buf.len() - len)..];
// SAFETY: we're guaranteed to have initialized len+1 bytes.
unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(init) }
}
Expand Down
5 changes: 1 addition & 4 deletions src/path/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
//! Filesystem path operations.
mod arg;
#[cfg(feature = "itoa")]
mod dec_int;

pub use arg::{option_into_with_c_str, Arg};
#[cfg(feature = "itoa")]
#[cfg_attr(docsrs, doc(cfg(feature = "itoa")))]
pub use dec_int::DecInt;
pub use dec_int::{DecInt, Integer};

pub(crate) const SMALL_PATH_BUFFER_SIZE: usize = 256;
2 changes: 0 additions & 2 deletions tests/net/unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
// This test uses `AF_UNIX` with `SOCK_SEQPACKET` which is unsupported on
// macOS.
#![cfg(not(any(apple, target_os = "espidf", target_os = "redox", target_os = "wasi")))]
// This test uses `DecInt`.
#![cfg(feature = "itoa")]
#![cfg(feature = "fs")]

use rustix::fs::{unlinkat, AtFlags, CWD};
Expand Down
2 changes: 0 additions & 2 deletions tests/net/unix_alloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// This test uses `AF_UNIX` with `SOCK_SEQPACKET` which is unsupported on
// macOS.
#![cfg(not(any(apple, target_os = "espidf", target_os = "redox", target_os = "wasi")))]
// This test uses `DecInt`.
#![cfg(feature = "itoa")]
#![cfg(feature = "fs")]

use rustix::fs::{unlinkat, AtFlags, CWD};
Expand Down
16 changes: 6 additions & 10 deletions tests/path/arg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
use rustix::ffi::{CStr, CString};
use rustix::io;
use rustix::path::Arg;
#[cfg(feature = "itoa")]
use rustix::path::DecInt;
use std::borrow::Cow;
use std::ffi::{OsStr, OsString};
Expand Down Expand Up @@ -132,15 +131,12 @@ fn test_arg() {
assert_eq!(cstr!("hello"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
assert_eq!(cstr!("hello"), Borrow::borrow(&t.into_c_str().unwrap()));

#[cfg(feature = "itoa")]
{
let t: DecInt = DecInt::new(43110);
assert_eq!("43110", t.as_str());
assert_eq!("43110".to_owned(), Arg::to_string_lossy(&t));
assert_eq!(cstr!("43110"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
assert_eq!(cstr!("43110"), t.as_c_str());
assert_eq!(cstr!("43110"), Borrow::borrow(&t.into_c_str().unwrap()));
}
let t: DecInt = DecInt::new(43110);
assert_eq!("43110", t.as_str());
assert_eq!("43110".to_owned(), Arg::to_string_lossy(&t));
assert_eq!(cstr!("43110"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
assert_eq!(cstr!("43110"), t.as_c_str());
assert_eq!(cstr!("43110"), Borrow::borrow(&t.into_c_str().unwrap()));
}

#[test]
Expand Down
22 changes: 10 additions & 12 deletions tests/path/dec_int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ use rustix::path::DecInt;
macro_rules! check {
($i:expr) => {
let i = $i;
assert_eq!(DecInt::new(i).as_ref().to_str().unwrap(), i.to_string());
assert_eq!(
DecInt::new(i).as_c_str().to_bytes_with_nul(),
format!("{i}\0").as_bytes(),
);
};
}

Expand All @@ -30,16 +33,11 @@ fn test_dec_int() {
check!(usize::MAX);
check!(isize::MIN);
}
}

#[test]
#[should_panic]
fn test_unsupported_max_u128_dec_int() {
check!(u128::MAX);
}

#[test]
#[should_panic]
fn test_unsupported_min_u128_dec_int() {
check!(i128::MIN);
for i in u16::MIN..=u16::MAX {
check!(i);
}
for i in i16::MIN..=i16::MAX {
check!(i);
}
}
1 change: 0 additions & 1 deletion tests/path/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,4 @@

#[cfg(not(feature = "rustc-dep-of-std"))]
mod arg;
#[cfg(feature = "itoa")]
mod dec_int;

0 comments on commit 9d47c4e

Please sign in to comment.