From e25703625059a08330e0b865e49bccb8d0d7885a Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 16 Feb 2024 15:51:11 -0800 Subject: [PATCH] Miscellaneous tweaks to speed up call/RPS benchmarks (#7953) This commit makes the following changes: * A handful of `#[inline]` annotations. * A couple cases of splitting out uncommon/slow paths from `#[inline]`-annotated functions into their own non-`#[inline]`-annotated functions. * Remove a call to `mpk::is_supported()` in async context construction. It is sufficient to just check `self.pkey.is_some()` since if mpk isn't supported we won't have a pkey, if we do have a pkey mpk must have been supported for us to get it, and even if mpk is supported, if we don't have a pkey we don't need to do anything here. Criterion benchmark results: ``` sync/no-hook/core - host-to-wasm - typed - nop-params-and-results time: [25.214 ns 25.322 ns 25.443 ns] change: [-21.901% -20.227% -18.749%] (p = 0.00 < 0.05) Performance has improved. ``` --- crates/runtime/src/instance.rs | 1 + crates/runtime/src/send_sync_ptr.rs | 9 +++++++++ crates/runtime/src/traphandlers.rs | 3 +++ crates/wasmtime/src/runtime/func.rs | 28 ++++++++++++++++++---------- crates/wasmtime/src/runtime/store.rs | 12 +++++++++++- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 30cf5f4f6db2..a6a9de907d2c 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -233,6 +233,7 @@ impl Instance { /// and that it's valid to acquire `&mut Instance` at this time. For example /// this can't be called twice on the same `VMContext` to get two active /// pointers to the same `Instance`. + #[inline] pub unsafe fn from_vmctx(vmctx: *mut VMContext, f: impl FnOnce(&mut Instance) -> R) -> R { let ptr = vmctx .cast::() diff --git a/crates/runtime/src/send_sync_ptr.rs b/crates/runtime/src/send_sync_ptr.rs index 5b6d05d4f4c0..8ab8d1aaf827 100644 --- a/crates/runtime/src/send_sync_ptr.rs +++ b/crates/runtime/src/send_sync_ptr.rs @@ -12,28 +12,33 @@ unsafe impl Sync for SendSyncPtr {} impl SendSyncPtr { /// Creates a new pointer wrapping the non-nullable pointer provided. + #[inline] pub fn new(ptr: NonNull) -> SendSyncPtr { SendSyncPtr(ptr) } /// Returns the underlying raw pointer. + #[inline] pub fn as_ptr(&self) -> *mut T { self.0.as_ptr() } /// Unsafely assert that this is a pointer to valid contents and it's also /// valid to get a shared reference to it at this time. + #[inline] pub unsafe fn as_ref<'a>(&self) -> &'a T { self.0.as_ref() } /// Unsafely assert that this is a pointer to valid contents and it's also /// valid to get a mutable reference to it at this time. + #[inline] pub unsafe fn as_mut<'a>(&mut self) -> &'a mut T { self.0.as_mut() } /// Returns the underlying `NonNull` wrapper. + #[inline] pub fn as_non_null(&self) -> NonNull { self.0 } @@ -41,6 +46,7 @@ impl SendSyncPtr { impl SendSyncPtr<[T]> { /// Returns the slice's length component of the pointer. + #[inline] pub fn len(&self) -> usize { self.0.len() } @@ -50,6 +56,7 @@ impl From for SendSyncPtr where U: Into>, { + #[inline] fn from(ptr: U) -> SendSyncPtr { SendSyncPtr::new(ptr.into()) } @@ -68,6 +75,7 @@ impl fmt::Pointer for SendSyncPtr { } impl Clone for SendSyncPtr { + #[inline] fn clone(&self) -> Self { *self } @@ -76,6 +84,7 @@ impl Clone for SendSyncPtr { impl Copy for SendSyncPtr {} impl PartialEq for SendSyncPtr { + #[inline] fn eq(&self, other: &SendSyncPtr) -> bool { self.0 == other.0 } diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index 3a0eeed68bc5..88cbf56090d1 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -326,11 +326,13 @@ mod call_thread_state { self.prev.get() } + #[inline] pub(crate) unsafe fn push(&self) { assert!(self.prev.get().is_null()); self.prev.set(tls::raw::replace(self)); } + #[inline] pub(crate) unsafe fn pop(&self) { let prev = self.prev.replace(ptr::null()); let head = tls::raw::replace(prev); @@ -346,6 +348,7 @@ enum UnwindReason { } impl CallThreadState { + #[inline] fn with( mut self, closure: impl FnOnce(&CallThreadState) -> i32, diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs index b404e5fb9ba6..8ffd979cf184 100644 --- a/crates/wasmtime/src/runtime/func.rs +++ b/crates/wasmtime/src/runtime/func.rs @@ -1076,24 +1076,32 @@ impl Func { #[inline] pub(crate) fn vm_func_ref(&self, store: &mut StoreOpaque) -> NonNull { let func_data = &mut store.store_data_mut()[self.0]; + let func_ref = func_data.export().func_ref; + if unsafe { func_ref.as_ref().wasm_call.is_some() } { + return func_ref; + } + if let Some(in_store) = func_data.in_store_func_ref { in_store.as_non_null() } else { - let func_ref = func_data.export().func_ref; unsafe { - if func_ref.as_ref().wasm_call.is_none() { - let func_ref = store.func_refs().push(func_ref.as_ref().clone()); - store.store_data_mut()[self.0].in_store_func_ref = - Some(SendSyncPtr::new(func_ref)); - store.fill_func_refs(); - func_ref - } else { - func_ref - } + // Move this uncommon/slow path out of line. + self.copy_func_ref_into_store_and_fill(store, func_ref) } } } + unsafe fn copy_func_ref_into_store_and_fill( + &self, + store: &mut StoreOpaque, + func_ref: NonNull, + ) -> NonNull { + let func_ref = store.func_refs().push(func_ref.as_ref().clone()); + store.store_data_mut()[self.0].in_store_func_ref = Some(SendSyncPtr::new(func_ref)); + store.fill_func_refs(); + func_ref + } + pub(crate) unsafe fn from_wasmtime_function( export: ExportFunction, store: &mut StoreOpaque, diff --git a/crates/wasmtime/src/runtime/store.rs b/crates/wasmtime/src/runtime/store.rs index db9d917b60d4..40df116370c6 100644 --- a/crates/wasmtime/src/runtime/store.rs +++ b/crates/wasmtime/src/runtime/store.rs @@ -401,6 +401,7 @@ impl AutoAssertNoGc where T: std::ops::DerefMut, { + #[inline] pub fn new(mut store: T) -> Self { let _ = &mut store; #[cfg(debug_assertions)] @@ -1097,7 +1098,16 @@ impl StoreInner { &mut self.data } + #[inline] pub fn call_hook(&mut self, s: CallHook) -> Result<()> { + if self.inner.pkey.is_none() && self.call_hook.is_none() { + Ok(()) + } else { + self.call_hook_slow_path(s) + } + } + + fn call_hook_slow_path(&mut self, s: CallHook) -> Result<()> { if let Some(pkey) = &self.inner.pkey { let allocator = self.engine().allocator(); match s { @@ -1402,7 +1412,7 @@ impl StoreOpaque { Some(AsyncCx { current_suspend: self.async_state.current_suspend.get(), current_poll_cx: poll_cx_box_ptr, - track_pkey_context_switch: mpk::is_supported() && self.pkey.is_some(), + track_pkey_context_switch: self.pkey.is_some(), }) }