From 161da4dde7a5b9d185a07e9f4a8218a46c561a8c Mon Sep 17 00:00:00 2001 From: Matt Gabrenya Date: Tue, 24 Dec 2024 13:58:51 -0700 Subject: [PATCH] feat: when deserialization of module from filesystem cache fails, remove from cache --- crates/common/src/result.rs | 8 +++- crates/host/src/module.rs | 32 ++++++++++--- crates/host/src/module/wasmer_sys.rs | 67 ++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 7 deletions(-) diff --git a/crates/common/src/result.rs b/crates/common/src/result.rs index cc5928fe..175fb7ea 100644 --- a/crates/common/src/result.rs +++ b/crates/common/src/result.rs @@ -33,6 +33,10 @@ pub enum WasmErrorInner { /// Wasmer failed to build a Module from wasm byte code. /// With the feature `wasmer_sys` enabled, building a Module includes compiling the wasm. ModuleBuild(String), + /// Wasmer failed to serialize a Module to bytes. + ModuleSerialize(String), + /// Wasmer failed to deserialize a Module from bytes. + ModuleDeserialize(String), /// The host failed to call a function in the guest. CallError(String), } @@ -49,8 +53,10 @@ impl WasmErrorInner { | Self::ErrorWhileError // Bad memory is bad memory. | Self::Memory - // Failing to build the wasmer Module means we cannot use it. + // Failing to build, serialize, or deserialize the wasmer Module means we cannot use it. | Self::ModuleBuild(_) + | Self::ModuleSerialize(_) + | Self::ModuleDeserialize(_) // This is ambiguous so best to treat as potentially corrupt. | Self::CallError(_) => true, diff --git a/crates/host/src/module.rs b/crates/host/src/module.rs index 434473a1..7b852a61 100644 --- a/crates/host/src/module.rs +++ b/crates/host/src/module.rs @@ -197,10 +197,21 @@ impl ModuleCache { match self.get_from_filesystem_cache(key) { // Filesystem cache hit, deserialize and save to deserialized cache Ok(Some(serialized_module)) => { - let module = Arc::new( - unsafe { Module::deserialize(&self.runtime_engine, serialized_module.clone()) } - .map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?, - ); + let module_result = + unsafe { Module::deserialize(&self.runtime_engine, serialized_module.clone()) }; + + // If deserialization fails, we assume the file is corrupt, + // so it is removed from the filesystem cache. + let module = match module_result { + Ok(d) => Ok(Arc::new(d)), + Err(e) => { + let _ = self.remove_from_filesystem_cache(key); + Err(wasm_error!(WasmErrorInner::ModuleDeserialize( + e.to_string() + ))) + } + }?; + self.add_to_deserialized_cache(key, module.clone()); Ok(module) @@ -234,8 +245,9 @@ impl ModuleCache { .serialize() .map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))?; let module = Arc::new(unsafe { - Module::deserialize(&self.runtime_engine, serialized_module.clone()) - .map_err(|e| wasm_error!(WasmErrorInner::ModuleBuild(e.to_string())))? + Module::deserialize(&self.runtime_engine, serialized_module.clone()).map_err( + |e| wasm_error!(WasmErrorInner::ModuleDeserialize(e.to_string())), + )? }); // Save serialized module to filesystem cache @@ -311,6 +323,14 @@ impl ModuleCache { Ok(()) } + // Remove serialized module from filesystem cache + fn remove_from_filesystem_cache(&self, key: CacheKey) -> Result<(), std::io::Error> { + if let Some(fs_path) = self.filesystem_cache_module_path(key) { + std::fs::remove_file(fs_path)?; + } + + Ok(()) + } /// Check deserialized cache for module fn get_from_deserialized_cache(&self, key: CacheKey) -> Option> { let mut deserialized_cache = self.deserialized_module_cache.write(); diff --git a/crates/host/src/module/wasmer_sys.rs b/crates/host/src/module/wasmer_sys.rs index 81726f62..d40a5c06 100644 --- a/crates/host/src/module/wasmer_sys.rs +++ b/crates/host/src/module/wasmer_sys.rs @@ -220,4 +220,71 @@ mod tests { std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap(); } + + #[test] + fn cache_get_from_fs_corrupt() { + // simple example wasm taken from wasmer docs + // https://docs.rs/wasmer/latest/wasmer/struct.Module.html#example + let wasm: Vec = vec![ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x06, 0x01, 0x60, 0x01, 0x7f, + 0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x0b, 0x01, 0x07, 0x61, 0x64, 0x64, 0x5f, + 0x6f, 0x6e, 0x65, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x41, 0x01, + 0x6a, 0x0b, 0x00, 0x1a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x01, 0x0a, 0x01, 0x00, 0x07, + 0x61, 0x64, 0x64, 0x5f, 0x6f, 0x6e, 0x65, 0x02, 0x07, 0x01, 0x00, 0x01, 0x00, 0x02, + 0x70, 0x30, + ]; + + // Bad serialized_wasm + let bad_serialized_wasm = vec![0x00]; + + let tmp_fs_cache_dir = tempdir().unwrap().into_path(); + let module_cache = ModuleCache::new(make_engine, Some(tmp_fs_cache_dir.clone())); + let key: CacheKey = [0u8; 32]; + + // Build module, serialize, save directly to filesystem + let serialized_module_path = tmp_fs_cache_dir.clone().join(hex::encode(key)); + let mut file = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&serialized_module_path) + .unwrap(); + file.write_all(&bad_serialized_wasm).unwrap(); + + // Module cannot be retrieved from fs cache + let res = module_cache.get(key, &wasm); + assert!(res.is_err()); + + let compiler_engine = make_engine(); + let module = + std::sync::Arc::new(Module::from_binary(&compiler_engine, wasm.as_slice()).unwrap()); + let serialized_module = module.serialize().unwrap(); + + // 2nd time, module can be retrieved from cache + let res = module_cache.get(key, &wasm).unwrap(); + assert_eq!(*res.serialize().unwrap(), *serialized_module); + + // make sure module is stored in deserialized cache + { + let deserialized_cached_module = module_cache + .deserialized_module_cache + .write() + .get_item(&key) + .unwrap(); + assert_eq!( + *deserialized_cached_module.serialize().unwrap(), + *module.serialize().unwrap() + ); + } + + // make sure module has been stored in serialized filesystem cache + { + let serialized_module_path = module_cache + .serialized_filesystem_cache_path + .unwrap() + .join(hex::encode(key)); + assert!(std::fs::metadata(serialized_module_path).is_ok()); + } + + std::fs::remove_dir_all(tmp_fs_cache_dir).unwrap(); + } }