From 1d2cd50ee0a900e6b3a323b1cac82b865c0171cb Mon Sep 17 00:00:00 2001 From: nerodesu017 <46645625+nerodesu017@users.noreply.github.com> Date: Thu, 4 Jul 2024 13:41:20 +0300 Subject: [PATCH] refactor: magic bytes to constant op codes --- src/core/error.rs | 2 +- src/core/mod.rs | 1 + src/core/opcodes.rs | 21 ++++++++ src/core/reader/mod.rs | 30 +++++++----- src/core/reader/types/values.rs | 61 +++++++++++++++++++---- src/execution/mod.rs | 56 +++++++++++++++------ src/validation/code.rs | 86 ++++++++++++++++++--------------- 7 files changed, 180 insertions(+), 77 deletions(-) create mode 100644 src/core/opcodes.rs diff --git a/src/core/error.rs b/src/core/error.rs index 8baf17f9..e8ccfbda 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -28,7 +28,7 @@ pub enum Error { InvalidExportDesc(u8), InvalidImportDesc(u8), ExprMissingEnd, - InvalidInstr(u8), + InvalidInstr(u16), EndInvalidValueStack, InvalidLocalIdx, InvalidValueStackType(Option), diff --git a/src/core/mod.rs b/src/core/mod.rs index 92d3b701..6a75cffd 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -1,4 +1,5 @@ pub mod error; pub mod indices; +pub mod opcodes; pub mod reader; diff --git a/src/core/opcodes.rs b/src/core/opcodes.rs new file mode 100644 index 00000000..67a78988 --- /dev/null +++ b/src/core/opcodes.rs @@ -0,0 +1,21 @@ +pub const NOP: u8 = 0x01; +pub const END: u8 = 0x0B; +pub const LOCAL_GET: u8 = 0x20; +pub const LOCAL_SET: u8 = 0x21; +pub const GLOBAL_GET: u8 = 0x23; +pub const GLOBAL_SET: u8 = 0x24; +pub const I32_LOAD: u8 = 0x28; +pub const I32_STORE: u8 = 0x36; +pub const I32_CONST: u8 = 0x41; +pub const I32_ADD: u8 = 0x6A; +pub const I32_MUL: u8 = 0x6C; +pub const I32_DIV_S: u8 = 0x6D; +pub const I32_DIV_U: u8 = 0x6E; +pub const FB_INSTRUCTIONS: u8 = 0xFB; +pub const FC_INSTRUCTIONS: u8 = 0xFC; +pub const FD_INSTRUCTIONS: u8 = 0xFD; +pub const FE_INSTRUCTIONS: u8 = 0xFE; + +pub mod fc_opcodes { + pub const I32_TRUNC_SAT_F32S: u8 = 0x00; +} diff --git a/src/core/reader/mod.rs b/src/core/reader/mod.rs index b70024b9..405bf57e 100644 --- a/src/core/reader/mod.rs +++ b/src/core/reader/mod.rs @@ -8,47 +8,51 @@ pub mod types; /// Its purpose is mostly to abstract parsing basic WASM values from the bytecode. pub struct WasmReader<'a> { pub(crate) full_contents: &'a [u8], - pub(crate) current: &'a [u8], + pub(crate) pc: usize, } impl<'a> WasmReader<'a> { pub fn new(wasm: &'a [u8]) -> Self { Self { full_contents: wasm, - current: wasm, + pc: 0, } } // TODO this is not very intuitive but we cannot shorten `self.current`'s end // because some methods rely on the property that `self.current`'s and // `self.full_contents`'s last element are equal. pub fn move_start_to(&mut self, span: Span) { - self.current = - &self.full_contents[span.from../* normally we would have the end of the span here*/]; + self.pc = span.from; } pub fn remaining_bytes(&self) -> &[u8] { - self.current + &self.full_contents[self.pc..] } pub fn current_idx(&self) -> usize { - self.full_contents.len() - self.current.len() + self.pc } pub fn make_span(&self, len: usize) -> Span { Span::new(self.current_idx(), len) } pub fn strip_bytes(&mut self) -> Result<[u8; N]> { - if N > self.current.len() { + if N > (self.full_contents.len() - self.pc) { return Err(Error::Eof); } - let (bytes, rest) = self.current.split_at(N); - self.current = rest; + let (bytes, _) = self.full_contents[self.pc..].split_at(N); + + self.pc += N; Ok(bytes.try_into().expect("the slice length to be exactly N")) } pub fn peek_u8(&self) -> Result { - self.current.first().copied().ok_or(Error::Eof) + if self.pc >= self.full_contents.len() { + Err(Error::Eof) + } else { + Ok(self.full_contents[self.pc]) + } } pub fn measure_num_read_bytes( @@ -63,10 +67,12 @@ impl<'a> WasmReader<'a> { } pub fn skip(&mut self, num_bytes: usize) -> Result<()> { - if self.current.len() < num_bytes { + if self.full_contents.len() - self.pc < num_bytes { return Err(Error::Eof); } - self.current = &self.current[num_bytes..]; + + self.pc += num_bytes; + Ok(()) } pub fn into_inner(self) -> &'a [u8] { diff --git a/src/core/reader/types/values.rs b/src/core/reader/types/values.rs index 35a1eece..37dbd85f 100644 --- a/src/core/reader/types/values.rs +++ b/src/core/reader/types/values.rs @@ -8,20 +8,26 @@ use alloc::vec::Vec; use core::mem; +use crate::core::opcodes::{FB_INSTRUCTIONS, FC_INSTRUCTIONS, FD_INSTRUCTIONS, FE_INSTRUCTIONS}; use crate::core::reader::WasmReader; use crate::{Error, Result}; impl WasmReader<'_> { /// Note: If `Err`, the [Wasm] object is no longer guaranteed to be in a valid state pub fn read_u8(&mut self) -> Result { - let value = *self.current.first().ok_or(Error::Eof)?; + if self.full_contents.len() <= self.pc { + return Err(Error::Eof); + } + + let byte = self.full_contents[self.pc]; - self.current = self - .current - .get(1..) - .expect("slice to contain at least 1 element"); + self.pc += 1; + + if self.full_contents.len() - self.pc == 0 { + panic!("slice to contain at least 1 element") + } - Ok(value) + Ok(byte) } /// Parses a variable-length `u32` as specified by [LEB128](https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128). @@ -66,11 +72,13 @@ impl WasmReader<'_> { pub fn read_name(&mut self) -> Result<&str> { let len = self.read_var_u32()? as usize; - if len > self.current.len() { + if len > (self.full_contents.len() - self.pc) { return Err(Error::Eof); } - let (utf8_str, rest) = self.current.split_at(len); // Cannot panic because check is done above - self.current = rest; + + let (utf8_str, _) = self.full_contents[self.pc..].split_at(len); + + self.pc += utf8_str.len(); core::str::from_utf8(utf8_str).map_err(Error::MalformedUtf8String) } @@ -95,6 +103,41 @@ impl WasmReader<'_> { let len = self.read_var_u32()?; (0..len).map(|_| read_element(self)).collect() } + + pub fn read_instruction(&mut self) -> Result<&[u8]> { + // if self.pc.is_empty() { + // return Err(Error::Eof); + // } + if self.full_contents.len() - self.pc == 0 { + return Err(Error::Eof); + } + + match self.full_contents[self.pc..] { + // check if we are at a multibyte (2 byte) instruction + [FB_INSTRUCTIONS, _, ..] + | [FC_INSTRUCTIONS, _, ..] + | [FD_INSTRUCTIONS, _, ..] + | [FE_INSTRUCTIONS, _, ..] => { + let bytes = &self.full_contents[self.pc..self.pc + 2]; + trace!("Multibyte instruction: 0x{:2X} 0x{:2X}", bytes[0], bytes[1]); + self.strip_bytes::<2>()?; + Ok(bytes) + } + // if we aren't at a multibyte instruction, we are at a 1 byte instruction + [_, ..] => { + let bytes = &self.full_contents[self.pc..self.pc + 1]; + trace!("Single instruction: 0x{:2X}", bytes[0]); + self.strip_bytes::<1>()?; + Ok(bytes) + } + _ => { + unreachable!() + } + } + + // TODO: implement + // Ok(&[1]) + } } #[cfg(test)] diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 8ce3fc4b..fbe7a71f 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -3,6 +3,7 @@ use alloc::vec::Vec; use value_stack::Stack; use crate::core::indices::{FuncIdx, GlobalIdx, LocalIdx}; +use crate::core::opcodes::*; use crate::core::reader::types::memarg::MemArg; use crate::core::reader::types::{FuncType, NumType, ValType}; use crate::core::reader::{WasmReadable, WasmReader}; @@ -12,7 +13,7 @@ use crate::execution::store::{FuncInst, GlobalInst, MemInst, Store}; use crate::execution::value::Value; use crate::validation::code::read_declared_locals; use crate::value::InteropValueList; -use crate::Error::RuntimeError; +use crate::Error::{self, RuntimeError}; use crate::RuntimeError::{DivideBy0, UnrepresentableResult}; use crate::{Result, ValidationInfo}; @@ -111,20 +112,26 @@ impl<'b> RuntimeInstance<'b> { wasm.move_start_to(inst.code_expr); loop { - match wasm.read_u8().unwrap_validated() { + let instr = &wasm.full_contents[wasm.pc..]; + if instr.len() == 0 { + return Err(Error::Eof); + } + wasm.strip_bytes::<1>()?; + trace!("Read instruction byte {:#x?}", instr.get(0).unwrap()); + match instr { // end - 0x0B => { + [NOP, ..] | [END, ..] => { break; } // local.get: [] -> [t] - 0x20 => { + [LOCAL_GET, ..] => { let local_idx = wasm.read_var_u32().unwrap_validated() as LocalIdx; let local = locals.get(local_idx); trace!("Instruction: local.get [] -> [{local:?}]"); stack.push_value(local.clone()); } // local.set [t] -> [] - 0x21 => { + [LOCAL_SET, ..] => { let local_idx = wasm.read_var_u32().unwrap_validated() as LocalIdx; let local = locals.get_mut(local_idx); let value = stack.pop_value(local.to_ty()); @@ -132,21 +139,21 @@ impl<'b> RuntimeInstance<'b> { *local = value; } // global.get [] -> [t] - 0x23 => { + [GLOBAL_GET, ..] => { let global_idx = wasm.read_var_u32().unwrap_validated() as GlobalIdx; let global = self.store.globals.get(global_idx).unwrap_validated(); stack.push_value(global.value.clone()); } // global.set [t] -> [] - 0x24 => { + [GLOBAL_SET, ..] => { let global_idx = wasm.read_var_u32().unwrap_validated() as GlobalIdx; let global = self.store.globals.get_mut(global_idx).unwrap_validated(); global.value = stack.pop_value(global.global.ty.ty) } // i32.load [i32] -> [i32] - 0x28 => { + [I32_LOAD, ..] => { let memarg = MemArg::read_unvalidated(&mut wasm); let relative_address: u32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); @@ -174,7 +181,7 @@ impl<'b> RuntimeInstance<'b> { trace!("Instruction: i32.load [{relative_address}] -> [{data}]"); } // i32.store [i32] -> [i32] - 0x36 => { + [I32_STORE, ..] => { let memarg = MemArg::read_unvalidated(&mut wasm); let data_to_store: u32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); @@ -197,13 +204,13 @@ impl<'b> RuntimeInstance<'b> { trace!("Instruction: i32.store [{relative_address} {data_to_store}] -> []"); } // i32.const: [] -> [i32] - 0x41 => { + [I32_CONST, ..] => { let constant = wasm.read_var_i32().unwrap_validated(); trace!("Instruction: i32.const [] -> [{constant}]"); stack.push_value(constant.into()); } // i32.add: [i32 i32] -> [i32] - 0x6A => { + [I32_ADD, ..] => { let v1: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let v2: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let res = v1.wrapping_add(v2); @@ -212,7 +219,7 @@ impl<'b> RuntimeInstance<'b> { stack.push_value(res.into()); } // i32.mul: [i32 i32] -> [i32] - 0x6C => { + [I32_MUL, ..] => { let v1: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let v2: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let res = v1.wrapping_mul(v2); @@ -221,7 +228,7 @@ impl<'b> RuntimeInstance<'b> { stack.push_value(res.into()); } // i32.div_s: [i32 i32] -> [i32] - 0x6D => { + [I32_DIV_S, ..] => { let dividend: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let divisor: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); @@ -238,7 +245,7 @@ impl<'b> RuntimeInstance<'b> { stack.push_value(res.into()); } // i32.div_u: [i32 i32] -> [i32] - 0x6E => { + [I32_DIV_U, ..] => { let dividend: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); let divisor: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); @@ -254,9 +261,28 @@ impl<'b> RuntimeInstance<'b> { trace!("Instruction: i32.div_u [{divisor} {dividend}] -> [{res}]"); stack.push_value(res.into()); } - other => { + [FB_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() + } + [FC_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() + } + [FD_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() + } + [FE_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() + } + [other, ..] => { trace!("Unknown instruction {other:#x}, skipping.."); } + &[] => { + unreachable!() + } } } Ok(()) diff --git a/src/validation/code.rs b/src/validation/code.rs index 7dcf5c3d..92bbb99e 100644 --- a/src/validation/code.rs +++ b/src/validation/code.rs @@ -1,8 +1,10 @@ use alloc::collections::VecDeque; use alloc::vec::Vec; +use fc_opcodes::I32_TRUNC_SAT_F32S; use core::iter; use crate::core::indices::{GlobalIdx, LocalIdx}; +use crate::core::opcodes::*; use crate::core::reader::section_header::{SectionHeader, SectionTy}; use crate::core::reader::span::Span; use crate::core::reader::types::global::Global; @@ -82,25 +84,27 @@ fn read_instructions( }; loop { - let Ok(instr) = wasm.read_u8() else { - return Err(Error::ExprMissingEnd); - }; - trace!("Read instruction byte {instr:#x?} ({instr})"); + let instr = &wasm.full_contents[wasm.pc..]; + if instr.len() == 0 { + return Err(Error::Eof); + } + wasm.strip_bytes::<1>()?; + trace!("Read instruction byte {:#x?}", instr.get(0).unwrap()); match instr { // nop - 0x01 => {} + [NOP, ..] => {} // end - 0x0B => { + [END, ..] => { return Ok(()); } // local.get: [] -> [t] - 0x20 => { + [LOCAL_GET, ..] => { let local_idx = wasm.read_var_u32()? as LocalIdx; let local_ty = locals.get(local_idx).ok_or(Error::InvalidLocalIdx)?; value_stack.push_back(*local_ty); } // local.set [t] -> [0] - 0x21 => { + [LOCAL_SET, ..] => { let local_idx = wasm.read_var_u32()? as LocalIdx; let local_ty = locals.get(local_idx).ok_or(Error::InvalidLocalIdx)?; let popped = value_stack.pop_back(); @@ -109,7 +113,7 @@ fn read_instructions( } } // global.get [] -> [t] - 0x23 => { + [GLOBAL_GET, ..] => { let global_idx = wasm.read_var_u32()? as GlobalIdx; let global = globals .get(global_idx) @@ -118,7 +122,7 @@ fn read_instructions( value_stack.push_back(global.ty.ty); } // global.set [t] -> [] - 0x24 => { + [GLOBAL_SET, ..] => { let global_idx = wasm.read_var_u32()? as GlobalIdx; let global = globals .get(global_idx) @@ -137,7 +141,7 @@ fn read_instructions( } } // i32.load [i32] -> [i32] - 0x28 => { + [I32_LOAD, ..] => { let _memarg = MemArg::read_unvalidated(wasm); // TODO check correct `memarg.align` @@ -148,7 +152,7 @@ fn read_instructions( value_stack.push_back(ValType::NumType(NumType::I32)); } // i32.store [i32] -> [i32] - 0x36 => { + [I32_STORE, ..] => { let _memarg = MemArg::read_unvalidated(wasm); // TODO check correct `memarg.align` @@ -159,45 +163,47 @@ fn read_instructions( // Value to store assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; } - // i32.add: [i32 i32] -> [i32] - 0x6A => { - // First value - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - // Second value - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - + [I32_CONST, ..] => { + let _num = wasm.read_var_i32()?; value_stack.push_back(ValType::NumType(NumType::I32)); } - 0x6C => { - // First value + [I32_ADD, ..] | [I32_MUL, ..] | [I32_DIV_S, ..] | [I32_DIV_U, ..] => { assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - // Second value assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; value_stack.push_back(ValType::NumType(NumType::I32)); } - // i32.div_s: [i32 i32] -> [i32] - 0x6D => { - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - - value_stack.push_back(ValType::NumType(NumType::I32)); + [FB_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() } - // i32.div_u: [i32 i32] -> [i32] - 0x6E => { - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - assert_pop_value_stack(value_stack, ValType::NumType(NumType::I32))?; - - value_stack.push_back(ValType::NumType(NumType::I32)); + [FC_INSTRUCTIONS, I32_TRUNC_SAT_F32S, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() } - // i32.const: [] -> [i32] - 0x41 => { - let _num = wasm.read_var_i32()?; - value_stack.push_back(ValType::NumType(NumType::I32)); + [FC_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() + } + [FD_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() } - other => { - return Err(Error::InvalidInstr(other)); + [FE_INSTRUCTIONS, _, ..] => { + wasm.strip_bytes::<1>()?; + unimplemented!() } + + other => match other[0] { + FB_INSTRUCTIONS | FC_INSTRUCTIONS | FD_INSTRUCTIONS | FE_INSTRUCTIONS => { + return Err(Error::InvalidInstr( + (other[0] as u16) << 8 | (other[1] as u16), + )); + } + _ => { + return Err(Error::InvalidInstr(other[0] as u16)); + } + }, } } }