Skip to content

Commit

Permalink
feat: data + data related opcodes + tests
Browse files Browse the repository at this point in the history
Signed-off-by: nerodesu017 <[email protected]>
  • Loading branch information
nerodesu017 committed Nov 18, 2024
1 parent 1dee4d1 commit 55997e3
Show file tree
Hide file tree
Showing 14 changed files with 1,961 additions and 15 deletions.
8 changes: 7 additions & 1 deletion src/core/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use core::str::Utf8Error;
use crate::core::reader::section_header::SectionTy;
use crate::core::reader::types::ValType;

use super::indices::MemIdx;
use super::indices::{DataIdx, MemIdx};

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum RuntimeError {
Expand Down Expand Up @@ -54,6 +54,8 @@ pub enum Error {
MemoryIsNotDefined(MemIdx),
// mem.align, wanted alignment
ErroneousAlignment(u32, u32),
NoDataSegments,
DataSegmentNotFound(DataIdx),
}

impl Display for Error {
Expand Down Expand Up @@ -140,6 +142,10 @@ impl Display for Error {
mem_align, minimum_wanted_alignment
))
}
Error::NoDataSegments => f.write_str("Data Count is None"),
Error::DataSegmentNotFound(data_idx) => {
f.write_fmt(format_args!("Data Segment {} not found", data_idx))
}
}
}
}
Expand Down
1 change: 0 additions & 1 deletion src/core/indices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ pub type MemIdx = usize;
pub type GlobalIdx = usize;
#[allow(dead_code)]
pub type ElemIdx = usize;
#[allow(dead_code)]
pub type DataIdx = usize;
pub type LocalIdx = usize;
#[allow(dead_code)]
Expand Down
199 changes: 199 additions & 0 deletions src/core/reader/types/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
use core::fmt::{Debug, Formatter};

use alloc::{format, vec::Vec};

use crate::{
core::{indices::MemIdx, reader::{span::Span, WasmReadable}},
read_constant_expression::read_constant_instructions,
};

use super::UnwrapValidatedExt;

pub struct DataSegment {
pub init: Vec<u8>,
pub mode: DataMode,
}

#[derive(Clone)]
pub enum DataMode {
Passive,
Active(DataModeActive),
}

#[derive(Clone)]
pub struct DataModeActive {
pub memory_idx: MemIdx,
pub offset: Span,
}

impl WasmReadable for DataSegment {
fn read(wasm: &mut crate::core::reader::WasmReader) -> crate::Result<Self> {
let mode = wasm.read_var_u32()?;
let data_sec: DataSegment = match mode {
0 => {
// active { memory 0, offset e }
trace!("Data section: active");
let offset = { read_constant_instructions(wasm, None, None)? };

let byte_vec = wasm.read_vec(|el| el.read_u8())?;

// WARN: we currently don't take into consideration how we act when we are dealing with globals here
DataSegment {
mode: DataMode::Active(DataModeActive {
memory_idx: 0,
offset,
}),
init: byte_vec,
}
}
1 => {
// passive
// A passive data segment's contents can be copied into a memory using the `memory.init` instruction
trace!("Data section: passive");
DataSegment {
mode: DataMode::Passive,
init: wasm.read_vec(|el| el.read_u8())?,
}
}
2 => {
// mode active { memory x, offset e }
// this hasn't been yet implemented in wasm
// as per docs:

// https://webassembly.github.io/spec/core/binary/modules.html#data-section
// The initial integer can be interpreted as a bitfield. Bit 0 indicates a passive segment, bit 1 indicates the presence of an explicit memory index for an active segment.
// In the current version of WebAssembly, at most one memory may be defined or imported in a single module, so all valid active data segments have a memory value of 0
todo!("Data section: active - with multiple memories - NOT YET IMPLEMENTED!");
}
_ => unreachable!(),
};

trace!("{:?}", data_sec.init);
Ok(data_sec)
}

fn read_unvalidated(wasm: &mut crate::core::reader::WasmReader) -> Self {
let mode = wasm.read_var_u32().unwrap_validated();
let data_sec: DataSegment = match mode {
0 => {
// active { memory 0, offset e }
trace!("Data section: active");
let offset = { read_constant_instructions(wasm, None, None).unwrap_validated() };

let byte_vec = wasm
.read_vec(|el| Ok(el.read_u8().unwrap_validated()))
.unwrap_validated();

// WARN: we currently don't take into consideration how we act when we are dealing with globals here
DataSegment {
mode: DataMode::Active(DataModeActive {
memory_idx: 0,
offset,
}),
init: byte_vec,
}
}
1 => {
// passive
// A passive data segment's contents can be copied into a memory using the `memory.init` instruction
trace!("Data section: passive");
DataSegment {
mode: DataMode::Passive,
init: wasm
.read_vec(|el| Ok(el.read_u8().unwrap_validated()))
.unwrap_validated(),
}
}
2 => {
// mode active { memory x, offset e }
// this hasn't been yet implemented in wasm
// as per docs:

// https://webassembly.github.io/spec/core/binary/modules.html#data-section
// The initial integer can be interpreted as a bitfield. Bit 0 indicates a passive segment, bit 1 indicates the presence of an explicit memory index for an active segment.
// In the current version of WebAssembly, at most one memory may be defined or imported in a single module, so all valid active data segments have a memory value of 0
todo!("Data section: active - with multiple memories - NOT YET IMPLEMENTED!");
}
_ => unreachable!(),
};

data_sec
}
}

impl Debug for DataSegment {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
let mut init_str = alloc::string::String::new();

let iter = self.init.iter().peekable();
// only if it's valid do we print is as a normal utf-8 char, otherwise, hex
for &byte in iter {
if let Ok(valid_char) = alloc::string::String::from_utf8(Vec::from(&[byte])) {
init_str.push_str(valid_char.as_str());
} else {
init_str.push_str(&format!("\\x{:02x}", byte));
}
}

f.debug_struct("DataSegment")
.field("init", &init_str)
.field("mode", &self.mode)
.finish()
}
}

///
/// Usually, we'd have something like this:
/// ```wasm
/// (module
/// (memory 1) ;; memory starting with 1 page
/// (data (i32.const 0) "abc") ;; writing the array of byte "abc" in the first memory (0) at offset 0
/// ;; for hardcoded offsets, we'll usually use i32.const because of wasm being x86 arch
/// )
/// ```
///
/// Since the span has only the start and length and acts a reference, we print the start and end (both inclusive, notice the '..=')
/// We print it in both decimal and hexadecimal so it's easy to trace in something like <https://webassembly.github.io/wabt/demo/wat2wasm/>
impl Debug for DataMode {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
match self {
DataMode::Passive => f.debug_struct("Passive").finish(),
DataMode::Active(active_data_mode) => {
let from = active_data_mode.offset.from;
let to = active_data_mode.offset.from + active_data_mode.offset.len() - 1;
f.debug_struct("Active")
// .field("offset", format_args!("[{}..={}]", from, to))
.field(
"offset",
&format_args!("[{}..={}] (hex = [{:X}..={:X}])", from, to, from, to),
)
.finish()
// f.
}
}
}
}

pub struct PassiveData {
pub init: Vec<u8>,
}

impl Debug for PassiveData {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
let mut init_str = alloc::string::String::new();

let iter = self.init.iter().peekable();
for &byte in iter {
if let Ok(valid_char) = alloc::string::String::from_utf8(Vec::from(&[byte])) {
init_str.push_str(valid_char.as_str());
} else {
// If it's not valid UTF-8, print it as hex
init_str.push_str(&format!("\\x{:02x}", byte));
}
}
f.debug_struct("PassiveData")
.field("init", &init_str)
.finish()
}
}
1 change: 1 addition & 0 deletions src/core/reader/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::execution::assert_validated::UnwrapValidatedExt;
use crate::Result;
use crate::{unreachable_validated, Error};

pub mod data;
pub mod export;
pub mod function_code_header;
pub mod global;
Expand Down
4 changes: 4 additions & 0 deletions src/core/reader/types/opcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,8 @@ pub mod fc_extensions {
pub const I64_TRUNC_SAT_F32_U: u8 = 0x05;
pub const I64_TRUNC_SAT_F64_S: u8 = 0x06;
pub const I64_TRUNC_SAT_F64_U: u8 = 0x07;
pub const MEMORY_INIT: u8 = 0x08;
pub const DATA_DROP: u8 = 0x09;
pub const MEMORY_COPY: u8 = 0x0A;
pub const MEMORY_FILL: u8 = 0x0B;
}
Loading

0 comments on commit 55997e3

Please sign in to comment.