Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev/memory #92

Merged
merged 13 commits into from
Nov 19, 2024
8 changes: 7 additions & 1 deletion src/core/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use core::str::Utf8Error;
use crate::core::reader::section_header::SectionTy;
use crate::core::reader::types::ValType;

use super::indices::MemIdx;
use super::indices::{DataIdx, MemIdx};

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum RuntimeError {
Expand Down Expand Up @@ -54,6 +54,8 @@ pub enum Error {
MemoryIsNotDefined(MemIdx),
// mem.align, wanted alignment
ErroneousAlignment(u32, u32),
NoDataSegments,
DataSegmentNotFound(DataIdx),
}

impl Display for Error {
Expand Down Expand Up @@ -140,6 +142,10 @@ impl Display for Error {
mem_align, minimum_wanted_alignment
))
}
Error::NoDataSegments => f.write_str("Data Count is None"),
Error::DataSegmentNotFound(data_idx) => {
f.write_fmt(format_args!("Data Segment {} not found", data_idx))
}
}
}
}
Expand Down
1 change: 0 additions & 1 deletion src/core/indices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ pub type MemIdx = usize;
pub type GlobalIdx = usize;
#[allow(dead_code)]
pub type ElemIdx = usize;
#[allow(dead_code)]
pub type DataIdx = usize;
pub type LocalIdx = usize;
#[allow(dead_code)]
Expand Down
199 changes: 199 additions & 0 deletions src/core/reader/types/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
use core::fmt::{Debug, Formatter};

use alloc::{format, vec::Vec};

use crate::{
core::{indices::MemIdx, reader::{span::Span, WasmReadable}},
read_constant_expression::read_constant_instructions,
};

use super::UnwrapValidatedExt;

pub struct DataSegment {
pub init: Vec<u8>,
pub mode: DataMode,
}

#[derive(Clone)]
pub enum DataMode {
Passive,
Active(DataModeActive),
}

#[derive(Clone)]
pub struct DataModeActive {
pub memory_idx: MemIdx,
pub offset: Span,
}

impl WasmReadable for DataSegment {
fn read(wasm: &mut crate::core::reader::WasmReader) -> crate::Result<Self> {
let mode = wasm.read_var_u32()?;
let data_sec: DataSegment = match mode {
0 => {
// active { memory 0, offset e }
trace!("Data section: active");
let offset = { read_constant_instructions(wasm, None, None)? };

let byte_vec = wasm.read_vec(|el| el.read_u8())?;

// WARN: we currently don't take into consideration how we act when we are dealing with globals here
DataSegment {
mode: DataMode::Active(DataModeActive {
memory_idx: 0,
offset,
}),
init: byte_vec,
}
}
1 => {
// passive
// A passive data segment's contents can be copied into a memory using the `memory.init` instruction
trace!("Data section: passive");
DataSegment {
mode: DataMode::Passive,
init: wasm.read_vec(|el| el.read_u8())?,
}
}
2 => {
// mode active { memory x, offset e }
// this hasn't been yet implemented in wasm
// as per docs:

// https://webassembly.github.io/spec/core/binary/modules.html#data-section
// The initial integer can be interpreted as a bitfield. Bit 0 indicates a passive segment, bit 1 indicates the presence of an explicit memory index for an active segment.
// In the current version of WebAssembly, at most one memory may be defined or imported in a single module, so all valid active data segments have a memory value of 0
todo!("Data section: active - with multiple memories - NOT YET IMPLEMENTED!");
}
_ => unreachable!(),
};

trace!("{:?}", data_sec.init);
Ok(data_sec)
}

fn read_unvalidated(wasm: &mut crate::core::reader::WasmReader) -> Self {
let mode = wasm.read_var_u32().unwrap_validated();
let data_sec: DataSegment = match mode {
0 => {
// active { memory 0, offset e }
trace!("Data section: active");
let offset = { read_constant_instructions(wasm, None, None).unwrap_validated() };

let byte_vec = wasm
.read_vec(|el| Ok(el.read_u8().unwrap_validated()))
.unwrap_validated();

// WARN: we currently don't take into consideration how we act when we are dealing with globals here
DataSegment {
mode: DataMode::Active(DataModeActive {
memory_idx: 0,
offset,
}),
init: byte_vec,
}
}
1 => {
// passive
// A passive data segment's contents can be copied into a memory using the `memory.init` instruction
trace!("Data section: passive");
DataSegment {
mode: DataMode::Passive,
init: wasm
.read_vec(|el| Ok(el.read_u8().unwrap_validated()))
.unwrap_validated(),
}
}
2 => {
// mode active { memory x, offset e }
// this hasn't been yet implemented in wasm
// as per docs:

// https://webassembly.github.io/spec/core/binary/modules.html#data-section
// The initial integer can be interpreted as a bitfield. Bit 0 indicates a passive segment, bit 1 indicates the presence of an explicit memory index for an active segment.
// In the current version of WebAssembly, at most one memory may be defined or imported in a single module, so all valid active data segments have a memory value of 0
todo!("Data section: active - with multiple memories - NOT YET IMPLEMENTED!");
}
_ => unreachable!(),
};

data_sec
}
}

impl Debug for DataSegment {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
let mut init_str = alloc::string::String::new();

let iter = self.init.iter().peekable();
// only if it's valid do we print is as a normal utf-8 char, otherwise, hex
for &byte in iter {
if let Ok(valid_char) = alloc::string::String::from_utf8(Vec::from(&[byte])) {
init_str.push_str(valid_char.as_str());
} else {
init_str.push_str(&format!("\\x{:02x}", byte));
}
}

f.debug_struct("DataSegment")
.field("init", &init_str)
.field("mode", &self.mode)
.finish()
}
}

///
/// Usually, we'd have something like this:
/// ```wasm
/// (module
/// (memory 1) ;; memory starting with 1 page
/// (data (i32.const 0) "abc") ;; writing the array of byte "abc" in the first memory (0) at offset 0
/// ;; for hardcoded offsets, we'll usually use i32.const because of wasm being x86 arch
/// )
/// ```

///
/// Since the span has only the start and length and acts a reference, we print the start and end (both inclusive, notice the '..=')
/// We print it in both decimal and hexadecimal so it's easy to trace in something like <https://webassembly.github.io/wabt/demo/wat2wasm/>
impl Debug for DataMode {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
match self {
DataMode::Passive => f.debug_struct("Passive").finish(),
DataMode::Active(active_data_mode) => {
let from = active_data_mode.offset.from;
let to = active_data_mode.offset.from + active_data_mode.offset.len() - 1;
f.debug_struct("Active")
// .field("offset", format_args!("[{}..={}]", from, to))
.field(
"offset",
&format_args!("[{}..={}] (hex = [{:X}..={:X}])", from, to, from, to),
)
.finish()
// f.
}
}
}
}

pub struct PassiveData {
pub init: Vec<u8>,
}

impl Debug for PassiveData {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
let mut init_str = alloc::string::String::new();

let iter = self.init.iter().peekable();
for &byte in iter {
if let Ok(valid_char) = alloc::string::String::from_utf8(Vec::from(&[byte])) {
init_str.push_str(valid_char.as_str());
} else {
// If it's not valid UTF-8, print it as hex
init_str.push_str(&format!("\\x{:02x}", byte));
}
}
f.debug_struct("PassiveData")
.field("init", &init_str)
.finish()
}
}
1 change: 1 addition & 0 deletions src/core/reader/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::execution::assert_validated::UnwrapValidatedExt;
use crate::Result;
use crate::{unreachable_validated, Error};

pub mod data;
pub mod export;
pub mod function_code_header;
pub mod global;
Expand Down
4 changes: 4 additions & 0 deletions src/core/reader/types/opcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,8 @@ pub mod fc_extensions {
pub const I64_TRUNC_SAT_F32_U: u8 = 0x05;
pub const I64_TRUNC_SAT_F64_S: u8 = 0x06;
pub const I64_TRUNC_SAT_F64_U: u8 = 0x07;
pub const MEMORY_INIT: u8 = 0x08;
pub const DATA_DROP: u8 = 0x09;
pub const MEMORY_COPY: u8 = 0x0A;
pub const MEMORY_FILL: u8 = 0x0B;
}
Loading
Loading