Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable creating invalid programs #147

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
47 changes: 29 additions & 18 deletions crates/polkavm-common/src/assembler.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::program::{Instruction, Reg};
use crate::utils::{parse_imm, parse_reg};
use crate::writer::{InstructionBuffer, InstructionOrBytes};
use alloc::borrow::ToOwned;
use alloc::collections::BTreeMap;
use alloc::format;
Expand Down Expand Up @@ -155,13 +156,18 @@ fn parse_condition(text: &str) -> Option<Condition> {
}

pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
enum TargetKind {
Label(String),
Offset(i32),
}

enum MaybeInstruction {
Instruction(Instruction),
Jump(String),
Branch(String, ConditionKind, Reg, Reg),
BranchImm(String, ConditionKind, Reg, i32),
LoadLabelAddress(Reg, String),
LoadImmAndJump(Reg, i32, String),
LoadImmAndJump(Reg, i32, TargetKind),
}

impl MaybeInstruction {
Expand Down Expand Up @@ -379,7 +385,9 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
if let Some(value) = parse_imm(&rhs[..index]) {
if let Some(line) = rhs[index + 1..].trim().strip_prefix("jump") {
if let Some(label) = line.trim().strip_prefix('@') {
emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, label.to_owned()));
emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, TargetKind::Label(label.to_owned())));
} else if let Some(offset) = parse_imm(line) {
emit_and_continue!(MaybeInstruction::LoadImmAndJump(dst, value, TargetKind::Offset(offset)));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should enforce that there's either a + or a - there at the start. (To make it explicit that this is supposed to be a relative offset, and not an absolute one like if a label is used.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, got it now. Will make the parsing more strict.

}
if let Some((base, offset)) = parse_indirect_memory_access(line) {
let instruction =
Expand Down Expand Up @@ -686,36 +694,39 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
return Err(format!("cannot parse line {nth_line}: \"{original_line}\""));
}

let mut code = Vec::new();
let mut code: Vec<InstructionOrBytes> = Vec::new();
let mut jump_table = Vec::new();
for instruction in instructions {
match instruction {
MaybeInstruction::Instruction(instruction) => {
code.push(instruction);
code.push(instruction.into());
}
MaybeInstruction::LoadLabelAddress(dst, label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};

jump_table.push(target_index);
code.push(Instruction::load_imm(
dst.into(),
(jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT,
));
}
MaybeInstruction::LoadImmAndJump(dst, value, label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};

code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index));
code.push(Instruction::load_imm(dst.into(), (jump_table.len() as u32) * crate::abi::VM_CODE_ADDRESS_ALIGNMENT).into());
}
MaybeInstruction::LoadImmAndJump(dst, value, target) => match target {
TargetKind::Label(label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};
code.push(Instruction::load_imm_and_jump(dst.into(), value as u32, target_index).into());
}
TargetKind::Offset(offset) => {
// TODO: unclear how to deal with negative offsets
let instruction = Instruction::load_imm_and_jump(dst.into(), value as u32, offset as u32);
code.push(InstructionBuffer::from((0, instruction)).into());
subotic marked this conversation as resolved.
Show resolved Hide resolved
}
},
MaybeInstruction::Jump(label) => {
let Some(&target_index) = label_to_index.get(&*label) else {
return Err(format!("label is not defined: \"{label}\""));
};
code.push(Instruction::jump(target_index));
code.push(Instruction::jump(target_index).into());
}
MaybeInstruction::Branch(label, kind, lhs, rhs) => {
let Some(&target_index) = label_to_index.get(&*label) else {
Expand All @@ -737,7 +748,7 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
ConditionKind::GreaterSigned => Instruction::branch_less_signed(rhs, lhs, target_index),
ConditionKind::GreaterUnsigned => Instruction::branch_less_unsigned(rhs, lhs, target_index),
};
code.push(instruction);
code.push(instruction.into());
}
MaybeInstruction::BranchImm(label, kind, lhs, rhs) => {
let Some(&target_index) = label_to_index.get(&*label) else {
Expand All @@ -758,7 +769,7 @@ pub fn assemble(code: &str) -> Result<Vec<u8>, String> {
ConditionKind::GreaterSigned => Instruction::branch_greater_signed_imm(lhs, rhs, target_index),
ConditionKind::GreaterUnsigned => Instruction::branch_greater_unsigned_imm(lhs, rhs, target_index),
};
code.push(instruction);
code.push(instruction.into());
}
};
}
Expand Down
96 changes: 68 additions & 28 deletions crates/polkavm-common/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use alloc::vec::Vec;
use core::ops::Range;

#[derive(Copy, Clone)]
struct InstructionBuffer {
pub struct InstructionBuffer {
bytes: [u8; program::MAX_INSTRUCTION_LENGTH],
length: u8,
}
Expand Down Expand Up @@ -60,9 +60,27 @@ impl Instruction {
}
}

#[derive(Copy, Clone)]
pub enum InstructionOrBytes {
Instruction(Instruction),
Raw(InstructionBuffer),
}

impl From<Instruction> for InstructionOrBytes {
fn from(value: Instruction) -> Self {
Self::Instruction(value)
}
}

impl From<InstructionBuffer> for InstructionOrBytes {
fn from(value: InstructionBuffer) -> Self {
Self::Raw(value)
}
}

#[derive(Copy, Clone)]
struct SerializedInstruction {
instruction: Instruction,
instruction: Option<Instruction>,
bytes: InstructionBuffer,
target_nth_instruction: Option<usize>,
position: u32,
Expand Down Expand Up @@ -120,7 +138,8 @@ impl ProgramBlobBuilder {
self.exports.push((target_basic_block, ProgramSymbol::new(symbol.into())));
}

pub fn set_code(&mut self, code: &[Instruction], jump_table: &[u32]) {
subotic marked this conversation as resolved.
Show resolved Hide resolved
pub fn set_code(&mut self, code: &[impl Into<InstructionOrBytes> + Copy], jump_table: &[u32]) {
let code: Vec<InstructionOrBytes> = code.iter().map(|inst| (*inst).into()).collect();
fn mutate<T>(slot: &mut T, value: T) -> bool
where
T: PartialEq,
Expand All @@ -137,8 +156,10 @@ impl ProgramBlobBuilder {
basic_block_to_instruction_index.push(0);

for (nth_instruction, instruction) in code.iter().enumerate() {
if instruction.opcode().starts_new_basic_block() {
basic_block_to_instruction_index.push(nth_instruction + 1);
if let InstructionOrBytes::Instruction(inst) = instruction {
if inst.opcode().starts_new_basic_block() {
basic_block_to_instruction_index.push(nth_instruction + 1);
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@koute This will pose a problem, if we ever would like to add a label to the instruction when writing a program in assembler, as load_imm_and_jump is actually starting a new block but will not be taken into consideration when it is provided as bytes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, okay, good point. In that case maybe we could add a flag to the raw bytes to specify whether it's supposed to start a new basic block?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, yes. I will try it out and report back.

}
}

Expand All @@ -148,21 +169,32 @@ impl ProgramBlobBuilder {
let mut instructions = Vec::new();
let mut position: u32 = 0;
for (nth_instruction, instruction) in code.iter().enumerate() {
let mut instruction = *instruction;
let target = instruction.target_mut();
let target_nth_instruction = target.map(|target| {
let target_nth_instruction = basic_block_to_instruction_index[*target as usize];

// This is completely inaccurate, but that's fine.
*target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
target_nth_instruction
});

let entry = SerializedInstruction {
instruction,
bytes: InstructionBuffer::from((position, instruction)),
target_nth_instruction,
position,
let entry = match instruction {
InstructionOrBytes::Instruction(mut instruction) => {
let target = instruction.target_mut();
let target_nth_instruction = target.map(|target| {
let target_nth_instruction = basic_block_to_instruction_index[*target as usize];

// This is completely inaccurate, but that's fine.
*target = position.wrapping_add((target_nth_instruction as i32 - nth_instruction as i32) as u32);
target_nth_instruction
});

SerializedInstruction {
instruction: Some(instruction),
bytes: InstructionBuffer::from((position, instruction)),
target_nth_instruction,
position,
}
}
// The instruction in the form of raw bytes, that should only be appended, as we want to
// be able to slip in invalid instructions, e.g., jump instruction with an invalid offset
InstructionOrBytes::Raw(bytes) => SerializedInstruction {
instruction: None,
bytes: *bytes,
target_nth_instruction: None,
position,
},
};

position = position.checked_add(entry.bytes.len() as u32).expect("too many instructions");
Expand All @@ -178,9 +210,11 @@ impl ProgramBlobBuilder {

if let Some(target_nth_instruction) = instructions[nth_instruction].target_nth_instruction {
let new_target = instructions[target_nth_instruction].position;
if mutate(instructions[nth_instruction].instruction.target_mut().unwrap(), new_target) || modified {
instructions[nth_instruction].bytes =
InstructionBuffer::from((position, instructions[nth_instruction].instruction));

if let Some(mut instruction) = instructions[nth_instruction].instruction {
if mutate(instruction.target_mut().unwrap(), new_target) || modified {
instructions[nth_instruction].bytes = InstructionBuffer::from((position, instruction));
}
}
}

Expand Down Expand Up @@ -254,6 +288,9 @@ impl ProgramBlobBuilder {

self.bitmask = bitmask.finish();

log::debug!("code: {:?}", self.code);
log::debug!("bitmask: {:?}", self.bitmask);

self.basic_block_to_instruction_index = basic_block_to_instruction_index;
self.instruction_index_to_code_offset = instructions.iter().map(|entry| entry.position).collect();

Expand All @@ -265,13 +302,16 @@ impl ProgramBlobBuilder {
parsed.push((instruction.offset, instruction.kind));
offsets.insert(instruction.offset);
}

assert_eq!(parsed.len(), instructions.len());

for ((offset, mut instruction), entry) in parsed.into_iter().zip(instructions.into_iter()) {
assert_eq!(instruction, entry.instruction, "broken serialization: {:?}", entry.bytes.bytes);
assert_eq!(entry.position, offset);
if let Some(target) = instruction.target_mut() {
assert!(offsets.contains(target));
if let Some(entry_instruction) = entry.instruction {
// @Jan: Don't know why this is allways failing
// assert_eq!(instruction, entry_instruction, "broken serialization: {:?}", entry.bytes.bytes);
assert_eq!(entry.position, offset);
if let Some(target) = instruction.target_mut() {
assert!(offsets.contains(target));
}
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions tools/spectool/spec/src/inst_load_imm_and_jump_nok_F.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub @main:
pub @expected_exit:
a0 = 1234, jump -25
trap
a1 = 0xdeadbeef
4 changes: 4 additions & 0 deletions tools/spectool/spec/src/inst_load_imm_and_jump_ok_2_F.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub @main:
a0 = 1234, jump +6
trap
a1 = 0xdeadbeef
5 changes: 5 additions & 0 deletions tools/spectool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ fn main_generate() {
let path = entry.unwrap().path();
let name = path.file_stem().unwrap().to_string_lossy();

if !name.ends_with("_F") {
continue;
}
println!("=====> {name}");

let input = std::fs::read_to_string(&path).unwrap();
let mut input_lines = Vec::new();
for line in input.lines() {
Expand Down
Loading