From 0b205ae25c16f8c21869f37e91755450b37757f5 Mon Sep 17 00:00:00 2001 From: Patric Bucher Date: Sat, 25 Nov 2023 10:32:09 +0100 Subject: [PATCH] Initial compiler / parser structure implemented There is now advance, expression and consume. Mainly was fighting with the rust borrow checker. --- src/compiler/compiler.rs | 72 ++++++++++++++++++++++++++++++++++----- src/compiler/mod.rs | 12 +++++-- src/compiler/parser.rs | 39 +++++++++++++++++---- src/compiler/scanner.rs | 23 ++++++++----- src/compiler/token.rs | 15 +++----- src/main.rs | 1 - src/vm/block/block.rs | 12 +++---- src/vm/mod.rs | 2 +- src/vm/opcodes.rs | 2 +- src/vm/virtual_machine.rs | 2 +- 10 files changed, 134 insertions(+), 46 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 8e7a178..1ee3050 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -1,18 +1,74 @@ use crate::compiler::token::TokenType; use crate::compiler::{Compiler, Parser, Scanner}; -use crate::vm::Block; +use crate::vm::opcodes::OpCode; +use crate::vm::{Block, Value}; impl Compiler { pub(crate) fn new() -> Compiler { - Compiler {} + Compiler { + blocks: Vec::default(), + parser: None, + } } - pub(crate) fn compile(&self, source: String) -> Option { - let mut parser = Parser::new(Scanner::new(source)); - parser.advance(); - parser.expression(); - parser.consume(TokenType::Eof, "Expect end of expression."); + pub(crate) fn compile(&mut self, source: String) -> Option { + self.parser = Option::from(Parser::new(Scanner::new(source))); + self.start_compiler(); - return Option::from(Block::new_no_opt()); + let had_error = match &mut self.parser { + Some(ref mut parser) => { + parser.advance(); + parser.expression(); + parser.consume(TokenType::Eof, "Expect end of expression"); + parser.had_error + } + None => true, + }; + + self.end_compiler(); + + return if !had_error { + Some(self.blocks.pop().unwrap()) + } else { + None + }; + } + + fn start_compiler(&mut self) { + self.blocks.push(Block::new( + format!("Block no. {}", self.blocks.len()).as_str(), + )); + } + + fn end_compiler(&mut self) { + self.emit_return(); + } + + fn number(&mut self) { + if let Some(parser) = &mut self.parser { + let value = parser.previous_token.token.parse::().unwrap(); + self.emit_constant(value); + } + } + + fn current_block(&mut self) -> &mut Block { + self.blocks.last_mut().unwrap() + } + + fn emit_return(&mut self) { + self.emit_byte(OpCode::Return as u8); + } + + fn emit_constant(&mut self, value: Value) { + self.current_block().write_constant(value, 0) + } + + fn emit_byte(&mut self, byte: u8) { + self.current_block().write_u8(byte); + } + + fn emit_bytes(&mut self, byte1: u8, byte2: u8) { + self.current_block().write_u8(byte1); + self.current_block().write_u8(byte2); } } diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 4b307cb..463c898 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -1,15 +1,16 @@ use crate::compiler::token::TokenType; +use crate::vm::Block; mod compiler; mod parser; mod scanner; mod token; -#[derive(Debug)] +#[derive(Debug, Clone, Default)] struct Token { pub(crate) token_type: TokenType, + token: String, start: usize, - length: usize, line: u32, } @@ -24,6 +25,11 @@ struct Parser { scanner: Scanner, previous_token: Token, current_token: Token, + had_error: bool, + panic_mode: bool, } -pub(crate) struct Compiler {} +pub(crate) struct Compiler { + blocks: Vec, + parser: Option, +} diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 2d5ae83..43a6bdb 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -5,8 +5,10 @@ impl Parser { pub(in crate::compiler) fn new(scanner: Scanner) -> Parser { Parser { scanner, - previous_token: Token::INVALID, - current_token: Token::INVALID, + had_error: false, + panic_mode: false, + previous_token: Token::default(), + current_token: Token::default(), } } @@ -17,15 +19,40 @@ impl Parser { if self.current_token.token_type != TokenType::Error { break; } - self.error_at_current(); + self.report_error_at_current(self.current_token.token.clone().as_str()); } } pub(in crate::compiler) fn expression(&self) {} - pub(in crate::compiler) fn consume(&self, token_type: TokenType, message: &str) {} + pub(in crate::compiler) fn consume(&mut self, token_type: TokenType, message: &str) { + if self.current_token.token_type == token_type { + self.advance(); + return; + } + + self.report_error_at_current(message); + } + + fn report_error_at_current(&mut self, message: &str) { + self.report_error(&self.current_token.clone(), message); + } + + fn report_error(&mut self, token: &Token, message: &str) { + if self.panic_mode { + return; + } + self.had_error = true; + self.panic_mode = true; - fn error_at_current(&self) { - println!("Error at current token: {:?}", self.current_token); + eprint!("[line {}] Error", token.line); + if token.token_type == TokenType::Eof { + eprint!(" at end"); + } else if token.token_type == TokenType::Error { + // Nothing. + } else { + eprint!(" at '{}'", token.token); + } + eprintln!(": {}", message); } } diff --git a/src/compiler/scanner.rs b/src/compiler/scanner.rs index 8d59bb4..3eaed36 100644 --- a/src/compiler/scanner.rs +++ b/src/compiler/scanner.rs @@ -17,11 +17,7 @@ impl Scanner { self.start = self.current; if self.is_at_end() { - if self.is_after_end() { - panic!("Scanner is after end of source."); - } - self.current += 1; - return self.make_token(TokenType::Eof); + return self.make_eof_token(); } let c = self.advance(); if Scanner::is_alpha(c) { @@ -81,7 +77,7 @@ impl Scanner { } } '"' => return self.make_string(), - _ => self.make_error_token("Unexpected character."), + _ => self.make_error_token("Unexpected character"), }; } @@ -90,7 +86,7 @@ impl Scanner { let mut placeholder_start = None; loop { if self.is_at_end() { - return self.make_error_token("Unterminated string."); + return self.make_error_token("Unterminated string"); } if self.peek() == '"' { break; @@ -264,11 +260,20 @@ impl Scanner { } fn make_error_token(&self, message: &str) -> Token { - Token::new(TokenType::Error, self.start, message.len(), self.line) + Token::new( + TokenType::Error, + String::from(message), + self.start, + self.line, + ) } fn make_token(&self, token_type: TokenType) -> Token { - Token::new(token_type, self.start, self.current - self.start, self.line) + let token = String::from_iter(&self.source[self.start..self.current]); + Token::new(token_type, token, self.start, self.line) + } + fn make_eof_token(&self) -> Token { + Token::new(TokenType::Eof, String::new(), self.start, self.line) } } diff --git a/src/compiler/token.rs b/src/compiler/token.rs index 6675e01..f588a63 100644 --- a/src/compiler/token.rs +++ b/src/compiler/token.rs @@ -1,6 +1,6 @@ use crate::compiler::Token; -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq, Default)] pub(in crate::compiler) enum TokenType { LeftParen, RightParen, @@ -46,27 +46,22 @@ pub(in crate::compiler) enum TokenType { While, Error, + + #[default] Eof, } impl Token { - pub(in crate::compiler) const INVALID: Token = Token { - token_type: TokenType::Eof, - start: 0, - length: 0, - line: 0, - }; - pub(in crate::compiler) fn new( token_type: TokenType, + token: String, start: usize, - length: usize, line: u32, ) -> Token { Token { token_type, + token, start, - length, line, } } diff --git a/src/main.rs b/src/main.rs index 2b13ab7..99649a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,6 @@ fn main() { print_tagline(); let args: Vec = env::args().collect(); - if args.len() == 1 { run_repl(); } else if args.len() >= 2 { diff --git a/src/vm/block/block.rs b/src/vm/block/block.rs index 0901aa7..9199375 100644 --- a/src/vm/block/block.rs +++ b/src/vm/block/block.rs @@ -2,7 +2,7 @@ use crate::vm::opcodes::OpCode; use crate::vm::{Block, Constants, Line}; impl Block { - pub(in crate::vm) fn new(name: &str) -> Self { + pub(crate) fn new(name: &str) -> Self { Block { name: String::from(name), constants: Constants::new(), @@ -20,12 +20,12 @@ impl Block { } impl Block { - pub(in crate::vm) fn write_op_code(&mut self, op_code: OpCode, line: usize) { + pub(crate) fn write_op_code(&mut self, op_code: OpCode, line: usize) { self.add_line(self.instructions.len(), line); self.instructions.push(op_code as u8) } - pub(in crate::vm) fn write_constant(&mut self, value: f64, line: usize) { + pub(crate) fn write_constant(&mut self, value: f64, line: usize) { let constant_index = self.constants.write_value(value); if constant_index <= 0xFF { @@ -40,14 +40,14 @@ impl Block { } } - fn write_u8(&mut self, value: u8) { + pub(crate) fn write_u8(&mut self, value: u8) { self.instructions.push(value) } - fn write_u16(&mut self, value: u16) { + pub(crate) fn write_u16(&mut self, value: u16) { self.instructions.push((value) as u8); self.instructions.push((value >> 8) as u8); } - fn write_u32(&mut self, value: u32) { + pub(crate) fn write_u32(&mut self, value: u32) { self.instructions.push((value) as u8); self.instructions.push((value >> 8) as u8); self.instructions.push((value >> 16) as u8); diff --git a/src/vm/mod.rs b/src/vm/mod.rs index f64e9dd..377e8d3 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1,5 +1,5 @@ mod block; -mod opcodes; +pub(crate) mod opcodes; mod virtual_machine; pub type Value = f64; diff --git a/src/vm/opcodes.rs b/src/vm/opcodes.rs index 8a262be..1709f37 100644 --- a/src/vm/opcodes.rs +++ b/src/vm/opcodes.rs @@ -2,7 +2,7 @@ use std::mem::transmute; #[repr(u8)] #[derive(Debug, PartialEq)] -pub(in crate::vm) enum OpCode { +pub(crate) enum OpCode { Return = 0x00, Constant = 0x01, Constant2 = 0x02, diff --git a/src/vm/virtual_machine.rs b/src/vm/virtual_machine.rs index 3c8f6de..49591d9 100644 --- a/src/vm/virtual_machine.rs +++ b/src/vm/virtual_machine.rs @@ -11,7 +11,7 @@ impl VirtualMachine { } pub fn interpret(&mut self, source: String) -> Result { - let compiler = Compiler::new(); + let mut compiler = Compiler::new(); let block = compiler.compile(source); return if let Some(block) = block { self.run(block)