From 9cf518a90644245465e1944fb1739dfe43c1f18a Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 2 Aug 2024 00:48:41 +0300 Subject: [PATCH 1/7] feat(#23): Added type parsing and logging Signed-off-by: fami-fish --- src/parser.rs | 147 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 140 insertions(+), 7 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 4c91d8b..46dc99a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -377,7 +377,10 @@ impl<'t, 'contents> Parser<'t, 'contents> { TokenKind::Identifier => Ok(()), TokenKind::DecimalIntLiteral => ReportKind::SyntaxError .new("Expected register starting with r") - .with_note(format!("HINT: You forgot the r prefix. Do: r{}", self.current.text)) + .with_note(format!( + "HINT: You forgot the r prefix. Do: r{}", + self.current.text + )) .with_label(ReportLabel::new(self.current.span.clone())) .into(), _ => ReportKind::UnexpectedToken @@ -387,7 +390,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { .into(), }?; - if !self.current.text.starts_with('r') { + if !self.current.text.starts_with("r") { return ReportKind::SyntaxError .new("Register identifier format is incorrect!") .with_label(ReportLabel::new(self.current.span.clone())) @@ -400,7 +403,9 @@ impl<'t, 'contents> Parser<'t, 'contents> { IntErrorKind::Empty => ReportKind::SyntaxError .new("Expected register identifier after r prefix") .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Registers follow the format r. e.g r8 r32"), + .with_note( + "HINT: Registers follow the format r. e.g r8 r32", + ), IntErrorKind::InvalidDigit => { let mut span = self.current.span.clone(); span.start_index += 1; @@ -409,7 +414,9 @@ impl<'t, 'contents> Parser<'t, 'contents> { ReportKind::SyntaxError .new("Register number contains an invalid digit") .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Registers follow the format r. e.g r8 r32") + .with_note( + "HINT: Registers follow the format r. e.g r8 r32", + ) }, // Here only positive overflow can be omitted by parse::() // It also doesnt omit Zero because usize can store 0. @@ -417,9 +424,8 @@ impl<'t, 'contents> Parser<'t, 'contents> { .new("Register identifier intager overflows") .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: You dont have this many registers. Trust me"), - } - .into(), - Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }), + }.into(), + Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }) } } @@ -506,6 +512,133 @@ impl<'t, 'contents> Parser<'t, 'contents> { //NOTE: idk if 5 is the right number. To be determined let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); + self.advance(); + if self.current.kind == TokenKind::Colon { + self.advance(); + match self.current.kind { + TokenKind::DecimalIntLiteral => { + n = Some(self.current.text.parse::().unwrap()); + if n == Some(0) { + return ReportKind::SyntaxError + .new("Array size cannot be zero.") + .with_note(format!("HINT: Did you mean [{}:]", t)) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + } + self.advance(); + }, + TokenKind::RBracket => {}, + _ => { + self.advance(); + return ReportKind::UnexpectedToken + .new(format!("got {:?}", self.current.kind)) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + }, + } + } + // We should fail earlier but we wait to gather the element size + // n before logging for clearer error logging + if let Type::Register { inner, ident } = t { + let mut inner_str = "".to_string(); + let mut n_str = "".to_string(); + if inner.is_some() { + inner_str = format!("{}", inner.unwrap()); + } + + if n.is_some() && n.unwrap() != 0 { + n_str = format!("{}", n.unwrap()); + } + + self.advance(); + return ReportKind::SyntaxError + .new("Heap types cannot contain register bindings") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note(format!("HINT: Did you want to bind the pointer to the register? [{inner_str}:{n_str}];r{ident}")) + .into(); + } + + if self.current.kind != TokenKind::RBracket { + self.advance(); + return ReportKind::SyntaxError + .new(format!("Expected closing bracket. Got {:?}", self.current.text)) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + }; + + Ok(Type::Heap { is_pointer: true, contents: vec![(t, n)] }) + }, + TokenKind::LBrace => { + self.advance(); + if self.current.kind == TokenKind::RBrace { + let mut span = self.current.span.clone(); + span.start_index -= 1; + + return ReportKind::SyntaxError + .new("Empty heaps are disallowed") + .with_label(ReportLabel::new(span)) + .with_note("HINT: Did you want to create a void pointer: []") + .into(); + } + //NOTE: idk if 5 is the right number. To be determined + let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); + loop { + let start = self.current.span.clone(); + let t = self.parse_type()?; + let mut n = None; + + let end = self.current.span.clone(); + let span = start.extend(&end); + self.advance(); + + if self.current.kind == TokenKind::Colon { + self.advance(); + match self.current.kind { + TokenKind::DecimalIntLiteral => { + n = Some(self.current.text.parse::().unwrap()); + if n == Some(0) { + return ReportKind::SyntaxError + .new("Array size cannot be zero.") + .with_note(format!("HINT: Did you mean {}:", t)) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + } + self.advance(); + }, + TokenKind::Comma => {}, + TokenKind::RBrace => {}, + _ => { + self.advance(); + return ReportKind::UnexpectedToken + .new(format!( + "Expected either `,` `}}` or a intager, got {:?}", + self.current.kind + )) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + }, + } + } + + if self.current.kind != TokenKind::Comma { + if self.current.kind == TokenKind::RBrace { + vec.push((t, n)); + break; + } + return ReportKind::SyntaxError + .new("Expected comma to separate heap types") + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + } + self.advance(); + if let Type::Register { ident, .. } = t { + return ReportKind::SyntaxError + .new("Heap types cannot contain register bindings") + .with_label(ReportLabel::new(span)) + .with_note(format!("HINT: Did you want to bind the pointer to the register? {};r{ident}", Type::Heap { is_pointer: false, contents: vec })) + .into(); + } +>>>>>>> f5e4b92 (feat(#23): Added type parsing and logging) while self.current.kind != end_kind { // let start = self.current.span.clone(); From 044b7cae9ccbf96b5876c44276766c8dd678c1d7 Mon Sep 17 00:00:00 2001 From: fami-fish Date: Fri, 2 Aug 2024 00:37:28 +0300 Subject: [PATCH 2/7] fmt: whitespace Signed-off-by: fami-fish --- src/parser.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 46dc99a..f5353c1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -377,10 +377,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { TokenKind::Identifier => Ok(()), TokenKind::DecimalIntLiteral => ReportKind::SyntaxError .new("Expected register starting with r") - .with_note(format!( - "HINT: You forgot the r prefix. Do: r{}", - self.current.text - )) + .with_note(format!("HINT: You forgot the r prefix. Do: r{}", self.current.text)) .with_label(ReportLabel::new(self.current.span.clone())) .into(), _ => ReportKind::UnexpectedToken @@ -403,9 +400,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { IntErrorKind::Empty => ReportKind::SyntaxError .new("Expected register identifier after r prefix") .with_label(ReportLabel::new(self.current.span.clone())) - .with_note( - "HINT: Registers follow the format r. e.g r8 r32", - ), + .with_note("HINT: Registers follow the format r. e.g r8 r32"), IntErrorKind::InvalidDigit => { let mut span = self.current.span.clone(); span.start_index += 1; @@ -414,9 +409,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { ReportKind::SyntaxError .new("Register number contains an invalid digit") .with_label(ReportLabel::new(self.current.span.clone())) - .with_note( - "HINT: Registers follow the format r. e.g r8 r32", - ) + .with_note("HINT: Registers follow the format r. e.g r8 r32") }, // Here only positive overflow can be omitted by parse::() // It also doesnt omit Zero because usize can store 0. @@ -424,8 +417,9 @@ impl<'t, 'contents> Parser<'t, 'contents> { .new("Register identifier intager overflows") .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: You dont have this many registers. Trust me"), - }.into(), - Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }) + } + .into(), + Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }), } } From 5afcd87c10a52241d765589de48e94e788a7ea56 Mon Sep 17 00:00:00 2001 From: fami-fish Date: Fri, 2 Aug 2024 14:20:26 +0300 Subject: [PATCH 3/7] fix: clippy errors Signed-off-by: fami-fish --- src/parser.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index f5353c1..f177c60 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -387,7 +387,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { .into(), }?; - if !self.current.text.starts_with("r") { + if !self.current.text.starts_with('r') { return ReportKind::SyntaxError .new("Register identifier format is incorrect!") .with_label(ReportLabel::new(self.current.span.clone())) @@ -515,7 +515,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { if n == Some(0) { return ReportKind::SyntaxError .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean [{}:]", t)) + .with_note(format!("HINT: Did you mean [{t}:]")) .with_label(ReportLabel::new(self.current.span.clone())) .into(); } @@ -534,8 +534,8 @@ impl<'t, 'contents> Parser<'t, 'contents> { // We should fail earlier but we wait to gather the element size // n before logging for clearer error logging if let Type::Register { inner, ident } = t { - let mut inner_str = "".to_string(); - let mut n_str = "".to_string(); + let mut inner_str = String::new(); + let mut n_str = String::new(); if inner.is_some() { inner_str = format!("{}", inner.unwrap()); } @@ -593,14 +593,13 @@ impl<'t, 'contents> Parser<'t, 'contents> { if n == Some(0) { return ReportKind::SyntaxError .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean {}:", t)) + .with_note(format!("HINT: Did you mean {t}:")) .with_label(ReportLabel::new(self.current.span.clone())) .into(); } self.advance(); }, - TokenKind::Comma => {}, - TokenKind::RBrace => {}, + TokenKind::Comma | TokenKind::RBrace => {}, _ => { self.advance(); return ReportKind::UnexpectedToken From 1b8b71468bf075eccc1ef7914864427c04ea56df Mon Sep 17 00:00:00 2001 From: fami-fish Date: Sat, 3 Aug 2024 00:40:56 +0300 Subject: [PATCH 4/7] fix: remove trailing comma on heap display printing Signed-off-by: fami-fish --- src/ast.rs | 20 ++++++++++++++++++++ src/parser.rs | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/ast.rs b/src/ast.rs index 40221bd..7826bba 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -50,8 +50,24 @@ impl Display for Type { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Size(s) => write!(f, "{s}")?, +<<<<<<< HEAD Self::Heap { is_pointer, contents } => { write!(f, "{}", if *is_pointer { "[" } else { "{" })?; +======= + Self::Heap { is_pointer: true, contents } => { + // contents length is always 1 for pointers + write!(f, "[{}", &contents[0].0)?; + match contents[0].1 { + Some(0) => write!(f, ":")?, + Some(size) => write!(f, ":{size}")?, + None => {}, + }; + + write!(f, "]")?; + }, + Self::Heap { is_pointer: false, contents } => { + write!(f, "{{")?; +>>>>>>> 900ce40 (fix: remove trailing comma on heap display printing) for (i, (t, elems)) in contents.iter().enumerate() { write!(f, "{t}")?; match elems { @@ -60,7 +76,11 @@ impl Display for Type { None => {}, }; +<<<<<<< HEAD if i != contents.len() - 1 { +======= + if i != contents.len() -1 { +>>>>>>> 900ce40 (fix: remove trailing comma on heap display printing) write!(f, ", ")?; } } diff --git a/src/parser.rs b/src/parser.rs index f177c60..6faa57d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -631,7 +631,6 @@ impl<'t, 'contents> Parser<'t, 'contents> { .with_note(format!("HINT: Did you want to bind the pointer to the register? {};r{ident}", Type::Heap { is_pointer: false, contents: vec })) .into(); } ->>>>>>> f5e4b92 (feat(#23): Added type parsing and logging) while self.current.kind != end_kind { // let start = self.current.span.clone(); From c26d8dc119dad48dc7248f50518edf9fe8d8fdf8 Mon Sep 17 00:00:00 2001 From: fami-fish Date: Sat, 3 Aug 2024 03:32:52 +0300 Subject: [PATCH 5/7] refactor: parser types now pass all test cases Signed-off-by: fami-fish --- src/ast.rs | 24 +--------- src/parser.rs | 118 +------------------------------------------------- 2 files changed, 3 insertions(+), 139 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 7826bba..bf94edd 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -50,24 +50,8 @@ impl Display for Type { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Size(s) => write!(f, "{s}")?, -<<<<<<< HEAD Self::Heap { is_pointer, contents } => { - write!(f, "{}", if *is_pointer { "[" } else { "{" })?; -======= - Self::Heap { is_pointer: true, contents } => { - // contents length is always 1 for pointers - write!(f, "[{}", &contents[0].0)?; - match contents[0].1 { - Some(0) => write!(f, ":")?, - Some(size) => write!(f, ":{size}")?, - None => {}, - }; - - write!(f, "]")?; - }, - Self::Heap { is_pointer: false, contents } => { - write!(f, "{{")?; ->>>>>>> 900ce40 (fix: remove trailing comma on heap display printing) + write!(f, "{}", if *is_pointer {"["} else {"{"})?; for (i, (t, elems)) in contents.iter().enumerate() { write!(f, "{t}")?; match elems { @@ -76,15 +60,11 @@ impl Display for Type { None => {}, }; -<<<<<<< HEAD if i != contents.len() - 1 { -======= - if i != contents.len() -1 { ->>>>>>> 900ce40 (fix: remove trailing comma on heap display printing) write!(f, ", ")?; } } - write!(f, "{}", if *is_pointer { "]" } else { "}" })?; + write!(f, "{}", if *is_pointer {"]"} else {"}"})?; }, Self::Register { inner: t, ident } => { if t.is_some() { diff --git a/src/parser.rs b/src/parser.rs index 6faa57d..a887c96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -417,8 +417,7 @@ impl<'t, 'contents> Parser<'t, 'contents> { .new("Register identifier intager overflows") .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: You dont have this many registers. Trust me"), - } - .into(), + }.into(), Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }), } } @@ -562,121 +561,6 @@ impl<'t, 'contents> Parser<'t, 'contents> { Ok(Type::Heap { is_pointer: true, contents: vec![(t, n)] }) }, - TokenKind::LBrace => { - self.advance(); - if self.current.kind == TokenKind::RBrace { - let mut span = self.current.span.clone(); - span.start_index -= 1; - - return ReportKind::SyntaxError - .new("Empty heaps are disallowed") - .with_label(ReportLabel::new(span)) - .with_note("HINT: Did you want to create a void pointer: []") - .into(); - } - //NOTE: idk if 5 is the right number. To be determined - let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); - loop { - let start = self.current.span.clone(); - let t = self.parse_type()?; - let mut n = None; - - let end = self.current.span.clone(); - let span = start.extend(&end); - self.advance(); - - if self.current.kind == TokenKind::Colon { - self.advance(); - match self.current.kind { - TokenKind::DecimalIntLiteral => { - n = Some(self.current.text.parse::().unwrap()); - if n == Some(0) { - return ReportKind::SyntaxError - .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean {t}:")) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - }, - TokenKind::Comma | TokenKind::RBrace => {}, - _ => { - self.advance(); - return ReportKind::UnexpectedToken - .new(format!( - "Expected either `,` `}}` or a intager, got {:?}", - self.current.kind - )) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }, - } - } - - if self.current.kind != TokenKind::Comma { - if self.current.kind == TokenKind::RBrace { - vec.push((t, n)); - break; - } - return ReportKind::SyntaxError - .new("Expected comma to separate heap types") - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - if let Type::Register { ident, .. } = t { - return ReportKind::SyntaxError - .new("Heap types cannot contain register bindings") - .with_label(ReportLabel::new(span)) - .with_note(format!("HINT: Did you want to bind the pointer to the register? {};r{ident}", Type::Heap { is_pointer: false, contents: vec })) - .into(); - } - - while self.current.kind != end_kind { - // let start = self.current.span.clone(); - let (t, n) = self.parse_array_type(&[TokenKind::Comma, end_kind, TokenKind::NewLine])?; - // let end = self.current.span.clone(); - // let span = start.extend(&end); - // let mut span = self.current.span.clone(); - // span.start_index -= 2; - // span.end_index -= 1; - vec.push((t, n)); - - if self.current.kind == TokenKind::NewLine { - let mut span = self.current.span.clone(); - span.start_index -= 1; - return ReportKind::SyntaxError - .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) - .with_label(ReportLabel::new(span)) - .with_note(format!("HINT: did you mean to close this heap? {}", Type::Heap { is_pointer, contents: vec } )) - .into(); - } - - if self.current.kind != end_kind { - if self.current.kind == (if is_pointer {TokenKind::RBrace} else {TokenKind::RBracket}) { - return ReportKind::SyntaxError - .new("Mismatched heap brackets") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Be more decisive next time. Is it a pointer or not?") - .into(); - } - - if self.peek(1).kind == TokenKind::NewLine { - return ReportKind::SyntaxError - .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: did you mean to close this heap?") - .into(); - } - - if self.current.kind == TokenKind::Comma { - self.advance(); - } - } - } - - Ok(Type::Heap { is_pointer, contents: vec }) - }, TokenKind::Colon => { ReportKind::SyntaxError .new("Cannot have an array of an unknown type") From 5b607c9aa7c5a484e7bacb7cea75ff7fbb8073b8 Mon Sep 17 00:00:00 2001 From: fami-fish Date: Sat, 3 Aug 2024 03:38:30 +0300 Subject: [PATCH 6/7] fix: clippy + fmt Signed-off-by: fami-fish --- src/ast.rs | 4 +-- src/parser.rs | 92 +++++++++++++++++++++++---------------------------- 2 files changed, 43 insertions(+), 53 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index bf94edd..40221bd 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -51,7 +51,7 @@ impl Display for Type { match self { Self::Size(s) => write!(f, "{s}")?, Self::Heap { is_pointer, contents } => { - write!(f, "{}", if *is_pointer {"["} else {"{"})?; + write!(f, "{}", if *is_pointer { "[" } else { "{" })?; for (i, (t, elems)) in contents.iter().enumerate() { write!(f, "{t}")?; match elems { @@ -64,7 +64,7 @@ impl Display for Type { write!(f, ", ")?; } } - write!(f, "{}", if *is_pointer {"]"} else {"}"})?; + write!(f, "{}", if *is_pointer { "]" } else { "}" })?; }, Self::Register { inner: t, ident } => { if t.is_some() { diff --git a/src/parser.rs b/src/parser.rs index a887c96..1d41810 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -417,7 +417,8 @@ impl<'t, 'contents> Parser<'t, 'contents> { .new("Register identifier intager overflows") .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: You dont have this many registers. Trust me"), - }.into(), + } + .into(), Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }), } } @@ -505,61 +506,50 @@ impl<'t, 'contents> Parser<'t, 'contents> { //NOTE: idk if 5 is the right number. To be determined let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); - self.advance(); - if self.current.kind == TokenKind::Colon { - self.advance(); - match self.current.kind { - TokenKind::DecimalIntLiteral => { - n = Some(self.current.text.parse::().unwrap()); - if n == Some(0) { - return ReportKind::SyntaxError - .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean [{t}:]")) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - }, - TokenKind::RBracket => {}, - _ => { - self.advance(); - return ReportKind::UnexpectedToken - .new(format!("got {:?}", self.current.kind)) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }, - } - } - // We should fail earlier but we wait to gather the element size - // n before logging for clearer error logging - if let Type::Register { inner, ident } = t { - let mut inner_str = String::new(); - let mut n_str = String::new(); - if inner.is_some() { - inner_str = format!("{}", inner.unwrap()); - } - if n.is_some() && n.unwrap() != 0 { - n_str = format!("{}", n.unwrap()); + while self.current.kind != end_kind { + // let start = self.current.span.clone(); + let (t, n) = self.parse_array_type(&[TokenKind::Comma, end_kind, TokenKind::NewLine])?; + // let end = self.current.span.clone(); + // let span = start.extend(&end); + // let mut span = self.current.span.clone(); + // span.start_index -= 2; + // span.end_index -= 1; + vec.push((t, n)); + + if self.current.kind == TokenKind::NewLine { + let mut span = self.current.span.clone(); + span.start_index -= 1; + return ReportKind::SyntaxError + .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) + .with_label(ReportLabel::new(span)) + .with_note(format!("HINT: did you mean to close this heap? {}", Type::Heap { is_pointer, contents: vec } )) + .into(); } - self.advance(); - return ReportKind::SyntaxError - .new("Heap types cannot contain register bindings") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note(format!("HINT: Did you want to bind the pointer to the register? [{inner_str}:{n_str}];r{ident}")) - .into(); - } + if self.current.kind != end_kind { + if self.current.kind == (if is_pointer {TokenKind::RBrace} else {TokenKind::RBracket}) { + return ReportKind::SyntaxError + .new("Mismatched heap brackets") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: Be more decisive next time. Is it a pointer or not?") + .into(); + } - if self.current.kind != TokenKind::RBracket { - self.advance(); - return ReportKind::SyntaxError - .new(format!("Expected closing bracket. Got {:?}", self.current.text)) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - }; + if self.peek(1).kind == TokenKind::NewLine { + return ReportKind::SyntaxError + .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: did you mean to close this heap?") + .into(); + } - Ok(Type::Heap { is_pointer: true, contents: vec![(t, n)] }) + if self.current.kind == TokenKind::Comma { + self.advance(); + } + } + } + Ok(Type::Heap { is_pointer, contents: vec }) }, TokenKind::Colon => { ReportKind::SyntaxError From 76573ff5024afe99faf7a42da6e8e653ab56a7ba Mon Sep 17 00:00:00 2001 From: fami-fish Date: Mon, 5 Aug 2024 14:29:13 +0300 Subject: [PATCH 7/7] refactor: parser now passes even more test cases Signed-off-by: fami-fish --- src/ast.rs | 21 ++-- src/parser.rs | 335 ++++++++++++++++++++++++++++---------------------- src/token.rs | 14 +++ 3 files changed, 214 insertions(+), 156 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 40221bd..a96ed35 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -41,7 +41,8 @@ pub enum LabelAttribute { pub enum Type { Size(usize), // NOTE: a size of 0 represents an array of undetermined length e.g [1:] - Heap { is_pointer: bool, contents: Vec<(Type, Option)> }, + Heap { is_pointer: bool, contents: Vec }, + Array { inner: Box, elems: Option }, Struct(String), Register { inner: Option>, ident: usize }, } @@ -50,15 +51,21 @@ impl Display for Type { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Size(s) => write!(f, "{s}")?, + Self::Array { inner, elems } => { + write!(f, "{inner}")?; + if elems.is_none() { + return Ok(()); + }; + + write!(f, ":")?; + if elems.unwrap() != 0 { + write!(f, "{}", elems.unwrap())?; + } + }, Self::Heap { is_pointer, contents } => { write!(f, "{}", if *is_pointer { "[" } else { "{" })?; - for (i, (t, elems)) in contents.iter().enumerate() { + for (i, t) in contents.iter().enumerate() { write!(f, "{t}")?; - match elems { - Some(0) => write!(f, ":")?, - Some(size) => write!(f, ":{size}")?, - None => {}, - }; if i != contents.len() - 1 { write!(f, ", ")?; diff --git a/src/parser.rs b/src/parser.rs index 1d41810..cce3837 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::cmp::PartialEq; use std::num::IntErrorKind; use std::slice::Iter; @@ -5,6 +6,7 @@ use std::str; use crate::ast::{ASTKind, LabelAttribute, Program, Type, AST}; use crate::report::{Report, ReportKind, ReportLabel, ReportSender, Result, Unbox}; +use crate::span::Span; use crate::token::{Token, TokenKind}; pub struct Parser<'t, 'contents> { @@ -369,116 +371,9 @@ impl<'t, 'contents> Parser<'t, 'contents> { }) as char) } - fn parse_register_binding(&mut self, inner: Option) -> Result { - // we expect that self is a ; - self.advance(); - - match self.current.kind { - TokenKind::Identifier => Ok(()), - TokenKind::DecimalIntLiteral => ReportKind::SyntaxError - .new("Expected register starting with r") - .with_note(format!("HINT: You forgot the r prefix. Do: r{}", self.current.text)) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(), - _ => ReportKind::UnexpectedToken - .new(format!("Expected register, got {}", self.current.text)) - .with_note("HINT: Registers follow the format r. e.g r8 r32") - .with_label(ReportLabel::new(self.current.span.clone())) - .into(), - }?; - - if !self.current.text.starts_with('r') { - return ReportKind::SyntaxError - .new("Register identifier format is incorrect!") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Registers follow the format r. e.g r8 r32") - .into(); - }; - - match self.current.text[1..].parse::() { - Err(e) => match e.kind() { - IntErrorKind::Empty => ReportKind::SyntaxError - .new("Expected register identifier after r prefix") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Registers follow the format r. e.g r8 r32"), - IntErrorKind::InvalidDigit => { - let mut span = self.current.span.clone(); - span.start_index += 1; - span.end_index += self.current.text.len(); - - ReportKind::SyntaxError - .new("Register number contains an invalid digit") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: Registers follow the format r. e.g r8 r32") - }, - // Here only positive overflow can be omitted by parse::() - // It also doesnt omit Zero because usize can store 0. - _ => ReportKind::SyntaxError - .new("Register identifier intager overflows") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: You dont have this many registers. Trust me"), - } - .into(), - Ok(i) => Ok(Type::Register { inner: inner.map(|t| Box::new(t)), ident: i }), - } - } - - // We use box here cause we never grow the terminals, so no need for length/capacity which - // comes with Vec - fn parse_array_type(&mut self, terminals: &[TokenKind]) -> Result<(Type, Option)> { - let elem_type = self.parse_type()?; - - if let Type::Register { ident, .. } = elem_type { - return ReportKind::RegisterWithinHeap - .new("Heaps cannot contain register bindings") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note(format!("{ident}")) - .into(); - } - - self.advance(); - if self.current.kind == TokenKind::Colon { - for term in terminals { - if *term == self.peek(1).kind { - self.advance(); - return Ok((elem_type, Some(0))); - } - } - - self.advance(); - - if self.current.kind == TokenKind::DecimalIntLiteral { - let elem_size = self.current.text.parse::().unwrap(); - if elem_size == 0 { - return ReportKind::SyntaxError - .new("Array size cannot be zero.") - .with_note(format!("HINT: Did you mean [{elem_type}:]")) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - self.advance(); - return Ok((elem_type, Some(elem_size))); - } - return ReportKind::UnexpectedToken - .new(format!( - "Expected {}, got {:?}", - terminals - .into_iter() - .map(|x| format!("{x:?}")) - .collect::>() - .join(" or "), - self.current.kind - )) - .with_label(ReportLabel::new(self.current.span.clone())) - .into(); - } - - Ok((elem_type, None)) - } - fn parse_type(&mut self) -> Result { match self.current.kind { - TokenKind::Semicolon => self.parse_register_binding(None), + // TokenKind::Semicolon => Ok(Type::Size(0)), TokenKind::DecimalIntLiteral => { // We know it lexed so this has to pass, so we can unwrap let Ok(size) = self.current.text.parse::() else { @@ -499,83 +394,225 @@ impl<'t, 'contents> Parser<'t, 'contents> { }, TokenKind::Identifier => Ok(Type::Struct(self.current.text.to_string())), TokenKind::LBrace | TokenKind::LBracket => { + let start_span = self.current.span.clone(); let is_pointer = self.current.kind == TokenKind::LBracket; let start_kind = if is_pointer {TokenKind::LBracket} else {TokenKind::LBrace}; let end_kind = if is_pointer {TokenKind::RBracket} else {TokenKind::RBrace}; self.advance(); //NOTE: idk if 5 is the right number. To be determined - let mut vec: Vec<(Type, Option)> = Vec::with_capacity(5); + let mut vec: Vec = Vec::with_capacity(5); while self.current.kind != end_kind { - // let start = self.current.span.clone(); - let (t, n) = self.parse_array_type(&[TokenKind::Comma, end_kind, TokenKind::NewLine])?; - // let end = self.current.span.clone(); - // let span = start.extend(&end); - // let mut span = self.current.span.clone(); - // span.start_index -= 2; - // span.end_index -= 1; - vec.push((t, n)); + let t = self.parse_type().map_err(|e| { + match self.tokens[self.index - 1].kind { + TokenKind::Comma => { + let mut span = self.current.span.clone(); + span.start_index -= 1; + ReportKind::SyntaxError + .new("Unclosed heap, found comma") + .with_label( + ReportLabel::new(span) + .with_text(format!("Replace this , with a {}", if end_kind == TokenKind::RBrace {"}"} else {"]"})) + ) + .with_note("HINT: Commas are required between types") + .into() + }, + a if matches!(a, TokenKind::LBrace | TokenKind::LBracket) => { + let opposite = match a { + TokenKind::LBrace => TokenKind::RBracket, + TokenKind::LBracket => TokenKind::RBrace, + _ => unreachable!(), + }; + + if opposite != self.current.kind {return e}; + + ReportKind::SyntaxError + .new("Incorrect heap nesting") + .with_label(ReportLabel::new(self.tokens[self.index - 1].span.clone()).with_text("This has no closing pair")) + .with_note("HINT: Inner heaps must terminate before outer ones") + .into() + } + _ => e + } + })?; + self.advance(); - if self.current.kind == TokenKind::NewLine { - let mut span = self.current.span.clone(); - span.start_index -= 1; + if matches!(t, Type::Register{..}) { return ReportKind::SyntaxError - .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) - .with_label(ReportLabel::new(span)) - .with_note(format!("HINT: did you mean to close this heap? {}", Type::Heap { is_pointer, contents: vec } )) + .new("Heaps cant contain register bindings") + .with_label(ReportLabel::new(start_span.extend(&self.current.span))) + .with_note("HINT: If they did, then memory would be discontiguous") .into(); } + vec.push(t); + if self.current.kind != end_kind { if self.current.kind == (if is_pointer {TokenKind::RBrace} else {TokenKind::RBracket}) { return ReportKind::SyntaxError .new("Mismatched heap brackets") - .with_label(ReportLabel::new(self.current.span.clone())) + .with_label(ReportLabel::new(start_span.extend(&self.current.span))) .with_note("HINT: Be more decisive next time. Is it a pointer or not?") .into(); } - if self.peek(1).kind == TokenKind::NewLine { - return ReportKind::SyntaxError - .new(format!("Unterminated {} heap", if is_pointer {"pointer to"} else {""})) - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: did you mean to close this heap?") - .into(); - } - if self.current.kind == TokenKind::Comma { self.advance(); + continue; + } + + if self.current.kind == TokenKind::NewLine { + let mut span = self.current.span.clone(); + span.start_index -= 1; + return ReportKind::UnexpectedToken + .new("Unclosed heap, found newline") + .with_label(ReportLabel::new(span)) + .with_note("HINT: Commas are required between types") + .into() + } + + if self.current.kind != TokenKind::NewLine { + let mut span = self.tokens[self.index - 1].span.clone(); + span.start_index = span.end_index; + span.end_index += 1; + + return ReportKind::SyntaxError + .new("Expected comma between types") + .with_label(ReportLabel::new(span).with_text("Add one here")) + .with_note("HINT: Commas are required between types") + .into() } } } + + if !is_pointer && vec.len() == 0 { + return ReportKind::SyntaxError + .new("Zero-sized heaps are disallowed") + .with_label(ReportLabel::new(start_span.extend(&self.current.span))) + .with_note("HINT: Did you mean to do a void pointer: []?") + .into(); + } + Ok(Type::Heap { is_pointer, contents: vec }) }, - TokenKind::Colon => { - ReportKind::SyntaxError - .new("Cannot have an array of an unknown type") - .with_label(ReportLabel::new(self.current.span.clone())) - .with_note("HINT: add a type before the colon, duh") - .into() - } TokenKind::NewLine => { - println!("{:?}", &self.tokens[self.index - 1]); + let mut span = self.current.span.clone(); + span.start_index -= 1; ReportKind::UnexpectedToken - .new("Unexpected newline") - .with_label(ReportLabel::new(self.current.span.clone())) - .into() + .new("Unexpected newline") + .with_label(ReportLabel::new(span)) + .into() } _ => ReportKind::UnexpectedToken .new(format!("Unexpected token: {:?}", self.current.kind)) .with_label(ReportLabel::new(self.current.span.clone())) .with_note("HINT: We expect literally any type... and you still messed it up") .into(), - }.and_then(|t| { - if self.peek(1).kind == TokenKind::Semicolon { - self.advance(); - self.parse_register_binding(Some(t)) - } else { - Ok(t) + } + // After the base type, optionally parse a register or an array, which + // are mutrually exclusive + .and_then(|t| { + match self.peek(1).kind { + TokenKind::Semicolon => { + self.advance(); + self.advance(); + let mut span = self.current.span.clone(); + + match t { + Type::Heap { is_pointer: true, .. } => Ok(()), + Type::Size(a) if a <= /*TODO: max register size here */ 8 => Ok(()), + _ => ReportKind::SyntaxError + .new("Registers can only be bound to pointer to heaps or sizes under the register's max") + .with_label(ReportLabel::new(self.tokens[self.index - 1].span.extend(&span))) + .into(), + }?; + + match self.current.kind { + TokenKind::Identifier => Ok(()), + TokenKind::DecimalIntLiteral => ReportKind::SyntaxError + .new("Expected register starting with r") + .with_note(format!("HINT: You forgot the r prefix. Do: r{}", self.current.text)) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(), + _ => ReportKind::UnexpectedToken + .new(format!("Expected register, got {}", self.current.text)) + .with_note("HINT: Registers follow the format r. e.g r8 r32") + .with_label(ReportLabel::new(self.current.span.clone())) + .into(), + }?; + + if !self.current.text.starts_with('r') { + return ReportKind::SyntaxError + .new("Register identifier format is incorrect!") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: Registers follow the format r. e.g r8 r32") + .into(); + }; + + match self.current.text[1..].parse::() { + Err(e) => match e.kind() { + IntErrorKind::Empty => ReportKind::SyntaxError + .new("Expected register identifier after r prefix") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: Registers follow the format r. e.g r8 r32"), + IntErrorKind::InvalidDigit => { + let mut span = self.current.span.clone(); + span.start_index += 1; + span.end_index += self.current.text.len(); + + ReportKind::SyntaxError + .new("Register number contains an invalid digit") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: Registers follow the format r. e.g r8 r32") + }, + // Here only positive overflow can be omitted by parse::() + // It also doesnt omit Zero because usize can store 0. + _ => ReportKind::SyntaxError + .new("Register identifier intager overflows") + .with_label(ReportLabel::new(self.current.span.clone())) + .with_note("HINT: You dont have this many registers. Trust me"), + } + .into(), + Ok(i) => { + if self.peek(1).kind == TokenKind::Colon { + return ReportKind::SyntaxError + .new("Register binding cannot be followed by an array!") + .with_label(ReportLabel::new(self.tokens[self.index + 1].span.extend(&self.tokens[self.index + 2].span))) + .into(); + } + Ok(Type::Register { inner: if t == Type::Size(0) {None} else {Some(Box::new(t))}, ident: i }) + } + } + }, + TokenKind::Colon => { + self.advance(); + self.advance(); + + let elems: Option = if self.current.kind == TokenKind::DecimalIntLiteral { + let elem_size = self.current.text.parse::().unwrap(); + if elem_size == 0 { + return ReportKind::SyntaxError + .new("Array size cannot be zero.") + .with_note(format!("HINT: Did you mean [{t}:]")) + .with_label(ReportLabel::new(self.current.span.clone())) + .into(); + } + Some(elem_size) + } else { + None + }; + + if self.peek(1).kind == TokenKind::Semicolon { + return ReportKind::SyntaxError + .new("Array cannot be followed by a register binding!") + .with_label(ReportLabel::new(self.tokens[self.index + 1].span.extend(&self.tokens[self.index + 2].span))) + .into(); + } + + Ok(Type::Array {inner: Box::new(t), elems}) + }, + _ => Ok(t) } }) } diff --git a/src/token.rs b/src/token.rs index a62186d..59fd5e4 100644 --- a/src/token.rs +++ b/src/token.rs @@ -78,3 +78,17 @@ impl std::fmt::Display for Token<'_> { write!(f, ")") } } + +impl TokenKind { + pub fn matching(self) -> Self { + match self { + Self::LBrace => Self::RBrace, + Self::RBrace => Self::LBrace, + Self::LBracket => Self::RBracket, + Self::RBracket => Self::LBracket, + Self::LParen => Self::RParen, + Self::RParen => Self::LParen, + a => a, + } + } +}