From 88c1c6288ce47f1e6d030e2cb6f8e337f9b8810f Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sat, 8 Jul 2023 19:43:44 +0200 Subject: [PATCH 01/14] message len & serialize --- src/nodes/message.rs | 191 +++++++++++++++++++++++++++++++++++++++++++ src/nodes/mod.rs | 1 + 2 files changed, 192 insertions(+) create mode 100644 src/nodes/message.rs diff --git a/src/nodes/message.rs b/src/nodes/message.rs new file mode 100644 index 0000000..dd3dc08 --- /dev/null +++ b/src/nodes/message.rs @@ -0,0 +1,191 @@ +use crate::toolkit::node::Node; + +use super::paragraph::Paragraph; + +pub struct Message { + header: Option, + icon: Option, + nodes: Vec, + warning: bool, + consumed_all_input: bool, +} + +impl Message { + pub fn new>( + header: Option, + icon: Option, + warning: bool, + consumed_all_input: bool, + ) -> Self { + Self::new_with_nodes(header, icon, Vec::new(), warning, consumed_all_input) + } + + pub fn new_with_nodes>( + header: Option, + icon: Option, + nodes: Vec, + warning: bool, + consumed_all_input: bool, + ) -> Self { + Self { + header: header.map(|s| s.into()), + icon: icon.map(|s| s.into()), + nodes, + warning, + consumed_all_input, + } + } +} + +impl Node for Message { + fn serialize(&self) -> String { + format!( + "%%%%\n{header}{icon}{warning}{nodes}\n%%%%{end}", + header = self + .header + .as_ref() + .map_or(String::new(), |s| format!("%%% {}\n", s)), + icon = self + .icon + .as_ref() + .map_or(String::new(), |s| format!("%% {}\n", s)), + nodes = self.nodes.iter().map(|n| n.serialize()).collect::(), + warning = if self.warning { "% \n" } else { "" }, + end = if self.consumed_all_input { "" } else { "\n\n" } + ) + } + + fn len(&self) -> usize { + let mut len = 10; + if let Some(header) = &self.header { + len += header.len() + 5; + } + if let Some(icon) = &self.icon { + len += icon.len() + 4; + } + len += self.nodes.iter().map(|n| n.len()).sum::(); + if self.warning { + len += 3; + } + if !self.consumed_all_input { + len += 2; + } + len + } +} + +#[cfg(test)] +mod test { + use super::Message; + use crate::{ + nodes::{paragraph::Paragraph, text::Text}, + toolkit::node::Node, + }; + use pretty_assertions::assert_eq; + + #[test] + fn len() { + assert_eq!(Message::new::<&str>(None, None, false, false).len(), 12); + assert_eq!(Message::new(Some("header"), None, false, false).len(), 23); + assert_eq!(Message::new(None, Some("icon"), false, false).len(), 20); + assert_eq!( + Message::new(Some("header"), Some("icon"), false, false).len(), + 31 + ); + assert_eq!(Message::new::<&str>(None, None, true, false).len(), 15); + assert_eq!(Message::new(Some("header"), None, true, false).len(), 26); + assert_eq!(Message::new(None, Some("icon"), true, false).len(), 23); + assert_eq!( + Message::new(Some("header"), Some("icon"), true, false).len(), + 34 + ); + assert_eq!(Message::new::<&str>(None, None, false, true).len(), 10); + assert_eq!(Message::new(Some("header"), None, false, true).len(), 21); + assert_eq!(Message::new(None, Some("icon"), false, true).len(), 18); + assert_eq!( + Message::new(Some("header"), Some("icon"), false, true).len(), + 29 + ); + assert_eq!(Message::new::<&str>(None, None, true, true).len(), 13); + assert_eq!(Message::new(Some("header"), None, true, true).len(), 24); + assert_eq!(Message::new(None, Some("icon"), true, true).len(), 21); + assert_eq!( + Message::new(Some("header"), Some("icon"), true, true).len(), + 32 + ); + assert_eq!( + Message::new_with_nodes( + Some("header"), + Some("icon"), + vec![Paragraph::new_with_nodes( + true, + vec![Text::new("simple text").into()] + )], + true, + true + ) + .len(), + 43 + ); + } + + #[test] + fn serialize() { + assert_eq!( + Message::new::<&str>(None, None, false, false).serialize(), + "%%%%\n\n%%%%\n\n" + ); + assert_eq!( + Message::new(Some("header"), None, false, false).serialize(), + "%%%%\n%%% header\n\n%%%%\n\n" + ); + assert_eq!( + Message::new(None, Some("icon"), false, false).serialize(), + "%%%%\n%% icon\n\n%%%%\n\n" + ); + assert_eq!( + Message::new(Some("header"), Some("icon"), false, false).serialize(), + "%%%%\n%%% header\n%% icon\n\n%%%%\n\n" + ); + assert_eq!( + Message::new::<&str>(None, None, true, false).serialize(), + "%%%%\n% \n\n%%%%\n\n" + ); + assert_eq!( + Message::new(Some("header"), None, true, false).serialize(), + "%%%%\n%%% header\n% \n\n%%%%\n\n" + ); + assert_eq!( + Message::new(None, Some("icon"), true, false).serialize(), + "%%%%\n%% icon\n% \n\n%%%%\n\n" + ); + assert_eq!( + Message::new(Some("header"), Some("icon"), true, false).serialize(), + "%%%%\n%%% header\n%% icon\n% \n\n%%%%\n\n" + ); + assert_eq!( + Message::new::<&str>(None, None, false, true).serialize(), + "%%%%\n\n%%%%" + ); + assert_eq!( + Message::new(Some("header"), None, false, true).serialize(), + "%%%%\n%%% header\n\n%%%%" + ); + assert_eq!( + Message::new(None, Some("icon"), false, true).serialize(), + "%%%%\n%% icon\n\n%%%%" + ); + assert_eq!( + Message::new(Some("header"), Some("icon"), false, true).serialize(), + "%%%%\n%%% header\n%% icon\n\n%%%%" + ); + assert_eq!( + Message::new::<&str>(None, None, true, true).serialize(), + "%%%%\n% \n\n%%%%" + ); + assert_eq!( + Message::new(Some("header"), None, true, true).serialize(), + "%%%%\n%%% header\n% \n\n%%%%" + ); + } +} diff --git a/src/nodes/mod.rs b/src/nodes/mod.rs index ed59ffc..347ff80 100644 --- a/src/nodes/mod.rs +++ b/src/nodes/mod.rs @@ -11,6 +11,7 @@ pub mod inline_code; pub mod italic; pub mod list; pub mod list_item; +pub mod message; pub mod paragraph; pub mod strikethrough; pub mod text; From c4a75863dbc7329d851c9f52d97e582dacccd63f Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sat, 8 Jul 2023 21:25:58 +0200 Subject: [PATCH 02/14] sequences should be equal in size in order to apply balanced match --- src/toolkit/tokenizer.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/toolkit/tokenizer.rs b/src/toolkit/tokenizer.rs index 33ea0e4..528b86c 100644 --- a/src/toolkit/tokenizer.rs +++ b/src/toolkit/tokenizer.rs @@ -118,7 +118,7 @@ impl<'input> Matcher<'input> { ) -> Option> { if !match_end_of_input && START_SEQUENCE_SIZE > 0 - && END_SEQUENCE_SIZE > 0 + && END_SEQUENCE_SIZE == START_SEQUENCE_SIZE && !Self::are_sequences_equal(start_sequence, end_sequence) { return self.get_balanced_match(start_sequence, end_sequence); @@ -300,6 +300,19 @@ mod tests { ) } + #[test] + fn get_match_when_value_repeats_part_of_start_token() { + let mut matcher = Matcher::new("(((t"); + assert_eq!( + matcher.get_match(&[Once('(')], &[Once('('), Once('t')], false), + Some(Match { + start_token: "(", + body: "(", + end_token: "(t" + }) + ) + } + #[test] fn get_balanced_match() { let mut matcher = Matcher::new("{{}}"); From 8c3a00f3fff1fc50b09ded5b92770190d945ea8c Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 10:12:24 +0200 Subject: [PATCH 03/14] test should reflect actual change --- src/toolkit/tokenizer.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/toolkit/tokenizer.rs b/src/toolkit/tokenizer.rs index 528b86c..e327c65 100644 --- a/src/toolkit/tokenizer.rs +++ b/src/toolkit/tokenizer.rs @@ -301,14 +301,14 @@ mod tests { } #[test] - fn get_match_when_value_repeats_part_of_start_token() { - let mut matcher = Matcher::new("(((t"); + fn patterns_with_non_equal_length_can_not_be_balanced() { + let mut matcher = Matcher::new("(()t"); assert_eq!( - matcher.get_match(&[Once('(')], &[Once('('), Once('t')], false), + matcher.get_match(&[Once('(')], &[Once(')'), Once('t')], false), Some(Match { start_token: "(", body: "(", - end_token: "(t" + end_token: ")t" }) ) } From 6fe94ad3106d23f03ae74ec5398de102b47ac280 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 10:30:06 +0200 Subject: [PATCH 04/14] move pattern state machine to it's own file --- src/nodes/anchor.rs | 5 +- src/nodes/bold.rs | 3 +- src/nodes/code.rs | 9 +- src/nodes/divider.rs | 6 +- src/nodes/embed.rs | 4 +- src/nodes/heading.rs | 6 +- src/nodes/highlight.rs | 6 +- src/nodes/image.rs | 6 +- src/nodes/image_gallery.rs | 6 +- src/nodes/inline_code.rs | 7 +- src/nodes/italic.rs | 5 +- src/nodes/list.rs | 6 +- src/nodes/list_item.rs | 6 +- src/nodes/paragraph.rs | 3 +- src/nodes/strikethrough.rs | 5 +- src/toolkit/mod.rs | 1 + src/toolkit/pattern.rs | 193 +++++++++++++++++++++++++++++++++++++ src/toolkit/tokenizer.rs | 193 +------------------------------------ 18 files changed, 225 insertions(+), 245 deletions(-) create mode 100644 src/toolkit/pattern.rs diff --git a/src/nodes/anchor.rs b/src/nodes/anchor.rs index 364755e..2c86dee 100644 --- a/src/nodes/anchor.rs +++ b/src/nodes/anchor.rs @@ -1,10 +1,7 @@ use crate::{ toolkit::context::Context, toolkit::deserializer::Deserializer, - toolkit::{ - node::Node, - tokenizer::{Matcher, Quantifiers::Once}, - }, + toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, }; /// Representation of an anchor diff --git a/src/nodes/bold.rs b/src/nodes/bold.rs index 1376e7b..33fdd19 100644 --- a/src/nodes/bold.rs +++ b/src/nodes/bold.rs @@ -6,7 +6,8 @@ use crate::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, node::Node, - tokenizer::{Matcher, Quantifiers::Once}, + pattern::Quantifiers::*, + tokenizer::Matcher, }, }; diff --git a/src/nodes/code.rs b/src/nodes/code.rs index 015ce27..2e2d39f 100644 --- a/src/nodes/code.rs +++ b/src/nodes/code.rs @@ -1,11 +1,6 @@ use crate::toolkit::{ - context::Context, - deserializer::Deserializer, - node::Node, - tokenizer::{ - Matcher, - Quantifiers::{Once, RepeatTimes}, - }, + context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/divider.rs b/src/nodes/divider.rs index caa5e0b..83627bf 100644 --- a/src/nodes/divider.rs +++ b/src/nodes/divider.rs @@ -1,8 +1,6 @@ use crate::toolkit::{ - context::Context, - deserializer::Deserializer, - node::Node, - tokenizer::{Matcher, Quantifiers::RepeatTimes}, + context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/embed.rs b/src/nodes/embed.rs index 20a2e97..2da4e06 100644 --- a/src/nodes/embed.rs +++ b/src/nodes/embed.rs @@ -1,7 +1,5 @@ use crate::toolkit::{ - deserializer::Deserializer, - node::Node, - tokenizer::{Matcher, Quantifiers::RepeatTimes}, + deserializer::Deserializer, node::Node, pattern::Quantifiers::*, tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/heading.rs b/src/nodes/heading.rs index f69c66d..ca87960 100644 --- a/src/nodes/heading.rs +++ b/src/nodes/heading.rs @@ -1,8 +1,6 @@ use crate::toolkit::{ - context::Context, - deserializer::Deserializer, - node::Node, - tokenizer::{Matcher, Quantifiers::Once, Quantifiers::RepeatTimes}, + context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/highlight.rs b/src/nodes/highlight.rs index b226674..da7bb98 100644 --- a/src/nodes/highlight.rs +++ b/src/nodes/highlight.rs @@ -2,10 +2,8 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, node::Node, - tokenizer::{ - Matcher, - Quantifiers::{Once, RepeatTimes}, - }, + pattern::Quantifiers::*, + tokenizer::Matcher, }; use super::paragraph::Paragraph; diff --git a/src/nodes/image.rs b/src/nodes/image.rs index 10686f9..c8efb39 100644 --- a/src/nodes/image.rs +++ b/src/nodes/image.rs @@ -1,8 +1,6 @@ use crate::toolkit::{ - context::Context, - deserializer::Deserializer, - node::Node, - tokenizer::{Matcher, Quantifiers::Once}, + context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/image_gallery.rs b/src/nodes/image_gallery.rs index c984bbc..88f8e43 100644 --- a/src/nodes/image_gallery.rs +++ b/src/nodes/image_gallery.rs @@ -2,10 +2,8 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, node::Node, - tokenizer::{ - Matcher, - Quantifiers::{Once, RepeatTimes}, - }, + pattern::Quantifiers::*, + tokenizer::Matcher, }; use super::image::Image; diff --git a/src/nodes/inline_code.rs b/src/nodes/inline_code.rs index c15d668..35f5fc9 100644 --- a/src/nodes/inline_code.rs +++ b/src/nodes/inline_code.rs @@ -1,7 +1,6 @@ -use crate::{ - toolkit::context::Context, - toolkit::tokenizer::{Matcher, Quantifiers::Once}, - toolkit::{deserializer::Deserializer, node::Node}, +use crate::toolkit::{ + context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/italic.rs b/src/nodes/italic.rs index 6495a3b..8b28ad9 100644 --- a/src/nodes/italic.rs +++ b/src/nodes/italic.rs @@ -1,9 +1,6 @@ use crate::{ toolkit::{context::Context, deserializer::Deserializer}, - toolkit::{ - node::Node, - tokenizer::{Matcher, Quantifiers::Once}, - }, + toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, }; /// Representation of an Italic text diff --git a/src/nodes/list.rs b/src/nodes/list.rs index e4d2c6f..dba1f7f 100644 --- a/src/nodes/list.rs +++ b/src/nodes/list.rs @@ -2,10 +2,8 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, node::Node, - tokenizer::{ - Matcher, - Quantifiers::{Once, RepeatTimes, ZeroOrMore}, - }, + pattern::Quantifiers::*, + tokenizer::Matcher, }; use super::list_item::ListItem; diff --git a/src/nodes/list_item.rs b/src/nodes/list_item.rs index 918d617..0014386 100644 --- a/src/nodes/list_item.rs +++ b/src/nodes/list_item.rs @@ -2,10 +2,8 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, FallbackNode, MaybeNode}, node::Node, - tokenizer::{ - Matcher, - Quantifiers::{Once, RepeatTimes, ZeroOrMore}, - }, + pattern::Quantifiers::*, + tokenizer::Matcher, }; use super::{ diff --git a/src/nodes/paragraph.rs b/src/nodes/paragraph.rs index bdce2e9..27abb55 100644 --- a/src/nodes/paragraph.rs +++ b/src/nodes/paragraph.rs @@ -6,7 +6,8 @@ use crate::toolkit::node::Node; use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, FallbackNode, MaybeNode}, - tokenizer::{Matcher, Quantifiers::RepeatTimes}, + pattern::Quantifiers::*, + tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/strikethrough.rs b/src/nodes/strikethrough.rs index 72997e6..94da9dc 100644 --- a/src/nodes/strikethrough.rs +++ b/src/nodes/strikethrough.rs @@ -1,9 +1,6 @@ use crate::{ toolkit::{context::Context, deserializer::Deserializer}, - toolkit::{ - node::Node, - tokenizer::{Matcher, Quantifiers::RepeatTimes}, - }, + toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, }; /// Representation of strikethrough diff --git a/src/toolkit/mod.rs b/src/toolkit/mod.rs index 0a687c9..b13d3cf 100644 --- a/src/toolkit/mod.rs +++ b/src/toolkit/mod.rs @@ -1,4 +1,5 @@ pub mod context; pub mod deserializer; pub mod node; +pub mod pattern; pub mod tokenizer; diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs new file mode 100644 index 0000000..9459b49 --- /dev/null +++ b/src/toolkit/pattern.rs @@ -0,0 +1,193 @@ +#[derive(Clone, Debug, PartialEq)] +pub enum Quantifiers { + Once(char), + ZeroOrMore(char), + RepeatTimes(usize, char), +} + +pub struct Pattern<'token, const SIZE: usize> { + index: usize, + sequence: &'token [Quantifiers; SIZE], + pub length: usize, + quantifiers_lengths: [usize; SIZE], +} + +impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { + pub fn new(sequence: &'sequence [Quantifiers; SIZE]) -> Self { + Self { + index: 0, + sequence, + length: 0, + quantifiers_lengths: [0; SIZE], + } + } + + fn next_index(&mut self, c: &char, index: usize) -> Option { + let current_pattern_length = self.get_quantifier_length(index).unwrap_or(&0); + return match self.sequence.get(index) { + Some(Quantifiers::Once(p)) if p == c => { + if let Some(count) = self.quantifiers_lengths.get_mut(index) { + *count += 1; + }; + Some(index + 1) + } + Some(Quantifiers::ZeroOrMore(p)) if p == c => { + if let Some(count) = self.quantifiers_lengths.get_mut(index) { + *count += 1; + }; + Some(index) + } + Some(Quantifiers::ZeroOrMore(p)) if p != c => self.next_index(c, index + 1), + Some(Quantifiers::RepeatTimes(length, p)) + if (p == c && current_pattern_length + 1 < *length) => + { + if let Some(count) = self.quantifiers_lengths.get_mut(index) { + *count += 1; + }; + + Some(index) + } + Some(Quantifiers::RepeatTimes(length, p)) + if (p == c && current_pattern_length + 1 == *length) => + { + Some(index + 1) + } + Some(Quantifiers::RepeatTimes(length, _)) if (*length == 0) => { + self.next_index(c, index + 1) + } + _ => None, + }; + } + + pub fn check_character(&mut self, c: &char) -> bool { + if let Some(new_index) = self.next_index(c, self.index) { + self.index = new_index; + self.length += 1; + return true; + } + self.reset(); + false + } + + pub fn reset(&mut self) { + self.index = 0; + self.length = 0; + self.quantifiers_lengths = [0; SIZE]; + } + + pub fn is_end_of_sequence(&self) -> bool { + self.index == self.sequence.len() + } + + pub fn get_quantifier_length(&self, index: usize) -> Option<&usize> { + self.quantifiers_lengths.get(index) + } +} + +#[cfg(test)] +mod tests { + use crate::toolkit::pattern::{Pattern, Quantifiers::*}; + + #[test] + fn matcher() { + let mut m = Pattern::new(&[Once('*'), Once('*')]); + assert_eq!(m.check_character(&'*'), true); + assert_eq!(m.is_end_of_sequence(), false); + assert_eq!(m.check_character(&'*'), true); + assert_eq!(m.is_end_of_sequence(), true); + } + + #[test] + fn matcher_not_matched() { + let mut m = Pattern::new(&[Once('*'), Once('*')]); + assert_eq!(m.check_character(&'a'), false); + assert_eq!(m.is_end_of_sequence(), false); + assert_eq!(m.check_character(&'b'), false); + assert_eq!(m.is_end_of_sequence(), false); + } + + #[test] + fn pattern_repeat() { + let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&'-'), true); + assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.length, 3); + assert_eq!(m.get_quantifier_length(0), Some(&2)); + assert_eq!(m.get_quantifier_length(1), Some(&1)); + assert_eq!(m.check_character(&'-'), false); + assert_eq!(m.length, 0); + assert_eq!(m.is_end_of_sequence(), false); + } + + #[test] + fn pattern_repeat_zero() { + let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); + assert_eq!(m.check_character(&'-'), true); + assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.get_quantifier_length(0), Some(&0)); + assert_eq!(m.get_quantifier_length(1), Some(&1)); + assert_eq!(m.check_character(&'-'), false); + } + + #[test] + fn pattern_exact_repeat_happy_path() { + let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&'-'), true); + assert_eq!(m.is_end_of_sequence(), true); + } + + #[test] + fn pattern_starts_with_exact_repeat() { + let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), false) + } + + #[test] + fn pattern_starts_with_0_exact_repeat() { + let mut m = Pattern::new(&[RepeatTimes(0, ' '), Once('-')]); + assert_eq!(m.check_character(&'-'), true); + assert_eq!(m.is_end_of_sequence(), true); + } + + #[test] + fn pattern_ends_with_exact_repeat() { + let mut m = Pattern::new(&[Once('-'), RepeatTimes(2, ' ')]); + assert_eq!(m.check_character(&'-'), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&' '), false); + } + + #[test] + fn repeat_times_pattern() { + let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&' '), false); + } + + #[test] + fn new_index() { + let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); + assert_eq!(m.next_index(&' ', 0), Some(0)); + assert_eq!(m.next_index(&'-', 1), Some(2)); + assert_eq!(m.next_index(&'d', 0), None); + assert_eq!(m.next_index(&'d', 1), None); + } + #[test] + fn pattern_repeat_is_not_matched() { + let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&' '), true); + assert_eq!(m.check_character(&'a'), false); + assert_eq!(m.is_end_of_sequence(), false); + } +} diff --git a/src/toolkit/tokenizer.rs b/src/toolkit/tokenizer.rs index e327c65..144663c 100644 --- a/src/toolkit/tokenizer.rs +++ b/src/toolkit/tokenizer.rs @@ -1,87 +1,4 @@ -#[derive(Clone, Debug, PartialEq)] -pub enum Quantifiers { - Once(char), - ZeroOrMore(char), - RepeatTimes(usize, char), -} - -struct Pattern<'token, const SIZE: usize> { - index: usize, - sequence: &'token [Quantifiers; SIZE], - length: usize, - quantifiers_lengths: [usize; SIZE], -} - -impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { - fn new(sequence: &'sequence [Quantifiers; SIZE]) -> Self { - Self { - index: 0, - sequence, - length: 0, - quantifiers_lengths: [0; SIZE], - } - } - - fn next_index(&mut self, c: &char, index: usize) -> Option { - let current_pattern_length = self.get_quantifier_length(index).unwrap_or(&0); - return match self.sequence.get(index) { - Some(Quantifiers::Once(p)) if p == c => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; - Some(index + 1) - } - Some(Quantifiers::ZeroOrMore(p)) if p == c => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; - Some(index) - } - Some(Quantifiers::ZeroOrMore(p)) if p != c => self.next_index(c, index + 1), - Some(Quantifiers::RepeatTimes(length, p)) - if (p == c && current_pattern_length + 1 < *length) => - { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; - - Some(index) - } - Some(Quantifiers::RepeatTimes(length, p)) - if (p == c && current_pattern_length + 1 == *length) => - { - Some(index + 1) - } - Some(Quantifiers::RepeatTimes(length, _)) if (*length == 0) => { - self.next_index(c, index + 1) - } - _ => None, - }; - } - fn check_character(&mut self, c: &char) -> bool { - if let Some(new_index) = self.next_index(c, self.index) { - self.index = new_index; - self.length += 1; - return true; - } - self.reset(); - false - } - - fn reset(&mut self) { - self.index = 0; - self.length = 0; - self.quantifiers_lengths = [0; SIZE]; - } - - fn is_end_of_sequence(&self) -> bool { - self.index == self.sequence.len() - } - - fn get_quantifier_length(&self, index: usize) -> Option<&usize> { - self.quantifiers_lengths.get(index) - } -} +use super::pattern::{Pattern, Quantifiers}; pub struct Matcher<'input> { input: &'input str, @@ -224,11 +141,12 @@ impl<'input> Matcher<'input> { &self.input[self.position..] } } + #[cfg(test)] mod tests { use crate::toolkit::tokenizer::{ - Match, Matcher, Pattern, - Quantifiers::{Once, RepeatTimes, ZeroOrMore}, + Match, Matcher, + Quantifiers::{Once, RepeatTimes}, }; use pretty_assertions::assert_eq; @@ -325,107 +243,4 @@ mod tests { }) ); } - - #[test] - fn matcher() { - let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'*'), true); - assert_eq!(m.is_end_of_sequence(), false); - assert_eq!(m.check_character(&'*'), true); - assert_eq!(m.is_end_of_sequence(), true); - } - - #[test] - fn matcher_not_matched() { - let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'a'), false); - assert_eq!(m.is_end_of_sequence(), false); - assert_eq!(m.check_character(&'b'), false); - assert_eq!(m.is_end_of_sequence(), false); - } - - #[test] - fn pattern_repeat() { - let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.length, 3); - assert_eq!(m.get_quantifier_length(0), Some(&2)); - assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), false); - assert_eq!(m.length, 0); - assert_eq!(m.is_end_of_sequence(), false); - } - - #[test] - fn pattern_repeat_zero() { - let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.get_quantifier_length(0), Some(&0)); - assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), false); - } - - #[test] - fn pattern_exact_repeat_happy_path() { - let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); - } - - #[test] - fn pattern_starts_with_exact_repeat() { - let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), false) - } - - #[test] - fn pattern_starts_with_0_exact_repeat() { - let mut m = Pattern::new(&[RepeatTimes(0, ' '), Once('-')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); - } - - #[test] - fn pattern_ends_with_exact_repeat() { - let mut m = Pattern::new(&[Once('-'), RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.check_character(&' '), false); - } - - #[test] - fn repeat_times_pattern() { - let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.check_character(&' '), false); - } - - #[test] - fn new_index() { - let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.next_index(&' ', 0), Some(0)); - assert_eq!(m.next_index(&'-', 1), Some(2)); - assert_eq!(m.next_index(&'d', 0), None); - assert_eq!(m.next_index(&'d', 1), None); - } - #[test] - fn pattern_repeat_is_not_matched() { - let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'a'), false); - assert_eq!(m.is_end_of_sequence(), false); - } } From 09c55160222cad03de19e1ab41db86ad383ad702 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 10:43:02 +0200 Subject: [PATCH 05/14] rename tokenizer --- src/nodes/anchor.rs | 2 +- src/nodes/bold.rs | 2 +- src/nodes/code.rs | 4 ++-- src/nodes/divider.rs | 4 ++-- src/nodes/embed.rs | 2 +- src/nodes/heading.rs | 4 ++-- src/nodes/highlight.rs | 2 +- src/nodes/image.rs | 4 ++-- src/nodes/image_gallery.rs | 2 +- src/nodes/inline_code.rs | 4 ++-- src/nodes/italic.rs | 2 +- src/nodes/list.rs | 2 +- src/nodes/list_item.rs | 2 +- src/nodes/paragraph.rs | 2 +- src/nodes/strikethrough.rs | 2 +- src/toolkit/{tokenizer.rs => matcher.rs} | 2 +- src/toolkit/mod.rs | 2 +- 17 files changed, 22 insertions(+), 22 deletions(-) rename src/toolkit/{tokenizer.rs => matcher.rs} (99%) diff --git a/src/nodes/anchor.rs b/src/nodes/anchor.rs index 2c86dee..226949f 100644 --- a/src/nodes/anchor.rs +++ b/src/nodes/anchor.rs @@ -1,7 +1,7 @@ use crate::{ toolkit::context::Context, toolkit::deserializer::Deserializer, - toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, + toolkit::{matcher::Matcher, node::Node, pattern::Quantifiers::*}, }; /// Representation of an anchor diff --git a/src/nodes/bold.rs b/src/nodes/bold.rs index 33fdd19..c878465 100644 --- a/src/nodes/bold.rs +++ b/src/nodes/bold.rs @@ -5,9 +5,9 @@ use crate::{ toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, + matcher::Matcher, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, }, }; diff --git a/src/nodes/code.rs b/src/nodes/code.rs index 2e2d39f..d4b2d02 100644 --- a/src/nodes/code.rs +++ b/src/nodes/code.rs @@ -1,6 +1,6 @@ use crate::toolkit::{ - context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, + context::Context, deserializer::Deserializer, matcher::Matcher, node::Node, + pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/divider.rs b/src/nodes/divider.rs index 83627bf..0598061 100644 --- a/src/nodes/divider.rs +++ b/src/nodes/divider.rs @@ -1,6 +1,6 @@ use crate::toolkit::{ - context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, + context::Context, deserializer::Deserializer, matcher::Matcher, node::Node, + pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/embed.rs b/src/nodes/embed.rs index 2da4e06..df795a0 100644 --- a/src/nodes/embed.rs +++ b/src/nodes/embed.rs @@ -1,5 +1,5 @@ use crate::toolkit::{ - deserializer::Deserializer, node::Node, pattern::Quantifiers::*, tokenizer::Matcher, + deserializer::Deserializer, matcher::Matcher, node::Node, pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/heading.rs b/src/nodes/heading.rs index ca87960..436f46d 100644 --- a/src/nodes/heading.rs +++ b/src/nodes/heading.rs @@ -1,6 +1,6 @@ use crate::toolkit::{ - context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, + context::Context, deserializer::Deserializer, matcher::Matcher, node::Node, + pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/highlight.rs b/src/nodes/highlight.rs index da7bb98..fc6de0c 100644 --- a/src/nodes/highlight.rs +++ b/src/nodes/highlight.rs @@ -1,9 +1,9 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, + matcher::Matcher, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, }; use super::paragraph::Paragraph; diff --git a/src/nodes/image.rs b/src/nodes/image.rs index c8efb39..62e8230 100644 --- a/src/nodes/image.rs +++ b/src/nodes/image.rs @@ -1,6 +1,6 @@ use crate::toolkit::{ - context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, + context::Context, deserializer::Deserializer, matcher::Matcher, node::Node, + pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/image_gallery.rs b/src/nodes/image_gallery.rs index 88f8e43..4c86dd9 100644 --- a/src/nodes/image_gallery.rs +++ b/src/nodes/image_gallery.rs @@ -1,9 +1,9 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, + matcher::Matcher, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, }; use super::image::Image; diff --git a/src/nodes/inline_code.rs b/src/nodes/inline_code.rs index 35f5fc9..a3ed975 100644 --- a/src/nodes/inline_code.rs +++ b/src/nodes/inline_code.rs @@ -1,6 +1,6 @@ use crate::toolkit::{ - context::Context, deserializer::Deserializer, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, + context::Context, deserializer::Deserializer, matcher::Matcher, node::Node, + pattern::Quantifiers::*, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/italic.rs b/src/nodes/italic.rs index 8b28ad9..b9de58f 100644 --- a/src/nodes/italic.rs +++ b/src/nodes/italic.rs @@ -1,6 +1,6 @@ use crate::{ toolkit::{context::Context, deserializer::Deserializer}, - toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, + toolkit::{matcher::Matcher, node::Node, pattern::Quantifiers::*}, }; /// Representation of an Italic text diff --git a/src/nodes/list.rs b/src/nodes/list.rs index dba1f7f..541ec62 100644 --- a/src/nodes/list.rs +++ b/src/nodes/list.rs @@ -1,9 +1,9 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, MaybeNode}, + matcher::Matcher, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, }; use super::list_item::ListItem; diff --git a/src/nodes/list_item.rs b/src/nodes/list_item.rs index 0014386..c6563da 100644 --- a/src/nodes/list_item.rs +++ b/src/nodes/list_item.rs @@ -1,9 +1,9 @@ use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, FallbackNode, MaybeNode}, + matcher::Matcher, node::Node, pattern::Quantifiers::*, - tokenizer::Matcher, }; use super::{ diff --git a/src/nodes/paragraph.rs b/src/nodes/paragraph.rs index 27abb55..8731fe1 100644 --- a/src/nodes/paragraph.rs +++ b/src/nodes/paragraph.rs @@ -6,8 +6,8 @@ use crate::toolkit::node::Node; use crate::toolkit::{ context::Context, deserializer::{Branch, DefinitelyNode, Deserializer, FallbackNode, MaybeNode}, + matcher::Matcher, pattern::Quantifiers::*, - tokenizer::Matcher, }; #[derive(Debug, PartialEq)] diff --git a/src/nodes/strikethrough.rs b/src/nodes/strikethrough.rs index 94da9dc..f60e209 100644 --- a/src/nodes/strikethrough.rs +++ b/src/nodes/strikethrough.rs @@ -1,6 +1,6 @@ use crate::{ toolkit::{context::Context, deserializer::Deserializer}, - toolkit::{node::Node, pattern::Quantifiers::*, tokenizer::Matcher}, + toolkit::{matcher::Matcher, node::Node, pattern::Quantifiers::*}, }; /// Representation of strikethrough diff --git a/src/toolkit/tokenizer.rs b/src/toolkit/matcher.rs similarity index 99% rename from src/toolkit/tokenizer.rs rename to src/toolkit/matcher.rs index 144663c..12d3a66 100644 --- a/src/toolkit/tokenizer.rs +++ b/src/toolkit/matcher.rs @@ -144,7 +144,7 @@ impl<'input> Matcher<'input> { #[cfg(test)] mod tests { - use crate::toolkit::tokenizer::{ + use crate::toolkit::matcher::{ Match, Matcher, Quantifiers::{Once, RepeatTimes}, }; diff --git a/src/toolkit/mod.rs b/src/toolkit/mod.rs index b13d3cf..4d50299 100644 --- a/src/toolkit/mod.rs +++ b/src/toolkit/mod.rs @@ -1,5 +1,5 @@ pub mod context; pub mod deserializer; +pub mod matcher; pub mod node; pub mod pattern; -pub mod tokenizer; From ff453e36c99ebcf0fc59890153899450b2cd86c2 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 11:23:44 +0200 Subject: [PATCH 06/14] make pattern.is_end_of_sequence private --- src/toolkit/matcher.rs | 11 +++--- src/toolkit/pattern.rs | 77 ++++++++++++++++++------------------------ 2 files changed, 36 insertions(+), 52 deletions(-) diff --git a/src/toolkit/matcher.rs b/src/toolkit/matcher.rs index 12d3a66..69d8818 100644 --- a/src/toolkit/matcher.rs +++ b/src/toolkit/matcher.rs @@ -58,8 +58,8 @@ impl<'input> Matcher<'input> { return Some((start_position, 0)); } for (index, char) in self.input.chars().enumerate().skip(start_position) { - let is_character_in_pattern = pattern.check_character(&char); - if is_character_in_pattern && pattern.is_end_of_sequence() { + let (is_character_in_pattern, is_end_of_sequence) = pattern.check_character(&char); + if is_character_in_pattern && is_end_of_sequence { return Some((index + 1, pattern.length)); } else if match_end_of_input && index == self.input.len() - 1 { return Some((index + 1, 0)); @@ -112,13 +112,10 @@ impl<'input> Matcher<'input> { self.iterate(start_sequence, self.position, true, false) { for (index, char) in self.input.chars().enumerate().skip(self.position) { - if start_pattern.check_character(&char) && start_pattern.is_end_of_sequence() { + if start_pattern.check_character(&char) == (true, true) { start_pattern.reset(); balance += 1; - } else if balance > 0 - && end_pattern.check_character(&char) - && end_pattern.is_end_of_sequence() - { + } else if balance > 0 && end_pattern.check_character(&char) == (true, true) { balance -= 1; if balance == 0 { let end_token_end_index = index + 1; diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs index 9459b49..0202a22 100644 --- a/src/toolkit/pattern.rs +++ b/src/toolkit/pattern.rs @@ -59,14 +59,14 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { }; } - pub fn check_character(&mut self, c: &char) -> bool { + pub fn check_character(&mut self, c: &char) -> (bool, bool) { if let Some(new_index) = self.next_index(c, self.index) { self.index = new_index; self.length += 1; - return true; + return (true, self.is_end_of_sequence()); } self.reset(); - false + (false, self.is_end_of_sequence()) } pub fn reset(&mut self) { @@ -75,11 +75,11 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { self.quantifiers_lengths = [0; SIZE]; } - pub fn is_end_of_sequence(&self) -> bool { + fn is_end_of_sequence(&self) -> bool { self.index == self.sequence.len() } - pub fn get_quantifier_length(&self, index: usize) -> Option<&usize> { + fn get_quantifier_length(&self, index: usize) -> Option<&usize> { self.quantifiers_lengths.get(index) } } @@ -91,87 +91,75 @@ mod tests { #[test] fn matcher() { let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'*'), true); - assert_eq!(m.is_end_of_sequence(), false); - assert_eq!(m.check_character(&'*'), true); - assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&'*'), (true, false)); + assert_eq!(m.check_character(&'*'), (true, true)); } #[test] fn matcher_not_matched() { let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'a'), false); - assert_eq!(m.is_end_of_sequence(), false); - assert_eq!(m.check_character(&'b'), false); - assert_eq!(m.is_end_of_sequence(), false); + assert_eq!(m.check_character(&'a'), (false, false)); + assert_eq!(m.check_character(&'b'), (false, false)); } #[test] fn pattern_repeat() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&'-'), (true, true)); assert_eq!(m.length, 3); assert_eq!(m.get_quantifier_length(0), Some(&2)); assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), false); - assert_eq!(m.length, 0); - assert_eq!(m.is_end_of_sequence(), false); + assert_eq!(m.check_character(&'-'), (false, false)); } #[test] fn pattern_repeat_zero() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&'-'), (true, true)); assert_eq!(m.get_quantifier_length(0), Some(&0)); assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), false); + assert_eq!(m.check_character(&'-'), (false, false)); } #[test] fn pattern_exact_repeat_happy_path() { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&'-'), (true, true)); } #[test] fn pattern_starts_with_exact_repeat() { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), false) + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (false, false)); } #[test] fn pattern_starts_with_0_exact_repeat() { let mut m = Pattern::new(&[RepeatTimes(0, ' '), Once('-')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.is_end_of_sequence(), true); + assert_eq!(m.check_character(&'-'), (true, true)); } #[test] fn pattern_ends_with_exact_repeat() { let mut m = Pattern::new(&[Once('-'), RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&'-'), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.check_character(&' '), false); + assert_eq!(m.check_character(&'-'), (true, false)); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, true)); + assert_eq!(m.check_character(&' '), (false, false)); } #[test] fn repeat_times_pattern() { let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.is_end_of_sequence(), true); - assert_eq!(m.check_character(&' '), false); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, true)); + assert_eq!(m.check_character(&' '), (false, false)); } #[test] @@ -185,9 +173,8 @@ mod tests { #[test] fn pattern_repeat_is_not_matched() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&' '), true); - assert_eq!(m.check_character(&'a'), false); - assert_eq!(m.is_end_of_sequence(), false); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&' '), (true, false)); + assert_eq!(m.check_character(&'a'), (false, false)); } } From 5d8e8ca5405b7118541695b03aaceeddf5cc3e94 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 12:24:56 +0200 Subject: [PATCH 07/14] Pattern state --- src/toolkit/matcher.rs | 16 ++++--- src/toolkit/pattern.rs | 106 ++++++++++++++++++++++++++--------------- 2 files changed, 77 insertions(+), 45 deletions(-) diff --git a/src/toolkit/matcher.rs b/src/toolkit/matcher.rs index 69d8818..174be0d 100644 --- a/src/toolkit/matcher.rs +++ b/src/toolkit/matcher.rs @@ -58,12 +58,12 @@ impl<'input> Matcher<'input> { return Some((start_position, 0)); } for (index, char) in self.input.chars().enumerate().skip(start_position) { - let (is_character_in_pattern, is_end_of_sequence) = pattern.check_character(&char); - if is_character_in_pattern && is_end_of_sequence { - return Some((index + 1, pattern.length)); + let pattern_state = pattern.check_character(&char); + if pattern_state.hit && pattern_state.end { + return Some((index + 1, pattern_state.length)); } else if match_end_of_input && index == self.input.len() - 1 { return Some((index + 1, 0)); - } else if fail_fast && !is_character_in_pattern { + } else if fail_fast && !pattern_state.hit { break; } } @@ -112,14 +112,16 @@ impl<'input> Matcher<'input> { self.iterate(start_sequence, self.position, true, false) { for (index, char) in self.input.chars().enumerate().skip(self.position) { - if start_pattern.check_character(&char) == (true, true) { + let start_pattern_state = start_pattern.check_character(&char); + let end_pattern_state = end_pattern.check_character(&char); + if start_pattern_state.hit && start_pattern_state.end { start_pattern.reset(); balance += 1; - } else if balance > 0 && end_pattern.check_character(&char) == (true, true) { + } else if balance > 0 && end_pattern_state.hit && end_pattern_state.end { balance -= 1; if balance == 0 { let end_token_end_index = index + 1; - let end_token_start_index = end_token_end_index - end_pattern.length; + let end_token_start_index = end_token_end_index - end_pattern_state.length; self.position = index + 1; return Some(Match { start_token: &self.input[..start_token_end_index], diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs index 0202a22..816285a 100644 --- a/src/toolkit/pattern.rs +++ b/src/toolkit/pattern.rs @@ -8,10 +8,38 @@ pub enum Quantifiers { pub struct Pattern<'token, const SIZE: usize> { index: usize, sequence: &'token [Quantifiers; SIZE], + length: usize, + quantifiers_lengths: [usize; SIZE], +} + +#[derive(Debug, PartialEq)] +pub struct PatternState { + pub hit: bool, + pub end: bool, pub length: usize, quantifiers_lengths: [usize; SIZE], } +impl PatternState { + pub fn new(hit: bool) -> Self { + Self { + hit, + end: false, + length: 0, + quantifiers_lengths: [0; SIZE], + } + } + + pub fn end(length: usize, quantifiers_lengths: [usize; SIZE]) -> Self { + Self { + hit: true, + end: true, + length, + quantifiers_lengths, + } + } +} + impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn new(sequence: &'sequence [Quantifiers; SIZE]) -> Self { Self { @@ -50,6 +78,10 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { Some(Quantifiers::RepeatTimes(length, p)) if (p == c && current_pattern_length + 1 == *length) => { + if let Some(count) = self.quantifiers_lengths.get_mut(index) { + *count += 1; + }; + Some(index + 1) } Some(Quantifiers::RepeatTimes(length, _)) if (*length == 0) => { @@ -59,14 +91,14 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { }; } - pub fn check_character(&mut self, c: &char) -> (bool, bool) { + pub fn check_character(&mut self, c: &char) -> PatternState { if let Some(new_index) = self.next_index(c, self.index) { self.index = new_index; self.length += 1; - return (true, self.is_end_of_sequence()); + return self.is_end_of_sequence(true); } self.reset(); - (false, self.is_end_of_sequence()) + self.is_end_of_sequence(false) } pub fn reset(&mut self) { @@ -75,8 +107,11 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { self.quantifiers_lengths = [0; SIZE]; } - fn is_end_of_sequence(&self) -> bool { - self.index == self.sequence.len() + fn is_end_of_sequence(&self, hit: bool) -> PatternState { + if self.index == self.sequence.len() { + return PatternState::end(self.length, self.quantifiers_lengths); + } + PatternState::new(hit) } fn get_quantifier_length(&self, index: usize) -> Option<&usize> { @@ -86,80 +121,75 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { #[cfg(test)] mod tests { - use crate::toolkit::pattern::{Pattern, Quantifiers::*}; + use crate::toolkit::pattern::{Pattern, PatternState, Quantifiers::*}; #[test] fn matcher() { let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'*'), (true, false)); - assert_eq!(m.check_character(&'*'), (true, true)); + assert_eq!(m.check_character(&'*'), PatternState::new(true)); + assert_eq!(m.check_character(&'*'), PatternState::end(2, [1, 1])); } #[test] fn matcher_not_matched() { let mut m = Pattern::new(&[Once('*'), Once('*')]); - assert_eq!(m.check_character(&'a'), (false, false)); - assert_eq!(m.check_character(&'b'), (false, false)); + assert_eq!(m.check_character(&'a'), PatternState::new(false)); + assert_eq!(m.check_character(&'b'), PatternState::new(false)); } #[test] fn pattern_repeat() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&'-'), (true, true)); - assert_eq!(m.length, 3); - assert_eq!(m.get_quantifier_length(0), Some(&2)); - assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), (false, false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&'-'), PatternState::end(3, [2, 1])); + assert_eq!(m.check_character(&'-'), PatternState::new(false)); } #[test] fn pattern_repeat_zero() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&'-'), (true, true)); - assert_eq!(m.get_quantifier_length(0), Some(&0)); - assert_eq!(m.get_quantifier_length(1), Some(&1)); - assert_eq!(m.check_character(&'-'), (false, false)); + assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); + assert_eq!(m.check_character(&'-'), PatternState::new(false)); } #[test] fn pattern_exact_repeat_happy_path() { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&'-'), (true, true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&'-'), PatternState::end(3, [2, 1])); } #[test] fn pattern_starts_with_exact_repeat() { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (false, false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(false)); } #[test] fn pattern_starts_with_0_exact_repeat() { let mut m = Pattern::new(&[RepeatTimes(0, ' '), Once('-')]); - assert_eq!(m.check_character(&'-'), (true, true)); + assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); } #[test] fn pattern_ends_with_exact_repeat() { let mut m = Pattern::new(&[Once('-'), RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&'-'), (true, false)); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, true)); - assert_eq!(m.check_character(&' '), (false, false)); + assert_eq!(m.check_character(&'-'), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::end(3, [1, 2])); + assert_eq!(m.check_character(&' '), PatternState::new(false)); } #[test] fn repeat_times_pattern() { let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, true)); - assert_eq!(m.check_character(&' '), (false, false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::end(2, [2])); + assert_eq!(m.check_character(&' '), PatternState::new(false)); } #[test] @@ -173,8 +203,8 @@ mod tests { #[test] fn pattern_repeat_is_not_matched() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&' '), (true, false)); - assert_eq!(m.check_character(&'a'), (false, false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&'a'), PatternState::new(false)); } } From 4ad635b5489c12183aa251c7567d805d62aedd11 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 12:33:02 +0200 Subject: [PATCH 08/14] if pattern detects end of pattern it should reset itself --- src/toolkit/pattern.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs index 816285a..dfc1d9e 100644 --- a/src/toolkit/pattern.rs +++ b/src/toolkit/pattern.rs @@ -95,10 +95,10 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { if let Some(new_index) = self.next_index(c, self.index) { self.index = new_index; self.length += 1; - return self.is_end_of_sequence(true); + return self.create_state(true); } self.reset(); - self.is_end_of_sequence(false) + self.create_state(false) } pub fn reset(&mut self) { @@ -107,9 +107,11 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { self.quantifiers_lengths = [0; SIZE]; } - fn is_end_of_sequence(&self, hit: bool) -> PatternState { + fn create_state(&mut self, hit: bool) -> PatternState { if self.index == self.sequence.len() { - return PatternState::end(self.length, self.quantifiers_lengths); + let state = PatternState::end(self.length, self.quantifiers_lengths); + self.reset(); + return state; } PatternState::new(hit) } @@ -143,14 +145,14 @@ mod tests { assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&'-'), PatternState::end(3, [2, 1])); - assert_eq!(m.check_character(&'-'), PatternState::new(false)); + assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); } #[test] fn pattern_repeat_zero() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); - assert_eq!(m.check_character(&'-'), PatternState::new(false)); + assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); } #[test] @@ -189,7 +191,7 @@ mod tests { let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::end(2, [2])); - assert_eq!(m.check_character(&' '), PatternState::new(false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); } #[test] From a2ec2a32cd47e54f0917e6a778c6e5b8a95fc966 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 14:23:03 +0200 Subject: [PATCH 09/14] message deserializer WIP --- src/nodes/message.rs | 123 ++++++++++++++++++++++++++++++++++++++--- src/toolkit/matcher.rs | 7 ++- 2 files changed, 120 insertions(+), 10 deletions(-) diff --git a/src/nodes/message.rs b/src/nodes/message.rs index dd3dc08..9f7bae5 100644 --- a/src/nodes/message.rs +++ b/src/nodes/message.rs @@ -1,11 +1,43 @@ -use crate::toolkit::node::Node; +use crate::toolkit::{ + context::Context, + deserializer::{Branch, DefinitelyNode, Deserializer, FallbackNode, MaybeNode}, + matcher::Matcher, + node::Node, + pattern::Quantifiers::*, +}; use super::paragraph::Paragraph; +#[derive(Debug, PartialEq)] +pub enum MessageNodes { + Paragraph(Paragraph), +} + +impl Node for MessageNodes { + fn serialize(&self) -> String { + match self { + Self::Paragraph(p) => p.serialize(), + } + } + + fn len(&self) -> usize { + match self { + Self::Paragraph(p) => p.len(), + } + } +} + +impl From for MessageNodes { + fn from(p: Paragraph) -> Self { + Self::Paragraph(p) + } +} + +#[derive(Debug, PartialEq)] pub struct Message { header: Option, icon: Option, - nodes: Vec, + nodes: Vec, warning: bool, consumed_all_input: bool, } @@ -23,7 +55,7 @@ impl Message { pub fn new_with_nodes>( header: Option, icon: Option, - nodes: Vec, + nodes: Vec, warning: bool, consumed_all_input: bool, ) -> Self { @@ -74,12 +106,70 @@ impl Node for Message { } } +impl Branch for Message { + fn push>(&mut self, node: CanBeNode) { + self.nodes.push(node.into()); + } + + fn get_maybe_nodes() -> Vec> { + vec![] + } + + fn get_fallback_node() -> Option> { + Some(Paragraph::fallback_node()) + } + + fn get_outer_token_length(&self) -> usize { + 0 + } +} + +impl Deserializer for Message { + fn deserialize_with_context(input: &str, _: Option) -> Option { + let mut matcher = Matcher::new(input); + if let Some(message) = matcher.get_match( + &[RepeatTimes(4, '%'), Once('\n')], + &[Once('\n'), RepeatTimes(4, '%')], + false, + ) { + println!("message: {:?}", message); + let mut inner_matcher = Matcher::new(&message.body); + let header = inner_matcher + .get_match(&[RepeatTimes(3, '%'), Once(' ')], &[Once('\n')], false) + .map(|s| s.body.to_string()); + let icon = inner_matcher + .get_match(&[RepeatTimes(2, '%'), Once(' ')], &[Once('\n')], false) + .map(|s| s.body.to_string()); + let warning = inner_matcher + .get_match(&[Once('%'), Once(' ')], &[Once('\n')], false) + .is_some(); + + let consumed_all_input = matcher + .get_match(&[RepeatTimes(2, '\n')], &[], false) + .is_none(); + let rest = inner_matcher.get_rest(); + + println!("rest: {:?}", rest); + + let container = Self::new(header, icon, warning, consumed_all_input); + + if rest.is_empty() { + println!("here"); + return Some(container); + } else { + return Self::parse_branch(rest, container); + } + } + None + } +} + #[cfg(test)] mod test { use super::Message; use crate::{ nodes::{paragraph::Paragraph, text::Text}, - toolkit::node::Node, + toolkit::{deserializer::Deserializer, node::Node}, }; use pretty_assertions::assert_eq; @@ -117,10 +207,7 @@ mod test { Message::new_with_nodes( Some("header"), Some("icon"), - vec![Paragraph::new_with_nodes( - true, - vec![Text::new("simple text").into()] - )], + vec![Paragraph::new_with_nodes(true, vec![Text::new("simple text").into()]).into()], true, true ) @@ -188,4 +275,24 @@ mod test { "%%%%\n%%% header\n% \n\n%%%%" ); } + + #[test] + fn deserialize_empty() { + assert_eq!( + Message::deserialize("%%%%\n\n%%%%\n\n"), + Some(Message::new::<&str>(None, None, false, false)) + ); + assert_eq!( + Message::deserialize("%%%%\n\n%%%%"), + Some(Message::new::<&str>(None, None, false, true)) + ); + } + + #[test] + fn deserialize() { + assert_eq!( + Message::deserialize("%%%%\n%%% header\n\n%%%%\n\n"), + Some(Message::new(Some("header"), None, false, false)), + ); + } } diff --git a/src/toolkit/matcher.rs b/src/toolkit/matcher.rs index 174be0d..4211cb5 100644 --- a/src/toolkit/matcher.rs +++ b/src/toolkit/matcher.rs @@ -107,11 +107,14 @@ impl<'input> Matcher<'input> { ) -> Option> { let mut start_pattern = Pattern::new(start_sequence); let mut end_pattern = Pattern::new(end_sequence); - let mut balance = 0; + let mut balance = 1; if let Some((start_token_end_index, _)) = self.iterate(start_sequence, self.position, true, false) { - for (index, char) in self.input.chars().enumerate().skip(self.position) { + println!("start_token_end_index: {}", start_token_end_index); + for (index, char) in self.input.chars().enumerate().skip(start_token_end_index) { + println!("index: {}", index); + println!("char: {}", char); let start_pattern_state = start_pattern.check_character(&char); let end_pattern_state = end_pattern.check_character(&char); if start_pattern_state.hit && start_pattern_state.end { From 36bf0544a9f4df7115ba75c7272c19ad464e0f0e Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 15:01:36 +0200 Subject: [PATCH 10/14] move length calculation from pattern to pattern state --- src/toolkit/pattern.rs | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs index dfc1d9e..a96b65b 100644 --- a/src/toolkit/pattern.rs +++ b/src/toolkit/pattern.rs @@ -8,7 +8,6 @@ pub enum Quantifiers { pub struct Pattern<'token, const SIZE: usize> { index: usize, sequence: &'token [Quantifiers; SIZE], - length: usize, quantifiers_lengths: [usize; SIZE], } @@ -30,11 +29,11 @@ impl PatternState { } } - pub fn end(length: usize, quantifiers_lengths: [usize; SIZE]) -> Self { + pub fn end(quantifiers_lengths: [usize; SIZE]) -> Self { Self { hit: true, end: true, - length, + length: quantifiers_lengths.iter().sum(), quantifiers_lengths, } } @@ -45,7 +44,6 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { Self { index: 0, sequence, - length: 0, quantifiers_lengths: [0; SIZE], } } @@ -94,7 +92,6 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn check_character(&mut self, c: &char) -> PatternState { if let Some(new_index) = self.next_index(c, self.index) { self.index = new_index; - self.length += 1; return self.create_state(true); } self.reset(); @@ -103,13 +100,12 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn reset(&mut self) { self.index = 0; - self.length = 0; self.quantifiers_lengths = [0; SIZE]; } fn create_state(&mut self, hit: bool) -> PatternState { if self.index == self.sequence.len() { - let state = PatternState::end(self.length, self.quantifiers_lengths); + let state = PatternState::end(self.quantifiers_lengths); self.reset(); return state; } @@ -129,7 +125,7 @@ mod tests { fn matcher() { let mut m = Pattern::new(&[Once('*'), Once('*')]); assert_eq!(m.check_character(&'*'), PatternState::new(true)); - assert_eq!(m.check_character(&'*'), PatternState::end(2, [1, 1])); + assert_eq!(m.check_character(&'*'), PatternState::end([1, 1])); } #[test] @@ -144,15 +140,15 @@ mod tests { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::new(true)); - assert_eq!(m.check_character(&'-'), PatternState::end(3, [2, 1])); - assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([2, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([0, 1])); } #[test] fn pattern_repeat_zero() { let mut m = Pattern::new(&[ZeroOrMore(' '), Once('-')]); - assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); - assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([0, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([0, 1])); } #[test] @@ -160,7 +156,7 @@ mod tests { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::new(true)); - assert_eq!(m.check_character(&'-'), PatternState::end(3, [2, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([2, 1])); } #[test] @@ -174,7 +170,7 @@ mod tests { #[test] fn pattern_starts_with_0_exact_repeat() { let mut m = Pattern::new(&[RepeatTimes(0, ' '), Once('-')]); - assert_eq!(m.check_character(&'-'), PatternState::end(1, [0, 1])); + assert_eq!(m.check_character(&'-'), PatternState::end([0, 1])); } #[test] @@ -182,7 +178,7 @@ mod tests { let mut m = Pattern::new(&[Once('-'), RepeatTimes(2, ' ')]); assert_eq!(m.check_character(&'-'), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::new(true)); - assert_eq!(m.check_character(&' '), PatternState::end(3, [1, 2])); + assert_eq!(m.check_character(&' '), PatternState::end([1, 2])); assert_eq!(m.check_character(&' '), PatternState::new(false)); } @@ -190,7 +186,7 @@ mod tests { fn repeat_times_pattern() { let mut m = Pattern::new(&[RepeatTimes(2, ' ')]); assert_eq!(m.check_character(&' '), PatternState::new(true)); - assert_eq!(m.check_character(&' '), PatternState::end(2, [2])); + assert_eq!(m.check_character(&' '), PatternState::end([2])); assert_eq!(m.check_character(&' '), PatternState::new(true)); } From 78a0f55dc30e60ea34772fb17ec3c994103d1272 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 15:30:07 +0200 Subject: [PATCH 11/14] sliding window --- src/nodes/heading.rs | 12 ++++++------ src/toolkit/pattern.rs | 43 +++++++++++++++++++++++------------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/nodes/heading.rs b/src/nodes/heading.rs index 436f46d..24efb14 100644 --- a/src/nodes/heading.rs +++ b/src/nodes/heading.rs @@ -28,12 +28,12 @@ impl Heading { impl Deserializer for Heading { fn deserialize_with_context(input: &str, _: Option) -> Option { let start_tokens = [ - [Once('#'), Once(' ')], - [RepeatTimes(2, '#'), Once(' ')], - [RepeatTimes(3, '#'), Once(' ')], - [RepeatTimes(4, '#'), Once(' ')], - [RepeatTimes(5, '#'), Once(' ')], [RepeatTimes(6, '#'), Once(' ')], + [RepeatTimes(5, '#'), Once(' ')], + [RepeatTimes(4, '#'), Once(' ')], + [RepeatTimes(3, '#'), Once(' ')], + [RepeatTimes(2, '#'), Once(' ')], + [Once('#'), Once(' ')], ]; for (i, start_token) in start_tokens.iter().enumerate() { @@ -41,7 +41,7 @@ impl Deserializer for Heading { if let Some(heading) = matcher.get_match(start_token, &[RepeatTimes(2, '\n')], true) { return Some(Self::new( heading.body, - (i + 1).try_into().unwrap_or(1), + (start_tokens.len() - i).try_into().unwrap_or(1), heading.end_token.is_empty(), )); } diff --git a/src/toolkit/pattern.rs b/src/toolkit/pattern.rs index a96b65b..7d5dcea 100644 --- a/src/toolkit/pattern.rs +++ b/src/toolkit/pattern.rs @@ -5,12 +5,6 @@ pub enum Quantifiers { RepeatTimes(usize, char), } -pub struct Pattern<'token, const SIZE: usize> { - index: usize, - sequence: &'token [Quantifiers; SIZE], - quantifiers_lengths: [usize; SIZE], -} - #[derive(Debug, PartialEq)] pub struct PatternState { pub hit: bool, @@ -39,46 +33,52 @@ impl PatternState { } } +pub struct Pattern<'token, const SIZE: usize> { + index: usize, + last_char: Option, + sequence: &'token [Quantifiers; SIZE], + quantifiers_lengths: [usize; SIZE], +} + impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn new(sequence: &'sequence [Quantifiers; SIZE]) -> Self { Self { index: 0, + last_char: None, sequence, quantifiers_lengths: [0; SIZE], } } + fn increment_quantifier_length(&mut self, index: usize) { + if let Some(count) = self.quantifiers_lengths.get_mut(index) { + *count += 1; + }; + } + fn next_index(&mut self, c: &char, index: usize) -> Option { let current_pattern_length = self.get_quantifier_length(index).unwrap_or(&0); return match self.sequence.get(index) { Some(Quantifiers::Once(p)) if p == c => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; + self.increment_quantifier_length(index); Some(index + 1) } Some(Quantifiers::ZeroOrMore(p)) if p == c => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; + self.increment_quantifier_length(index); Some(index) } Some(Quantifiers::ZeroOrMore(p)) if p != c => self.next_index(c, index + 1), Some(Quantifiers::RepeatTimes(length, p)) if (p == c && current_pattern_length + 1 < *length) => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; + self.increment_quantifier_length(index); Some(index) } Some(Quantifiers::RepeatTimes(length, p)) if (p == c && current_pattern_length + 1 == *length) => { - if let Some(count) = self.quantifiers_lengths.get_mut(index) { - *count += 1; - }; + self.increment_quantifier_length(index); Some(index + 1) } @@ -92,6 +92,9 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn check_character(&mut self, c: &char) -> PatternState { if let Some(new_index) = self.next_index(c, self.index) { self.index = new_index; + self.last_char = Some(*c); + return self.create_state(true); + } else if self.index == 1 && self.last_char == Some(*c) { return self.create_state(true); } self.reset(); @@ -100,6 +103,7 @@ impl<'sequence, const SIZE: usize> Pattern<'sequence, SIZE> { pub fn reset(&mut self) { self.index = 0; + self.last_char = None; self.quantifiers_lengths = [0; SIZE]; } @@ -164,7 +168,8 @@ mod tests { let mut m = Pattern::new(&[RepeatTimes(2, ' '), Once('-')]); assert_eq!(m.check_character(&' '), PatternState::new(true)); assert_eq!(m.check_character(&' '), PatternState::new(true)); - assert_eq!(m.check_character(&' '), PatternState::new(false)); + assert_eq!(m.check_character(&' '), PatternState::new(true)); + assert_eq!(m.check_character(&'-'), PatternState::end([2, 1])); } #[test] From 790bed314d32d56a57eb9267e7b45dffc2725bc8 Mon Sep 17 00:00:00 2001 From: "serhiy.barhamon" Date: Sun, 9 Jul 2023 16:27:08 +0200 Subject: [PATCH 12/14] message body parser --- src/nodes/message.rs | 134 +++++++++++++++++++++++++++++++++--- src/toolkit/deserializer.rs | 11 +++ src/toolkit/matcher.rs | 3 - 3 files changed, 137 insertions(+), 11 deletions(-) diff --git a/src/nodes/message.rs b/src/nodes/message.rs index 9f7bae5..0ced980 100644 --- a/src/nodes/message.rs +++ b/src/nodes/message.rs @@ -120,7 +120,20 @@ impl Branch for Message { } fn get_outer_token_length(&self) -> usize { - 0 + let mut len = 10; + if let Some(header) = &self.header { + len += header.len() + 5; + } + if let Some(icon) = &self.icon { + len += icon.len() + 4; + } + if self.warning { + len += 3; + } + if !self.consumed_all_input { + len += 2; + } + len } } @@ -132,8 +145,7 @@ impl Deserializer for Message { &[Once('\n'), RepeatTimes(4, '%')], false, ) { - println!("message: {:?}", message); - let mut inner_matcher = Matcher::new(&message.body); + let mut inner_matcher = Matcher::new(message.body); let header = inner_matcher .get_match(&[RepeatTimes(3, '%'), Once(' ')], &[Once('\n')], false) .map(|s| s.body.to_string()); @@ -149,12 +161,9 @@ impl Deserializer for Message { .is_none(); let rest = inner_matcher.get_rest(); - println!("rest: {:?}", rest); - let container = Self::new(header, icon, warning, consumed_all_input); if rest.is_empty() { - println!("here"); return Some(container); } else { return Self::parse_branch(rest, container); @@ -168,7 +177,7 @@ impl Deserializer for Message { mod test { use super::Message; use crate::{ - nodes::{paragraph::Paragraph, text::Text}, + nodes::{bold::Bold, paragraph::Paragraph, strikethrough::Strikethrough, text::Text}, toolkit::{deserializer::Deserializer, node::Node}, }; use pretty_assertions::assert_eq; @@ -289,10 +298,119 @@ mod test { } #[test] - fn deserialize() { + fn deserialize_with_header() { assert_eq!( Message::deserialize("%%%%\n%%% header\n\n%%%%\n\n"), Some(Message::new(Some("header"), None, false, false)), ); + assert_eq!( + Message::deserialize("%%%%\n%%% header\n\n%%%%"), + Some(Message::new(Some("header"), None, false, true)), + ); + } + + #[test] + fn deserialize_with_icon() { + assert_eq!( + Message::deserialize("%%%%\n%% icon\n\n%%%%\n\n"), + Some(Message::new(None, Some("icon"), false, false)), + ); + assert_eq!( + Message::deserialize("%%%%\n%% icon\n\n%%%%"), + Some(Message::new(None, Some("icon"), false, true)), + ); + } + + #[test] + fn deserialize_with_header_and_icon() { + assert_eq!( + Message::deserialize("%%%%\n%%% header\n%% icon\n\n%%%%\n\n"), + Some(Message::new(Some("header"), Some("icon"), false, false)), + ); + assert_eq!( + Message::deserialize("%%%%\n%%% header\n%% icon\n\n%%%%"), + Some(Message::new(Some("header"), Some("icon"), false, true)), + ); + } + + #[test] + fn deserialize_with_header_and_icon_and_warning() { + assert_eq!( + Message::deserialize("%%%%\n%%% header\n%% icon\n% \n\n%%%%\n\n"), + Some(Message::new::<&str>( + Some("header"), + Some("icon"), + true, + false + )), + ); + assert_eq!( + Message::deserialize("%%%%\n%%% header\n%% icon\n% \n\n%%%%"), + Some(Message::new::<&str>( + Some("header"), + Some("icon"), + true, + true + )), + ); + } + + #[test] + fn deserialize_with_header_and_warning() { + assert_eq!( + Message::deserialize("%%%%\n%%% header\n% \n\n%%%%\n\n"), + Some(Message::new::<&str>(Some("header"), None, true, false)), + ); + assert_eq!( + Message::deserialize("%%%%\n%%% header\n% \n\n%%%%"), + Some(Message::new::<&str>(Some("header"), None, true, true)), + ); + } + + #[test] + fn deserialize_with_icon_and_warning() { + assert_eq!( + Message::deserialize("%%%%\n%% icon\n% \n\n%%%%\n\n"), + Some(Message::new::<&str>(None, Some("icon"), true, false)), + ); + assert_eq!( + Message::deserialize("%%%%\n%% icon\n% \n\n%%%%"), + Some(Message::new::<&str>(None, Some("icon"), true, true)), + ); + } + + #[test] + fn deserialize_with_warning() { + assert_eq!( + Message::deserialize("%%%%\n% \n\n%%%%\n\n"), + Some(Message::new::<&str>(None, None, true, false)), + ); + assert_eq!( + Message::deserialize("%%%%\n% \n\n%%%%"), + Some(Message::new::<&str>(None, None, true, true)), + ); + } + + #[test] + fn deserialize_with_header_and_icon_and_content() { + assert_eq!( + Message::deserialize("%%%%\n%%% header\n%% icon\nthis is some **content**\n\nand this is next ~~line~~\n%%%%\n\n"), + Some(Message::new_with_nodes( + Some("header"), + Some("icon"), + vec![ + Paragraph::new_with_nodes(false, vec![ + Text::new("this is some ".to_string()).into(), + Bold::new_with_nodes(vec![Text::new("content").into()]).into(), + ]).into(), + Paragraph::new_with_nodes(true, vec![ + Text::new("and this is next ").into(), + Strikethrough::new("line").into(), + ]).into(), + ], + false, + false + )), + ); } } diff --git a/src/toolkit/deserializer.rs b/src/toolkit/deserializer.rs index 0da0841..4fba09f 100644 --- a/src/toolkit/deserializer.rs +++ b/src/toolkit/deserializer.rs @@ -20,6 +20,17 @@ where while current_position < input.len() { let slice = &input[current_position..]; current_position += 1; + if maybe_nodes.is_empty() { + match fallback_node.as_ref() { + Some(fallback_node) => { + branch.push(fallback_node(slice)); + current_position = branch.len() - branch.get_outer_token_length(); + fallback_position = current_position; + } + None => return None, + } + continue; + } for parser in &maybe_nodes { if let Some(node) = parser(slice, branch.context()) { if fallback_position != current_position - 1 { diff --git a/src/toolkit/matcher.rs b/src/toolkit/matcher.rs index 4211cb5..7e50913 100644 --- a/src/toolkit/matcher.rs +++ b/src/toolkit/matcher.rs @@ -111,10 +111,7 @@ impl<'input> Matcher<'input> { if let Some((start_token_end_index, _)) = self.iterate(start_sequence, self.position, true, false) { - println!("start_token_end_index: {}", start_token_end_index); for (index, char) in self.input.chars().enumerate().skip(start_token_end_index) { - println!("index: {}", index); - println!("char: {}", char); let start_pattern_state = start_pattern.check_character(&char); let end_pattern_state = end_pattern.check_character(&char); if start_pattern_state.hit && start_pattern_state.end { From 24b988961d415cb241b52f94f859501376f1228b Mon Sep 17 00:00:00 2001 From: Serhiy Barhamon Date: Sun, 9 Jul 2023 16:30:44 +0200 Subject: [PATCH 13/14] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 68a7d9b..2153f0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yamd" -version = "0.2.0" +version = "0.3.0" edition = "2021" license = "MIT OR Apache-2.0" description = "Yet Another Markdown (flavor)" From 2f23f0e2e7866f12d68782aa5ce172740fcd5ef8 Mon Sep 17 00:00:00 2001 From: Serhiy Barhamon Date: Sun, 9 Jul 2023 16:43:26 +0200 Subject: [PATCH 14/14] add message to Yamd and bump version --- Cargo.lock | 2 +- src/nodes/message.rs | 16 +---------- src/nodes/yamd.rs | 67 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a05a4c4..a17580d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,7 +98,7 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "yamd" -version = "0.2.0" +version = "0.3.0" dependencies = [ "pretty_assertions", ] diff --git a/src/nodes/message.rs b/src/nodes/message.rs index 0ced980..ec16929 100644 --- a/src/nodes/message.rs +++ b/src/nodes/message.rs @@ -88,21 +88,7 @@ impl Node for Message { } fn len(&self) -> usize { - let mut len = 10; - if let Some(header) = &self.header { - len += header.len() + 5; - } - if let Some(icon) = &self.icon { - len += icon.len() + 4; - } - len += self.nodes.iter().map(|n| n.len()).sum::(); - if self.warning { - len += 3; - } - if !self.consumed_all_input { - len += 2; - } - len + self.nodes.iter().map(|n| n.len()).sum::() + self.get_outer_token_length() } } diff --git a/src/nodes/yamd.rs b/src/nodes/yamd.rs index 6a765f8..4c0e211 100644 --- a/src/nodes/yamd.rs +++ b/src/nodes/yamd.rs @@ -7,7 +7,7 @@ use crate::{ use super::{ code::Code, divider::Divider, embed::Embed, highlight::Highlight, image::Image, - image_gallery::ImageGallery, list::List, + image_gallery::ImageGallery, list::List, message::Message, }; #[derive(Debug, PartialEq)] @@ -21,6 +21,7 @@ pub enum YamdNodes { Highlight(Highlight), Divider(Divider), Embed(Embed), + Message(Message), } impl From for YamdNodes { @@ -77,6 +78,12 @@ impl From for YamdNodes { } } +impl From for YamdNodes { + fn from(value: Message) -> Self { + YamdNodes::Message(value) + } +} + impl Node for YamdNodes { fn serialize(&self) -> String { match self { @@ -89,6 +96,7 @@ impl Node for YamdNodes { YamdNodes::Highlight(node) => node.serialize(), YamdNodes::Divider(node) => node.serialize(), YamdNodes::Embed(node) => node.serialize(), + YamdNodes::Message(node) => node.serialize(), } } fn len(&self) -> usize { @@ -102,6 +110,7 @@ impl Node for YamdNodes { YamdNodes::Highlight(node) => node.len(), YamdNodes::Divider(node) => node.len(), YamdNodes::Embed(node) => node.len(), + YamdNodes::Message(node) => node.len(), } } } @@ -136,6 +145,7 @@ impl Branch for Yamd { Highlight::maybe_node(), Divider::maybe_node(), Embed::maybe_node(), + Message::maybe_node(), ] } @@ -187,8 +197,10 @@ mod tests { highlight::Highlight, image::Image, image_gallery::ImageGallery, + italic::Italic, list::{List, ListTypes::Unordered}, list_item::ListItem, + message::Message, strikethrough::Strikethrough, text::Text, }, @@ -224,6 +236,15 @@ t**b** {{youtube|123}} +%%%% +%%% header +%% icon +% +content **bold** + +content _italic_ +%%%% + end"#; #[test] @@ -315,6 +336,28 @@ end"#; ) .into(), Embed::new("youtube", "123", false).into(), + Message::new_with_nodes( + Some("header"), + Some("icon"), + vec![ + Paragraph::new_with_nodes( + false, + vec![ + Text::new("content ").into(), + Bold::new_with_nodes(vec![Text::new("bold").into()]).into() + ] + ) + .into(), + Paragraph::new_with_nodes( + true, + vec![Text::new("content ").into(), Italic::new("italic").into()] + ) + .into() + ], + true, + false + ) + .into(), Paragraph::new_with_nodes(true, vec![Text::new("end").into()]).into() ])) ); @@ -385,6 +428,28 @@ end"#; ) .into(), Embed::new("youtube", "123", false).into(), + Message::new_with_nodes( + Some("header"), + Some("icon"), + vec![ + Paragraph::new_with_nodes( + false, + vec![ + Text::new("content ").into(), + Bold::new_with_nodes(vec![Text::new("bold").into()]).into() + ] + ) + .into(), + Paragraph::new_with_nodes( + true, + vec![Text::new("content ").into(), Italic::new("italic").into()] + ) + .into() + ], + true, + false + ) + .into(), Paragraph::new_with_nodes(true, vec![Text::new("end").into()]).into() ]) .serialize(),