From 04fbc6ece228674ac9b29f2215a94fad1a208ca6 Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Sun, 4 Aug 2024 22:13:08 +0900 Subject: [PATCH] Add support for custom parsing of APC, SOS and PM sequences. --- src/definitions.rs | 74 +++++++++++++++++++++-------------------- src/lib.rs | 83 ++++++++++++++++++++++++++++++++++++++++++++++ src/table.rs | 10 +++--- 3 files changed, 126 insertions(+), 41 deletions(-) diff --git a/src/definitions.rs b/src/definitions.rs index 218c1eb..28cd746 100644 --- a/src/definitions.rs +++ b/src/definitions.rs @@ -19,7 +19,7 @@ pub enum State { #[default] Ground = 12, OscString = 13, - SosPmApcString = 14, + OpaqueString = 14, Utf8 = 15, } @@ -28,21 +28,34 @@ pub enum State { #[derive(Debug, Clone, Copy)] pub enum Action { None = 0, - Clear = 1, - Collect = 2, - CsiDispatch = 3, - EscDispatch = 4, - Execute = 5, - Hook = 6, - Ignore = 7, - OscEnd = 8, - OscPut = 9, - OscStart = 10, - Param = 11, - Print = 12, - Put = 13, - Unhook = 14, - BeginUtf8 = 15, + Collect = 1, + CsiDispatch = 2, + EscDispatch = 3, + Execute = 4, + Ignore = 5, + OscPut = 6, + Param = 7, + Print = 8, + Put = 9, + BeginUtf8 = 10, + OpaquePut = 11, + + // Actions that do not need to be packed as 4 bits in the state table + // Can have values higher than 16 + Clear = 16, + Hook = 17, + Unhook = 18, + OscStart = 19, + OscEnd = 20, + OpaqueStart = 21, + OpaqueEnd = 22, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum OpaqueSequenceKind { + Sos, + Pm, + Apc, } /// Unpack a u8 into a State and Action @@ -57,9 +70,9 @@ pub fn unpack(delta: u8) -> (State, Action) { unsafe { ( // State is stored in bottom 4 bits - mem::transmute(delta & 0x0f), + mem::transmute::(delta & 0x0f), // Action is stored in top 4 bits - mem::transmute(delta >> 4), + mem::transmute::(delta >> 4), ) } } @@ -75,8 +88,8 @@ mod tests { #[test] fn unpack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), + match unpack(0xaa) { + (State::Escape, Action::BeginUtf8) => (), _ => panic!("unpack failed"), } @@ -85,27 +98,16 @@ mod tests { _ => panic!("unpack failed"), } - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), + match unpack(0xbf) { + (State::Utf8, Action::OpaquePut) => (), _ => panic!("unpack failed"), } } #[test] fn pack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), - _ => panic!("unpack failed"), - } - - match unpack(0x0f) { - (State::Utf8, Action::None) => (), - _ => panic!("unpack failed"), - } - - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), - _ => panic!("unpack failed"), - } + assert_eq!(pack(State::Escape, Action::BeginUtf8), 0xaa); + assert_eq!(pack(State::Utf8, Action::None), 0x0f); + assert_eq!(pack(State::Utf8, Action::OpaquePut), 0xbf); } } diff --git a/src/lib.rs b/src/lib.rs index 31e2a31..bcd6185 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,6 +49,7 @@ pub mod ansi; pub use params::{Params, ParamsIter}; use definitions::{unpack, Action, State}; +use crate::definitions::OpaqueSequenceKind; const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_PARAMS: usize = 16; @@ -187,6 +188,9 @@ impl Parser { State::OscString => { self.perform_action(performer, Action::OscEnd, byte); }, + State::OpaqueString => { + self.perform_action(performer, Action::OpaqueEnd, byte); + }, _ => (), } @@ -202,6 +206,9 @@ impl Parser { State::OscString => { self.perform_action(performer, Action::OscStart, byte); }, + State::OpaqueString => { + self.perform_action(performer, Action::OpaqueStart, byte); + }, _ => (), } @@ -364,6 +371,28 @@ impl Parser { Action::BeginUtf8 => self.process_utf8(performer, byte), Action::Ignore => (), Action::None => (), + + // APC Actions are checked last, since they are relatively rare + Action::OpaqueStart => { + let kind = match byte { + 0x58 => OpaqueSequenceKind::Sos, + 0x5e => OpaqueSequenceKind::Pm, + 0x5f => OpaqueSequenceKind::Apc, + + // Changes to OpaqueString state which trigger this action are only possible + // when one of the escape sequences above is detected (see Escape state changes + // in table.rs). Since there is no other way to reach this action with any other + // byte value, this branch is unreachable. + _ => unreachable!("invalid opaque sequence kind"), + }; + performer.opaque_start(kind) + }, + Action::OpaquePut => { + performer.opaque_put(byte) + } + Action::OpaqueEnd => { + performer.opaque_end() + }, } } } @@ -428,6 +457,20 @@ pub trait Perform { /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} + + /// The start of an opaque sequence (SOS, PM or APC) has been detected. + /// + /// The `kind` parameter indicates the type of sequence that was started. + /// + /// Until the opaque sequence ends (at which point `opaque_end` will be called), invalid + /// characters will be ignored while valid characters will be passed on to `opaque_put`. + fn opaque_start(&mut self, _kind: OpaqueSequenceKind) {} + + /// We've reached the end of the ongoing opaque sequence (SOS, PM or APC). + fn opaque_end(&mut self) {} + + /// A byte has been received as part of an ongoing opaque sequence. + fn opaque_put(&mut self, _byte: u8) {} } #[cfg(all(test, feature = "no_std"))] @@ -460,6 +503,9 @@ mod tests { DcsHook(Vec>, Vec, bool, char), DcsPut(u8), DcsUnhook, + OpaqueStart(OpaqueSequenceKind), + OpaquePut(u8), + OpaqueEnd, } impl Perform for Dispatcher { @@ -492,6 +538,18 @@ mod tests { fn unhook(&mut self) { self.dispatched.push(Sequence::DcsUnhook); } + + fn opaque_start(&mut self, _kind: OpaqueSequenceKind) { + self.dispatched.push(Sequence::OpaqueStart(_kind)); + } + + fn opaque_put(&mut self, byte: u8) { + self.dispatched.push(Sequence::OpaquePut(byte)); + } + + fn opaque_end(&mut self) { + self.dispatched.push(Sequence::OpaqueEnd); + } } #[test] @@ -628,6 +686,31 @@ mod tests { } } + #[test] + fn parse_apc() { + const INPUT: &[u8] = b"\x1b_abc\x1b\\"; + + // Test with ESC \ terminator. + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + for byte in INPUT { + parser.advance(&mut dispatcher, *byte); + } + assert_eq!(dispatcher.dispatched.len(), 6); + assert_eq!( + dispatcher.dispatched[0..5], + vec![ + Sequence::OpaqueStart(OpaqueSequenceKind::Apc), + Sequence::OpaquePut(b'a'), + Sequence::OpaquePut(b'b'), + Sequence::OpaquePut(b'c'), + Sequence::OpaqueEnd, + ] + ) + } + #[test] fn exceed_max_buffer_size() { static NUM_BYTES: usize = MAX_OSC_RAW + 100; diff --git a/src/table.rs b/src/table.rs index f2c0105..31628b1 100644 --- a/src/table.rs +++ b/src/table.rs @@ -44,9 +44,9 @@ generate_state_changes!(state_changes, { 0x5b => (CsiEntry, None), 0x5d => (OscString, None), 0x50 => (DcsEntry, None), - 0x58 => (SosPmApcString, None), - 0x5e => (SosPmApcString, None), - 0x5f => (SosPmApcString, None), + 0x58 => (OpaqueString, None), + 0x5e => (OpaqueString, None), + 0x5f => (OpaqueString, None), }, EscapeIntermediate { @@ -152,11 +152,11 @@ generate_state_changes!(state_changes, { 0x9c => (Ground, None), }, - SosPmApcString { + OpaqueString { 0x00..=0x17 => (Anywhere, Ignore), 0x19 => (Anywhere, Ignore), 0x1c..=0x1f => (Anywhere, Ignore), - 0x20..=0x7f => (Anywhere, Ignore), + 0x20..=0x7f => (Anywhere, OpaquePut), 0x9c => (Ground, None), },