diff --git a/Cargo.toml b/Cargo.toml index 58441ff..73bc2ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,21 +13,20 @@ name = "vte" edition = "2021" rust-version = "1.62.1" -[dependencies] -arrayvec = { version = "0.7.2", default-features = false, optional = true } -bitflags = { version = "2.3.3", default-features = false, optional = true } -cursor-icon = { version = "1.0.0", default-features = false, optional = true } -log = { version = "0.4.17", optional = true } -serde = { version = "1.0.160", features = ["derive"], optional = true } -utf8parse = { version = "0.2.0", path = "utf8parse" } -vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" } +[workspace] +members = ["vte_generate_state_changes"] [features] ansi = ["log", "cursor-icon", "bitflags"] default = ["no_std"] -nightly = ["utf8parse/nightly"] no_std = ["arrayvec"] serde = ["dep:serde"] -[workspace] -members = ["utf8parse", "vte_generate_state_changes"] +[dependencies] +arrayvec = { version = "0.7.2", default-features = false, optional = true } +bitflags = { version = "2.3.3", default-features = false, optional = true } +cursor-icon = { version = "1.0.0", default-features = false, optional = true } +log = { version = "0.4.17", optional = true } +memchr = "2.7.4" +serde = { version = "1.0.160", features = ["derive"], optional = true } +vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" } diff --git a/examples/parselog.rs b/examples/parselog.rs index dfd0aee..c41c150 100644 --- a/examples/parselog.rs +++ b/examples/parselog.rs @@ -61,11 +61,7 @@ fn main() { loop { match handle.read(&mut buf) { Ok(0) => break, - Ok(n) => { - for byte in &buf[..n] { - statemachine.advance(&mut performer, *byte); - } - }, + Ok(n) => statemachine.advance(&mut performer, &buf[..n]), Err(err) => { println!("err: {}", err); break; diff --git a/rustfmt.toml b/rustfmt.toml index 9308ba9..f82517e 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,13 +1,17 @@ format_code_in_doc_comments = true +group_imports = "StdExternalCrate" match_block_trailing_comma = true condense_wildcard_suffixes = true use_field_init_shorthand = true +normalize_doc_attributes = true overflow_delimited_expr = true +imports_granularity = "Module" +format_macro_matchers = true use_small_heuristics = "Max" +hex_literal_case = "Upper" normalize_comments = true reorder_impl_items = true use_try_shorthand = true newline_style = "Unix" format_strings = true wrap_comments = true -comment_width = 100 diff --git a/src/ansi.rs b/src/ansi.rs index 59b17e6..32ea829 100644 --- a/src/ansi.rs +++ b/src/ansi.rs @@ -11,21 +11,20 @@ extern crate alloc; use alloc::borrow::ToOwned; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use bitflags::bitflags; - use core::convert::TryFrom; use core::fmt::{self, Display, Formatter, Write}; +#[cfg(not(feature = "no_std"))] +use core::ops::Mul; use core::ops::{Add, Sub}; use core::str::FromStr; use core::time::Duration; -use core::{iter, str}; - -#[cfg(not(feature = "no_std"))] -use core::ops::Mul; - +use core::{iter, mem, str}; #[cfg(not(feature = "no_std"))] use std::time::Instant; +use bitflags::bitflags; +#[doc(inline)] +pub use cursor_icon; use cursor_icon::CursorIcon; use log::debug; #[cfg(feature = "serde")] @@ -33,9 +32,6 @@ use serde::{Deserialize, Serialize}; use crate::{Params, ParamsIter}; -#[doc(inline)] -pub use cursor_icon; - /// Maximum time before a synchronized update is aborted. const SYNC_UPDATE_TIMEOUT: Duration = Duration::from_millis(150); @@ -168,9 +164,9 @@ impl FromStr for Rgb { match u32::from_str_radix(chars, 16) { Ok(mut color) => { - let b = (color & 0xff) as u8; + let b = (color & 0xFF) as u8; color >>= 8; - let g = (color & 0xff) as u8; + let g = (color & 0xFF) as u8; color >>= 8; let r = color as u8; Ok(Rgb { r, g, b }) @@ -237,14 +233,8 @@ fn parse_number(input: &[u8]) -> Option { let mut num: u8 = 0; for c in input { let c = *c as char; - if let Some(digit) = c.to_digit(10) { - num = match num.checked_mul(10).and_then(|v| v.checked_add(digit as u8)) { - Some(v) => v, - None => return None, - } - } else { - return None; - } + let digit = c.to_digit(10)?; + num = num.checked_mul(10).and_then(|v| v.checked_add(digit as u8))?; } Some(num) } @@ -274,7 +264,8 @@ impl Default for SyncState { } } -/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler. +/// The processor wraps a `crate::Parser` to ultimately call methods on a +/// Handler. #[cfg(not(feature = "no_std"))] #[derive(Default)] pub struct Processor { @@ -282,7 +273,8 @@ pub struct Processor { parser: crate::Parser, } -/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler. +/// The processor wraps a `crate::Parser` to ultimately call methods on a +/// Handler. #[cfg(feature = "no_std")] #[derive(Default)] pub struct Processor { @@ -303,15 +295,15 @@ impl Processor { /// Process a new byte from the PTY. #[inline] - pub fn advance(&mut self, handler: &mut H, byte: u8) + pub fn advance(&mut self, handler: &mut H, bytes: &[u8]) where H: Handler, { if self.state.sync_state.timeout.pending_timeout() { - self.advance_sync(handler, byte); + self.advance_sync(handler, bytes); } else { let mut performer = Performer::new(&mut self.state, handler); - self.parser.advance(&mut performer, byte); + self.parser.advance(&mut performer, bytes); } } @@ -321,15 +313,15 @@ impl Processor { H: Handler, { // Process all synchronized bytes. - for i in 0..self.state.sync_state.buffer.len() { - let byte = self.state.sync_state.buffer[i]; - let mut performer = Performer::new(&mut self.state, handler); - self.parser.advance(&mut performer, byte); - } + let buffer = mem::take(&mut self.state.sync_state.buffer); + let mut performer = Performer::new(&mut self.state, handler); + self.parser.advance(&mut performer, &buffer); + self.state.sync_state.buffer = buffer; // Report that update ended, since we could end due to timeout. handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into()); - // Resetting state after processing makes sure we don't interpret buffered sync escapes. + // Resetting state after processing makes sure we don't interpret buffered sync + // escapes. self.state.sync_state.buffer.clear(); self.state.sync_state.timeout.clear_timeout(); } @@ -342,35 +334,50 @@ impl Processor { /// Process a new byte during a synchronized update. #[cold] - fn advance_sync(&mut self, handler: &mut H, byte: u8) + fn advance_sync(&mut self, handler: &mut H, bytes: &[u8]) where H: Handler, { - self.state.sync_state.buffer.push(byte); + // Advance sync parser or stop sync if we'd exceed the maximum buffer size. + if self.state.sync_state.buffer.len() + bytes.len() >= SYNC_BUFFER_SIZE - 1 { + // Terminate the synchronized update. + self.stop_sync(handler); - // Handle sync CSI escape sequences. - self.advance_sync_csi(handler); + // Just parse the bytes normally. + let mut performer = Performer::new(&mut self.state, handler); + self.parser.advance(&mut performer, bytes); + } else { + self.state.sync_state.buffer.extend(bytes); + self.advance_sync_csi(handler, bytes.len()); + } } /// Handle BSU/ESU CSI sequences during synchronized update. - fn advance_sync_csi(&mut self, handler: &mut H) + fn advance_sync_csi(&mut self, handler: &mut H, new_bytes: usize) where H: Handler, { - // Get the last few bytes for comparison. - let len = self.state.sync_state.buffer.len(); - let offset = len.saturating_sub(SYNC_ESCAPE_LEN); - let end = &self.state.sync_state.buffer[offset..]; + // Get constraints within which a new escape character might be relevant. + let buffer_len = self.state.sync_state.buffer.len(); + let start_offset = (buffer_len - new_bytes).saturating_sub(SYNC_ESCAPE_LEN - 1); + let end_offset = buffer_len.saturating_sub(SYNC_ESCAPE_LEN - 1); + let search_buffer = &self.state.sync_state.buffer[start_offset..end_offset]; + // Search for termination/extension escapes in the added bytes. + // // NOTE: It is technically legal to specify multiple private modes in the same // escape, but we only allow EXACTLY `\e[?2026h`/`\e[?2026l` to keep the parser - // reasonable. - // - // Check for extension/termination of the synchronized update. - if end == BSU_CSI { - self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT); - } else if end == ESU_CSI || len >= SYNC_BUFFER_SIZE - 1 { - self.stop_sync(handler); + // more simple. + for index in memchr::memchr_iter(0x1B, search_buffer) { + let offset = start_offset + index; + let escape = &self.state.sync_state.buffer[offset..offset + SYNC_ESCAPE_LEN]; + + if escape == BSU_CSI { + self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT); + } else if escape == ESU_CSI { + self.stop_sync(handler); + break; + } } } } @@ -710,13 +717,14 @@ bitflags! { /// /// This only applies to keys corresponding to ascii characters. /// -/// For the details on how to implement the mode handling correctly, consult [`XTerm's -/// implementation`] and the [`output`] of XTerm's provided [`perl script`]. Some libraries and -/// implementations also use the [`fixterms`] definition of the `CSI u`. +/// For the details on how to implement the mode handling correctly, consult +/// [`XTerm's implementation`] and the [`output`] of XTerm's provided [`perl +/// script`]. Some libraries and implementations also use the [`fixterms`] +/// definition of the `CSI u`. /// -/// The end escape sequence has a `CSI char; modifiers u` form while the original -/// `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI u`, since it has -/// more adoption. +/// The end escape sequence has a `CSI char; modifiers u` form while the +/// original `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI +/// u`, since it has more adoption. /// /// [`XTerm's implementation`]: https://invisible-island.net/xterm/modified-keys.html /// [`perl script`]: https://github.com/ThomasDickey/xterm-snapshots/blob/master/vttests/modify-keys.pl @@ -727,12 +735,14 @@ bitflags! { pub enum ModifyOtherKeys { /// Reset the state. Reset, - /// Enables this feature except for keys with well-known behavior, e.g., Tab, Backspace and - /// some special control character cases which are built into the X11 library (e.g., - /// Control-Space to make a NUL, or Control-3 to make an Escape character). + /// Enables this feature except for keys with well-known behavior, e.g., + /// Tab, Backspace and some special control character cases which are + /// built into the X11 library (e.g., Control-Space to make a NUL, or + /// Control-3 to make an Escape character). /// /// Escape sequences shouldn't be emitted under the following circumstances: - /// - When the key is in range of `[64;127]` and the modifier is either Control or Shift + /// - When the key is in range of `[64;127]` and the modifier is either + /// Control or Shift /// - When the key combination is a known control combination alias /// /// For more details, consult the [`example`] for the suggested translation. @@ -740,9 +750,10 @@ pub enum ModifyOtherKeys { /// [`example`]: https://github.com/alacritty/vte/blob/master/doc/modifyOtherKeys-example.txt EnableExceptWellDefined, /// Enables this feature for all keys including the exceptions of - /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special cases built into the - /// X11 library. Any shifted (modified) ordinary key send an escape sequence. The Alt- and - /// Meta- modifiers cause XTerm to send escape sequences. + /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special + /// cases built into the X11 library. Any shifted (modified) ordinary + /// key send an escape sequence. The Alt- and Meta- modifiers cause + /// XTerm to send escape sequences. /// /// For more details, consult the [`example`] for the suggested translation. /// @@ -1203,16 +1214,20 @@ impl StandardCharset { pub enum ScpCharPath { /// SCP's first parameter value of 0. Behavior is implementation defined. Default, - /// SCP's first parameter value of 1 which sets character path to LEFT-TO-RIGHT. + /// SCP's first parameter value of 1 which sets character path to + /// LEFT-TO-RIGHT. LTR, - /// SCP's first parameter value of 2 which sets character path to RIGHT-TO-LEFT. + /// SCP's first parameter value of 2 which sets character path to + /// RIGHT-TO-LEFT. RTL, } -/// SCP control's second parameter which determines update mode/direction between components. +/// SCP control's second parameter which determines update mode/direction +/// between components. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ScpUpdateMode { - /// SCP's second parameter value of 0 (the default). Implementation dependant update. + /// SCP's second parameter value of 0 (the default). Implementation + /// dependant update. ImplementationDependant, /// SCP's second parameter value of 1. /// @@ -1351,8 +1366,8 @@ where return; } - // Link parameters are in format of `key1=value1:key2=value2`. Currently only key - // `id` is defined. + // Link parameters are in format of `key1=value1:key2=value2`. Currently only + // key `id` is defined. let id = link_params .split(|&b| b == b':') .find_map(|kv| kv.strip_prefix(b"id=")) @@ -1948,7 +1963,7 @@ pub mod C0 { /// Unit Separator. pub const US: u8 = 0x1F; /// Delete, should be ignored by terminal. - pub const DEL: u8 = 0x7f; + pub const DEL: u8 = 0x7F; } // Tests for parsing escape sequences. @@ -1959,22 +1974,24 @@ mod tests { use super::*; #[derive(Default)] - pub struct TestSyncHandler; + pub struct TestSyncHandler { + is_sync: usize, + } impl Timeout for TestSyncHandler { #[inline] fn set_timeout(&mut self, _: Duration) { - unreachable!() + self.is_sync += 1; } #[inline] fn clear_timeout(&mut self) { - unreachable!() + self.is_sync = 0; } #[inline] fn pending_timeout(&self) -> bool { - false + self.is_sync != 0 } } @@ -2033,72 +2050,60 @@ mod tests { #[test] fn parse_control_attribute() { - static BYTES: &[u8] = &[0x1b, b'[', b'1', b'm']; + static BYTES: &[u8] = &[0x1B, b'[', b'1', b'm']; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); assert_eq!(handler.attr, Some(Attr::Bold)); } #[test] fn parse_terminal_identity_csi() { - let bytes: &[u8] = &[0x1b, b'[', b'1', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'1', b'c']; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(!handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'[', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'c']; - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'[', b'0', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'0', b'c']; - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); } #[test] fn parse_terminal_identity_esc() { - let bytes: &[u8] = &[0x1b, b'Z']; + let bytes: &[u8] = &[0x1B, b'Z']; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'#', b'Z']; + let bytes: &[u8] = &[0x1B, b'#', b'Z']; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(!handler.identity_reported); handler.reset_state(); @@ -2107,16 +2112,14 @@ mod tests { #[test] fn parse_truecolor_attr() { static BYTES: &[u8] = &[ - 0x1b, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';', + 0x1B, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';', b'2', b'5', b'5', b'm', ]; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); let spec = Rgb { r: 128, g: 66, b: 255 }; @@ -2127,38 +2130,34 @@ mod tests { #[test] fn parse_zsh_startup() { static BYTES: &[u8] = &[ - 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'7', b'm', b'%', 0x1b, b'[', b'2', b'7', b'm', - 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ', + 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'7', b'm', b'%', 0x1B, b'[', b'2', b'7', b'm', + 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', - b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1b, b'[', b'0', b'm', 0x1b, b'[', b'2', - b'7', b'm', 0x1b, b'[', b'2', b'4', b'm', 0x1b, b'[', b'J', b'j', b'w', b'i', b'l', - b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1b, - b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xe2, 0x9e, 0x9c, b' ', 0x1b, b'[', b'0', - b'1', b';', b'3', b'2', b'm', b' ', 0x1b, b'[', b'3', b'6', b'm', b'~', b'/', b'c', + b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1B, b'[', b'0', b'm', 0x1B, b'[', b'2', + b'7', b'm', 0x1B, b'[', b'2', b'4', b'm', 0x1B, b'[', b'J', b'j', b'w', b'i', b'l', + b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1B, + b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xE2, 0x9E, 0x9C, b' ', 0x1B, b'[', b'0', + b'1', b';', b'3', b'2', b'm', b' ', 0x1B, b'[', b'3', b'6', b'm', b'~', b'/', b'c', b'o', b'd', b'e', ]; let mut handler = MockHandler::default(); let mut parser = Processor::::new(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); } #[test] fn parse_designate_g0_as_line_drawing() { - static BYTES: &[u8] = &[0x1b, b'(', b'0']; + static BYTES: &[u8] = &[0x1B, b'(', b'0']; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); assert_eq!(handler.index, CharsetIndex::G0); assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing); @@ -2166,37 +2165,35 @@ mod tests { #[test] fn parse_designate_g1_as_line_drawing_and_invoke() { - static BYTES: &[u8] = &[0x1b, b')', b'0', 0x0e]; + static BYTES: &[u8] = &[0x1B, b')', b'0', 0x0E]; let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in &BYTES[..3] { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, &BYTES[..3]); assert_eq!(handler.index, CharsetIndex::G1); assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing); let mut handler = MockHandler::default(); - parser.advance(&mut handler, BYTES[3]); + parser.advance(&mut handler, &[BYTES[3]]); assert_eq!(handler.index, CharsetIndex::G1); } #[test] fn parse_valid_rgb_colors() { - assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xff, g: 0xee, b: 0xdd })); - assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xff, g: 0xec, b: 0xca })); - assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xff, g: 0x0, b: 0x0 })); + assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xFF, g: 0xEE, b: 0xDD })); + assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xFF, g: 0xEC, b: 0xCA })); + assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xFF, g: 0x0, b: 0x0 })); } #[test] fn parse_valid_legacy_rgb_colors() { - assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xa0, b: 0xf0 })); - assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); + assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xA0, b: 0xF0 })); + assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); } #[test] @@ -2233,11 +2230,9 @@ mod tests { let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); - assert_eq!(handler.color, Some(Rgb { r: 0xf0, g: 0xf0, b: 0xf0 })); + assert_eq!(handler.color, Some(Rgb { r: 0xF0, g: 0xF0, b: 0xF0 })); } #[test] @@ -2247,9 +2242,7 @@ mod tests { let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert_eq!(handler.reset_colors, vec![1]); } @@ -2261,9 +2254,7 @@ mod tests { let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); let expected: Vec = (0..256).collect(); assert_eq!(handler.reset_colors, expected); @@ -2276,30 +2267,73 @@ mod tests { let mut parser = Processor::::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); let expected: Vec = (0..256).collect(); assert_eq!(handler.reset_colors, expected); } + #[test] + fn partial_sync_updates() { + let mut parser = Processor::::new(); + let mut handler = MockHandler::default(); + + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + // Start synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26h"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Dispatch some data. + + parser.advance(&mut handler, b"random \x1b[31m stuff"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Extend synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26h"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 2); + assert!(handler.attr.is_none()); + + // Terminate synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 2); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26l"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_some()); + } + #[test] #[cfg(not(feature = "no_std"))] fn contrast() { - let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff }; + let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF }; let rgb2 = Rgb { r: 0x00, g: 0x00, b: 0x00 }; assert!((rgb1.contrast(rgb2) - 21.).abs() < f64::EPSILON); - let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff }; + let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF }; assert!((rgb1.contrast(rgb1) - 1.).abs() < f64::EPSILON); - let rgb1 = Rgb { r: 0xff, g: 0x00, b: 0xff }; - let rgb2 = Rgb { r: 0x00, g: 0xff, b: 0x00 }; + let rgb1 = Rgb { r: 0xFF, g: 0x00, b: 0xFF }; + let rgb2 = Rgb { r: 0x00, g: 0xFF, b: 0x00 }; assert!((rgb1.contrast(rgb2) - 2.285_543_608_124_253_3).abs() < f64::EPSILON); let rgb1 = Rgb { r: 0x12, g: 0x34, b: 0x56 }; - let rgb2 = Rgb { r: 0xfe, g: 0xdc, b: 0xba }; + let rgb2 = Rgb { r: 0xFE, g: 0xDC, b: 0xBA }; assert!((rgb1.contrast(rgb2) - 9.786_558_997_257_74).abs() < f64::EPSILON); } } diff --git a/src/definitions.rs b/src/definitions.rs index 218c1eb..11058c0 100644 --- a/src/definitions.rs +++ b/src/definitions.rs @@ -4,52 +4,51 @@ use core::mem; #[repr(u8)] #[derive(Debug, Default, Copy, Clone)] pub enum State { - Anywhere = 0, - CsiEntry = 1, - CsiIgnore = 2, - CsiIntermediate = 3, - CsiParam = 4, - DcsEntry = 5, - DcsIgnore = 6, - DcsIntermediate = 7, - DcsParam = 8, - DcsPassthrough = 9, - Escape = 10, - EscapeIntermediate = 11, + CsiEntry, + CsiIgnore, + CsiIntermediate, + CsiParam, + DcsEntry, + DcsIgnore, + DcsIntermediate, + DcsParam, + DcsPassthrough, + Escape, + EscapeIntermediate, + OscString, + SosPmApcString, + Anywhere, #[default] - Ground = 12, - OscString = 13, - SosPmApcString = 14, - Utf8 = 15, + Ground, } +// NOTE: Removing the unused actions prefixed with `_` will reduce performance. #[allow(dead_code)] #[repr(u8)] #[derive(Debug, Clone, Copy)] pub enum Action { - None = 0, - Clear = 1, - Collect = 2, - CsiDispatch = 3, - EscDispatch = 4, - Execute = 5, - Hook = 6, - Ignore = 7, - OscEnd = 8, - OscPut = 9, - OscStart = 10, - Param = 11, - Print = 12, - Put = 13, - Unhook = 14, - BeginUtf8 = 15, + None, + _Clear, + Collect, + CsiDispatch, + EscDispatch, + Execute, + _Hook, + _Ignore, + _OscEnd, + OscPut, + _OscStart, + Param, + _Print, + Put, + _Unhook, } /// Unpack a u8 into a State and Action /// -/// The implementation of this assumes that there are *precisely* 16 variants for both Action and -/// State. Furthermore, it assumes that the enums are tag-only; that is, there is no data in any -/// variant. +/// The implementation of this assumes that there are *precisely* 16 variants +/// for both Action and State. Furthermore, it assumes that the enums are +/// tag-only; that is, there is no data in any variant. /// /// Bad things will happen if those invariants are violated. #[inline(always)] @@ -57,9 +56,9 @@ pub fn unpack(delta: u8) -> (State, Action) { unsafe { ( // State is stored in bottom 4 bits - mem::transmute(delta & 0x0f), + mem::transmute::(delta & 0x0F), // Action is stored in top 4 bits - mem::transmute(delta >> 4), + mem::transmute::(delta >> 4), ) } } @@ -75,37 +74,26 @@ mod tests { #[test] fn unpack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), + match unpack(0xEE) { + (State::Ground, Action::_Unhook) => (), _ => panic!("unpack failed"), } - match unpack(0x0f) { - (State::Utf8, Action::None) => (), + match unpack(0x0E) { + (State::Ground, Action::None) => (), _ => panic!("unpack failed"), } - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), + match unpack(0xE0) { + (State::CsiEntry, Action::_Unhook) => (), _ => panic!("unpack failed"), } } #[test] fn pack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), - _ => panic!("unpack failed"), - } - - match unpack(0x0f) { - (State::Utf8, Action::None) => (), - _ => panic!("unpack failed"), - } - - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), - _ => panic!("unpack failed"), - } + assert_eq!(pack(State::Ground, Action::_Unhook), 0xEE); + assert_eq!(pack(State::Ground, Action::None), 0x0E); + assert_eq!(pack(State::CsiEntry, Action::_Unhook), 0xE0); } } diff --git a/src/lib.rs b/src/lib.rs index 31e2a31..27e71ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,44 +1,39 @@ //! Parser for implementing virtual terminal emulators //! -//! [`Parser`] is implemented according to [Paul Williams' ANSI parser -//! state machine]. The state machine doesn't assign meaning to the parsed data -//! and is thus not itself sufficient for writing a terminal emulator. Instead, -//! it is expected that an implementation of [`Perform`] is provided which does +//! [`Parser`] is implemented according to [Paul Williams' ANSI parser state +//! machine]. The state machine doesn't assign meaning to the parsed data and is +//! thus not itself sufficient for writing a terminal emulator. Instead, it is +//! expected that an implementation of [`Perform`] is provided which does //! something useful with the parsed data. The [`Parser`] handles the book //! keeping, and the [`Perform`] gets to simply handle actions. //! //! # Examples //! -//! For an example of using the [`Parser`] please see the examples folder. The example included -//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to -//! pipe `vim` into it +//! For an example of using the [`Parser`] please see the examples folder. The +//! example included there simply logs all the actions [`Perform`] does. One +//! quick way to see it in action is to pipe `printf` into it //! //! ```sh -//! cargo build --release --example parselog -//! vim | target/release/examples/parselog +//! printf '\x1b[31mExample' | cargo run --example parselog //! ``` //! -//! Just type `:q` to exit. -//! //! # Differences from original state machine description //! //! * UTF-8 Support for Input //! * OSC Strings can be terminated by 0x07 -//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in -//! all states. +//! * Only supports 7-bit codes //! //! [`Parser`]: struct.Parser.html //! [`Perform`]: trait.Perform.html //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser #![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)] -#![cfg_attr(all(feature = "nightly", test), feature(test))] #![cfg_attr(feature = "no_std", no_std)] use core::mem::MaybeUninit; +use core::str; #[cfg(feature = "no_std")] use arrayvec::ArrayVec; -use utf8parse as utf8; mod definitions; mod params; @@ -46,28 +41,13 @@ mod table; #[cfg(feature = "ansi")] pub mod ansi; -pub use params::{Params, ParamsIter}; - use definitions::{unpack, Action, State}; +pub use params::{Params, ParamsIter}; const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_PARAMS: usize = 16; const MAX_OSC_RAW: usize = 1024; -struct VtUtf8Receiver<'a, P: Perform>(&'a mut P, &'a mut State); - -impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> { - fn codepoint(&mut self, c: char) { - self.0.print(c); - *self.1 = State::Ground; - } - - fn invalid_sequence(&mut self) { - self.0.print('�'); - *self.1 = State::Ground; - } -} - /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] /// /// [`Perform`]: trait.Perform.html @@ -88,7 +68,8 @@ pub struct Parser { osc_params: [(usize, usize); MAX_OSC_PARAMS], osc_num_params: usize, ignoring: bool, - utf8_parser: utf8::Parser, + partial_utf8: [u8; 4], + partial_utf8_len: usize, } impl Parser { @@ -99,7 +80,8 @@ impl Parser { } impl Parser { - /// Create a new Parser with a custom size for the Operating System Command buffer. + /// Create a new Parser with a custom size for the Operating System Command + /// buffer. /// /// Call with a const-generic param on `Parser`, like: /// @@ -121,41 +103,34 @@ impl Parser { &self.intermediates[..self.intermediate_idx] } - /// Advance the parser state + /// Advance the parser state. /// - /// Requires a [`Perform`] in case `byte` triggers an action + /// Requires a [`Perform`] implementation to handle the triggered actions. /// /// [`Perform`]: trait.Perform.html #[inline] - pub fn advance(&mut self, performer: &mut P, byte: u8) { - // Utf8 characters are handled out-of-band. - if let State::Utf8 = self.state { - self.process_utf8(performer, byte); - return; - } - - // Handle state changes in the anywhere state before evaluating changes - // for current state. - let mut change = table::STATE_CHANGES[State::Anywhere as usize][byte as usize]; + pub fn advance(&mut self, performer: &mut P, bytes: &[u8]) { + let mut i = 0; - if change == 0 { - change = table::STATE_CHANGES[self.state as usize][byte as usize]; + // Handle partial codepoints from previous calls to `advance`. + if self.partial_utf8_len > 0 { + i += self.advance_partial_utf8(performer, bytes); } - // Unpack into a state and action - let (state, action) = unpack(change); + while i < bytes.len() { + match self.state { + State::Ground => i += self.advance_ground(performer, &bytes[i..]), + _ => { + let byte = bytes[i]; + let change = table::STATE_CHANGES[self.state as usize][byte as usize]; + let (state, action) = unpack(change); - self.perform_state_change(performer, state, action, byte); - } + self.perform_state_change(performer, state, action, byte); - #[inline] - fn process_utf8

(&mut self, performer: &mut P, byte: u8) - where - P: Perform, - { - let mut receiver = VtUtf8Receiver(performer, &mut self.state); - let utf8_parser = &mut self.utf8_parser; - utf8_parser.advance(&mut receiver, byte); + i += 1; + }, + } + } } #[inline] @@ -163,93 +138,75 @@ impl Parser { where P: Perform, { - macro_rules! maybe_action { - ($action:expr, $arg:expr) => { - match $action { - Action::None => (), - action => { - self.perform_action(performer, action, $arg); - }, - } - }; + if matches!(state, State::Anywhere) { + self.perform_action(performer, action, byte); + return; } - match state { - State::Anywhere => { - // Just run the action - self.perform_action(performer, action, byte); - }, - state => { - match self.state { - State::DcsPassthrough => { - self.perform_action(performer, Action::Unhook, byte); - }, - State::OscString => { - self.perform_action(performer, Action::OscEnd, byte); - }, - _ => (), - } + match self.state { + State::DcsPassthrough => performer.unhook(), + State::OscString => { + let param_idx = self.osc_num_params; + let idx = self.osc_raw.len(); - maybe_action!(action, byte); + match param_idx { + // Finish last parameter if not already maxed + MAX_OSC_PARAMS => (), - match state { - State::CsiEntry | State::DcsEntry | State::Escape => { - self.perform_action(performer, Action::Clear, byte); - }, - State::DcsPassthrough => { - self.perform_action(performer, Action::Hook, byte); + // First param is special - 0 to current byte index + 0 => { + self.osc_params[param_idx] = (0, idx); + self.osc_num_params += 1; }, - State::OscString => { - self.perform_action(performer, Action::OscStart, byte); + + // All other params depend on previous indexing + _ => { + let prev = self.osc_params[param_idx - 1]; + let begin = prev.1; + self.osc_params[param_idx] = (begin, idx); + self.osc_num_params += 1; }, - _ => (), } - - // Assume the new state - self.state = state; + self.osc_dispatch(performer, byte); }, + _ => (), } - } - /// Separate method for osc_dispatch that borrows self as read-only - /// - /// The aliasing is needed here for multiple slices into self.osc_raw - #[inline] - fn osc_dispatch(&self, performer: &mut P, byte: u8) { - let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = - unsafe { MaybeUninit::uninit().assume_init() }; + if matches!(action, Action::None) { + match state { + State::CsiEntry | State::DcsEntry | State::Escape => self.reset_params(), + State::DcsPassthrough => { + if self.params.is_full() { + self.ignoring = true; + } else { + self.params.push(self.param); + } - for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { - let indices = self.osc_params[i]; - *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); + performer.hook( + self.params(), + self.intermediates(), + self.ignoring, + byte as char, + ); + }, + State::OscString => { + self.osc_raw.clear(); + self.osc_num_params = 0; + }, + _ => (), + } + } else { + self.perform_action(performer, action, byte); } - unsafe { - let num_params = self.osc_num_params; - let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; - performer.osc_dispatch(&*params, byte == 0x07); - } + self.state = state; } #[inline] fn perform_action(&mut self, performer: &mut P, action: Action, byte: u8) { match action { - Action::Print => performer.print(byte as char), Action::Execute => performer.execute(byte), - Action::Hook => { - if self.params.is_full() { - self.ignoring = true; - } else { - self.params.push(self.param); - } - - performer.hook(self.params(), self.intermediates(), self.ignoring, byte as char); - }, Action::Put => performer.put(byte), - Action::OscStart => { - self.osc_raw.clear(); - self.osc_num_params = 0; - }, Action::OscPut => { #[cfg(feature = "no_std")] { @@ -285,31 +242,6 @@ impl Parser { self.osc_raw.push(byte); } }, - Action::OscEnd => { - let param_idx = self.osc_num_params; - let idx = self.osc_raw.len(); - - match param_idx { - // Finish last parameter if not already maxed - MAX_OSC_PARAMS => (), - - // First param is special - 0 to current byte index - 0 => { - self.osc_params[param_idx] = (0, idx); - self.osc_num_params += 1; - }, - - // All other params depend on previous indexing - _ => { - let prev = self.osc_params[param_idx - 1]; - let begin = prev.1; - self.osc_params[param_idx] = (begin, idx); - self.osc_num_params += 1; - }, - } - self.osc_dispatch(performer, byte); - }, - Action::Unhook => performer.unhook(), Action::CsiDispatch => { if self.params.is_full() { self.ignoring = true; @@ -341,37 +273,195 @@ impl Parser { return; } - if byte == b';' { - self.params.push(self.param); - self.param = 0; - } else if byte == b':' { - self.params.extend(self.param); - self.param = 0; - } else { - // Continue collecting bytes into param - self.param = self.param.saturating_mul(10); - self.param = self.param.saturating_add((byte - b'0') as u16); + match byte { + b';' => { + self.params.push(self.param); + self.param = 0; + }, + b':' => { + self.params.extend(self.param); + self.param = 0; + }, + _ => { + // Continue collecting bytes into param + self.param = self.param.saturating_mul(10); + self.param = self.param.saturating_add((byte - b'0') as u16); + }, } }, - Action::Clear => { - // Reset everything on ESC/CSI/DCS entry - self.intermediate_idx = 0; - self.ignoring = false; - self.param = 0; + _ => (), + } + } - self.params.clear(); + /// Reset escape sequence parameters and intermediates. + #[inline] + fn reset_params(&mut self) { + self.intermediate_idx = 0; + self.ignoring = false; + self.param = 0; + + self.params.clear(); + } + + /// Separate method for osc_dispatch that borrows self as read-only + /// + /// The aliasing is needed here for multiple slices into self.osc_raw + #[inline] + fn osc_dispatch(&self, performer: &mut P, byte: u8) { + let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = + unsafe { MaybeUninit::uninit().assume_init() }; + + for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { + let indices = self.osc_params[i]; + *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); + } + + unsafe { + let num_params = self.osc_num_params; + let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; + performer.osc_dispatch(&*params, byte == 0x07); + } + } + + /// Advance the parser state from ground. + /// + /// The ground state is handled separately since it can only be left using + /// the escape character (`\x1b`). This allows more efficient parsing by + /// using SIMD search with [`memchr`]. + #[inline] + fn advance_ground(&mut self, performer: &mut P, bytes: &[u8]) -> usize { + // Find the next escape character. + let num_bytes = bytes.len(); + let plain_chars = memchr::memchr(0x1B, bytes).unwrap_or(num_bytes); + + // If the next character is ESC, just process it and short-circuit. + if plain_chars == 0 { + self.state = State::Escape; + self.reset_params(); + return 1; + } + + match str::from_utf8(&bytes[..plain_chars]) { + Ok(parsed) => { + Self::ground_dispatch(performer, parsed); + let mut processed = plain_chars; + + // If there's another character, it must be escape so process it directly. + if processed < num_bytes { + self.state = State::Escape; + self.reset_params(); + processed += 1; + } + + processed + }, + // Handle invalid and partial utf8. + Err(err) => { + // Dispatch all the valid bytes. + let valid_bytes = err.valid_up_to(); + let parsed = unsafe { str::from_utf8_unchecked(&bytes[..valid_bytes]) }; + Self::ground_dispatch(performer, parsed); + + match err.error_len() { + Some(len) => { + // Execute C1 escapes or emit replacement character. + if len == 1 && bytes[valid_bytes] <= 0x9F { + performer.execute(bytes[valid_bytes]); + } else { + performer.print('�'); + } + + // Restart processing after the invalid bytes. + // + // While we could theoretically try to just re-parse + // `bytes[valid_bytes + len..plain_chars]`, it's easier + // to just skip it and invalid utf8 is pretty rare anyway. + valid_bytes + len + }, + None => { + let extra_bytes = num_bytes - valid_bytes; + for i in 0..extra_bytes { + self.partial_utf8[self.partial_utf8_len + i] = bytes[valid_bytes + i]; + } + self.partial_utf8_len += extra_bytes; + + num_bytes + }, + } }, - Action::BeginUtf8 => self.process_utf8(performer, byte), - Action::Ignore => (), - Action::None => (), + } + } + + /// Advance the parser while processing a partial utf8 codepoint. + #[inline] + fn advance_partial_utf8(&mut self, performer: &mut P, bytes: &[u8]) -> usize { + // Try to copy up to 3 more characters, to ensure the codepoint is complete. + let old_bytes = self.partial_utf8_len; + let to_copy = bytes.len().min(self.partial_utf8.len() - old_bytes); + self.partial_utf8[old_bytes..old_bytes + to_copy].copy_from_slice(&bytes[..to_copy]); + self.partial_utf8_len += to_copy; + + // Parse the unicode character. + match str::from_utf8(&self.partial_utf8[..self.partial_utf8_len]) { + // If the entire buffer is valid, use the first character and continue parsing. + Ok(parsed) => { + let c = unsafe { parsed.chars().next().unwrap_unchecked() }; + performer.print(c); + + self.partial_utf8_len = 0; + c.len_utf8() - old_bytes + }, + Err(err) => { + match err.error_len() { + // If the partial character was also invalid, emit the replacement + // character. + Some(invalid_len) => { + performer.print('�'); + + self.partial_utf8_len = 0; + invalid_len - old_bytes + }, + None => { + // If we have any valid bytes, that means we partially copied another + // utf8 character into `partial_utf8`. Since we only care about the + // first character, we just ignore the rest. + let valid_bytes = err.valid_up_to(); + if valid_bytes > 0 { + let c = unsafe { + let parsed = + str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]); + parsed.chars().next().unwrap_unchecked() + }; + performer.print(c); + + self.partial_utf8_len = 0; + valid_bytes - old_bytes + } else { + // If the character still isn't complete, wait for more data. + bytes.len() + } + }, + } + }, + } + } + + /// Handle ground dispatch of print/execute for all characters in a string. + #[inline] + fn ground_dispatch(performer: &mut P, text: &str) { + for c in text.chars() { + match c { + '\x00'..='\x1f' | '\u{80}'..='\u{9f}' => performer.execute(c as u8), + _ => performer.print(c), + } } } } /// Performs actions requested by the Parser /// -/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor -/// movement, or simply printing characters to the screen. +/// Actions in this case mean, for example, handling a CSI escape sequence +/// describing cursor movement, or simply printing characters to the screen. /// /// The methods on this type correspond to actions described in /// . I've done my best to describe them in @@ -385,19 +475,21 @@ pub trait Perform { /// Execute a C0 or C1 control function. fn execute(&mut self, _byte: u8) {} - /// Invoked when a final character arrives in first part of device control string. + /// Invoked when a final character arrives in first part of device control + /// string. /// - /// The control function should be determined from the private marker, final character, and - /// execute with a parameter list. A handler should be selected for remaining characters in the - /// string; the handler function should subsequently be called by `put` for every character in + /// The control function should be determined from the private marker, final + /// character, and execute with a parameter list. A handler should be + /// selected for remaining characters in the string; the handler + /// function should subsequently be called by `put` for every character in /// the control string. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {} - /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls - /// will also be passed to the handler. + /// Pass bytes as part of a device control string to the handle chosen in + /// `hook`. C0 controls will also be passed to the handler. fn put(&mut self, _byte: u8) {} /// Called when a device control string is terminated. @@ -411,9 +503,9 @@ pub trait Perform { /// A final character has arrived for a CSI sequence /// - /// The `ignore` flag indicates that either more than two intermediates arrived - /// or the number of parameters exceeded the maximum supported length, - /// and subsequent characters were ignored. + /// The `ignore` flag indicates that either more than two intermediates + /// arrived or the number of parameters exceeded the maximum supported + /// length, and subsequent characters were ignored. fn csi_dispatch( &mut self, _params: &Params, @@ -436,12 +528,12 @@ extern crate std; #[cfg(test)] mod tests { - use super::*; - use std::vec::Vec; - static OSC_BYTES: &[u8] = &[ - 0x1b, 0x5d, // Begin OSC + use super::*; + + const OSC_BYTES: &[u8] = &[ + 0x1B, 0x5D, // Begin OSC b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC @@ -459,6 +551,8 @@ mod tests { Esc(Vec, bool, u8), DcsHook(Vec>, Vec, bool, char), DcsPut(u8), + Print(char), + Execute(u8), DcsUnhook, } @@ -492,6 +586,14 @@ mod tests { fn unhook(&mut self) { self.dispatched.push(Sequence::DcsUnhook); } + + fn print(&mut self, c: char) { + self.dispatched.push(Sequence::Print(c)); + } + + fn execute(&mut self, byte: u8) { + self.dispatched.push(Sequence::Execute(byte)); + } } #[test] @@ -499,9 +601,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in OSC_BYTES { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, OSC_BYTES); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -519,9 +619,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in &[0x1b, 0x5d, 0x07] { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, &[0x1B, 0x5D, 0x07]); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -537,9 +635,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -557,9 +653,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -574,9 +668,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -588,25 +680,19 @@ mod tests { #[test] fn parse_osc_with_utf8_arguments() { static INPUT: &[u8] = &[ - 0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27, 0xc2, 0xaf, 0x5c, - 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f, 0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, - 0x20, 0x73, 0x6c, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07, + 0x0D, 0x1B, 0x5D, 0x32, 0x3B, 0x65, 0x63, 0x68, 0x6F, 0x20, 0x27, 0xC2, 0xAF, 0x5C, + 0x5F, 0x28, 0xE3, 0x83, 0x84, 0x29, 0x5F, 0x2F, 0xC2, 0xAF, 0x27, 0x20, 0x26, 0x26, + 0x20, 0x73, 0x6C, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07, ]; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); - assert_eq!(dispatcher.dispatched.len(), 1); - match &dispatcher.dispatched[0] { - Sequence::Osc(params, _) => { - assert_eq!(params[0], &[b'2']); - assert_eq!(params[1], &INPUT[5..(INPUT.len() - 1)]); - }, - _ => panic!("expected osc sequence"), - } + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(b'\r')); + let osc_data = INPUT[5..(INPUT.len() - 1)].into(); + assert_eq!(dispatcher.dispatched[1], Sequence::Osc(vec![vec![b'2'], osc_data], true)); + assert_eq!(dispatcher.dispatched.len(), 2); } #[test] @@ -615,9 +701,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -631,26 +715,20 @@ mod tests { #[test] fn exceed_max_buffer_size() { static NUM_BYTES: usize = MAX_OSC_RAW + 100; - static INPUT_START: &[u8] = &[0x1b, b']', b'5', b'2', b';', b's']; - static INPUT_END: &[u8] = &[b'\x07']; + static INPUT_START: &[u8] = b"\x1b]52;s"; + static INPUT_END: &[u8] = b"\x07"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); // Create valid OSC escape - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size - for _ in 0..NUM_BYTES { - parser.advance(&mut dispatcher, b'a'); - } + parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -679,9 +757,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -704,9 +780,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -723,9 +797,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in b"\x1b[4;m" { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, b"\x1b[4;m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -740,9 +812,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in b"\x1b[;4m" { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, b"\x1b[;4m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -758,13 +828,11 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { - Sequence::Csi(params, ..) => assert_eq!(params, &[[std::u16::MAX as u16]]), + Sequence::Csi(params, ..) => assert_eq!(params, &[[u16::MAX]]), _ => panic!("expected csi sequence"), } } @@ -775,14 +843,12 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'?']); + assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, @@ -796,9 +862,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -818,9 +882,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -839,14 +901,12 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 3); match &dispatcher.dispatched[0] { Sequence::DcsHook(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'$']); + assert_eq!(intermediates, b"$"); assert_eq!(params, &[[1]]); assert!(!ignore); }, @@ -862,9 +922,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 7); match &dispatcher.dispatched[0] { @@ -886,13 +944,11 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 6); match &dispatcher.dispatched[5] { - Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, &[b'+']), + Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, b"+"), _ => panic!("expected esc sequence"), } } @@ -903,14 +959,12 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Esc(intermediates, ignore, byte) => { - assert_eq!(intermediates, &[b'(']); + assert_eq!(intermediates, b"("); assert_eq!(*byte, b'A'); assert!(!ignore); }, @@ -918,15 +972,26 @@ mod tests { } } + #[test] + fn esc_reset_intermediates() { + static INPUT: &[u8] = b"\x1b[?2004l\x1b#8"; + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Csi(vec![vec![2004]], vec![63], false, 'l')); + assert_eq!(dispatcher.dispatched[1], Sequence::Esc(vec![35], false, 56)); + } + #[test] fn params_buffer_filled_with_subparam() { static INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -947,14 +1012,12 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser: Parser<30> = Parser::new_with_size(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'?']); + assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, @@ -974,19 +1037,13 @@ mod tests { let mut parser: Parser = Parser::new_with_size(); // Create valid OSC escape - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size - for _ in 0..NUM_BYTES { - parser.advance(&mut dispatcher, b'a'); - } + parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -1012,15 +1069,9 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser: Parser<5> = Parser::new_with_size(); - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } - for byte in INPUT_MIDDLE { - parser.advance(&mut dispatcher, *byte); - } - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); + parser.advance(&mut dispatcher, INPUT_MIDDLE); + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -1031,74 +1082,128 @@ mod tests { _ => panic!("expected osc sequence"), } } -} -#[cfg(all(feature = "nightly", test))] -mod bench { - extern crate std; - extern crate test; + #[test] + fn unicode() { + const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89"; - use super::*; + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - use test::{black_box, Bencher}; + parser.advance(&mut dispatcher, INPUT); - static VTE_DEMO: &[u8] = include_bytes!("../tests/demo.vte"); + assert_eq!(dispatcher.dispatched.len(), 6); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('🎉')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('_')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('🦀')); + assert_eq!(dispatcher.dispatched[3], Sequence::Print('🦀')); + assert_eq!(dispatcher.dispatched[4], Sequence::Print('_')); + assert_eq!(dispatcher.dispatched[5], Sequence::Print('🎉')); + } - struct BenchDispatcher; - impl Perform for BenchDispatcher { - fn print(&mut self, c: char) { - black_box(c); - } + #[test] + fn invalid_utf8() { + const INPUT: &[u8] = b"a\xEF\xBCb"; - fn execute(&mut self, byte: u8) { - black_box(byte); - } + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { - black_box((params, intermediates, ignore, c)); - } + parser.advance(&mut dispatcher, INPUT); - fn put(&mut self, byte: u8) { - black_box(byte); - } + assert_eq!(dispatcher.dispatched.len(), 3); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); + } - fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) { - black_box((params, bell_terminated)); - } + #[test] + fn partial_utf8() { + const INPUT: &[u8] = b"\xF0\x9F\x9A\x80"; - fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { - black_box((params, intermediates, ignore, c)); - } + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) { - black_box((intermediates, ignore, byte)); - } + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..2]); + parser.advance(&mut dispatcher, &INPUT[2..3]); + parser.advance(&mut dispatcher, &INPUT[3..]); + + assert_eq!(dispatcher.dispatched.len(), 1); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('🚀')); } - #[bench] - fn testfile(b: &mut Bencher) { - b.iter(|| { - let mut dispatcher = BenchDispatcher; - let mut parser = Parser::new(); + #[test] + fn partial_utf8_separating_utf8() { + // This is different from the `partial_utf8` test since it has a multi-byte UTF8 + // character after the partial UTF8 state, causing a partial byte to be present + // in the `partial_utf8` buffer after the 2-byte codepoint. - for byte in VTE_DEMO { - parser.advance(&mut dispatcher, *byte); - } - }); + // "ĸ🎉" + const INPUT: &[u8] = b"\xC4\xB8\xF0\x9F\x8E\x89"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..]); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('ĸ')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('🎉')); } - #[bench] - fn state_changes(b: &mut Bencher) { - let input = b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\"; - b.iter(|| { - let mut dispatcher = BenchDispatcher; - let mut parser = Parser::new(); + #[test] + fn partial_invalid_utf8() { + const INPUT: &[u8] = b"a\xEF\xBCb"; - for _ in 0..1_000 { - for byte in input { - parser.advance(&mut dispatcher, *byte); - } - } - }); + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..2]); + parser.advance(&mut dispatcher, &INPUT[2..3]); + parser.advance(&mut dispatcher, &INPUT[3..]); + + assert_eq!(dispatcher.dispatched.len(), 3); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); + } + + #[test] + fn c1s() { + const INPUT: &[u8] = b"\x00\x1f\x80\x90\x98\x9b\x9c\x9d\x9e\x9fa"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 11); + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0)); + assert_eq!(dispatcher.dispatched[1], Sequence::Execute(31)); + assert_eq!(dispatcher.dispatched[2], Sequence::Execute(128)); + assert_eq!(dispatcher.dispatched[3], Sequence::Execute(144)); + assert_eq!(dispatcher.dispatched[4], Sequence::Execute(152)); + assert_eq!(dispatcher.dispatched[5], Sequence::Execute(155)); + assert_eq!(dispatcher.dispatched[6], Sequence::Execute(156)); + assert_eq!(dispatcher.dispatched[7], Sequence::Execute(157)); + assert_eq!(dispatcher.dispatched[8], Sequence::Execute(158)); + assert_eq!(dispatcher.dispatched[9], Sequence::Execute(159)); + assert_eq!(dispatcher.dispatched[10], Sequence::Print('a')); + } + + #[test] + fn execute_anywhere() { + const INPUT: &[u8] = b"\x18\x1a"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0x18)); + assert_eq!(dispatcher.dispatched[1], Sequence::Execute(0x1A)); } } diff --git a/src/params.rs b/src/params.rs index 608c040..967befb 100644 --- a/src/params.rs +++ b/src/params.rs @@ -8,8 +8,9 @@ pub(crate) const MAX_PARAMS: usize = 32; pub struct Params { /// Number of subparameters for each parameter. /// - /// For each entry in the `params` slice, this stores the length of the param as number of - /// subparams at the same index as the param in the `params` slice. + /// For each entry in the `params` slice, this stores the length of the + /// param as number of subparams at the same index as the param in the + /// `params` slice. /// /// At the subparam positions the length will always be `0`. subparams: [u8; MAX_PARAMS], diff --git a/src/table.rs b/src/table.rs index f2c0105..b440606 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,39 +1,20 @@ -/// This is the state change table. It's indexed first by current state and then by the next -/// character in the pty stream. -use crate::definitions::{pack, Action, State}; - use vte_generate_state_changes::generate_state_changes; +/// This is the state change table. It's indexed first by current state and then +/// by the next character in the pty stream. +use crate::definitions::{pack, Action, State}; + // Generate state changes at compile-time -pub static STATE_CHANGES: [[u8; 256]; 16] = state_changes(); +pub static STATE_CHANGES: [[u8; 256]; 13] = state_changes(); generate_state_changes!(state_changes, { - Anywhere { - 0x18 => (Ground, Execute), - 0x1a => (Ground, Execute), - 0x1b => (Escape, None), - }, - - Ground { - 0x00..=0x17 => (Anywhere, Execute), - 0x19 => (Anywhere, Execute), - 0x1c..=0x1f => (Anywhere, Execute), - 0x20..=0x7f => (Anywhere, Print), - 0x80..=0x8f => (Anywhere, Execute), - 0x91..=0x9a => (Anywhere, Execute), - 0x9c => (Anywhere, Execute), - // Beginning of UTF-8 2 byte sequence - 0xc2..=0xdf => (Utf8, BeginUtf8), - // Beginning of UTF-8 3 byte sequence - 0xe0..=0xef => (Utf8, BeginUtf8), - // Beginning of UTF-8 4 byte sequence - 0xf0..=0xf4 => (Utf8, BeginUtf8), - }, - Escape { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x20..=0x2f => (EscapeIntermediate, Collect), 0x30..=0x4f => (Ground, EscDispatch), 0x51..=0x57 => (Ground, EscDispatch), @@ -51,18 +32,24 @@ generate_state_changes!(state_changes, { EscapeIntermediate { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x7e => (Ground, EscDispatch), }, CsiEntry { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x20..=0x2f => (CsiIntermediate, Collect), 0x30..=0x39 => (CsiParam, Param), 0x3a..=0x3b => (CsiParam, Param), @@ -72,20 +59,26 @@ generate_state_changes!(state_changes, { CsiIgnore { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x20..=0x3f => (Anywhere, Ignore), - 0x7f => (Anywhere, Ignore), + 0x20..=0x3f => (Anywhere, None), + 0x7f => (Anywhere, None), 0x40..=0x7e => (Ground, None), }, CsiParam { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x30..=0x39 => (Anywhere, Param), 0x3a..=0x3b => (Anywhere, Param), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x3c..=0x3f => (CsiIgnore, None), 0x20..=0x2f => (CsiIntermediate, Collect), 0x40..=0x7e => (Ground, CsiDispatch), @@ -93,19 +86,25 @@ generate_state_changes!(state_changes, { CsiIntermediate { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x3f => (CsiIgnore, None), 0x40..=0x7e => (Ground, CsiDispatch), }, DcsEntry { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x7f => (Anywhere, None), 0x20..=0x2f => (DcsIntermediate, Collect), 0x30..=0x39 => (DcsParam, Param), 0x3a..=0x3b => (DcsParam, Param), @@ -114,30 +113,39 @@ generate_state_changes!(state_changes, { }, DcsIntermediate { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x3f => (DcsIgnore, None), 0x40..=0x7e => (DcsPassthrough, None), }, DcsIgnore { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x20..=0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x20..=0x7f => (Anywhere, None), 0x9c => (Ground, None), }, DcsParam { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x30..=0x39 => (Anywhere, Param), 0x3a..=0x3b => (Anywhere, Param), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x3c..=0x3f => (DcsIgnore, None), 0x20..=0x2f => (DcsIntermediate, Collect), 0x40..=0x7e => (DcsPassthrough, None), @@ -145,27 +153,36 @@ generate_state_changes!(state_changes, { DcsPassthrough { 0x00..=0x17 => (Anywhere, Put), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Put), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Put), 0x20..=0x7e => (Anywhere, Put), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x9c => (Ground, None), }, SosPmApcString { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x20..=0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x20..=0x7f => (Anywhere, None), 0x9c => (Ground, None), }, OscString { - 0x00..=0x06 => (Anywhere, Ignore), + 0x00..=0x06 => (Anywhere, None), 0x07 => (Ground, None), - 0x08..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x08..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x20..=0xff => (Anywhere, OscPut), } }); diff --git a/utf8parse/Cargo.toml b/utf8parse/Cargo.toml deleted file mode 100644 index 71ea44b..0000000 --- a/utf8parse/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -authors = ["Joe Wilm ", "Christian Duerr "] -description = "Table-driven UTF-8 parser" -documentation = "https://docs.rs/utf8parse/" -repository = "https://github.com/alacritty/vte" -keywords = ["utf8", "parse", "table"] -categories = ["parsing", "no-std"] -license = "Apache-2.0 OR MIT" -version = "0.2.2" -name = "utf8parse" -edition = "2018" - -[features] -nightly = [] -default = [] diff --git a/utf8parse/LICENSE-APACHE b/utf8parse/LICENSE-APACHE deleted file mode 120000 index 965b606..0000000 --- a/utf8parse/LICENSE-APACHE +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-APACHE \ No newline at end of file diff --git a/utf8parse/LICENSE-MIT b/utf8parse/LICENSE-MIT deleted file mode 120000 index 76219eb..0000000 --- a/utf8parse/LICENSE-MIT +++ /dev/null @@ -1 +0,0 @@ -../LICENSE-MIT \ No newline at end of file diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs deleted file mode 100644 index 093de81..0000000 --- a/utf8parse/src/lib.rs +++ /dev/null @@ -1,132 +0,0 @@ -//! A table-driven UTF-8 Parser -//! -//! This module implements a table-driven UTF-8 parser which should -//! theoretically contain the minimal number of branches (1). The only branch is -//! on the `Action` returned from unpacking a transition. -#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)] -#![cfg_attr(all(feature = "nightly", test), feature(test))] -#![no_std] - -use core::char; - -mod types; - -use types::{Action, State}; - -/// Handles codepoint and invalid sequence events from the parser. -pub trait Receiver { - /// Called whenever a codepoint is parsed successfully - fn codepoint(&mut self, _: char); - - /// Called when an invalid_sequence is detected - fn invalid_sequence(&mut self); -} - -/// A parser for Utf8 Characters -/// -/// Repeatedly call `advance` with bytes to emit Utf8 characters -#[derive(Clone, Default, PartialEq, Eq, Debug)] -pub struct Parser { - point: u32, - state: State, -} - -/// Continuation bytes are masked with this value. -const CONTINUATION_MASK: u8 = 0b0011_1111; - -impl Parser { - /// Create a new Parser - pub fn new() -> Parser { - Parser { point: 0, state: State::Ground } - } - - /// Advance the parser - /// - /// The provider receiver will be called whenever a codepoint is completed or an invalid - /// sequence is detected. - pub fn advance(&mut self, receiver: &mut R, byte: u8) - where - R: Receiver, - { - let (state, action) = self.state.advance(byte); - self.perform_action(receiver, byte, action); - self.state = state; - } - - fn perform_action(&mut self, receiver: &mut R, byte: u8, action: Action) - where - R: Receiver, - { - match action { - Action::InvalidSequence => { - self.point = 0; - receiver.invalid_sequence(); - }, - Action::EmitByte => { - receiver.codepoint(byte as char); - }, - Action::SetByte1 => { - let point = self.point | ((byte & CONTINUATION_MASK) as u32); - let c = unsafe { char::from_u32_unchecked(point) }; - self.point = 0; - - receiver.codepoint(c); - }, - Action::SetByte2 => { - self.point |= ((byte & CONTINUATION_MASK) as u32) << 6; - }, - Action::SetByte2Top => { - self.point |= ((byte & 0b0001_1111) as u32) << 6; - }, - Action::SetByte3 => { - self.point |= ((byte & CONTINUATION_MASK) as u32) << 12; - }, - Action::SetByte3Top => { - self.point |= ((byte & 0b0000_1111) as u32) << 12; - }, - Action::SetByte4 => { - self.point |= ((byte & 0b0000_0111) as u32) << 18; - }, - } - } -} - -#[cfg(all(feature = "nightly", test))] -mod benches { - extern crate std; - extern crate test; - - use super::{Parser, Receiver}; - - use self::test::{black_box, Bencher}; - - static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt"); - - impl Receiver for () { - fn codepoint(&mut self, c: char) { - black_box(c); - } - - fn invalid_sequence(&mut self) {} - } - - #[bench] - fn parse_bench_utf8_demo(b: &mut Bencher) { - let mut parser = Parser::new(); - - b.iter(|| { - for byte in UTF8_DEMO { - parser.advance(&mut (), *byte); - } - }) - } - - #[bench] - fn std_string_parse_utf8(b: &mut Bencher) { - b.iter(|| { - for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() { - black_box(c); - } - }); - } -} diff --git a/utf8parse/src/types.rs b/utf8parse/src/types.rs deleted file mode 100644 index 8a52c67..0000000 --- a/utf8parse/src/types.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Types supporting the UTF-8 parser - -/// Action to take when receiving a byte -#[derive(Debug, Copy, Clone)] -pub enum Action { - /// Unexpected byte; sequence is invalid - InvalidSequence = 0, - /// Received valid 7-bit ASCII byte which can be directly emitted. - EmitByte = 1, - /// Set the bottom continuation byte - SetByte1 = 2, - /// Set the 2nd-from-last continuation byte - SetByte2 = 3, - /// Set the 2nd-from-last byte which is part of a two byte sequence - SetByte2Top = 4, - /// Set the 3rd-from-last continuation byte - SetByte3 = 5, - /// Set the 3rd-from-last byte which is part of a three byte sequence - SetByte3Top = 6, - /// Set the top byte of a four byte sequence. - SetByte4 = 7, -} - -/// States the parser can be in. -/// -/// There is a state for each initial input of the 3 and 4 byte sequences since -/// the following bytes are subject to different conditions than a tail byte. -#[allow(non_camel_case_types)] -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] -pub enum State { - /// Ground state; expect anything - #[default] - Ground = 0, - /// 3 tail bytes - Tail3 = 1, - /// 2 tail bytes - Tail2 = 2, - /// 1 tail byte - Tail1 = 3, - /// UTF8-3 starting with E0 - U3_2_e0 = 4, - /// UTF8-3 starting with ED - U3_2_ed = 5, - /// UTF8-4 starting with F0 - Utf8_4_3_f0 = 6, - /// UTF8-4 starting with F4 - Utf8_4_3_f4 = 7, -} - -impl State { - /// Advance the parser state. - /// - /// This takes the current state and input byte into consideration, to determine the next state - /// and any action that should be taken. - #[inline] - pub fn advance(self, byte: u8) -> (State, Action) { - match self { - State::Ground => match byte { - 0x00..=0x7f => (State::Ground, Action::EmitByte), - 0xc2..=0xdf => (State::Tail1, Action::SetByte2Top), - 0xe0 => (State::U3_2_e0, Action::SetByte3Top), - 0xe1..=0xec => (State::Tail2, Action::SetByte3Top), - 0xed => (State::U3_2_ed, Action::SetByte3Top), - 0xee..=0xef => (State::Tail2, Action::SetByte3Top), - 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4), - 0xf1..=0xf3 => (State::Tail3, Action::SetByte4), - 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4), - _ => (State::Ground, Action::InvalidSequence), - }, - State::U3_2_e0 => match byte { - 0xa0..=0xbf => (State::Tail1, Action::SetByte2), - _ => (State::Ground, Action::InvalidSequence), - }, - State::U3_2_ed => match byte { - 0x80..=0x9f => (State::Tail1, Action::SetByte2), - _ => (State::Ground, Action::InvalidSequence), - }, - State::Utf8_4_3_f0 => match byte { - 0x90..=0xbf => (State::Tail2, Action::SetByte3), - _ => (State::Ground, Action::InvalidSequence), - }, - State::Utf8_4_3_f4 => match byte { - 0x80..=0x8f => (State::Tail2, Action::SetByte3), - _ => (State::Ground, Action::InvalidSequence), - }, - State::Tail3 => match byte { - 0x80..=0xbf => (State::Tail2, Action::SetByte3), - _ => (State::Ground, Action::InvalidSequence), - }, - State::Tail2 => match byte { - 0x80..=0xbf => (State::Tail1, Action::SetByte2), - _ => (State::Ground, Action::InvalidSequence), - }, - State::Tail1 => match byte { - 0x80..=0xbf => (State::Ground, Action::SetByte1), - _ => (State::Ground, Action::InvalidSequence), - }, - } - } -} diff --git a/utf8parse/tests/UTF-8-demo.txt b/utf8parse/tests/UTF-8-demo.txt deleted file mode 100644 index 4363f27..0000000 --- a/utf8parse/tests/UTF-8-demo.txt +++ /dev/null @@ -1,212 +0,0 @@ - -UTF-8 encoded sample plain-text file -‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ - -Markus Kuhn [ˈmaʳkʊs kuːn] — 2002-07-25 - - -The ASCII compatible UTF-8 encoding used in this plain-text file -is defined in Unicode, ISO 10646-1, and RFC 2279. - - -Using Unicode/UTF-8, you can write in emails and source code things such as - -Mathematics and sciences: - - ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫ - ⎪⎢⎜│a²+b³ ⎟⎥⎪ - ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪ - ⎪⎢⎜⎷ c₈ ⎟⎥⎪ - ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬ - ⎪⎢⎜ ∞ ⎟⎥⎪ - ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪ - ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪ - 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭ - -Linguistics and dictionaries: - - ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn - Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] - -APL: - - ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈ - -Nicer typography in plain text files: - - ╔══════════════════════════════════════════╗ - ║ ║ - ║ • ‘single’ and “double” quotes ║ - ║ ║ - ║ • Curly apostrophes: “We’ve been here” ║ - ║ ║ - ║ • Latin-1 apostrophe and accents: '´` ║ - ║ ║ - ║ • ‚deutsche‘ „Anführungszeichen“ ║ - ║ ║ - ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║ - ║ ║ - ║ • ASCII safety test: 1lI|, 0OD, 8B ║ - ║ ╭─────────╮ ║ - ║ • the euro symbol: │ 14.95 € │ ║ - ║ ╰─────────╯ ║ - ╚══════════════════════════════════════════╝ - -Combining characters: - - STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑ - -Greek (in Polytonic): - - The Greek anthem: - - Σὲ γνωρίζω ἀπὸ τὴν κόψη - τοῦ σπαθιοῦ τὴν τρομερή, - σὲ γνωρίζω ἀπὸ τὴν ὄψη - ποὺ μὲ βία μετράει τὴ γῆ. - - ᾿Απ᾿ τὰ κόκκαλα βγαλμένη - τῶν ῾Ελλήνων τὰ ἱερά - καὶ σὰν πρῶτα ἀνδρειωμένη - χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά! - - From a speech of Demosthenes in the 4th century BC: - - Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι, - ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς - λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ - τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿ - εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ - πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν - οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι, - οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν - ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον - τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι - γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν - προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους - σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ - τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ - τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς - τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον. - - Δημοσθένους, Γ´ ᾿Ολυνθιακὸς - -Georgian: - - From a Unicode conference invitation: - - გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო - კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს, - ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს - ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი, - ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება - ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში, - ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში. - -Russian: - - From a Unicode conference invitation: - - Зарегистрируйтесь сейчас на Десятую Международную Конференцию по - Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии. - Конференция соберет широкий круг экспертов по вопросам глобального - Интернета и Unicode, локализации и интернационализации, воплощению и - применению Unicode в различных операционных системах и программных - приложениях, шрифтах, верстке и многоязычных компьютерных системах. - -Thai (UCS Level 2): - - Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese - classic 'San Gua'): - - [----------------------------|------------------------] - ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่ - สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา - ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา - โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ - เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ - ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ - พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้ - ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ - - (The above is a two-column text. If combining characters are handled - correctly, the lines of the second column should be aligned with the - | character above.) - -Ethiopian: - - Proverbs in the Amharic language: - - ሰማይ አይታረስ ንጉሥ አይከሰስ። - ብላ ካለኝ እንደአባቴ በቆመጠኝ። - ጌጥ ያለቤቱ ቁምጥና ነው። - ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው። - የአፍ ወለምታ በቅቤ አይታሽም። - አይጥ በበላ ዳዋ ተመታ። - ሲተረጉሙ ይደረግሙ። - ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል። - ድር ቢያብር አንበሳ ያስር። - ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም። - እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም። - የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ። - ሥራ ከመፍታት ልጄን ላፋታት። - ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል። - የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ። - ተንጋሎ ቢተፉ ተመልሶ ባፉ። - ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው። - እግርህን በፍራሽህ ልክ ዘርጋ። - -Runes: - - ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ - - (Old English, which transcribed into Latin reads 'He cwaeth that he - bude thaem lande northweardum with tha Westsae.' and means 'He said - that he lived in the northern land near the Western Sea.') - -Braille: - - ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌ - - ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞ - ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎ - ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂ - ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙ - ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑ - ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲ - - ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ - - ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹ - ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞ - ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕ - ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹ - ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎ - ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎ - ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳ - ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞ - ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ - - (The first couple of paragraphs of "A Christmas Carol" by Dickens) - -Compact font selection example text: - - ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789 - abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ - –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд - ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა - -Greetings in various languages: - - Hello world, Καλημέρα κόσμε, コンニチハ - -Box drawing alignment tests: █ - ▉ - ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳ - ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳ - ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳ - ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳ - ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎ - ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏ - ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█ - ▝▀▘▙▄▟ diff --git a/utf8parse/tests/utf-8-demo.rs b/utf8parse/tests/utf-8-demo.rs deleted file mode 100644 index 51df492..0000000 --- a/utf8parse/tests/utf-8-demo.rs +++ /dev/null @@ -1,31 +0,0 @@ -use utf8parse::{Parser, Receiver}; - -static UTF8_DEMO: &[u8] = include_bytes!("UTF-8-demo.txt"); - -#[derive(Debug, PartialEq)] -struct StringWrapper(String); - -impl Receiver for StringWrapper { - fn codepoint(&mut self, c: char) { - self.0.push(c); - } - - fn invalid_sequence(&mut self) {} -} - -#[test] -fn utf8parse_test() { - let mut parser = Parser::new(); - - // utf8parse implementation - let mut actual = StringWrapper(String::new()); - - for byte in UTF8_DEMO { - parser.advance(&mut actual, *byte) - } - - // standard library implementation - let expected = String::from_utf8_lossy(UTF8_DEMO); - - assert_eq!(actual.0, expected); -} diff --git a/vte_generate_state_changes/src/lib.rs b/vte_generate_state_changes/src/lib.rs index b016518..ff8ea49 100644 --- a/vte_generate_state_changes/src/lib.rs +++ b/vte_generate_state_changes/src/lib.rs @@ -25,8 +25,8 @@ pub fn generate_state_changes(item: proc_macro::TokenStream) -> proc_macro::Toke let assignments_stream = states_stream(&mut iter); quote!( - const fn #fn_name() -> [[u8; 256]; 16] { - let mut state_changes = [[0; 256]; 16]; + const fn #fn_name() -> [[u8; 256]; 13] { + let mut state_changes = [[0; 256]; 13]; #assignments_stream @@ -71,7 +71,8 @@ fn state_entry_stream(iter: &mut Peekable) -> TokenStrea tokens } -/// Generate the array assignment statement for a single byte->target mapping for one state. +/// Generate the array assignment statement for a single byte->target mapping +/// for one state. fn change_stream(iter: &mut Peekable, state: &TokenTree) -> TokenStream { // Start of input byte range let start = next_usize(iter); @@ -101,8 +102,6 @@ fn change_stream(iter: &mut Peekable, state: &TokenTree) // Create a new entry for every byte in the range for byte in start..=end { - // TODO: Force adding `State::` and `Action::`? - // TODO: Should we really use `pack` here without import? tokens.extend(quote!( state_changes[State::#state as usize][#byte] = pack(State::#target_state, Action::#target_action); @@ -148,7 +147,8 @@ fn expect_punct(iter: &mut impl Iterator, c: char) { /// /// # Panics /// -/// Panics if the next token is not a [`usize`] in hex or decimal literal format. +/// Panics if the next token is not a [`usize`] in hex or decimal literal +/// format. fn next_usize(iter: &mut impl Iterator) -> usize { match iter.next() { Some(Literal(literal)) => {