Skip to content

Commit

Permalink
Switch parser to multi-byte processing
Browse files Browse the repository at this point in the history
This patch overhauls the `Parser::advance` API to operate on byte slices
instead of individual bytes, which allows for additional performance
optimizations.

VTE does not support C1 escapes and C0 escapes always start with an
escape character. This makes it possible to simplify processing if a
byte stream is determined to not contain any escapes. The `memchr` crate
provides a battle-tested implementation for SIMD-accelerated byte
searches, which is why this implementation makes use of it.

VTE also only supports UTF8 characters in the ground state, which means
that the new non-escape parsing path is able to rely completely on STD's
`str::from_utf8` since `memchr` gives us the full length of the plain
text character buffer. This allows us to completely remove `utf8parse`
and all related code.

We also make use of `memchr` in the synchronized escape handling in
`ansi.rs`, since it realies heavily on scanning large amounts of text
for the extension/termination escape sequences.
  • Loading branch information
chrisduerr committed Dec 20, 2024
1 parent ebc4a4d commit fbe3273
Show file tree
Hide file tree
Showing 16 changed files with 774 additions and 1,122 deletions.
21 changes: 10 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,20 @@ name = "vte"
edition = "2021"
rust-version = "1.62.1"

[dependencies]
arrayvec = { version = "0.7.2", default-features = false, optional = true }
bitflags = { version = "2.3.3", default-features = false, optional = true }
cursor-icon = { version = "1.0.0", default-features = false, optional = true }
log = { version = "0.4.17", optional = true }
serde = { version = "1.0.160", features = ["derive"], optional = true }
utf8parse = { version = "0.2.0", path = "utf8parse" }
vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
[workspace]
members = ["vte_generate_state_changes"]

[features]
ansi = ["log", "cursor-icon", "bitflags"]
default = ["no_std"]
nightly = ["utf8parse/nightly"]
no_std = ["arrayvec"]
serde = ["dep:serde"]

[workspace]
members = ["utf8parse", "vte_generate_state_changes"]
[dependencies]
arrayvec = { version = "0.7.2", default-features = false, optional = true }
bitflags = { version = "2.3.3", default-features = false, optional = true }
cursor-icon = { version = "1.0.0", default-features = false, optional = true }
log = { version = "0.4.17", optional = true }
memchr = "2.7.4"
serde = { version = "1.0.160", features = ["derive"], optional = true }
vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
6 changes: 1 addition & 5 deletions examples/parselog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,7 @@ fn main() {
loop {
match handle.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
for byte in &buf[..n] {
statemachine.advance(&mut performer, *byte);
}
},
Ok(n) => statemachine.advance(&mut performer, &buf[..n]),
Err(err) => {
println!("err: {}", err);
break;
Expand Down
6 changes: 5 additions & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
format_code_in_doc_comments = true
group_imports = "StdExternalCrate"
match_block_trailing_comma = true
condense_wildcard_suffixes = true
use_field_init_shorthand = true
normalize_doc_attributes = true
overflow_delimited_expr = true
imports_granularity = "Module"
format_macro_matchers = true
use_small_heuristics = "Max"
hex_literal_case = "Upper"
normalize_comments = true
reorder_impl_items = true
use_try_shorthand = true
newline_style = "Unix"
format_strings = true
wrap_comments = true
comment_width = 100
Loading

0 comments on commit fbe3273

Please sign in to comment.