Skip to content

Commit

Permalink
Added more progress bars
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanpeach committed Nov 15, 2024
1 parent 2058a8c commit c76599f
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 46 deletions.
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@ opt-level = 3
debug = false

[dependencies]
aho-corasick = "1.1.3"
bon = "2.3.0"
cached = "0.54.0"
clap = { version = "4.5.16", features = ["derive"] }
comrak = "0.29.0"
derive_more = { version = "1.0.0", features = ["full"] }
env_logger = "0.11.5"
fancy-regex = "0.14.0"
fuzzy-matcher = "0.3.7"
getset = "0.1.3"
git2 = "0.19.0"
Expand Down
35 changes: 31 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ pub mod rules;
pub mod sed;
pub mod visitor;

use std::{backtrace::Backtrace, cell::RefCell, rc::Rc};
use std::{backtrace::Backtrace, cell::RefCell, env, rc::Rc};

use file::{get_files, name::ngrams};
use indicatif::ProgressBar;
use log::info;
use miette::{Diagnostic, Result};
use ngrams::MissingSubstringError;
Expand Down Expand Up @@ -179,7 +180,7 @@ fn check(config: &config::Config) -> Result<OutputReport, OutputErrors> {

let mut reports: Vec<Report> = vec![];

// First pass
// Filename pass
// Just over filenames
// NOTE: Always use `filter_by_excludes` and `dedupe_by_code` on the reports
let similar_filenames = SimilarFilename::calculate(
Expand All @@ -194,24 +195,44 @@ fn check(config: &config::Config) -> Result<OutputReport, OutputErrors> {
.map(|x| Report::SimilarFilename(x.clone())),
);

// Second pass
// First pass
// This gives us metadata we need for all other rules from the content of files
// The duplicate alias visitor has to run first to get the table of aliases
let first_pass_bar: Option<ProgressBar> = if env::var("RUNNING_TESTS").is_ok() {
None
} else {
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_possible_truncation)]
Some(ProgressBar::new(all_files.len() as u64).with_prefix("First Pass"))
};
let duplicate_alias_visitor = Rc::new(RefCell::new(DuplicateAliasVisitor::new(
&all_files,
&config.filename_to_alias,
)));
for file in &all_files {
let visitors: Vec<Rc<RefCell<dyn Visitor>>> = vec![duplicate_alias_visitor.clone()];
parse(file, visitors)?;
if let Some(bar) = &first_pass_bar {
bar.inc(1);
}
}
let mut duplicate_alias_visitor: DuplicateAliasVisitor =
Rc::try_unwrap(duplicate_alias_visitor)
.expect("parse is done")
.into_inner();
reports.extend(duplicate_alias_visitor.finalize(&config.exclude)?);
if let Some(bar) = &first_pass_bar {
bar.finish();
}

// Third Pass
// Second Pass
let second_pass_bar: Option<ProgressBar> = if env::var("RUNNING_TESTS").is_ok() {
None
} else {
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_possible_truncation)]
Some(ProgressBar::new(all_files.len() as u64).with_prefix("Second Pass"))
};
let mut visitors: Vec<Rc<RefCell<dyn Visitor>>> = vec![];
for rule in ThirdPassRule::iter() {
visitors.push(match rule {
Expand All @@ -232,12 +253,18 @@ fn check(config: &config::Config) -> Result<OutputReport, OutputErrors> {

for file in &all_files {
parse(file, visitors.clone())?;
if let Some(bar) = &second_pass_bar {
bar.inc(1);
}
}

for visitor in visitors {
let mut visitor_cell = (*visitor).borrow_mut();
reports.extend(visitor_cell.finalize(&config.exclude)?);
}
if let Some(bar) = &second_pass_bar {
bar.finish();
}

Ok(OutputReport { reports })
}
Expand Down
2 changes: 1 addition & 1 deletion src/rules/similar_filename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl SimilarFilename {
} else {
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::cast_possible_truncation)]
Some(ProgressBar::new((n * (n + 1.0) / 2.0) as u64))
Some(ProgressBar::new((n * (n + 1.0) / 2.0) as u64).with_prefix("Crosschecking files"))
};
let matcher = SkimMatcherV2::default();
let mut matches: Vec<SimilarFilename> = Vec::new();
Expand Down
118 changes: 79 additions & 39 deletions src/rules/unlinked_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ use crate::{
sed::ReplacePair,
visitor::{FinalizeError, VisitError, Visitor},
};
use aho_corasick::AhoCorasick;
use bon::Builder;
use cached::proc_macro::cached;
use comrak::{
arena_tree::Node,
nodes::{Ast, NodeValue},
};
use fancy_regex::Regex;
use hashbrown::HashMap;
use miette::{Diagnostic, NamedSource, Result, SourceOffset, SourceSpan};
use std::{
Expand Down Expand Up @@ -111,11 +110,45 @@ impl UnlinkedTextVisitor {
}
}

#[cached]
fn get_regex(alias: Alias) -> Regex {
// Compile the regex and cache it based on the alias
let pattern = format!(r"(?i)(?<![\w#]){alias}(?!\w)");
Regex::new(&pattern).expect("The regex is just case insensitive string search")
/// Checks if the match at the given start and end indices is a whole word match.
fn is_whole_word_match(text: &str, start: usize, end: usize) -> bool {
is_start_boundary(text, start) && is_end_boundary(text, end) && !is_start_hashtag(text, start)
}

/// Checks if the character before the start index is a word boundary.
fn is_start_boundary(text: &str, start: usize) -> bool {
if start == 0 {
true
} else {
text[..start]
.chars()
.next_back()
.map_or(true, |c| !c.is_alphanumeric())
}
}

/// Checks if the character before the start index is a word boundary.
fn is_start_hashtag(text: &str, start: usize) -> bool {
if start == 0 {
false
} else {
text[..start]
.chars()
.next_back()
.map_or(false, |c| c == '#')
}
}

/// Checks if the character after the end index is a word boundary.
fn is_end_boundary(text: &str, end: usize) -> bool {
if end == text.len() {
true
} else {
text[end..]
.chars()
.next()
.map_or(true, |c| !c.is_alphanumeric())
}
}

impl Visitor for UnlinkedTextVisitor {
Expand All @@ -129,40 +162,47 @@ impl Visitor for UnlinkedTextVisitor {
let sourcepos = data_ref.sourcepos;
let parent = node.parent();
if let NodeValue::Text(text) = data {
for alias in self.alias_table.keys() {
// Make sure neither the character before or after is a letter
// This makes sure you aren't matching a part of a word
// This should also handle tags
// Check the character before the match

let re = get_regex(alias.clone());
if let Ok(Some(found)) = re.find(text) {
// Get our span
let span = repair_span_due_to_frontmatter(
SourceSpan::new(
(SourceOffset::from_location(
remove_frontmatter_from_source(source, node),
sourcepos.start.line,
sourcepos.start.column,
)
.offset()
+ found.start())
.into(),
alias.to_string().len(),
),
node,
);

// Dont match inside wikilinks
if let Some(parent) = parent {
if let NodeValue::WikiLink(_) = parent.data.borrow().value {
// If this is already in a link, skip it
continue;
}
let patterns: Vec<String> = self
.alias_table
.keys()
.map(std::string::ToString::to_string)
.collect();
let ac = AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(&patterns)?;
// Make sure neither the character before or after is a letter
// This makes sure you aren't matching a part of a word
// This should also handle tags
// Check the character before the match
for found in ac.find_iter(text) {
if !is_whole_word_match(text, found.start(), found.end()) {
continue;
}
let alias = Alias::new(&patterns[found.pattern().as_usize()]);
let span = repair_span_due_to_frontmatter(
SourceSpan::new(
(SourceOffset::from_location(
remove_frontmatter_from_source(source, node),
sourcepos.start.line,
sourcepos.start.column,
)
.offset()
+ found.start())
.into(),
found.end() - found.start(),
),
node,
);

// Dont match inside wikilinks
if let Some(parent) = parent {
if let NodeValue::WikiLink(_) = parent.data.borrow().value {
// If this is already in a link, skip it
continue;
}

self.new_unlinked_texts.push((alias.clone(), span));
}

self.new_unlinked_texts.push((alias, span));
}
}
Ok(())
Expand Down
8 changes: 8 additions & 0 deletions src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ pub enum VisitError {
#[backtrace]
backtrace: std::backtrace::Backtrace,
},

#[error("Error making patterns from aliases")]
AhoBuildError {
#[from]
source: aho_corasick::BuildError,
#[backtrace]
backtrace: std::backtrace::Backtrace,
},
}

#[derive(Error, Debug)]
Expand Down

0 comments on commit c76599f

Please sign in to comment.