Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --histogram=NUM argument #2918

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions crates/core/flags/defs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[
&ContextSeparator,
&Count,
&CountMatches,
&Histogram,
&Crlf,
&Debug,
&DfaSizeLimit,
Expand Down Expand Up @@ -1322,6 +1323,44 @@ given.
}
}

/// --histogram
#[derive(Debug)]
struct Histogram;

impl Flag for Histogram {
fn is_switch(&self) -> bool {
false
}
fn name_short(&self) -> Option<u8> {
None
}
fn name_long(&self) -> &'static str {
"histogram"
}
fn doc_variable(&self) -> Option<&'static str> {
Some("NUM")
}
fn doc_category(&self) -> Category {
Category::OutputModes
}
fn doc_short(&self) -> &'static str {
r"Print a histogram of the matches"
}
fn doc_long(&self) -> &'static str {
r"
The offset of the match and the specified bin size
(NUM) of this argument are used to determine which bin gets
incremented for every match."
}

fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> {
let binsize = convert::u64(&v.unwrap_value())?;
args.histogram_bin_size = Some(binsize);
args.mode.update(Mode::Search(SearchMode::Histogram));
Ok(())
}
}

#[cfg(test)]
#[test]
fn test_count_matches() {
Expand Down
7 changes: 7 additions & 0 deletions crates/core/flags/hiargs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ pub(crate) struct HiArgs {
follow: bool,
globs: ignore::overrides::Override,
heading: bool,
histogram_bin_size: Option<u64>,
hidden: bool,
hyperlink_config: grep::printer::HyperlinkConfig,
ignore_file_case_insensitive: bool,
Expand Down Expand Up @@ -203,6 +204,7 @@ impl HiArgs {
SearchMode::FilesWithMatches
| SearchMode::FilesWithoutMatch
| SearchMode::Count
| SearchMode::Histogram
| SearchMode::CountMatches => return false,
SearchMode::JSON => return true,
SearchMode::Standard => {
Expand Down Expand Up @@ -272,6 +274,7 @@ impl HiArgs {
follow: low.follow,
heading,
hidden: low.hidden,
histogram_bin_size: low.histogram_bin_size,
hyperlink_config,
ignore_file: low.ignore_file,
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
Expand Down Expand Up @@ -569,6 +572,10 @@ impl HiArgs {
SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch,
SearchMode::Count => SummaryKind::Count,
SearchMode::CountMatches => SummaryKind::CountMatches,
SearchMode::Histogram => SummaryKind::Histogram(
self.histogram_bin_size
.expect("Histogram bin size must be specified"),
),
SearchMode::JSON => {
return Printer::JSON(self.printer_json(wtr))
}
Expand Down
3 changes: 3 additions & 0 deletions crates/core/flags/lowargs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub(crate) struct LowArgs {
pub(crate) globs: Vec<String>,
pub(crate) heading: Option<bool>,
pub(crate) hidden: bool,
pub(crate) histogram_bin_size: Option<u64>,
pub(crate) hostname_bin: Option<PathBuf>,
pub(crate) hyperlink_format: HyperlinkFormat,
pub(crate) iglobs: Vec<String>,
Expand Down Expand Up @@ -209,6 +210,8 @@ pub(crate) enum SearchMode {
/// Show files containing at least one match and the total number of
/// matches.
CountMatches,
/// Show a histogram of the matches
Histogram,
/// Print matches in a JSON lines format.
JSON,
}
Expand Down
20 changes: 20 additions & 0 deletions crates/printer/src/stats.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{
collections::HashMap,
ops::{Add, AddAssign},
time::Duration,
};
Expand All @@ -17,6 +18,7 @@ pub struct Stats {
bytes_searched: u64,
bytes_printed: u64,
matched_lines: u64,
histogram: HashMap<u64, u64>,
matches: u64,
}

Expand All @@ -33,6 +35,11 @@ impl Stats {
self.elapsed.0
}

/// Returns a reference to the histogram
pub fn histogram(&self) -> &HashMap<u64, u64> {
&self.histogram
}

/// Return the total number of searches executed.
pub fn searches(&self) -> u64 {
self.searches
Expand Down Expand Up @@ -102,6 +109,11 @@ impl Stats {
pub fn add_matches(&mut self, n: u64) {
self.matches += n;
}

/// Add to the total number of matches.
pub fn increment_histogram(&mut self, entry: u64) {
self.histogram.entry(entry).and_modify(|c| *c += 1).or_insert(1);
}
}

impl Add for Stats {
Expand All @@ -125,6 +137,14 @@ impl<'a> Add<&'a Stats> for Stats {
bytes_printed: self.bytes_printed + rhs.bytes_printed,
matched_lines: self.matched_lines + rhs.matched_lines,
matches: self.matches + rhs.matches,
histogram: self
.histogram
.into_iter()
.chain(rhs.histogram.clone())
.fold(std::collections::HashMap::new(), |mut acc, (k, v)| {
acc.entry(k).and_modify(|e| *e += v).or_insert(v);
acc
}),
}
}
}
Expand Down
43 changes: 40 additions & 3 deletions crates/printer/src/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ pub enum SummaryKind {
/// If the `path` setting is enabled, then the count is prefixed by the
/// corresponding file path.
CountMatches,
/// Show a histogram of the matches
Histogram(u64),
/// Show only the file path if and only if a match was found.
///
/// This ignores the `path` setting and always shows the file path. If no
Expand Down Expand Up @@ -101,7 +103,7 @@ impl SummaryKind {

match *self {
PathWithMatch | PathWithoutMatch => true,
Count | CountMatches | Quiet => false,
Count | CountMatches | Histogram { .. } | Quiet => false,
}
}

Expand All @@ -111,7 +113,7 @@ impl SummaryKind {
use self::SummaryKind::*;

match *self {
CountMatches => true,
Histogram { .. } | CountMatches => true,
Count | PathWithMatch | PathWithoutMatch | Quiet => false,
}
}
Expand All @@ -123,7 +125,9 @@ impl SummaryKind {

match *self {
PathWithMatch | Quiet => true,
Count | CountMatches | PathWithoutMatch => false,
Count | CountMatches | Histogram { .. } | PathWithoutMatch => {
false
}
}
}
}
Expand Down Expand Up @@ -682,6 +686,13 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
if let Some(ref mut stats) = self.stats {
stats.add_matches(sink_match_count);
stats.add_matched_lines(mat.lines().count() as u64);

if let SummaryKind::Histogram(bin_size) = self.summary.config.kind
{
stats.increment_histogram(
mat.absolute_byte_offset() / bin_size,
);
}
} else if self.summary.config.kind.quit_early() {
return Ok(false);
}
Expand Down Expand Up @@ -788,6 +799,32 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.write_line_term(searcher)?;
}
}
SummaryKind::Histogram(bin_size) => {
let stats = self
.stats
.as_ref()
.expect("Histogram should enable stats tracking");
if self.match_count > 0 {
let bin_iter = 0..=(stats.bytes_searched() / bin_size);
let terminal_str = bin_iter
.map(|i| {
stats
.histogram()
.get(&i)
.unwrap_or(&0)
.to_string()
.into_bytes()
})
.collect::<Vec<Vec<u8>>>()
.join(searcher.line_terminator().as_bytes());
if self.path.is_some() {
self.write_path_field()?;
self.write_line_term(searcher)?;
}
self.write(&terminal_str)?;
self.write_line_term(searcher)?;
}
}
SummaryKind::PathWithMatch => {
if self.match_count > 0 {
self.write_path_line(searcher)?;
Expand Down
Loading