From 94102a4e2d767f9ce4b81b3dc5ddd003d8a3b38a Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 22 Jul 2024 19:53:10 -0700 Subject: [PATCH 1/4] Makefile.am: removing lc-approx.cpp from sources and lc.md from docs --- Makefile.am | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index a0120a3..c79401c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -51,7 +51,6 @@ EXTRA_DIST = \ docs/content/hmr-rep.md \ docs/content/hypermr.md \ docs/content/index.md \ - docs/content/lc.md \ docs/content/levels.md \ docs/content/liftfilter.md \ docs/content/merge-bsrate.md \ @@ -220,7 +219,6 @@ dnmtools_SOURCES += src/utils/guessprotocol.cpp dnmtools_SOURCES += src/utils/uniq.cpp dnmtools_SOURCES += src/utils/merge-bsrate.cpp dnmtools_SOURCES += src/utils/format-reads.cpp -dnmtools_SOURCES += src/utils/lc-approx.cpp dnmtools_SOURCES += src/utils/selectsites.cpp dnmtools_SOURCES += src/utils/symmetric-cpgs.cpp dnmtools_SOURCES += src/utils/merge-methcounts.cpp From 06e216aec1eb9979558bd28f4d61bc69aa6b2d7b Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 22 Jul 2024 19:53:36 -0700 Subject: [PATCH 2/4] docs/content/lc.md: removing this documentation file as the corresponding command is being removed --- docs/content/lc.md | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 docs/content/lc.md diff --git a/docs/content/lc.md b/docs/content/lc.md deleted file mode 100644 index 703a009..0000000 --- a/docs/content/lc.md +++ /dev/null @@ -1,39 +0,0 @@ -# lc - Count number of lines in a big file - -## Synopsis -```shell -$ dnmtools lc -``` - -## Description - -When working with next-generation sequencing data, researchers often -handle very large files, such as FASTQ files containing raw reads and -\*.sam files containing mapped reads. `lc_approx` is an auxiliary tool -designed to approximate the number of lines in a very large file by -counting the number of lines in a small, randomly chosen chunk from -the big file and scaling the estimate by file size. For example, in -order to estimate the number of reads in a FASTQ file `input.fq`, run -```shell -$ dnmtools lc input.fq -``` -It will return the approximate number of lines in this file and by -dividing the above number by 4, you get the approximate number of -reads in that file. The lc approx can be hundreds of times faster than -the unix tool` wc -l`. - -## Options - -```txt - -v, -verbose -``` -print more run info to STDERR while the program is running. -```txt - -n, -samples -``` -number of samples -```txt - -z, -size -``` -sample size (bytes) - From 4f79e2a943824636a06dbc7d6da9a12367a14776 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 22 Jul 2024 19:54:10 -0700 Subject: [PATCH 3/4] docs/mkdocs.yml: removing the reference to lc.md which is itself now removed --- docs/mkdocs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c38525a..0194111 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -42,7 +42,6 @@ nav: - General-purpose tools: - 'cleanhp': 'cleanhp.md' - 'guessprotocol': 'guessprotocol.md' - - 'lc': 'lc.md' - 'merge-bsrate': 'merge-bsrate.md' - 'merge': 'merge.md' - 'selectsites': 'selectsites.md' From ee7084e2efbde45c1ce462098ca7364664f2df3e Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 22 Jul 2024 19:54:32 -0700 Subject: [PATCH 4/4] src/dnmtools.cpp: removing the lc command as that functionality is being removed --- src/dnmtools.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dnmtools.cpp b/src/dnmtools.cpp index 49b8df2..5ba9074 100644 --- a/src/dnmtools.cpp +++ b/src/dnmtools.cpp @@ -114,8 +114,6 @@ main_format(int argc, const char **argv); int main_guessprotocol(int argc, const char **argv); int -main_lc_approx(int argc, const char **argv); -int main_lift_filter(int argc, const char **argv); int main_merge_bsrate(int argc, const char **argv); @@ -202,7 +200,6 @@ main(int argc, const char **argv) { {"utilities", {{{"cleanhp", "fix and stat invdup/hairping reads", main_clean_hairpins}, {"guessprotocol", "guess whether protocol is ordinary, pbat or random", main_guessprotocol}, - {"lc", "approximate line counts in a file", main_lc_approx}, {"merge-bsrate", "merge bisulfite conversion rates files from bsrate", main_merge_bsrate}, {"merge", "merge multiple counts files into a counts file or a table", main_merge_methcounts}, {"covered", "filter a counts file for only covered sites", main_covered},