From 58034e0efc90345db85d1f13191c788a89528ff2 Mon Sep 17 00:00:00 2001
From: Mahmudur <mahmudhera93@gmail.com>
Date: Thu, 9 Nov 2023 14:39:32 -0500
Subject: [PATCH 1/6] added test tests

---
 tests/test_utils.py | 103 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index c94a9f1..1db085c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -8,6 +8,14 @@
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 from srcs import utils
 import sourmash
+import unittest
+import math
+import json
+import pytest
+import tempfile
+import gzip
+import sys
+import shutil
 
 
 def to_testing_data(file):
@@ -53,9 +61,104 @@ def test_load_signature_with_ksize3():
     assert type(sig) == sourmash.signature.FrozenSourmashSignature
     assert sig.jaccard(sig) == 1.0
 
+class TestGetColumnIndices(unittest.TestCase):
+    def test_1(self):
+        column_name_to_index = {
+            "TAXID": 1,
+            "RANK": 0,
+            "PERCENTAGE": 2,
+            "TAXPATH": 3,
+            "TAXPATHSN": 4
+        }
+        indices = utils.get_column_indices(column_name_to_index)
+        assert indices == (0, 1, 2, 3, 4)
+
+    def test_2(self):
+        column_name_to_index = {
+            "RANK": 0,
+            "PERCENTAGE": 2,
+            "TAXPATH": 3,
+            "TAXPATHSN": 4
+        }
+        with self.assertRaises(RuntimeError):
+            indices = utils.get_column_indices(column_name_to_index)
+
+    def test_3(self):
+        column_name_to_index = {
+            "TAXID": 1,
+            "PERCENTAGE": 2,
+            "TAXPATH": 3,
+            "TAXPATHSN": 4
+        }
+        with self.assertRaises(RuntimeError):
+            indices = utils.get_column_indices(column_name_to_index)
+
+    def test_4(self):
+        column_name_to_index = {
+            "TAXID": 1,
+            "RANK": 0,
+            "TAXPATH": 3,
+            "TAXPATHSN": 4
+        }
+        with self.assertRaises(RuntimeError):
+            indices = utils.get_column_indices(column_name_to_index)
+
+    def test_5(self):
+        column_name_to_index = {
+            "TAXID": 1,
+            "RANK": 0,
+            "PERCENTAGE": 2,
+            "TAXPATHSN": 4
+        }
+        with self.assertRaises(RuntimeError):
+            indices = utils.get_column_indices(column_name_to_index)
+
+    def test_6(self):
+        column_name_to_index = {
+            "TAXID": 1,
+            "RANK": 0,
+            "PERCENTAGE": 2,
+            "TAXPATH": 3
+        }
+        indices = utils.get_column_indices(column_name_to_index)
+        assert indices[4] is None
+
+class TestGetCamiProfile(unittest.TestCase):
+    def test_1(self):
+        file_path = os.path.join(os.path.dirname(__file__), 'testdata/sample_cami.txt')
+        with open(file_path, 'r') as file:
+            sample_cami_content = file.readlines()
+
+        profiles = utils.get_cami_profile(sample_cami_content)
 
+        expected_header = {
+            'SAMPLEID': 'CAMI_LOW_S001',
+            'VERSION': '0.9.1',
+            'RANKS': 'superkingdom|phylum|class|order|family|genus|species|strain',
+            'TAXONOMYID': 'ncbi-taxonomy_DATE',
+            '__PROGRAM__': 'unknown'
+        }
 
+        assert len(profiles) == 1
+        sample_id, header, profile = profiles[0]
 
+        assert sample_id == "CAMI_LOW_S001"
+        assert header == expected_header
+        assert len(profile) == 2044
 
+        prediction1 = profile[0]
+        assert prediction1.rank == 'superkingdom'
+        assert prediction1.taxid == '2157'
+        assert math.isclose(prediction1.percentage, 0.029528, abs_tol=1e-6)
+        assert prediction1.taxpath == '2157'
+        assert prediction1.taxpathsn == 'Archaea'
 
+        prediction2 = profile[1]
+        assert prediction2.rank == 'superkingdom'
+        assert prediction2.taxid == '2'
+        assert math.isclose(prediction2.percentage, 29.183763, rel_tol=1e-6)
+        assert prediction2.taxpath == '2'
+        assert prediction2.taxpathsn == 'Bacteria'
 
+if __name__ == '__main__':
+    unittest.main()

From c4c09266c14cc0a73a07f5a552b72778bfe081f8 Mon Sep 17 00:00:00 2001
From: Mahmudur <mahmudhera93@gmail.com>
Date: Thu, 9 Nov 2023 22:22:26 -0500
Subject: [PATCH 2/6] removed duplications, added assertion

---
 tests/test_utils.py    |  8 ++---
 tests/test_workflow.py |  6 ++--
 tests/unittests.py     | 71 +++++++-----------------------------------
 3 files changed, 19 insertions(+), 66 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1db085c..862ff05 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -81,7 +81,7 @@ def test_2(self):
             "TAXPATHSN": 4
         }
         with self.assertRaises(RuntimeError):
-            indices = utils.get_column_indices(column_name_to_index)
+            utils.get_column_indices(column_name_to_index)
 
     def test_3(self):
         column_name_to_index = {
@@ -91,7 +91,7 @@ def test_3(self):
             "TAXPATHSN": 4
         }
         with self.assertRaises(RuntimeError):
-            indices = utils.get_column_indices(column_name_to_index)
+            utils.get_column_indices(column_name_to_index)
 
     def test_4(self):
         column_name_to_index = {
@@ -101,7 +101,7 @@ def test_4(self):
             "TAXPATHSN": 4
         }
         with self.assertRaises(RuntimeError):
-            indices = utils.get_column_indices(column_name_to_index)
+            utils.get_column_indices(column_name_to_index)
 
     def test_5(self):
         column_name_to_index = {
@@ -111,7 +111,7 @@ def test_5(self):
             "TAXPATHSN": 4
         }
         with self.assertRaises(RuntimeError):
-            indices = utils.get_column_indices(column_name_to_index)
+            utils.get_column_indices(column_name_to_index)
 
     def test_6(self):
         column_name_to_index = {
diff --git a/tests/test_workflow.py b/tests/test_workflow.py
index 1495198..cb04531 100644
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@@ -82,6 +82,6 @@ def test_demo_workflow():
     cmd = "cd demo; python ../run_YACHT.py --json demo_ani_thresh_0.95_config.json --sample_file sample.sig.zip --significance 0.99 --num_threads 1 --min_coverage_list 1 0.6 0.2 0.1 --out result.xlsx"
     _ = subprocess.run(cmd, shell=True, check=True)
     cmd = "cd demo; python ../srcs/standardize_yacht_output.py --yacht_output result.xlsx --sheet_name min_coverage0.2 --genome_to_taxid toy_genome_to_taxid.tsv --mode cami --sample_name 'MySample' --outfile_prefix cami_result --outdir ./"
-    _ = subprocess.run(cmd, shell=True, check=True)
-
-
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    assert exists('./cami_result.cami')
diff --git a/tests/unittests.py b/tests/unittests.py
index ec7bf18..641e373 100644
--- a/tests/unittests.py
+++ b/tests/unittests.py
@@ -33,54 +33,7 @@ def test_check_file_existence():
     non_existing_file = os.path.join(tmp_dir, "non_existing_file.txt")
     with pytest.raises(ValueError, match=dont_exist):
         check_file_existence(non_existing_file, dont_exist)
-        
-def test_get_column_indices():
-    column_name_to_index = {
-        "TAXID": 1,
-        "RANK": 0,
-        "PERCENTAGE": 2,
-        "TAXPATH": 3,
-        "TAXPATHSN": 4
-    }
-    indices = get_column_indices(column_name_to_index)
-    assert indices == (0, 1, 2, 3, 4)
-    
-def test_get_cami_profile():
-    file_path = os.path.join(os.path.dirname(__file__), 'testdata/sample_cami.txt')
-    with open(file_path, 'r') as file:
-        sample_cami_content = file.readlines()
-    
-    profiles = get_cami_profile(sample_cami_content)
-
-    expected_header = {
-        'SAMPLEID': 'CAMI_LOW_S001', 
-        'VERSION': '0.9.1', 
-        'RANKS': 'superkingdom|phylum|class|order|family|genus|species|strain', 
-        'TAXONOMYID': 'ncbi-taxonomy_DATE', 
-        '__PROGRAM__': 'unknown'
-    }
-
-    assert len(profiles) == 1
-    sample_id, header, profile = profiles[0]
-
-    assert sample_id == "CAMI_LOW_S001"
-    assert header == expected_header
-    assert len(profile) == 2044
-
-    prediction1 = profile[0]
-    assert prediction1.rank == 'superkingdom'
-    assert prediction1.taxid == '2157'
-    assert math.isclose(prediction1.percentage, 0.029528, abs_tol=1e-6)
-    assert prediction1.taxpath == '2157'
-    assert prediction1.taxpathsn == 'Archaea'
-
-    prediction2 = profile[1]
-    assert prediction2.rank == 'superkingdom'
-    assert prediction2.taxid == '2'
-    assert math.isclose(prediction2.percentage, 29.183763, rel_tol=1e-6)
-    assert prediction2.taxpath == '2'
-    assert prediction2.taxpathsn == 'Bacteria'
-    
+
 def test_get_alt_mut_rate():
     nu = 10
     thresh = 5
@@ -89,7 +42,7 @@ def test_get_alt_mut_rate():
     result = get_alt_mut_rate(nu, thresh, ksize, significance)
     expected_result = 0.047902071844405425
     assert math.isclose(result, expected_result, rel_tol=1e-6, abs_tol=1e-6)
-    
+
 def test_get_alt_mut_rate_zero_nu():
     nu = 0
     thresh = 5
@@ -107,10 +60,10 @@ def test_get_alt_mut_rate_large_thresh():
     result = get_alt_mut_rate(nu, thresh, ksize, significance)
     expected_result = -1
     assert result == expected_result
-    
+
 def test_get_info_from_single_sig():
     sig_list_file = 'gtdb_ani_thresh_0.95_intermediate_files/training_sig_files.txt'
-    
+
     with open(sig_list_file, 'r') as file:
         lines = file.readlines()
         if lines:
@@ -143,7 +96,7 @@ def test_get_info_from_single_sig():
 def test_collect_signature_info():
     num_threads = 2
     ksize = 0
-    path_to_temp_dir = 'gtdb_ani_thresh_0.95_intermediate_files/' 
+    path_to_temp_dir = 'gtdb_ani_thresh_0.95_intermediate_files/'
 
     result = collect_signature_info(num_threads, ksize, path_to_temp_dir)
 
@@ -167,16 +120,16 @@ def test_run_multisearch():
     result = run_multisearch(num_threads, ani_thresh, ksize, scale, path_to_temp_dir)
 
     for signature_name, expected_related_genomes in expected_results.items():
-        assert signature_name in result 
-        actual_related_genomes = result[signature_name] 
+        assert signature_name in result
+        actual_related_genomes = result[signature_name]
         assert set(actual_related_genomes) == set(expected_related_genomes)
-    
+
 def test_single_hyp_test():
     exclusive_hashes_info_org = (100, 90)
     ksize = 31
-    
+
     result = single_hyp_test(exclusive_hashes_info_org, ksize)
-    
+
     in_sample_est, p_val, num_exclusive_kmers, num_exclusive_kmers_coverage, num_matches, \
     acceptance_threshold_with_coverage, actual_confidence_with_coverage, alt_confidence_mut_rate_with_coverage = result
 
@@ -189,6 +142,6 @@ def test_single_hyp_test():
     assert isinstance(actual_confidence_with_coverage, float)
     assert isinstance(alt_confidence_mut_rate_with_coverage, float)
 
-        
+
 if __name__ == '__main__':
-    pytest.main()
\ No newline at end of file
+    pytest.main()

From 070a6de168c98e27a888adcfa78303588e68be37 Mon Sep 17 00:00:00 2001
From: Mahmudur <mahmudhera93@gmail.com>
Date: Thu, 9 Nov 2023 22:54:01 -0500
Subject: [PATCH 3/6] added assertions, fixed path

---
 tests/integration_tests.py | 28 +++++++++++++++++++++++-----
 tests/test_workflow.py     |  2 +-
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/tests/integration_tests.py b/tests/integration_tests.py
index 4d15e35..14eaa8e 100644
--- a/tests/integration_tests.py
+++ b/tests/integration_tests.py
@@ -22,15 +22,15 @@ def make_train_fasta():
 def test_sourmash_sketch_command():
     with tempfile.TemporaryDirectory() as tmp_dir:
         make_train_fasta()
-        
+
         fasta_file = "example.fasta"
         output_file = os.path.join(tmp_dir, "training_database.sig.zip")
         cmd = [
             "sourmash", "sketch", "dna", "-f", "-p", "k=31,scaled=1000,abund", "--singleton", fasta_file, "-o", output_file
         ]
-        
+
         subprocess.run(cmd, check=True)
-        
+
         assert os.path.isfile(output_file)
 
 def test_make_training_data_from_sketches():
@@ -63,11 +63,29 @@ def test_make_training_data_from_sketches():
         config = json.load(f)
         assert config['ksize'] == int(ksize)
         assert config['ani_thresh'] == float(ani_thresh)
-        
+
 def test_run_yacht():
     cmd = "python run_YACHT.py --json gtdb_ani_thresh_0.95_config.json --sample_file 'tests/testdata/sample.sig.zip' --significance 0.99 --min_coverage_list 1 0.6 0.2 0.1"
-    
+
     res = subprocess.run(cmd, shell=True, check=True)
     assert res.returncode == 0
 
     assert exists('result.xlsx')
+
+def test_run_yacht_and_standardizer():
+    cmd = "cd demo; sourmash sketch dna -f -p k=31,scaled=1000,abund -o sample.sig.zip query_data/query_data.fq"
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    cmd = "cd demo; sourmash sketch fromfile ref_paths.csv -p dna,k=31,scaled=1000,abund -o ref.sig.zip --force-output-already-exists"
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    cmd = "cd demo; python ../make_training_data_from_sketches.py --force --ref_file ref.sig.zip --ksize 31 --num_threads 1 --ani_thresh 0.95 --prefix 'demo_ani_thresh_0.95' --outdir ./"
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    cmd = "cd demo; python ../run_YACHT.py --json demo_ani_thresh_0.95_config.json --sample_file sample.sig.zip --significance 0.99 --num_threads 1 --min_coverage_list 1 0.6 0.2 0.1 --out result.xlsx"
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    cmd = "cd demo; python ../srcs/standardize_yacht_output.py --yacht_output result.xlsx --sheet_name min_coverage0.2 --genome_to_taxid toy_genome_to_taxid.tsv --mode cami --sample_name 'MySample' --outfile_prefix cami_result --outdir ./"
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+    assert exists('demo/cami_result.cami')
diff --git a/tests/test_workflow.py b/tests/test_workflow.py
index cb04531..6e7d927 100644
--- a/tests/test_workflow.py
+++ b/tests/test_workflow.py
@@ -84,4 +84,4 @@ def test_demo_workflow():
     cmd = "cd demo; python ../srcs/standardize_yacht_output.py --yacht_output result.xlsx --sheet_name min_coverage0.2 --genome_to_taxid toy_genome_to_taxid.tsv --mode cami --sample_name 'MySample' --outfile_prefix cami_result --outdir ./"
     res = subprocess.run(cmd, shell=True, check=True)
     assert res.returncode == 0
-    assert exists('./cami_result.cami')
+    assert exists('demo/cami_result.cami')

From 041bd805f052f6a1b0835521b6fb24a7a8ac610f Mon Sep 17 00:00:00 2001
From: Mahmudur <mahmudhera93@gmail.com>
Date: Sun, 12 Nov 2023 15:16:19 -0500
Subject: [PATCH 4/6] added tests for standardize output

---
 tests/test_standardize_output.py              |  95 ++++++++++++++++++
 .../cami_result.cami                          |  21 ++++
 .../standardize_output_testdata/result.xlsx   | Bin 0 -> 8846 bytes
 .../toy_genome_to_taxid.tsv                   |  16 +++
 .../standardize_output_testdata/~$result.xlsx | Bin 0 -> 165 bytes
 5 files changed, 132 insertions(+)
 create mode 100644 tests/test_standardize_output.py
 create mode 100644 tests/testdata/standardize_output_testdata/cami_result.cami
 create mode 100644 tests/testdata/standardize_output_testdata/result.xlsx
 create mode 100644 tests/testdata/standardize_output_testdata/toy_genome_to_taxid.tsv
 create mode 100644 tests/testdata/standardize_output_testdata/~$result.xlsx

diff --git a/tests/test_standardize_output.py b/tests/test_standardize_output.py
new file mode 100644
index 0000000..e0ef8fe
--- /dev/null
+++ b/tests/test_standardize_output.py
@@ -0,0 +1,95 @@
+import unittest 
+import os
+import subprocess
+
+class TestScript(unittest.TestCase):
+    def test_everything_exists(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+        assert os.path.exists(os.path.join(outdir, 'cami_result.cami'))
+
+    def test_wrong_yacht_output(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result_nonexisting.xlsx')
+        assert not os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+        
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        with self.assertRaises(subprocess.CalledProcessError):
+            res = subprocess.run(cmd, shell=True, check=True)
+
+    def test_wrong_genome_to_taxid(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid_nonexisting.tsv')
+        assert not os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        with self.assertRaises(subprocess.CalledProcessError):
+            res = subprocess.run(cmd, shell=True, check=True)
+
+    def test_wrong_outdir(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata_nonexisting')
+        cmd = 'rm -rf ' + outdir
+        try:
+            subprocess.run(cmd, shell=True, check=True)
+        except:
+            pass
+        assert not os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+        assert os.path.exists(outdir)
+
+        cmd = 'rm -rf ' + outdir
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/tests/testdata/standardize_output_testdata/cami_result.cami b/tests/testdata/standardize_output_testdata/cami_result.cami
new file mode 100644
index 0000000..a5ca06d
--- /dev/null
+++ b/tests/testdata/standardize_output_testdata/cami_result.cami
@@ -0,0 +1,21 @@
+@SampleID:MySample
+@Version:0.9.1
+@Ranks:superkingdom|phylum|class|order|family|genus|species
+
+@@TAXID	RANK	TAXPATH	TAXPATHSN	PERCENTAGE
+2	superkingdom	2	Bacteria	100.0
+1239	phylum	2|1239	Bacteria|Bacillota	100.0
+186801	class	2|1239|186801	Bacteria|Bacillota|Clostridia	100.0
+186802	order	2|1239|186801|186802	Bacteria|Bacillota|Clostridia|Eubacteriales	100.0
+216572	family	2|1239|186801|186802|216572	Bacteria|Bacillota|Clostridia|Eubacteriales|Oscillospiraceae	40.0
+186803	family	2|1239|186801|186802|186803	Bacteria|Bacillota|Clostridia|Eubacteriales|Lachnospiraceae	20.0
+186806	family	2|1239|186801|186802|186806	Bacteria|Bacillota|Clostridia|Eubacteriales|Eubacteriaceae	40.0
+1017280	genus	2|1239|186801|186802|216572|1017280	Bacteria|Bacillota|Clostridia|Eubacteriales|Oscillospiraceae|Pseudoflavonifractor	20.0
+35829	genus	2|1239|186801|186802|216572|35829	Bacteria|Bacillota|Clostridia|Eubacteriales|Oscillospiraceae|Acetivibrio	20.0
+572511	genus	2|1239|186801|186802|186803|572511	Bacteria|Bacillota|Clostridia|Eubacteriales|Lachnospiraceae|Blautia	20.0
+1730	genus	2|1239|186801|186802|186806|1730	Bacteria|Bacillota|Clostridia|Eubacteriales|Eubacteriaceae|Eubacterium	40.0
+2841525	species	2|1239|186801|186802|216572|1017280|2841525	Bacteria|Bacillota|Clostridia|Eubacteriales|Oscillospiraceae|Pseudoflavonifractor|Pseudoflavonifractor sp. MSJ-30	20.0
+2841523	species	2|1239|186801|186802|216572|35829|2841523	Bacteria|Bacillota|Clostridia|Eubacteriales|Oscillospiraceae|Acetivibrio|Acetivibrio sp. MSJd-27	20.0
+2841517	species	2|1239|186801|186802|186803|572511|2841517	Bacteria|Bacillota|Clostridia|Eubacteriales|Lachnospiraceae|Blautia|Blautia sp. MSJ-19	20.0
+2841519	species	2|1239|186801|186802|186806|1730|2841519	Bacteria|Bacillota|Clostridia|Eubacteriales|Eubacteriaceae|Eubacterium|Eubacterium sp. MSJ-21	20.0
+2841513	species	2|1239|186801|186802|186806|1730|2841513	Bacteria|Bacillota|Clostridia|Eubacteriales|Eubacteriaceae|Eubacterium|Eubacterium sp. MSJ-13	20.0
diff --git a/tests/testdata/standardize_output_testdata/result.xlsx b/tests/testdata/standardize_output_testdata/result.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..44da4cd21a7fd5a5d45bd91de18150228c3a9229
GIT binary patch
literal 8846
zcmZ{K1yEeu()D1$9fJEH0fNimPVnFw9D=(I1kYf>Ex227cXtTE9fFhK?*4Og>wEV-
z{`Y3;oT*bay{2pL?$x_{jiM|JEDitwKmd5vS!hUVccS<|KUF?o7|)l9ow1^$oxKyY
zp}jp5*v48uLLRw`1w;HvW@pJ(?U<aEd@5&k@RJlbSComEYrwsyN@wIE|1wKh6Yh7-
z9x6A^z@7qc%)xhnI%LUivB^S$chrOynxw)b?xEQ8W<9_PqJ9_yTsiiUlzWo>THY<<
z7S8oIJH_<@iK)&BmaKHw307wj0kbV+b#^y~aP6_TaEkxoZz<2!yWM9$hoAre%zyXS
z*v`@PFNgD@I~BTFPy+Q$cUxU1EfFNNn8Z?rMUAk6bvD5|5|ff4O3n4OZ*}1PiTbaf
z?&sBUJVLxO3&uVn{Z#xAUUrZz-aZ)dLl0V<KZd<hN<5*<i!62R90mUP)ylA6CjAhR
zeNP`D7Ym|OJAQTb#CrBQSymd{Yd>wrDF;|3goY})&R!v+-zF6ObBt3gD7}iK9~PuK
z_BOiDT0EDXqveC*EKHS*$h*D9CM6y@>X6!Sr`<0#{PyYPqdZS*2AKwBTU!^G%$n$n
zdGRRmewCH{(jK@tSlqlF_rYyi=DPb$))y+waD8ESi~XMqlk_2#3gZ<3kV*>xU_UPm
z*qYhd+|<VO?>p;XOLL&D<Fo+6@jfaa1DjpiAop?k)zx4-S-39NBtZ@o$O14m!>!#-
zq9H!*@3=6AzM>w)$Ty|%!!qvNjYz!P+6K9w9Wohh2L{KTf(_=G7iXI5nU_~J*npTK
zECdDR#~ny{a(cDe7JzP$*B~}l_fP*&1X(c7(9DNb9YSWRncbSjC3T2}br$|R69X?j
zMswjYRV8gZee#-LWbD_tL#pAXIZ)UdA~cP`HK#Y$z4r^tf~KQ8yd-RXwk$-N<US*I
z1)B=m!E35rJsp;2Lh4ri;}cT8{)1j|a81&eIg_{uY^Z#^X__Oho|iob8DZZSoH=X=
zS2s?;pt_-nP<vw7Ri)<iuC#OO_FNIfnjDKUlex=<vgrJxaFzFyS6(Z7Ddwq2;X3ap
zt=yI-BSQL0$H1s<C+~y0%R>5!$DH>pXlNhGta5i=n~O?r?0I0}*8UQnBnN2PyUzTW
zfcdKT=GG;SaGs=jN#w(^<{?iA=3`$k?2N?(!B-AR$FwLoGyr$cxSXPAxk%lY)vlO{
z4^cCtZ)z0@5t0Y|F5dZ5^!_Y=xJYOo%w_jT^Wq(`iu183hAKE{P8p58ciP^}D2Sbv
z7ag&0VMIK+qtY=9qP6A2cH|{C6ZIUuyo}DHW*a@aLE2N`HzQO3TA@2Sx|ts4NY%V_
zsp)n3{+%t=Y+*+=<!qcJP$FaCZPO+us8$-44O<8yEilnikr+A`k@ag<;B`%33Fa!c
zSA848z)E=VRk2;NfJT<4^aA7>Ly~Yk)#<m>xb+=;p5*wYTFcLz1WjD11pF0_B2LEM
z8E|~ZNoguS2{zVn9P%rXzyu%-Rw0kKycG5$L*Bw;>ZI^-95e7%COXohnySE=W_X*O
zuv4;}$R)^pg5Tp2?k1HJi8z{w1lo+KT9N`{(R@r=4va`H?ZRXb6*MB?B6O^M?N{ee
z2r<yc>Kg|uOw?fBYn5=u3K|UT4@bL<%tWJM#^`ZBe|ID_3bUOw5G0d)knNxe`;e~J
zE#0~8KlJgT!8>r8-I_7t>fIEc%qBWbfzS>c2YgPl7Gf(z-7=#nh(qujDH{^pNh7J?
zFAp#h_t(A_#WMd-P*39v++D@j{vA9%OkSgTM~&wKSrhf#R9<GClc74xlD0ZMcV(;#
zvm8ux#u|>p1O2Myv2W&hGCKAj%mvd=4sPdxkn&`$w^XmAWc{i|fDQMd-;e2g5VuWq
zs1b)$Cr!D(V5N%^+3u)Z$i&b7DsQA~;MdvQDbI2iWE{QW*WaocMrIeQca+_&i)*W;
z0m+<K{;*dd9YWzV<BuV!bIT;*yt~wO-K5H6QArSMAQWwbv1v;C$qPzPjBZ4lRF1Oi
zzS-#o6Rz{j!!~*)YS?VejgQ}Lbt+Zl9Pxi8=?^~5rZ~(BOdRbN3v&OYpJ*1}r^tb0
zV_gDdl&m*sm_1>{Sv4_un+gB9TnYMAS<Qe|rw@i9D%(>oOk&%9E8&Cw#(2!eV!mmu
zZTTBF0vk$Yo-T_-Y_sC;%chl`Y9ym-I;RzFmI5`n3Zsu&r2`o#m<q3vv80US@bYD{
z&j?i7wiu*))IjYL>~&ZP_$?d1`yCWi8n9gJ(4pBHn|TU7Oj*tHPgmZeATsI%YhkU+
z>mE#RrKFa5P`chGxOEzwKG+YwL;uCMrGKs~R_1-Vv~g=2%O@q5>fVb9<tzbB6;J%?
zH6Gih;$ckGD{QFYxIyt_Xn&J@V&BAD<knM^#y7YlrGh*@5{#b%74#;kv2earpukb!
zIc^*s#ag<B&E7Kl*+5j;Os4U^i-mLwuse#)sZyVnEzlqbEUv2#Hxste7|3>e*rvUu
zeOlC;5~#rwkUYytS|zOxa!T=N=#nUT(wrf^VO!y`U*uhe(ff_fD3<Hp>GkD{FJ`e(
zTT}V_8|qZPnolQw<SF+vi@BFet0<mR#oBo$446W)@SbEbuZ+WV&N=iWag0jyD@*$p
zSW(=4HP2A8K7v+)B$5f)r$bA4!jbGmriFhAEl#1DU!rmlN5ko{Ce?A}Y$_n}|9m_`
zt?25BSUU)9hx*R~!i6?;{s0XCI3WQ5_<t7=H#<iwCv#I%XD8-AAAhPyp8A1p3JZ=`
zd@wxSD;U2wAvO8(?I}gmbr}N`+)&efQ(V$lm+U}!UnR3?Z^OH5;uR~hvP1Ks-eNhZ
zsnxXc4-uuRh3hFcLqIsmO#)`}*?}*}l5y!@YTqJ5nP|o0*Xlx`Q=%E`X>vRcq~#=A
zR5^y$q)|);@iE&+{Z0?&97`etwye{NJUY|Ekg5Xl6oruF$5ieztAF5cvTfW5cUW3`
zxwQ>D41aUG|6#yDv>_7z!~bm7(y^LY=i3)VI$%)@`E9k|8%6CmJ5tM+*)u0&U*?H@
z21a-9UBJkchTnI}tqUhAakW2Fmq<xZv#;oN3(`K}ufr*suwi-fVW?HbDk-DxPVQ}9
zTr!rI?Mfhqf4t@A98Zutm8$~LqDa<FK&knr?eA=T^w6tn_)%StwXX3>v`>q)oMH>b
z=mv>i`V+9AfErf=^4Po{pru3u5p9{U>ykjfJ9XwiP9Ia=#S`cQA~JlV$e&NB=FbD0
zs+(w2t`T7@tP1Q~vEP9OrKZK&StmomV&C@Hmb@PvaaV-Ekw2Gw29j*I9&5@TX<gEG
zhfX@>Xr!S7DBYneW#9(lCQVz0RV|UWPvY|FYu&KY%d4nWCkkM<XkiO2iz=0S{poT_
zN@a($mK%aRHoP6Xrk!6?(T1gt^*$y|YFN2W6*bw>Fu7(&w_41EbM3Eld5%L!Ac{n7
z)|D{*cHPx9`#JV=f#urkHv1kyZ*MDzV2<iyZRY}yejIyYlS+yZd6{RYFRlr4DLKCm
zhz_ONZ#rZN_AH3KZD}RA@xulq1c8F1B6hEl-U_XLCOeiEB0s_wTK$a*Zd*JNgFn25
zN_p@d_U`y7Wbm1+q7%KrC8=%eMgiwq3IBRPM6aLx0Je4H$=)pRXpz7f+TA?cXK9J*
zkPs-Uw|&Rt=o!YFvejUPv6)Rirmt@n6iD)UF8pJjWgls<rdcDgqNW9|GuPDH@HrRm
z=8$y>-TK!b-i+)T7Nv+{Ouq^*2WbRJPwSH0{um_t&y@M}fpOjm=K0Kp@;_6C?a!H1
z;~*jh^e1KTQR;5K#Yip6ZSlnxO&>mrlR9FL0rKl&omae3lRj4TCYD}mSVxSWw9W<*
zu?snz&?5sl@3b_+KY#<24`^2s+@UyQHCh5BsrZ;I^UqcHz2=b&7SmNVyn_`tXG^SG
zezo{qsxfZeXBJiYl^fBZyA1aTWn`@v1mJ<&Nj({^0(Qx+hunuQk4dI7N@=zWea7d%
za&lB{4$F1eu03FSE<~k|mtPk{Vu{1dblI3w+g)St4!Xp2<rGEAcoFk`A}aigcH^9Z
zLd)W<SMq$n?eAqD^GEfAuDXCF;0w1#wcak-migDN`-+8j_s;NkJmXgw<w-_EjjIJo
z+cCN$XSQQU%e3VQ3Gy06M>6I$Q+I`VOl(#%T4t!lhKCF!ityJrL=V*|%f!3;V{t-g
zQLQuNChA=Cq=b56H2NaHdV@DDwUr%ui=N8B2kLP=#pH`S>}y7l2UA|s>m&{M!uRI_
zG=sZ!jy=w2i&_eAmYZ;`-bSBMA=n&aUZ$lyv<XRZinhB=F76M|^K^Z>epsh}bX}!#
zjC>LsQA;bZ(0x$Y73BQ7u^|7^9j(+z9xaI;cEDd_abLBl3wP`+E}z%@XtQNLUS~VE
z0G1ouKc28?8a^H|KeejOmNUT{Tklr>x)ZIwI0Xexnh;2rr_rfd&ZE+N=>5S6GSTQm
ze2I$PHw7sVviHScTVoajg+YgUu|wX-7-6@ObZyhKjW@I3Ixa8t9Lm#(8p=6H`6tHE
zp<r3toEuDc_>+YLvBl-C2M7EEX_~ldx_iVkhJ`1A@*BV`2dqG-yqWdzc-(N8L36nr
zx{wX9y<?#CaW4=&0i<~B0D{30pbB}TE{Zxi|0SAE+gO8o>556<oFNt!;}E8L*Nikj
zp=zo&fdJ|nsMbiEXrZU*7keFX)+n|7Bp@$Qvd)2f&QaWdodacqLy{F0gTxKC5#%pd
zk9z7y6&wvYxQ&Nfj3Bpthfz-zyolMegRpK78O()@k%yZni{rpuT0I%$n`dm~L{o+^
zUJw50L^82?-LY_!G$${{2NZ3>z(gyguBC9Dl%j-Cg(+bfii!?VKCY@SE-lQ;?GH(g
z(9b=mDehi#IY|HWD0c7SBl=+gL+_rq=5{$e#u9Y+5lACJ+6}ecGm7!)Mc2smN}odz
z764d&ZgKxdZDaqFsEN8xDZChgPha}4IBzALiAPg;hYL2G7kCO1!u(BB5Xtd28@5)+
z)^gDnI<wQ9a92(B56VcHqjox{Mu*j{_-?gct2wbQYd7EFOUTzv=ULKxEMO=ok=Lc)
zQ`nQA+P5rK@6W6XMi8Z>SJMkOo7|{nzOc5utQ%l>$Rp*vt5Usa&u}(G!!zc8bXl`%
zn_t)nFy~q-41df&SmDE4Rmh1QrDt}T-pnVQ_V8kTME3E-=?~r@dD^@kbB=gt!}`9D
zzn-?fIF!}WX(C|6+A8a32A;sI#5ixGoOer7#0s7bRdav)etE=2^Le^z&Q|_)dinmP
z5o3Q&yk4=FVS93SxOZ_#djYtxBw60d34hwAvf5&XlvqRP^^xspah=+V^}*E7QZuu%
z!J_XeY|oXvAle17Ry3|Pzy~Qv<Bn}f<0i6IAwyP|+d?moNH<BjV7R(&zOLU<(Apwg
zt@nseFOVcf4wqNAS%B5${rT3K=lN*K9JlSe6(>v8Mh0je-3Nzwv4~m2{s}FU-x9NE
z$LZTC_UCuEO_c&`^=rqvx7lCvX%7!aQ>=y;1Fz<mp3s{W-ClRM9dhsdR@DzjUo(g|
z^)OS5-<h(tTC$d}ltFJ?3dGVzb6;pRre&Yz)!&)%7&vz5sHo7bceI<-p3k(|lpMBL
zj+$(tFjH?x*Uv*3f|ZRsMm$X6EYCV{_DNEL#8{k9q!G2rH8ShFnucMV{u<RyLO{tb
zZ(@7LcKK_sER3c{!%lCyU^dU|U?x&LxF;1W(0By4GsZvQP#E3Ub0r=Q+Uji!C1~Ij
zieo?{!8<)9?t`*uQcp~e0WGPBynhd?Xc;h?rIWAq3?qwHe4CsSk}d*qb&EC%4razi
z!pJfdH^;{$l!Uy(%z_ecsR5v88X$tVzsvc0;OTNIz^-<xh*ZiV*#|3$B1TJP&XE8}
z=~g?l<gvplsRSsh$lzVO{7~0@e-RLC*7ITg=DafP1E4X6!60vRbb%_!(HMO{*B?Uq
zD{=4}OU)2pi~4lnPdRKQNOwf8FUzX>vc1;B29a-P71IcRVC#6!&nPKMZUiIfA`cT^
zOc||IU>J}3LgP3(qL4^e5Cw%9`Px$ScdRkrMp`G3GLt(#7HJ?Ph+<7d3JkzvE=8pD
zS4rxr{8Z8-zKec*<q4k*z-T)9P6?&*aShIRAUFXjRZ$j=Jeo4UONl-pn8G%aSa?E^
z#(|N*3_<$mXfTMCGEiS&jzCh}jflY0dBhJTgMso#FakSeS6DDP6=g>-*&u<FGjh6A
zC#Ix*@HI6XNW6(HR#^9%+LxcSiQY3o6A2`4vPdaPGT{In6b$fWIV6>plex-<Ao_^`
zcRlgyS02!dmN(Vd>CYD$Go|<Ki}i(a_1~Awljuihgw^KrliJp5?=-b`?pPV`@8mv2
z2|dUfU%ogTDE?rq6?x{ImH*8-9RK1R5Y8XYLB+bw-2i{w<IuO=SKw>@G)SizkpyrQ
ziD-WE4oKsjAb4kO<`fMmA1QXq4T3SL;y!SyA76;@o>80Bjo_ZK6{3;Wuy!tOLav?;
zSDDE#jdv+HP?@mNDOC^3Y?4uifR&4@2;S2MS?MqC^z2)!aot++?Yg>@F8B!UeBWp8
z2jA~?)t_1zqvOTwm=vd<yV*SbYy!9?r+TWJmZp?(41b&68#*Do-iRTiM5B77zi2y)
zc<03$*~foH>sCC(y6ZF%&tz@)hjk{^ruqJ2o#coWnggnb4gJg6pj%y!$f8Mhl?UCj
z{ojs%vyQH5d(uOxcWFp#9yqTgSwXv+tk$b?QhJ<>_@fZcQDB06yXuO~-qggfS%Gb|
znu@E-@;B}GSbJEz0l8q*CmeJ;SC=-c?__*wLbFW=r&WU)&c>E$ZlepYQ`o;&8_HUh
z6g@U?sQs*DP|Z85UBsSD4|(KIjeOKnZ5}z;)=@|+s#gOR?5&hJ4xG>&YVw74wQSEo
zdDGl-kv?#>AMPg|H-oRae3aDOXbvB*@Me=|QH6}TpU7M*r*GKy_YS3J3r41|G0@c|
zvg*}214a(=r<eLvi#_!h<QcPiqY9RIT#+VAhT;!PgHRG+0^={1cGHG+Ompf&tT+q2
zW7BWtZ%0tLtM`ZvlMF-HT>VBbl8wz*mh|&sv#euUuDz(@5Rrc|==ULHDL(EGz3C^v
zT}U8^p^UThB)FKr+dWY7;#VNCbTb5s78(HaG+Dl@Fd`;yla@e5Z3&}IGmBs{qE5r>
z=Vd-hBz~k}k64P#gdhM-Kww16buAfX4J2AoEIcqF3Aw03aFmF+@+lf+&x(XU1!Yz$
zmM=mGmFRkK6o?Y|1tEkSD;Ew7fcu=52t#nZT1>g&3$IUYQ1}}5)E92oM`g#>pn`44
zo(1cT1fww)1?7iC&?t<+i7>o-$0-TvH~n@X;6{s11qqz#Kx&=}Pq^hL>7I#6>hI2+
zh!JDFd71t((dhxoSh3$r^CQyX7oI36U?(L@N9X5qWsIgw0M;AKH4bb@p2e#wP31Wb
zU>ARPSu=)Qi!G^emqJv9q11#MxuM`T2h1G^+2qxy@3^62UF%U5ARr?WVyG}yu8W`Y
zNbnDc{U~m<%!7!p_+cj{0P<V-RTwONU>GbX^akALFhwTCr<T<@QzU#tUrdpuFW8K^
z7)ipL{8aZn2xQ(@nMW*L4>q~S^E@zYo)yO+D*h3KNcOIruww)&LtxI-_^=iYj~IR@
z4c1o#%;!Y{V0{Igf{)dpNyU)S01nn4<r>687QHIQo>$3n6nKRek9gHbuC?3BN(v?2
zg1+(C{UlQR-gGjYzSWg!TNJdsu#s?n(vy6!MshQ*X|>zECRrSBdk3YUPh|X__95YZ
z$A@sz=S3TTbs33R@Z84RAOirXf4A{Y&hFNxPJeODlKNsq3Jb;qZV!B%yrR-K(Oj+x
znfL|EyrrPFWCMR{{co(rChcwPb=WccXiJxsnUE9Wj7B~afrgyTNRllk9=gIH`h(Ox
zA=CMay#}Lh_(>;JDXS7Aw6CrisI9+B2%>UL>ROr^*m@xf+*SfbpMC|Jm`NTKoioW-
z4L5OmD8HS|^lg<p*-RRwUt)V>$<XyNAHx6YVZzE)k}w4n9H_C=RUDX5VThT4Z&{NE
z;hM3kUZwGzjkXWD?g#Ke;$~0$jH(ZloPO?p(u9qfZR^XvciyLHYc=ifJvH{|JVv)y
zAzHNm^o#Brm(`$75DLR(8eZzoi4xIPm}nuUwH*g?bgH?A^U46d3kio&l}oWicYV|L
zFLf_hY&a>y{yfhf8=*xfQsjY+F`BG1eWDd-#(inXW)A+39ys0x4!kh^Io-#UB|Uzk
zD^o~Vjs6p1_y+pd`t{Y>AXg+`ws!I#pVKc|`MtjymE)q%w=Zsd99C$_Wmj+U<xU+W
zp#z2Ov)3!^E5gVl!SU8q%4|X3IO<M6mdGCqtL>oke><fd_rRGzB+*DSCoI#i2ki5y
zcfaq9hnbr`{u(|Zkk(0`wt3WonL|I?Oj_~UVHW7qt@HtB(&CkwDEECwip_@wuY+D#
zSbL&!mI3au)^>zo6Syc}44_xEVN^*6mDfY0d+Bxyw3K9n2a)!83GOgb3wKs&yR<%R
zF(g!~k@2jBZs63m-Y%$;+Da4WngS&TlQQT$VEM7`lpThJoy#W?wQ8u^SS+8PJ=^3v
zd;GiLZxoK-WAFhJc#Z@4QP=?@7;W}b2<qv^8G{Ie#)2^M)QD?QSd%(9s#!=aA>PqJ
zbfZHt&kl7BIAL|c@WDhI8ju6yw$>y(&IE@PjvB3RKeyhX{%4n6|8DO~$8#jVevazr
zpa$qWnp!(CGyQc>i;=a3W<?P$$1>HIE9`-0Et#tj3r21T*~&@ki8k&zzYL%2AUDU9
zeM(P%TaM0==}PRhm}lL`((_?nowF2BW%b4&-630(o>Nx%Z5Bt=*u@+M<L@)-fw$eZ
zGxc&#_tNJ`(R)o!HKuMzI2TaYW!=eCGGf~E4!-QNPs2Y)k*1Q<%1id|3pkJSf1F*A
zM`gd0uxC7v&Xo&wjVl*SUuw%e%QH5r0k3R7osE6N5x8kjW62Adn3X`Jv8%Xw(WR(?
zmVh6gbuFalkplYPsr%Oi&dAQr>aXMtk6HRl*$U9l`wmkgL=>Aiv%~@wi$Naf8agqh
zTT1N^pG&mABdvl<Pw`5lFtW6~pH?e<fEX!T?7S?u4mq}Zq46VTzxq^WLz*JY!WBK+
z7e4)DFFCaU=oz3k+(NHb%nxIj7(Sqll{Mp@#gpNDAAs2gbv15Z=}XSX;$N%#^WrBB
zl<?ViDh00J(bi1b;hWfavwNYbn<7&K><}51zbUN|d{3ojtl~^N99&JR*>Ge)kxr_v
zvD@ze;RtQon4QJranVKKF)xkqq|lOTW>qrD!3ti{M6L$i6}dQvPWWj#!Mg=6yt)uH
zxF<X@kHqM;mrQ0DNOF6m*+vLFqDX^1YioS-$#?|&omDd2ZQy*Iz#X>&TT&!w9!}vB
zo|ecV9}hGs1lkE^Z|+FCNJ15fSIi1L9mlqrJ)WhxPuYoxQsioudDd8Jl$XH=(n1)_
z?J;EO!@DoEH0kfiwq4H~Q2D$Ognw_u-$D29+wms=hhurrKC@s*lpEau<TOPwq0|T$
z=<q5GG<%GO*p<2tK1)G;Rn8U3s(RG)e!|_@3~XIjkDRIr?6KN@Cu7=e%>+>80MVV+
zbUL&Z){x+gj?WmVju#SAfYOXAbOL|GyjA&0`?(#%MXeT2&*cQ)ev{b%Y`LiPc|iGR
zd}*`_V!1a7He=Ayu*sWKws<aPuQ#hJl77ZG#2s|bVO=oz5)n!@1(+vORHeKb`{x-~
z`&&0}%PwT^QI@8|@eu93%`ad5XRb}gwr|oshoB?e-zuVxn4PV&sjaiVs=K|ZlkQ((
zIIKGK9EKS8@pQBF4sU{CEIpvg#$bdDV&sjdnceHDA3kxRg~QUnRihsdef!DGtdB2d
z%Z?8{u|`sdaUyU)2*J-3M$+hzU$bnA318Y1g<;LK@hbyH_0G9o+z6KD_ir0gcxSZV
zeo3mF{ygQRHw)$P?c;FY_e)ccmN}}Wr*_b_wz&|azEd7(WH+i%BBe!<!gf%k{OYYX
z$K7U20jnlb73ZgyYnW(WE4*OQaS!T;N+>H+LyTwO4%BfxhcXF31rVfN<;G%9^oRv|
zw-VTpPY|n_Y_owPQ?-51E-ovbmy*mV5posy9ivPR$C)8aCrCx@U(O&xA2j>r6nkv$
z6Wh1=k29=HT;EFCmiG=U`4GCh*?R3TXD&KX#gm`}Ur#d$K9^-EXdIaTt+YSK)t{H|
z^K$?Hy8BD(m%8`gwg7-HRK>qr|DOu}()^`5{EzwI^Uwbmt@tItOLgF10Dedae*pfg
zR`3$#<-qhG6oF@Ee|F;~%AfJ+OO%(>oPSV$qy1NuKeL^eC@=f7e^9ia_4EJY%b#xT
zCCba=*1u4o#_|53{B!tvY5KCR{bQQ*OgjHo<z5=TtP}qjh7tYC^Z%<FFO6Sv+dsy*
sB!7(m2h+X8dC3(2;GmNN0RM$I6lLL_|DFH<pgg}Zo{80*?60T)2VOeZr2qf`

literal 0
HcmV?d00001

diff --git a/tests/testdata/standardize_output_testdata/toy_genome_to_taxid.tsv b/tests/testdata/standardize_output_testdata/toy_genome_to_taxid.tsv
new file mode 100644
index 0000000..7c7e53f
--- /dev/null
+++ b/tests/testdata/standardize_output_testdata/toy_genome_to_taxid.tsv
@@ -0,0 +1,16 @@
+genome_id	taxid
+GCF_018918045.1	2841525
+GCF_018918095.1	2841523
+GCF_018918125.1	2841517
+GCF_018918185.1	2841519
+GCF_018918235.1	2841513
+GCF_018918285.1	2841512
+GCF_018918345.1	2841509
+GCF_907163105.1	2004710
+GCF_907163115.1	316
+GCF_907163125.1	2004710
+GCF_907163135.1	28220
+GCF_907164845.1	2822368
+GCF_907164905.1	2822344
+GCF_907165045.1	2823330
+GCF_907165215.1	76633
diff --git a/tests/testdata/standardize_output_testdata/~$result.xlsx b/tests/testdata/standardize_output_testdata/~$result.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..9065a5036ca920e20d106fc411a215177c9c6581
GIT binary patch
literal 165
zcmWd)O3cVj%v114ElSi;@C9;8Q%Z{z2mw)sAcjPS42E1Fna7~O;K7i}Py}S_0NK7!
PnNo%nhEkxY0>uyj#T*xR

literal 0
HcmV?d00001


From da7e05b3db0f479d31bf513e6c242764ca374a77 Mon Sep 17 00:00:00 2001
From: Mahmudur <mahmudhera93@gmail.com>
Date: Sun, 12 Nov 2023 15:21:55 -0500
Subject: [PATCH 5/6] added the test to utils

---
 tests/test_utils.py | 90 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 862ff05..26cdb5e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -160,5 +160,95 @@ def test_1(self):
         assert prediction2.taxpath == '2'
         assert prediction2.taxpathsn == 'Bacteria'
 
+
+class TestStandardizeOutput(unittest.TestCase):
+    def test_everything_exists(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+        assert os.path.exists(os.path.join(outdir, 'cami_result.cami'))
+
+    def test_wrong_yacht_output(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result_nonexisting.xlsx')
+        assert not os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        with self.assertRaises(subprocess.CalledProcessError):
+            res = subprocess.run(cmd, shell=True, check=True)
+
+    def test_wrong_genome_to_taxid(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid_nonexisting.tsv')
+        assert not os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata')
+        assert os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        with self.assertRaises(subprocess.CalledProcessError):
+            res = subprocess.run(cmd, shell=True, check=True)
+
+    def test_wrong_outdir(self):
+        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+        script_dir = os.path.join(script_dir, 'srcs')
+        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
+        assert os.path.exists(script_full_path)
+
+        yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
+        assert os.path.exists(yacht_output)
+
+        genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
+        assert os.path.exists(genome_to_taxid)
+
+        outdir = os.path.join(os.path.dirname(__file__), 'testdata_nonexisting')
+        cmd = 'rm -rf ' + outdir
+        try:
+            subprocess.run(cmd, shell=True, check=True)
+        except:
+            pass
+        assert not os.path.exists(outdir)
+
+        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+        assert os.path.exists(outdir)
+
+        cmd = 'rm -rf ' + outdir
+        res = subprocess.run(cmd, shell=True, check=True)
+        assert res.returncode == 0
+
+
 if __name__ == '__main__':
     unittest.main()

From acd913b6eca15fc15ca69a17239412eda96bf387 Mon Sep 17 00:00:00 2001
From: Maksym <maxim.lupey@gmail.com>
Date: Thu, 7 Dec 2023 14:02:00 -0500
Subject: [PATCH 6/6] refactoring

---
 tests/test_standardize_output.py | 112 +++++++++++++++----------------
 1 file changed, 55 insertions(+), 57 deletions(-)

diff --git a/tests/test_standardize_output.py b/tests/test_standardize_output.py
index e0ef8fe..8e30203 100644
--- a/tests/test_standardize_output.py
+++ b/tests/test_standardize_output.py
@@ -1,95 +1,93 @@
-import unittest 
+import unittest
 import os
 import subprocess
 
+def get_script_path():
+    script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    return os.path.join(script_dir, 'srcs', 'standardize_yacht_output.py')
+
+def assert_file_exists(file_path):
+    assert os.path.exists(file_path)
+
+def assert_file_not_exists(file_path):
+    assert not os.path.exists(file_path)
+
+def create_outdir(outdir):
+    cmd = f'rm -rf {outdir}'
+    try:
+        subprocess.run(cmd, shell=True, check=True)
+    except:
+        pass
+    assert not os.path.exists(outdir)
+
+def cleanup_outdir(outdir):
+    cmd = f'rm -rf {outdir}'
+    res = subprocess.run(cmd, shell=True, check=True)
+    assert res.returncode == 0
+
 class TestScript(unittest.TestCase):
-    def test_everything_exists(self):
-        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        script_dir = os.path.join(script_dir, 'srcs')
-        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
-        assert os.path.exists(script_full_path)
+    def setUp(self):
+        self.script_full_path = get_script_path()
 
+    def test_everything_exists(self):
         yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
-        assert os.path.exists(yacht_output)
-
         genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
-        assert os.path.exists(genome_to_taxid)
-
         outdir = os.path.join(os.path.dirname(__file__), 'testdata')
-        assert os.path.exists(outdir)
 
-        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        assert_file_exists(self.script_full_path)
+        assert_file_exists(yacht_output)
+        assert_file_exists(genome_to_taxid)
+        assert_file_exists(outdir)
+
+        cmd = f"python {self.script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
         res = subprocess.run(cmd, shell=True, check=True)
         assert res.returncode == 0
-        assert os.path.exists(os.path.join(outdir, 'cami_result.cami'))
+        assert_file_exists(os.path.join(outdir, 'cami_result.cami'))
 
     def test_wrong_yacht_output(self):
-        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        script_dir = os.path.join(script_dir, 'srcs')
-        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
-        assert os.path.exists(script_full_path)
-
         yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result_nonexisting.xlsx')
-        assert not os.path.exists(yacht_output)
-
         genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
-        assert os.path.exists(genome_to_taxid)
-
         outdir = os.path.join(os.path.dirname(__file__), 'testdata')
-        assert os.path.exists(outdir)
-        
-        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+
+        assert_file_exists(self.script_full_path)
+        assert_file_not_exists(yacht_output)
+        assert_file_exists(genome_to_taxid)
+        assert_file_exists(outdir)
+
+        cmd = f"python {self.script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
         with self.assertRaises(subprocess.CalledProcessError):
             res = subprocess.run(cmd, shell=True, check=True)
 
     def test_wrong_genome_to_taxid(self):
-        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        script_dir = os.path.join(script_dir, 'srcs')
-        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
-        assert os.path.exists(script_full_path)
-
         yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
-        assert os.path.exists(yacht_output)
-
         genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid_nonexisting.tsv')
-        assert not os.path.exists(genome_to_taxid)
-
         outdir = os.path.join(os.path.dirname(__file__), 'testdata')
-        assert os.path.exists(outdir)
 
-        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
+        assert_file_exists(self.script_full_path)
+        assert_file_exists(yacht_output)
+        assert_file_not_exists(genome_to_taxid)
+        assert_file_exists(outdir)
+
+        cmd = f"python {self.script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
         with self.assertRaises(subprocess.CalledProcessError):
             res = subprocess.run(cmd, shell=True, check=True)
 
     def test_wrong_outdir(self):
-        script_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        script_dir = os.path.join(script_dir, 'srcs')
-        script_full_path = os.path.join(script_dir, 'standardize_yacht_output.py')
-        assert os.path.exists(script_full_path)
-
         yacht_output = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/result.xlsx')
-        assert os.path.exists(yacht_output)
-
         genome_to_taxid = os.path.join(os.path.dirname(__file__), 'testdata/standardize_output_testdata/toy_genome_to_taxid.tsv')
-        assert os.path.exists(genome_to_taxid)
-
         outdir = os.path.join(os.path.dirname(__file__), 'testdata_nonexisting')
-        cmd = 'rm -rf ' + outdir
-        try:
-            subprocess.run(cmd, shell=True, check=True)
-        except:
-            pass
-        assert not os.path.exists(outdir)
-
-        cmd = f"python {script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
-        res = subprocess.run(cmd, shell=True, check=True)
-        assert res.returncode == 0
-        assert os.path.exists(outdir)
 
-        cmd = 'rm -rf ' + outdir
+        assert_file_exists(self.script_full_path)
+        assert_file_exists(yacht_output)
+        assert_file_exists(genome_to_taxid)
+        create_outdir(outdir)
+
+        cmd = f"python {self.script_full_path} --yacht_output {yacht_output} --sheet_name min_coverage0.2 --genome_to_taxid {genome_to_taxid} --outfile_prefix cami_result --outdir {outdir}"
         res = subprocess.run(cmd, shell=True, check=True)
         assert res.returncode == 0
+        assert_file_exists(outdir)
 
+        cleanup_outdir(outdir)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file