From 4a738a56ca5c7d4798d52f1b37e41d7bc92c7af6 Mon Sep 17 00:00:00 2001 From: Jessy Barrette <30420025+JessyBarrette@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:05:01 -0400 Subject: [PATCH] add input-path-list (#108) * add input-path-list * add os path separator compatibility to both cli and configuration --- CHANGELOG.md | 2 ++ ocean_data_parser/batch/convert.py | 12 +++++++-- .../batch/default-batch-config.yaml | 4 ++- tests/test_batch.py | 27 +++++++++++++++++++ tests/test_cli.py | 27 +++++++++++++++++++ 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afdd12dc..9805e655 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ability to list multiple file glob expression and associated metadata. - Add Onset.csv timestamp format: "\d+\/\d+\/\d\d\d\d\s+\d+\:\d+\:\d+" = "%m/%d/%Y %H:%M:%S" - Rely on ruff for format and linter testing +- Add option to pass a list of input_path paths via the configuration file or a +os path seperator list via the command line interface or the configuration ### Fixed diff --git a/ocean_data_parser/batch/convert.py b/ocean_data_parser/batch/convert.py index 10da5752..d6d97d35 100644 --- a/ocean_data_parser/batch/convert.py +++ b/ocean_data_parser/batch/convert.py @@ -86,7 +86,11 @@ def validate_parser_kwargs(ctx, _, value): "-i", "--input-path", type=str, - help="Input path to file list. It can be a glob expression (ex: *.cnv)", + help=( + "Input path to file list. It can be a glob expression (ex: *.cnv)" + " or a list of paths separated by a colons [:] (linux,mac) " + "and semi-colons [;] (windows)." + ), ) @click.option( "--exclude", @@ -248,9 +252,13 @@ def get_excluded_files(self) -> list: def get_source_files(self) -> list: excluded_files = self.get_excluded_files() + paths = self.config["input_path"] + paths = paths.split(os.pathsep) if isinstance(paths, str) else paths + return [ Path(file) - for file in glob(self.config["input_path"], recursive=True) + for path in paths + for file in glob(path, recursive=True) if file not in excluded_files ] diff --git a/ocean_data_parser/batch/default-batch-config.yaml b/ocean_data_parser/batch/default-batch-config.yaml index 3919e034..07ecf695 100644 --- a/ocean_data_parser/batch/default-batch-config.yaml +++ b/ocean_data_parser/batch/default-batch-config.yaml @@ -1,7 +1,9 @@ --- name: Batch Conversion Name -input_path: "" # file or glob expression +input_path: "" # glob expression of files to convert. + # It can be a str, a list of os path separator (: for unix, ; for windows) + # separated list of paths or a list of paths input_table: # retrieve files to convert from tables (ignore input_path if set) # input_table is used to retrieve files to convert from tables # that are listing different glob expressions to retrieve files and associated metadata diff --git a/tests/test_batch.py b/tests/test_batch.py index 030726a3..5d88212b 100644 --- a/tests/test_batch.py +++ b/tests/test_batch.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import pandas as pd @@ -389,6 +390,32 @@ def test_batch_input_path(self, input_path): assert len(source_files) == len(list(glob(input_path))) assert set(source_files) == set(Path(file) for file in glob(input_path)) + def test_batch_input_path_with_os_path_seperator(self): + input_path = ( + "tests/parsers_test_files/dfo/odf/bio/CTD/*.ODF" + + os.pathsep + + "tests/parsers_test_files/seabird/**/*.btl" + ) + batch = BatchConversion(input_path=input_path) + source_files = batch.get_source_files() + expected_files = [ + file for path in input_path.split(os.pathsep) for file in glob(path) + ] + assert source_files + assert len(source_files) == len(expected_files) + + def test_batch_input_path_with_list(self): + input_path = [ + "tests/parsers_test_files/dfo/odf/bio/CTD/*.ODF", + "tests/parsers_test_files/seabird/**/*.btl", + ] + batch = BatchConversion(input_path=input_path) + source_files = batch.get_source_files() + expected_files = [file for path in input_path for file in glob(path)] + assert source_files + assert len(source_files) == len(expected_files) + assert set(source_files) == set(expected_files) + @pytest.mark.parametrize( "exclude", ( diff --git a/tests/test_cli.py b/tests/test_cli.py index a23ceca5..7a2b925f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -151,3 +151,30 @@ def test_odpy_convert_parser_kwargs(tmp_path): assert ( "Load weather data from metqa file" in results.output ), "Parser kwargs not passed to parser" + + +def test_multiple_input_paths(tmp_path): + args = ( + "--log-level", + "DEBUG", + "convert", + "--input-path", + "../tests/parsers_test_files/dfo/nafc/pcnv/ctd/cab041_2023_011.pcnv" + + os.pathsep + + "../tests/parsers_test_files/dfo/nafc/pcnv/ctd/cab041_2023_011.pcnv", + "--parser", + "dfo.nafc.pcnv", + "--output-path", + str(tmp_path), + "--multiprocessing", + "1", + ) + results = run_command(cli.main, args) + assert results.exit_code == 0, results.output + assert "ERROR" not in results.output, results.output + assert "Run conversion" in results.output, results.output + assert "cab041_2023_011" in results.output, results.output + assert "Conversion completed" in results.output + assert ( + "2/2 files needs to be converted" in results.output + ), "Failed to process two files input paths"