Skip to content

Commit

Permalink
add input-path-list (#108)
Browse files Browse the repository at this point in the history
* add input-path-list
* add os path separator compatibility to both cli and configuration
  • Loading branch information
JessyBarrette authored Aug 20, 2024
1 parent fcbbc0a commit 4a738a5
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
ability to list multiple file glob expression and associated metadata.
- Add Onset.csv timestamp format: "\d+\/\d+\/\d\d\d\d\s+\d+\:\d+\:\d+" = "%m/%d/%Y %H:%M:%S"
- Rely on ruff for format and linter testing
- Add option to pass a list of input_path paths via the configuration file or a
os path seperator list via the command line interface or the configuration

### Fixed

Expand Down
12 changes: 10 additions & 2 deletions ocean_data_parser/batch/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def validate_parser_kwargs(ctx, _, value):
"-i",
"--input-path",
type=str,
help="Input path to file list. It can be a glob expression (ex: *.cnv)",
help=(
"Input path to file list. It can be a glob expression (ex: *.cnv)"
" or a list of paths separated by a colons [:] (linux,mac) "
"and semi-colons [;] (windows)."
),
)
@click.option(
"--exclude",
Expand Down Expand Up @@ -248,9 +252,13 @@ def get_excluded_files(self) -> list:

def get_source_files(self) -> list:
excluded_files = self.get_excluded_files()
paths = self.config["input_path"]
paths = paths.split(os.pathsep) if isinstance(paths, str) else paths

return [
Path(file)
for file in glob(self.config["input_path"], recursive=True)
for path in paths
for file in glob(path, recursive=True)
if file not in excluded_files
]

Expand Down
4 changes: 3 additions & 1 deletion ocean_data_parser/batch/default-batch-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
---

name: Batch Conversion Name
input_path: "" # file or glob expression
input_path: "" # glob expression of files to convert.
# It can be a str, a list of os path separator (: for unix, ; for windows)
# separated list of paths or a list of paths
input_table: # retrieve files to convert from tables (ignore input_path if set)
# input_table is used to retrieve files to convert from tables
# that are listing different glob expressions to retrieve files and associated metadata
Expand Down
27 changes: 27 additions & 0 deletions tests/test_batch.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -389,6 +390,32 @@ def test_batch_input_path(self, input_path):
assert len(source_files) == len(list(glob(input_path)))
assert set(source_files) == set(Path(file) for file in glob(input_path))

def test_batch_input_path_with_os_path_seperator(self):
input_path = (
"tests/parsers_test_files/dfo/odf/bio/CTD/*.ODF"
+ os.pathsep
+ "tests/parsers_test_files/seabird/**/*.btl"
)
batch = BatchConversion(input_path=input_path)
source_files = batch.get_source_files()
expected_files = [
file for path in input_path.split(os.pathsep) for file in glob(path)
]
assert source_files
assert len(source_files) == len(expected_files)

def test_batch_input_path_with_list(self):
input_path = [
"tests/parsers_test_files/dfo/odf/bio/CTD/*.ODF",
"tests/parsers_test_files/seabird/**/*.btl",
]
batch = BatchConversion(input_path=input_path)
source_files = batch.get_source_files()
expected_files = [file for path in input_path for file in glob(path)]
assert source_files
assert len(source_files) == len(expected_files)
assert set(source_files) == set(expected_files)

@pytest.mark.parametrize(
"exclude",
(
Expand Down
27 changes: 27 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,30 @@ def test_odpy_convert_parser_kwargs(tmp_path):
assert (
"Load weather data from metqa file" in results.output
), "Parser kwargs not passed to parser"


def test_multiple_input_paths(tmp_path):
args = (
"--log-level",
"DEBUG",
"convert",
"--input-path",
"../tests/parsers_test_files/dfo/nafc/pcnv/ctd/cab041_2023_011.pcnv"
+ os.pathsep
+ "../tests/parsers_test_files/dfo/nafc/pcnv/ctd/cab041_2023_011.pcnv",
"--parser",
"dfo.nafc.pcnv",
"--output-path",
str(tmp_path),
"--multiprocessing",
"1",
)
results = run_command(cli.main, args)
assert results.exit_code == 0, results.output
assert "ERROR" not in results.output, results.output
assert "Run conversion" in results.output, results.output
assert "cab041_2023_011" in results.output, results.output
assert "Conversion completed" in results.output
assert (
"2/2 files needs to be converted" in results.output
), "Failed to process two files input paths"

0 comments on commit 4a738a5

Please sign in to comment.