Skip to content

Commit

Permalink
[V7-2846] Add ndpi support (#338)
Browse files Browse the repository at this point in the history
* add .ndpi support for upload
* Fixing tricky global state change in test case
  • Loading branch information
simedw authored Feb 7, 2022
1 parent ae23cd1 commit 9cc6588
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 16 deletions.
10 changes: 4 additions & 6 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install wheel
pip install --upgrade setuptools
pip install --editable ".[test]"
pip install --editable ".[ml]"
pip install --editable ".[test,ml]"
- name: Run tests
run: pytest
Expand All @@ -39,11 +39,9 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install pytest-describe
pip install wheel
pip install --upgrade setuptools
pip install --editable ".[test]"
pip install --editable ".[ml]"
pip install --editable ".[test,ml]"
- name: Run tests
run: pytest
18 changes: 13 additions & 5 deletions darwin/dataset/split_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Split:
Attributes
----------
random: Optional[Dict[str, Path]], default: None
Stores the type of split (e.g.: ``train``, ``val``, ``test``) and the file path where the
Stores the type of split (e.g.: ``train``, ``val``, ``test``) and the file path where the
split is stored if the split is of type ``random``. Defaults to ``None``.
stratified: Optional[Dict[str, Dict[str, Path]]], default: None
Stores the relation between an annotation type and the partition-filepath key value of the
Expand All @@ -61,10 +61,10 @@ def is_valid(self) -> bool:
"""
Returns whether or not this split instance is valid.
Returns
Returns
-------
bool
``True`` if this isntance is valid, ``False`` otherwise.
``True`` if this instance is valid, ``False`` otherwise.
"""
return self.random is not None or self.stratified is not None

Expand Down Expand Up @@ -310,7 +310,11 @@ def _stratify_samples(
X_train = np.concatenate((X_train, np.array(single_files)), axis=0)
X_val, X_test, y_val, y_test = _remove_cross_contamination(
*train_test_split(
X_tmp, y_tmp, test_size=(test_size / (val_size + test_size)), random_state=split_seed, stratify=y_tmp,
X_tmp,
y_tmp,
test_size=(test_size / (val_size + test_size)),
random_state=split_seed,
stratify=y_tmp,
),
test_size,
)
Expand All @@ -322,7 +326,11 @@ def _stratify_samples(


def _remove_cross_contamination(
X_a: np.ndarray, X_b: np.ndarray, y_a: np.ndarray, y_b: np.ndarray, b_min_size: int,
X_a: np.ndarray,
X_b: np.ndarray,
y_a: np.ndarray,
y_b: np.ndarray,
b_min_size: int,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Remove cross contamination present in X_a and X_b by selecting one or the other on a flip coin decision.
Expand Down
2 changes: 1 addition & 1 deletion darwin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@


SUPPORTED_IMAGE_EXTENSIONS = [".png", ".jpeg", ".jpg", ".jfif", ".tif", ".tiff", ".bmp", ".svs"]
SUPPORTED_VIDEO_EXTENSIONS = [".avi", ".bpm", ".dcm", ".mov", ".mp4", ".pdf"]
SUPPORTED_VIDEO_EXTENSIONS = [".avi", ".bpm", ".dcm", ".mov", ".mp4", ".pdf", ".ndpi"]
SUPPORTED_EXTENSIONS = SUPPORTED_IMAGE_EXTENSIONS + SUPPORTED_VIDEO_EXTENSIONS


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"pydantic",
],
extras_require={
"test": ["responses", "pytest", "pytest-describe"],
"test": ["responses", "pytest", "pytest-describe", "sklearn"],
"ml": ["sklearn", "torch", "torchvision"],
},
packages=[
Expand Down
6 changes: 5 additions & 1 deletion tests/darwin/dataset/remote_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,10 @@ def it_works(darwin_client: Client, dataset_name: str, dataset_slug: str, team_s
)
url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500"
responses.add(
responses.POST, url, json=files_content, status=200,
responses.POST,
url,
json=files_content,
status=200,
)

actual = remote_dataset.fetch_remote_files()
Expand Down Expand Up @@ -424,6 +427,7 @@ def works_with_supported_files(remote_dataset: RemoteDataset):
".mov",
".mp4",
".pdf",
".ndpi",
]
filenames = [f"test{extension}" for extension in supported_extensions]
assert_upload_mocks_are_correctly_called(remote_dataset, filenames)
Expand Down
10 changes: 8 additions & 2 deletions tests/darwin/dataset/split_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@


def test_requires_scikit_learn():
sklearn_module = sys.modules.get("sklearn")
sys.modules["sklearn"] = None

with pytest.raises(ImportError):
split_dataset("")
try:
with pytest.raises(ImportError):
split_dataset("")
finally:
del sys.modules["sklearn"]
if sklearn_module:
sys.modules["sklearn"] = sklearn_module


def describe_classification_dataset():
Expand Down

0 comments on commit 9cc6588

Please sign in to comment.