[V7-2846] Add ndpi support (#338)

* add .ndpi support for upload * Fixing tricky global state change in test case
v7labs · Feb 7, 2022 · 9cc6588 · 9cc6588
1 parent ae23cd1
commit 9cc6588
Show file tree

Hide file tree

Showing 6 changed files with 32 additions and 16 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -18,9 +18,9 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
+          pip install wheel
           pip install --upgrade setuptools
-          pip install --editable ".[test]"
-          pip install --editable ".[ml]"
+          pip install --editable ".[test,ml]"
 
       - name: Run tests
         run: pytest
@@ -39,11 +39,9 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install pytest
-          pip install pytest-describe
+          pip install wheel
           pip install --upgrade setuptools
-          pip install --editable ".[test]"
-          pip install --editable ".[ml]"
+          pip install --editable ".[test,ml]"
 
       - name: Run tests
         run: pytest
diff --git a/darwin/dataset/split_manager.py b/darwin/dataset/split_manager.py
@@ -47,7 +47,7 @@ class Split:
     Attributes
     ----------
     random: Optional[Dict[str, Path]], default: None
-        Stores the type of split (e.g.: ``train``, ``val``, ``test``) and the file path where the 
+        Stores the type of split (e.g.: ``train``, ``val``, ``test``) and the file path where the
         split is stored if the split is of type ``random``. Defaults to ``None``.
     stratified: Optional[Dict[str, Dict[str, Path]]], default: None
         Stores the relation between an annotation type and the partition-filepath key value of the
@@ -61,10 +61,10 @@ def is_valid(self) -> bool:
         """
         Returns whether or not this split instance is valid.
 
-        Returns 
+        Returns
         -------
         bool
-            ``True`` if this isntance is valid, ``False`` otherwise.
+            ``True`` if this instance is valid, ``False`` otherwise.
         """
         return self.random is not None or self.stratified is not None
 
@@ -310,7 +310,11 @@ def _stratify_samples(
     X_train = np.concatenate((X_train, np.array(single_files)), axis=0)
     X_val, X_test, y_val, y_test = _remove_cross_contamination(
         *train_test_split(
-            X_tmp, y_tmp, test_size=(test_size / (val_size + test_size)), random_state=split_seed, stratify=y_tmp,
+            X_tmp,
+            y_tmp,
+            test_size=(test_size / (val_size + test_size)),
+            random_state=split_seed,
+            stratify=y_tmp,
         ),
         test_size,
     )
@@ -322,7 +326,11 @@ def _stratify_samples(
 
 
 def _remove_cross_contamination(
-    X_a: np.ndarray, X_b: np.ndarray, y_a: np.ndarray, y_b: np.ndarray, b_min_size: int,
+    X_a: np.ndarray,
+    X_b: np.ndarray,
+    y_a: np.ndarray,
+    y_b: np.ndarray,
+    b_min_size: int,
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     """
     Remove cross contamination present in X_a and X_b by selecting one or the other on a flip coin decision.

diff --git a/darwin/utils.py b/darwin/utils.py
@@ -33,7 +33,7 @@
 
 
 SUPPORTED_IMAGE_EXTENSIONS = [".png", ".jpeg", ".jpg", ".jfif", ".tif", ".tiff", ".bmp", ".svs"]
-SUPPORTED_VIDEO_EXTENSIONS = [".avi", ".bpm", ".dcm", ".mov", ".mp4", ".pdf"]
+SUPPORTED_VIDEO_EXTENSIONS = [".avi", ".bpm", ".dcm", ".mov", ".mp4", ".pdf", ".ndpi"]
 SUPPORTED_EXTENSIONS = SUPPORTED_IMAGE_EXTENSIONS + SUPPORTED_VIDEO_EXTENSIONS
 
 

diff --git a/setup.py b/setup.py
@@ -36,7 +36,7 @@
         "pydantic",
     ],
     extras_require={
-        "test": ["responses", "pytest", "pytest-describe"],
+        "test": ["responses", "pytest", "pytest-describe", "sklearn"],
         "ml": ["sklearn", "torch", "torchvision"],
     },
     packages=[

diff --git a/tests/darwin/dataset/remote_dataset_test.py b/tests/darwin/dataset/remote_dataset_test.py
@@ -361,7 +361,10 @@ def it_works(darwin_client: Client, dataset_name: str, dataset_slug: str, team_s
         )
         url = "http://localhost/api/datasets/1/items?page%5Bsize%5D=500"
         responses.add(
-            responses.POST, url, json=files_content, status=200,
+            responses.POST,
+            url,
+            json=files_content,
+            status=200,
         )
 
         actual = remote_dataset.fetch_remote_files()
@@ -424,6 +427,7 @@ def works_with_supported_files(remote_dataset: RemoteDataset):
             ".mov",
             ".mp4",
             ".pdf",
+            ".ndpi",
         ]
         filenames = [f"test{extension}" for extension in supported_extensions]
         assert_upload_mocks_are_correctly_called(remote_dataset, filenames)

diff --git a/tests/darwin/dataset/split_manager_test.py b/tests/darwin/dataset/split_manager_test.py
@@ -7,10 +7,16 @@
 
 
 def test_requires_scikit_learn():
+    sklearn_module = sys.modules.get("sklearn")
     sys.modules["sklearn"] = None
 
-    with pytest.raises(ImportError):
-        split_dataset("")
+    try:
+        with pytest.raises(ImportError):
+            split_dataset("")
+    finally:
+        del sys.modules["sklearn"]
+        if sklearn_module:
+            sys.modules["sklearn"] = sklearn_module
 
 
 def describe_classification_dataset():