Numpy 2.0 support (#220)

* added support for numpy 2.0 * moved multimanger to its own testfile * remove python 3.8 support * update workflow * update workflow2 * add requirements.txt * update requirements.txt * update workflow3 * add debug line to multi test * workflow update * workflow - run hstouch after h5pyd setup * workflow - fix workflow syntax error * workflow - fix working directory * fix test_dataset_fancyselect to work with h5py * adjust numpy version requirements * fix numpy 2.0 vs 2.1.1 incompat issue * fix errors on windows * fix for wincompat in vlen test
HDFGroup · Oct 1, 2024 · 059e7c3 · 059e7c3
1 parent 41ff190
commit 059e7c3
Show file tree

Hide file tree

Showing 11 changed files with 663 additions and 557 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -26,7 +26,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -42,19 +42,13 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install flake8 pytest
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 
       - name: Lint with flake8
         shell: bash
         run: |
           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # stop the build if there are Python syntax errors or undefined names
           flake8 . --count --max-complexity=10 --max-line-length=127 --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 
-      - name: Install package
-        shell: bash
-        run: |
-          pip install -e .
-
       - name: Checkout HSDS 
         uses: actions/checkout@v4
         with:
@@ -84,10 +78,18 @@ jobs:
       
       - name: HSDS Setup
         shell: bash
+        env:
+          ADMIN_PASSWORD: admin
+          ADMIN_USERNAME: admin
         working-directory: ${{github.workspace}}/hsds
         run: |
           python tests/integ/setup_test.py
 
+      - name: Install h5pyd package
+        shell: bash
+        run: |
+          pip install -e .
+
       - name: Create h5pyd test folder
         env:
           HS_USERNAME: test_user1

diff --git a/h5pyd/_apps/utillib.py b/h5pyd/_apps/utillib.py
@@ -338,7 +338,7 @@ def copy_array(src_arr, ctx):
 
     if has_reference(src_arr.dtype):
         # flatten array to simplify iteration
-        count = np.product(src_arr.shape)
+        count = int(np.prod(src_arr.shape))
         tgt_arr_flat = tgt_arr.reshape((count,))
         src_arr_flat = src_arr.reshape((count,))
         for i in range(count):

diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py
@@ -1350,17 +1350,16 @@ def __setitem__(self, args, val):
                 # Attempt to directly convert the input array of vlen data to its base class
                 val = numpy.asarray(val, dtype=vlen_base_class)
 
-            except ValueError as ve:
+            except (ValueError, TypeError):
                 # Failed to convert input array to vlen base class directly, instead create a new array where
                 # each element is an array of the Dataset's dtype
-                self.log.debug(f"asarray ValueError: {ve}")
                 try:
                     # Force output shape
                     tmp = numpy.empty(shape=val.shape, dtype=self.dtype)
                     tmp[:] = [numpy.array(x, dtype=self.dtype) for x in val]
                     val = tmp
-                except ValueError as e:
-                    msg = f"ValueError converting value element by element: {e}"
+                except (ValueError, TypeError):
+                    msg = "ValueError converting value element by element"
                     self.log.debug(msg)
 
             if vlen_base_class == val.dtype:
@@ -1589,19 +1588,21 @@ def write_direct(self, source, source_sel=None, dest_sel=None):
         data = source.__getitem__(slices)
         self.__setitem__(dest_sel, data)
 
-    def __array__(self, dtype=None):
-        """Create a Numpy array containing the whole dataset.  DON'T THINK
-        THIS MEANS DATASETS ARE INTERCHANGABLE WITH ARRAYS.  For one thing,
-        you have to read the whole dataset everytime this method is called.
-        """
-        arr = numpy.empty(self._shape, dtype=self.dtype if dtype is None else dtype)
+    def __array__(self, dtype=None, copy=True):
+        if copy is False:
+            raise ValueError(
+                f"AstypeWrapper.__array__ received {copy=} "
+                f"but memory allocation cannot be avoided on read"
+            )
 
         # Special case for (0,)*-shape datasets
         if self._shape is None or numpy.prod(self._shape) == 0:
-            return arr
+            return numpy.empty(self._shape, dtype=self.dtype if dtype is None else dtype)
 
-        self.read_direct(arr)
-        return arr
+        data = self[:]
+        if dtype is not None:
+            return data.astype(dtype, copy=False)
+        return data
 
     def __repr__(self):
         if not self:

diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,7 @@ requires-python = ">=3.8"
 version = "0.18.0"
 
 dependencies = [
-    "numpy >= 1.17.3, < 2.0.0",
+    "numpy >=2.0.0rc1; python_version>='3.9'",
     "requests_unixsocket",
     "pytz",
     "pyjwt",

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,11 @@
+certifi==2024.8.30
+charset-normalizer==3.3.2
+h5py==3.12.1
+idna==3.10
+numpy==2.1.1
+packaging==24.1
+PyJWT==2.9.0
+pytz==2024.2
+requests==2.32.3
+requests-unixsocket==0.3.0
+urllib3==2.2.3
diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py
@@ -26,7 +26,7 @@
 class TestAttribute(TestCase):
 
     def test_create(self):
-        filename = self.getFileName("create_attribfute")
+        filename = self.getFileName("create_attribute")
         print("filename:", filename)
         f = h5py.File(filename, 'w')
 
@@ -61,7 +61,7 @@ def test_create(self):
         self.assertEqual(value, "Hello HDF")
 
         # create attribute with as a fixed length string
-        g1.attrs.create('d1', np.string_("This is a numpy string"))
+        g1.attrs.create('d1', np.bytes_("This is a numpy string"))
         value = g1.attrs['d1']
         self.assertEqual(value, b"This is a numpy string")
 
@@ -89,7 +89,7 @@ def test_create(self):
             self.assertEqual(arr[i], 1)
 
         # array of strings
-        g1.attrs['strings'] = [np.string_("Hello"), np.string_("Good-bye")]
+        g1.attrs['strings'] = [np.bytes_("Hello"), np.bytes_("Good-bye")]
         arr = g1.attrs['strings']
         self.assertEqual(arr.shape, (2,))
         self.assertEqual(arr[0], b"Hello")