Skip to content

Commit

Permalink
Change default dtype for indices to 64 bit
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Aug 18, 2024
1 parent 04f1a0f commit 15ca581
Show file tree
Hide file tree
Showing 17 changed files with 22 additions and 11 deletions.
3 changes: 2 additions & 1 deletion bw_processing/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np

MAX_SIGNED_32BIT_INT = 2147483647
MAX_SIGNED_64BIT_INT = 9223372036854775807

# We could try to save space by not storing the columns
# `row_index` and `col_index`, and add them after loading from
Expand All @@ -20,7 +21,7 @@
("maximum", np.float32),
("negative", bool),
]
INDICES_DTYPE = [("row", np.int32), ("col", np.int32)]
INDICES_DTYPE = [("row", np.int64), ("col", np.int64)]

NAME_RE = re.compile(r"^[\w\-\.]*$")

Expand Down
9 changes: 9 additions & 0 deletions bw_processing/datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .constants import (
DEFAULT_LICENSES,
INDICES_DTYPE,
MAX_SIGNED_32BIT_INT,
MAX_SIGNED_64BIT_INT,
NUMPY_SERIALIZE_FORMAT_EXTENSION,
NUMPY_SERIALIZE_FORMAT_NAME,
PARQUET_SERIALIZE_FORMAT_EXTENSION,
Expand Down Expand Up @@ -257,6 +259,12 @@ def rehydrate_interface(

self.data[index] = resource

def get_max_index_value(self) -> int:
"""Get maximum index value (max signed 32 or 64 bit integer) for this datapackage"""
if self.metadata.get("64_bit_indices"):
return MAX_SIGNED_64BIT_INT
return MAX_SIGNED_32BIT_INT


class FilteredDatapackage(DatapackageBase):
"""A subset of a datapackage. Used in matrix construction or other data manipulation operations.
Expand Down Expand Up @@ -360,6 +368,7 @@ def _create(
"combinatorial": combinatorial,
"sequential": sequential,
"seed": seed,
"64_bit_indices": True,
"sum_intra_duplicates": sum_intra_duplicates,
"sum_inter_duplicates": sum_inter_duplicates,
"matrix_serialize_format_type": matrix_serialize_format_type.value,
Expand Down
Binary file modified tests/fixtures/indexing/array.indices.npy
Binary file not shown.
19 changes: 10 additions & 9 deletions tests/fixtures/indexing/datapackage.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@
"profile": "data-resource",
"format": "npy",
"mediatype": "application/octet-stream",
"name": "array.data",
"name": "array.indices",
"matrix": "sa_matrix",
"kind": "data",
"path": "array.data.npy",
"kind": "indices",
"path": "array.indices.npy",
"group": "array",
"category": "array",
"nrows": 3
Expand All @@ -62,10 +62,10 @@
"profile": "data-resource",
"format": "npy",
"mediatype": "application/octet-stream",
"name": "array.indices",
"name": "array.data",
"matrix": "sa_matrix",
"kind": "indices",
"path": "array.indices.npy",
"kind": "data",
"path": "array.data.npy",
"group": "array",
"category": "array",
"nrows": 3
Expand Down Expand Up @@ -167,10 +167,11 @@
]
}
],
"created": "2021-05-18T13:12:19.590666Z",
"created": "2024-08-18T20:58:57.736902Z",
"combinatorial": false,
"sequential": false,
"seed": null,
"sum_intra_duplicates": true,
"sum_inter_duplicates": false
}
"sum_inter_duplicates": false,
"matrix_serialize_format_type": "numpy"
}
Binary file modified tests/fixtures/indexing/vector.data.npy
Binary file not shown.
Binary file modified tests/fixtures/indexing/vector.indices.npy
Binary file not shown.
Binary file modified tests/fixtures/merging/merging_first.zip
Binary file not shown.
Binary file modified tests/fixtures/merging/merging_same_1.zip
Binary file not shown.
Binary file modified tests/fixtures/merging/merging_same_2.zip
Binary file not shown.
Binary file modified tests/fixtures/merging/merging_second.zip
Binary file not shown.
Binary file modified tests/fixtures/test-fixture.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/fixtures/tfd/datapackage.json
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@
"valid_for": "sa-data-array"
}
],
"created": "2024-05-22T12:20:16.587994+00:00Z",
"created": "2024-08-18T20:58:29.941013Z",
"combinatorial": false,
"sequential": false,
"seed": null,
Expand Down
Binary file modified tests/fixtures/tfd/sa-array-interface.indices.npy
Binary file not shown.
Binary file modified tests/fixtures/tfd/sa-data-array.indices.npy
Binary file not shown.
Binary file modified tests/fixtures/tfd/sa-data-vector-from-dict.indices.npy
Binary file not shown.
Binary file modified tests/fixtures/tfd/sa-data-vector.indices.npy
Binary file not shown.
Binary file modified tests/fixtures/tfd/sa-vector-interface.indices.npy
Binary file not shown.

0 comments on commit 15ca581

Please sign in to comment.