Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Mongo Data Migration #804

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Write the date in place of the "Unreleased" in the case a new version is release

- Add adapters for reading back assets with the image/jpeg and
multipart/related;type=image/jpeg mimetypes.
- Refactor CSVAdapter to allow pd.read_csv kwargs

## v0.1.0b10 (2024-10-11)

Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/service.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ or its dask counterpart.
.. autosummary::
:toctree: generated

tiled.adapters.csv.read_csv
tiled.adapters.csv.CSVAdapter
tiled.adapters.excel.ExcelAdapter
tiled.adapters.hdf5.HDF5Adapter
tiled.adapters.netcdf.read_netcdf
Expand Down
4 changes: 2 additions & 2 deletions tiled/_tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import tifffile
import xarray

from ..adapters.csv import read_csv
from ..adapters.csv import CSVAdapter
from ..adapters.dataframe import ArrayAdapter
from ..adapters.tiff import TiffAdapter
from ..catalog import in_memory
Expand Down Expand Up @@ -236,7 +236,7 @@ async def test_write_dataframe_external_direct(a, tmpdir):
filepath = str(tmpdir / "file.csv")
data_uri = ensure_uri(filepath)
df.to_csv(filepath, index=False)
dfa = read_csv(data_uri)
dfa = CSVAdapter.from_uris(data_uri)
structure = asdict(dfa.structure())
await a.create_node(
key="x",
Expand Down
2 changes: 1 addition & 1 deletion tiled/_tests/test_jpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def client(tmpdir_module):

tree = MapAdapter(
{
"color": JPEGAdapter(ensure_uri(path)),
"color": JPEGAdapter.from_uris(ensure_uri(path)),
"sequence": JPEGSequenceAdapter.from_uris(
[ensure_uri(filepath) for filepath in filepaths]
),
Expand Down
6 changes: 5 additions & 1 deletion tiled/_tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,11 @@ def test_write_with_specified_mimetype(tree):
df = pandas.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
structure = TableStructure.from_pandas(df)

for mimetype in [PARQUET_MIMETYPE, "text/csv", APACHE_ARROW_FILE_MIME_TYPE]:
for mimetype in [
PARQUET_MIMETYPE,
"text/csv",
APACHE_ARROW_FILE_MIME_TYPE,
]:
x = client.new(
"table",
[
Expand Down
19 changes: 1 addition & 18 deletions tiled/adapters/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
self._structure = structure
self._metadata = metadata or {}
self.specs = specs or []
self.access_policy = access_policy

@classmethod
def from_array(
Expand Down Expand Up @@ -100,30 +101,12 @@ def __repr__(self) -> str:

@property
def dims(self) -> Optional[Tuple[str, ...]]:
"""

Returns
-------

"""
return self._structure.dims

def metadata(self) -> JSON:
"""

Returns
-------

"""
return self._metadata

def structure(self) -> ArrayStructure:
"""

Returns
-------

"""
return self._structure

def read(
Expand Down
13 changes: 13 additions & 0 deletions tiled/adapters/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@ def __init__(
self.specs = list(specs or [])
self.access_policy = access_policy

@classmethod
def from_assets(
cls,
assets: List[Asset],
structure: TableStructure,
metadata: Optional[JSON] = None,
specs: Optional[List[Spec]] = None,
access_policy: Optional[AccessPolicy] = None,
**kwargs: Optional[Union[str, List[str], Dict[str, str]]],
) -> "ArrowAdapter":
data_uris = [a.data_uri for a in assets]
return cls(data_uris, structure, metadata, specs, access_policy)

def metadata(self) -> JSON:
"""

Expand Down
16 changes: 15 additions & 1 deletion tiled/adapters/awkward_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
A directory containing awkward buffers, one file per form key.
"""
from pathlib import Path
from typing import List, Optional
from typing import Dict, List, Optional, Union

import awkward.forms

Expand Down Expand Up @@ -73,3 +73,17 @@ def from_directory(
specs=specs,
access_policy=access_policy,
)

@classmethod
def from_assets(
cls,
assets: List[Asset],
structure: AwkwardStructure,
metadata: Optional[JSON] = None,
specs: Optional[List[Spec]] = None,
access_policy: Optional[AccessPolicy] = None,
**kwargs: Optional[Union[str, List[str], Dict[str, str]]],
) -> "AwkwardBuffersAdapter":
return cls.from_directory(
assets[0].data_uri, structure, metadata, specs, access_policy
)
Loading