From 4f97ce46c52f19fe15fcde859cd7fbf39de43733 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Wed, 6 Nov 2024 17:39:52 -0500 Subject: [PATCH] Change test_dataset band dtype --- tests/nested_dask/conftest.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/nested_dask/conftest.py b/tests/nested_dask/conftest.py index bdcfad7..d2029bd 100644 --- a/tests/nested_dask/conftest.py +++ b/tests/nested_dask/conftest.py @@ -1,6 +1,8 @@ import nested_dask as nd import nested_pandas as npd import numpy as np +import pandas as pd +import pyarrow as pa import pytest @@ -18,7 +20,11 @@ def test_dataset(): layer_data = { "t": randomstate.random(layer_size * n_base) * 20, "flux": randomstate.random(layer_size * n_base) * 100, - "band": randomstate.choice(["r", "g"], size=layer_size * n_base), + # Ensure pyarrow[string] dtype, not large_string + # https://github.com/lincc-frameworks/nested-dask/issues/71 + "band": pd.Series( + randomstate.choice(["r", "g"], size=layer_size * n_base), dtype=pd.ArrowDtype(pa.string()) + ), "index": np.arange(layer_size * n_base) % n_base, } layer_nf = npd.NestedFrame(data=layer_data).set_index("index").sort_index()