Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pin dask to 2024.1.1 #1301

Merged
merged 11 commits into from
Mar 1, 2024
4 changes: 2 additions & 2 deletions continuous_integration/docker/conda.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
python>=3.9
dask>=2022.3.0
dask==2024.1.1
pandas>=1.4.0
jpype1>=1.0.2
openjdk>=8
Expand All @@ -12,7 +12,7 @@ sphinx>=3.2.1
tzlocal>=2.1
fastapi>=0.92.0
httpx>=0.24.1
uvicorn>=0.13.4
uvicorn>=0.14
pyarrow>=14.0.1
prompt_toolkit>=3.0.8
pygments>=2.7.1
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/docker/main.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ RUN mamba install -y \
# build requirements
"maturin>=1.3,<1.4" \
# core dependencies
"dask>=2022.3.0" \
"dask==2024.1.1" \
"pandas>=1.4.0" \
"fastapi>=0.92.0" \
"httpx>=0.24.1" \
"uvicorn>=0.13.4" \
"uvicorn>=0.14" \
"tzlocal>=2.1" \
"prompt_toolkit>=3.0.8" \
"pygments>=2.7.1" \
Expand Down
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -26,12 +26,13 @@ dependencies:
- pytest-xdist
- pytest
- python=3.10
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -26,12 +26,13 @@ dependencies:
- pytest-xdist
- pytest
- python=3.11
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
8 changes: 5 additions & 3 deletions continuous_integration/environment-3.12.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -27,12 +27,14 @@ dependencies:
- pytest-xdist
- pytest
- python=3.12
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# TODO: add once tpot supports python 3.12
# - tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
dependencies:
- c-compiler
- dask=2022.3.0
- dask=2024.1.1
- fastapi=0.92.0
- fugue=0.7.3
- httpx=0.24.1
Expand All @@ -26,6 +26,7 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
- py-xgboost=1.7.0
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noting that we'll need to make a follow-up PR to dask-build-environment as this pinning will definitely conflict with the RAPIDS xgboost.

- scikit-learn=1.0.0
- sphinx
# TODO: remove this constraint when we require pandas>2
Expand All @@ -34,5 +35,5 @@ dependencies:
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal=2.1
- uvicorn=0.13.4
- uvicorn=0.14
- zlib
5 changes: 3 additions & 2 deletions continuous_integration/gpuci/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ channels:
dependencies:
- c-compiler
- zlib
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -32,14 +32,15 @@ dependencies:
- pytest-xdist
- pytest
- python=3.10
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
# GPU-specific requirements
- cudatoolkit=11.8
- cudf=24.04
Expand Down
5 changes: 3 additions & 2 deletions continuous_integration/gpuci/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ channels:
dependencies:
- c-compiler
- zlib
- dask>=2022.3.0
- dask==2024.1.1
- fastapi>=0.92.0
- fugue>=0.7.3
- httpx>=0.24.1
Expand All @@ -32,14 +32,15 @@ dependencies:
- pytest-xdist
- pytest
- python=3.9
- py-xgboost>=1.7.0
- scikit-learn>=1.0.0
- sphinx
- sqlalchemy
- tpot>=0.12.0
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- tzlocal>=2.1
- uvicorn>=0.13.4
- uvicorn>=0.14
# GPU-specific requirements
- cudatoolkit=11.8
- cudf=24.04
Expand Down
4 changes: 2 additions & 2 deletions continuous_integration/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ requirements:
- xz # [linux64]
run:
- python
- dask >=2022.3.0
- dask ==2024.1.1
- pandas >=1.4.0
- fastapi >=0.92.0
- httpx >=0.24.1
- uvicorn >=0.13.4
- uvicorn >=0.14
- tzlocal >=2.1
- prompt-toolkit >=3.0.8
- pygments >=2.7.1
Expand Down
11 changes: 0 additions & 11 deletions dask_sql/_compat.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
import dask
import pandas as pd
import prompt_toolkit
from packaging.version import parse as parseVersion

_pandas_version = parseVersion(pd.__version__)
_prompt_toolkit_version = parseVersion(prompt_toolkit.__version__)
_dask_version = parseVersion(dask.__version__)

INDEXER_WINDOW_STEP_IMPLEMENTED = _pandas_version >= parseVersion("1.5.0")
PANDAS_GT_200 = _pandas_version >= parseVersion("2.0.0")

# TODO: remove if prompt-toolkit min version gets bumped
PIPE_INPUT_CONTEXT_MANAGER = _prompt_toolkit_version >= parseVersion("3.0.29")

# TODO: remove when dask min version gets bumped
BROADCAST_JOIN_SUPPORT_WORKING = _dask_version > parseVersion("2023.1.0")

# Parquet predicate-support version checks
PQ_NOT_IN_SUPPORT = parseVersion(dask.__version__) > parseVersion("2023.5.1")
PQ_IS_SUPPORT = parseVersion(dask.__version__) >= parseVersion("2023.3.1")

DASK_CUDF_TODATETIME_SUPPORT = _dask_version >= parseVersion("2023.5.1")
9 changes: 0 additions & 9 deletions dask_sql/physical/rel/logical/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dask.base import tokenize
from dask.highlevelgraph import HighLevelGraph

from dask_sql._compat import BROADCAST_JOIN_SUPPORT_WORKING
from dask_sql.datacontainer import ColumnContainer, DataContainer
from dask_sql.physical.rel.base import BaseRelPlugin
from dask_sql.physical.rel.logical.filter import filter_or_scalar
Expand Down Expand Up @@ -259,14 +258,6 @@ def _join_on_columns(
added_columns = list(lhs_columns_to_add.keys())

broadcast = dask_config.get("sql.join.broadcast")
if not BROADCAST_JOIN_SUPPORT_WORKING and (
isinstance(broadcast, float) or broadcast
):
warnings.warn(
"Broadcast Joins may not work as expected with dask<2023.1.1"
"For more information refer to https://github.com/dask/dask/issues/9851"
" and https://github.com/dask/dask/issues/9870"
)
if join_type == "leftanti" and not is_cudf_type(df_lhs_with_tmp):
df = df_lhs_with_tmp.merge(
df_rhs_with_tmp,
Expand Down
15 changes: 5 additions & 10 deletions dask_sql/physical/rex/core/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from dask.highlevelgraph import HighLevelGraph
from dask.utils import random_state_data

from dask_sql._compat import DASK_CUDF_TODATETIME_SUPPORT
from dask_sql._datafusion_lib import SqlTypeName
from dask_sql.datacontainer import DataContainer
from dask_sql.mappings import (
Expand Down Expand Up @@ -964,15 +963,11 @@ def date_part(self, what, df: SeriesOrScalar):
elif what in {"YEAR", "YEARS"}:
return df.year
elif what == "DATE":
if isinstance(df, pd.Timestamp):
return df.date()
else:
if is_cudf_type(df) and not DASK_CUDF_TODATETIME_SUPPORT:
raise RuntimeError(
"Dask-cuDF to_datetime support requires Dask version >= 2023.5.1"
)
else:
return dd.to_datetime(df.strftime("%Y-%m-%d"))
return (
df.date()
if isinstance(df, pd.Timestamp)
else dd.to_datetime(df.strftime("%Y-%m-%d"))
)
else:
raise NotImplementedError(f"Extraction of {what} is not (yet) implemented.")

Expand Down
6 changes: 0 additions & 6 deletions dask_sql/physical/utils/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
from dask.layers import DataFrameIOLayer
from dask.utils import M, apply, is_arraylike

from dask_sql._compat import PQ_IS_SUPPORT, PQ_NOT_IN_SUPPORT

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -501,8 +499,6 @@ def _get_blockwise_input(input_index, indices: list, dsk: RegenerableGraph):


def _inv(symbol: str):
if symbol == "in" and not PQ_NOT_IN_SUPPORT:
raise ValueError("This version of dask does not support 'not in'")
return {
">": "<",
"<": ">",
Expand Down Expand Up @@ -568,8 +564,6 @@ def _blockwise_isin_dnf(op, indices: list, dsk: RegenerableGraph) -> DNF:

def _blockwise_isna_dnf(op, indices: list, dsk: RegenerableGraph) -> DNF:
# Return DNF expression pattern for `isna`
if not PQ_IS_SUPPORT:
raise ValueError("This version of dask does not support 'is' predicates.")
left = _get_blockwise_input(0, indices, dsk)
return DNF((left, "is", None))

Expand Down
4 changes: 2 additions & 2 deletions docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ dependencies:
- sphinx>=4.0.0
- sphinx-tabs
- dask-sphinx-theme>=2.0.3
- dask>=2022.3.0
- dask==2024.1.1
- pandas>=1.4.0
- fugue>=0.7.3
# FIXME: https://github.com/fugue-project/fugue/issues/526
- triad<0.9.2
- fastapi>=0.92.0
- httpx>=0.24.1
- uvicorn>=0.13.4
- uvicorn>=0.14
- tzlocal>=2.1
- prompt_toolkit>=3.0.8
- pygments>=2.7.1
Expand Down
4 changes: 2 additions & 2 deletions docs/requirements-docs.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
sphinx>=4.0.0
sphinx-tabs
dask-sphinx-theme>=3.0.0
dask>=2022.3.0
dask==2024.1.1
pandas>=1.4.0
fugue>=0.7.3
# FIXME: https://github.com/fugue-project/fugue/issues/526
triad<0.9.2
fastapi>=0.92.0
httpx>=0.24.1
uvicorn>=0.13.4
uvicorn>=0.14
tzlocal>=2.1
prompt_toolkit>=3.0.8
pygments>=2.7.1
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ classifiers = [
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"dask[dataframe]>=2022.3.0",
"distributed>=2022.3.0",
"dask[dataframe]==2024.1.1",
"distributed==2024.1.1",
"pandas>=1.4.0",
"fastapi>=0.92.0",
"httpx>=0.24.1",
"uvicorn>=0.13.4",
"uvicorn>=0.14",
"tzlocal>=2.1",
"prompt_toolkit>=3.0.8",
"pygments>=2.7.1",
Expand Down
9 changes: 0 additions & 9 deletions tests/integration/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dask.utils_test import hlg_layer
from packaging.version import parse as parseVersion

from dask_sql._compat import PQ_IS_SUPPORT, PQ_NOT_IN_SUPPORT
from tests.utils import assert_eq

DASK_GT_2022_4_2 = parseVersion(dask.__version__) >= parseVersion("2022.4.2")
Expand Down Expand Up @@ -182,10 +181,6 @@ def test_filter_year(c):
"SELECT * FROM parquet_ddf WHERE b NOT IN (1, 3, 5, 6)",
lambda x: x[~x["b"].isin([1, 3, 5, 6])],
[[("b", "not in", (1, 3, 5, 6))]],
marks=pytest.mark.skipif(
not PQ_NOT_IN_SUPPORT,
reason="Requires https://github.com/dask/dask/pull/10320",
),
),
(
"SELECT a FROM parquet_ddf WHERE (b > 5 AND b < 10) OR a = 1",
Expand Down Expand Up @@ -317,10 +312,6 @@ def test_filter_decimal(c, gpu):
c.drop_table("df")


@pytest.mark.skipif(
not PQ_IS_SUPPORT,
reason="Requires https://github.com/dask/dask/pull/10320",
)
def test_predicate_pushdown_isna(tmpdir):
from dask_sql.context import Context

Expand Down
5 changes: 0 additions & 5 deletions tests/integration/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dask.utils_test import hlg_layer

from dask_sql import Context
from dask_sql._compat import BROADCAST_JOIN_SUPPORT_WORKING
from dask_sql.datacontainer import Statistics
from tests.utils import assert_eq

Expand Down Expand Up @@ -524,10 +523,6 @@ def test_join_reorder(c):
assert_eq(result_df, expected_df, check_index=False)


@pytest.mark.xfail(
not BROADCAST_JOIN_SUPPORT_WORKING,
reason="Broadcast Joins do not work as expected with dask<2023.1.1",
)
@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
def test_broadcast_join(c, client, gpu):
df1 = dd.from_pandas(
Expand Down
Loading
Loading