Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
goodwanghan committed Aug 10, 2023
1 parent c1ba28c commit 03d9b46
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 23 deletions.
28 changes: 15 additions & 13 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,28 @@
"settings": {
"terminal.integrated.shell.linux": "/bin/bash",
"python.pythonPath": "/usr/local/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
"python.defaultInterpreterPath": "/usr/local/bin/python",
"isort.interpreter": [
"/usr/local/bin/python"
],
"flake8.interpreter": [
"/usr/local/bin/python"
],
"pylint.interpreter": [
"/usr/local/bin/python"
]
},
"extensions": [
"ms-python.python",
"ms-python.isort",
"ms-python.flake8",
"ms-python.pylint",
"ms-python.mypy",
"GitHub.copilot",
"njpwerner.autodocstring"
]
}},

}
},
"forwardPorts": [
8888
],
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/test_dask.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Dask Tests

on:
push:
branches: [ master ]
paths-ignore:
- 'docs/**'
- '**.md'
pull_request:
branches: [ master ]
paths-ignore:
- 'docs/**'
- '**.md'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
test_dask_lower_bound:
name: Dask 2023.2.0
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Install dependencies
run: make devenv
- name: Setup Dask
run: pip install pyarrow=11.0.0 pandas==2.0.2 dask[dataframe,distributed]==2023.5.0
- name: Test
run: make testdask

test_dask_latest:
name: Dask Latest
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v1
with:
python-version: "3.10"
- name: Install dependencies
run: make devenv
- name: Setup Dask
run: pip install -U dask[dataframe,distributed] pyarrow pandas
- name: Test
run: make testdask
12 changes: 10 additions & 2 deletions fugue_dask/_constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from typing import Any, Dict
import pandas as pd

import dask
import pandas as pd
import pyarrow as pa
from packaging import version

FUGUE_DASK_CONF_DEFAULT_PARTITIONS = "fugue.dask.default.partitions"
FUGUE_DASK_DEFAULT_CONF: Dict[str, Any] = {FUGUE_DASK_CONF_DEFAULT_PARTITIONS: -1}
FUGUE_DASK_USE_ARROW = hasattr(pd, "ArrowDtype") and dask.__version__ >= "2023.7.1"
FUGUE_DASK_USE_ARROW = (
hasattr(pd, "ArrowDtype")
and version.parse(dask.__version__) >= version.parse("2023.2")
and version.parse(pa.__version__) >= version.parse("7")
and version.parse(pd.__version__) >= version.parse("2")
)
6 changes: 5 additions & 1 deletion fugue_dask/execution_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,11 @@ def take(
_presort: IndexedOrderedDict = presort or partition_spec.presort

def _partition_take(partition, n, presort):
if len(presort.keys()) > 0:
assert_or_throw(
partition.shape[1] == len(meta),
FugueBug("hitting the dask bug where partition keys are lost"),
)
if len(presort.keys()) > 0 and len(partition) > 1:
partition = partition.sort_values(
list(presort.keys()),
ascending=list(presort.values()),
Expand Down
15 changes: 8 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def get_version() -> str:
install_requires=[
"triad>=0.9.1",
"adagio>=0.2.4",
"pyarrow>=0.15.1",
"pandas>=1.2.0",
"pyarrow>=6.0.1",
"pandas>=1.3.5",
# sql dependencies
"qpd>=0.4.4",
"fugue-sql-antlr>=0.1.6",
Expand All @@ -52,8 +52,9 @@ def get_version() -> str:
"spark": ["pyspark>=3.1.1"],
"dask": [
"dask[distributed,dataframe]; python_version < '3.8'",
"dask[distributed,dataframe]>=2022.9.0,<2023.7.1; python_version >= '3.8'",
"qpd[dask]>=0.4.4",
"dask[distributed,dataframe]>=2023.5.0; python_version >= '3.8'",
"pyarrow>=11.0.0",
"pandas>=2.0.2",
],
"ray": ["ray[data]>=2.1.0", "duckdb>=0.5.0", "pyarrow>=6.0.1"],
"duckdb": [
Expand All @@ -73,14 +74,14 @@ def get_version() -> str:
"fugue-sql-antlr[cpp]>=0.1.6",
"pyspark>=3.1.1",
"dask[distributed,dataframe]; python_version < '3.8'",
"dask[distributed,dataframe]>=2022.9.0,<2023.7.1; python_version >= '3.8'",
"dask[distributed,dataframe]>=2023.5.0; python_version >= '3.8'",
"ray[data]>=2.1.0",
"qpd[dask]>=0.4.4",
"notebook",
"jupyterlab",
"ipython>=7.10.0",
"duckdb>=0.5.0",
"pyarrow>=6.0.1",
"pyarrow>=11.0.0",
"pandas>=2.0.2",
"ibis-framework>=2.1.1; python_version < '3.8'",
"ibis-framework>=3.2.0,<6; python_version >= '3.8'",
"polars",
Expand Down

0 comments on commit 03d9b46

Please sign in to comment.