Skip to content

Commit

Permalink
GH1033 Add overloads of engine for pd.read_json (#1035)
Browse files Browse the repository at this point in the history
* GHXXX Add overloads of engine for pd.read_json

* GH1033 PR Feedback

* GH1033 PR Feedback

* GH1033 Fix ignore type

* GH1033 PR feedback
  • Loading branch information
loicdiridollou authored Nov 21, 2024
1 parent e610b76 commit 92bd9cb
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 1 deletion.
106 changes: 105 additions & 1 deletion pandas-stubs/io/json/_json.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,61 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> JsonReader[Series]: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonSeriesOrient | None = ...,
typ: Literal["series"],
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True],
chunksize: int,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"],
) -> JsonReader[Series]: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonFrameOrient | None = ...,
typ: Literal["frame"] = ...,
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True],
chunksize: int,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> JsonReader[DataFrame]: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonFrameOrient | None = ...,
typ: Literal["frame"] = ...,
Expand All @@ -72,6 +123,7 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"],
) -> JsonReader[DataFrame]: ...
@overload
def read_json(
Expand All @@ -96,6 +148,32 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> Series: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonSeriesOrient | None = ...,
typ: Literal["series"],
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True],
chunksize: None = ...,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"],
) -> Series: ...
@overload
def read_json(
Expand All @@ -120,6 +198,32 @@ def read_json(
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["ujson"] = ...,
) -> DataFrame: ...
@overload
def read_json(
path_or_buf: FilePath | ReadBuffer[bytes],
*,
orient: JsonFrameOrient | None = ...,
typ: Literal["frame"] = ...,
dtype: bool | Mapping[HashableT, DtypeArg] | None = ...,
convert_axes: bool | None = ...,
convert_dates: bool | list[str] = ...,
keep_default_dates: bool = ...,
precise_float: bool = ...,
date_unit: TimeUnit | None = ...,
encoding: str | None = ...,
encoding_errors: (
Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"]
| None
) = ...,
lines: Literal[True],
chunksize: None = ...,
compression: CompressionOptions = ...,
nrows: int | None = ...,
storage_options: StorageOptions = ...,
dtype_backend: DtypeBackend | NoDefault = ...,
engine: Literal["pyarrow"],
) -> DataFrame: ...

class JsonReader(abc.Iterator, Generic[NDFrameT]):
Expand Down
30 changes: 30 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1714,3 +1714,33 @@ def test_read_excel_index_col() -> None:
),
pd.DataFrame,
)


def test_read_json_engine() -> None:
"""Test the engine argument for `pd.read_json` introduced with pandas 2.0."""
data = """{"index": {"0": 0, "1": 1},
"a": {"0": 1, "1": null},
"b": {"0": 2.5, "1": 4.5},
"c": {"0": true, "1": false},
"d": {"0": "a", "1": "b"},
"e": {"0": 1577.2, "1": 1577.1}}"""
check(
assert_type(pd.read_json(io.StringIO(data), engine="ujson"), pd.DataFrame),
pd.DataFrame,
)

data_lines = b"""{"col 1":"a","col 2":"b"}
{"col 1":"c","col 2":"d"}"""
dd = io.BytesIO(data_lines)
check(
assert_type(
pd.read_json(dd, lines=True, engine="pyarrow"),
pd.DataFrame,
),
pd.DataFrame,
)

if TYPE_CHECKING_INVALID_USAGE:
pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]
pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType]
pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]

0 comments on commit 92bd9cb

Please sign in to comment.