diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index 8151a645..8e023631 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -48,10 +48,61 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., ) -> JsonReader[Series]: ... @overload def read_json( - path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonSeriesOrient | None = ..., + typ: Literal["series"], + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True], + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"], +) -> JsonReader[Series]: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonFrameOrient | None = ..., + typ: Literal["frame"] = ..., + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True], + chunksize: int, + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> JsonReader[DataFrame]: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], *, orient: JsonFrameOrient | None = ..., typ: Literal["frame"] = ..., @@ -72,6 +123,7 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"], ) -> JsonReader[DataFrame]: ... @overload def read_json( @@ -96,6 +148,32 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> Series: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonSeriesOrient | None = ..., + typ: Literal["series"], + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True], + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"], ) -> Series: ... @overload def read_json( @@ -120,6 +198,32 @@ def read_json( nrows: int | None = ..., storage_options: StorageOptions = ..., dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["ujson"] = ..., +) -> DataFrame: ... +@overload +def read_json( + path_or_buf: FilePath | ReadBuffer[bytes], + *, + orient: JsonFrameOrient | None = ..., + typ: Literal["frame"] = ..., + dtype: bool | Mapping[HashableT, DtypeArg] | None = ..., + convert_axes: bool | None = ..., + convert_dates: bool | list[str] = ..., + keep_default_dates: bool = ..., + precise_float: bool = ..., + date_unit: TimeUnit | None = ..., + encoding: str | None = ..., + encoding_errors: ( + Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"] + | None + ) = ..., + lines: Literal[True], + chunksize: None = ..., + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., + dtype_backend: DtypeBackend | NoDefault = ..., + engine: Literal["pyarrow"], ) -> DataFrame: ... class JsonReader(abc.Iterator, Generic[NDFrameT]): diff --git a/tests/test_io.py b/tests/test_io.py index 95476440..17c702d2 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1714,3 +1714,33 @@ def test_read_excel_index_col() -> None: ), pd.DataFrame, ) + + +def test_read_json_engine() -> None: + """Test the engine argument for `pd.read_json` introduced with pandas 2.0.""" + data = """{"index": {"0": 0, "1": 1}, + "a": {"0": 1, "1": null}, + "b": {"0": 2.5, "1": 4.5}, + "c": {"0": true, "1": false}, + "d": {"0": "a", "1": "b"}, + "e": {"0": 1577.2, "1": 1577.1}}""" + check( + assert_type(pd.read_json(io.StringIO(data), engine="ujson"), pd.DataFrame), + pd.DataFrame, + ) + + data_lines = b"""{"col 1":"a","col 2":"b"} + {"col 1":"c","col 2":"d"}""" + dd = io.BytesIO(data_lines) + check( + assert_type( + pd.read_json(dd, lines=True, engine="pyarrow"), + pd.DataFrame, + ), + pd.DataFrame, + ) + + if TYPE_CHECKING_INVALID_USAGE: + pd.read_json(dd, lines=False, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue] + pd.read_json(io.StringIO(data), engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType] + pd.read_json(io.StringIO(data), lines=True, engine="pyarrow") # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]