From c68449a65cb594f4cc449795ad4cc057eb30856f Mon Sep 17 00:00:00 2001
From: rmhowe425 <45905457+rmhowe425@users.noreply.github.com>
Date: Tue, 11 Jul 2023 16:52:05 -0400
Subject: [PATCH] DEPR: Remove literal string input for read_xml (#53809)
* Updating documentation and adding deprecation logic for read_xml.
* Fixing documentation issue and adding unit test
* Updating unit tests and documentation.
* Fixing unit tests and documentation issues
* Fixing unit tests and documentation issues
* Fixing unit tests and documentation issues
* Fixing import error in documentation
* Updated deprecation logic per reviewer recommendations.
* Updating deprecation logic and documentation per reviewer recommendations.
* Fixing logic error
* Fixing implementation per reviewer recommendations.
* Updating implementation per reviewer recommendations.
* Cleaning up the deprecation logic a bit.
* Updating implementation per reviewer recommendations.
* Updating unit tests
* Fixing discrepancy in doc string.
* Updating implementation based on reviewer recommendations.
---
doc/source/user_guide/io.rst | 13 +--
doc/source/whatsnew/v1.5.0.rst | 3 +-
doc/source/whatsnew/v2.1.0.rst | 1 +
pandas/io/xml.py | 30 +++++-
pandas/tests/io/xml/test_xml.py | 128 +++++++++++++++----------
pandas/tests/io/xml/test_xml_dtypes.py | 30 +++---
6 files changed, 133 insertions(+), 72 deletions(-)
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index ec0e7d0636b07..4d4b9e086e9e5 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2919,6 +2919,7 @@ Read an XML string:
.. ipython:: python
+ from io import StringIO
xml = """
@@ -2941,7 +2942,7 @@ Read an XML string:
"""
- df = pd.read_xml(xml)
+ df = pd.read_xml(StringIO(xml))
df
Read a URL with no options:
@@ -2961,7 +2962,7 @@ as a string:
f.write(xml)
with open(file_path, "r") as f:
- df = pd.read_xml(f.read())
+ df = pd.read_xml(StringIO(f.read()))
df
Read in the content of the "books.xml" as instance of ``StringIO`` or
@@ -3052,7 +3053,7 @@ For example, below XML contains a namespace with prefix, ``doc``, and URI at
"""
- df = pd.read_xml(xml,
+ df = pd.read_xml(StringIO(xml),
xpath="//doc:row",
namespaces={"doc": "https://example.com"})
df
@@ -3082,7 +3083,7 @@ But assigning *any* temporary name to correct URI allows parsing by nodes.
"""
- df = pd.read_xml(xml,
+ df = pd.read_xml(StringIO(xml),
xpath="//pandas:row",
namespaces={"pandas": "https://example.com"})
df
@@ -3117,7 +3118,7 @@ However, if XPath does not reference node names such as default, ``/*``, then
"""
- df = pd.read_xml(xml, xpath="./row")
+ df = pd.read_xml(StringIO(xml), xpath="./row")
df
shows the attribute ``sides`` on ``shape`` element was not parsed as
@@ -3218,7 +3219,7 @@ output (as shown below for demonstration) for easier parse into ``DataFrame``:
"""
- df = pd.read_xml(xml, stylesheet=xsl)
+ df = pd.read_xml(StringIO(xml), stylesheet=xsl)
df
For very large XML files that can range in hundreds of megabytes to gigabytes, :func:`pandas.read_xml`
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 9653226b96196..44728e7e552ab 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -221,6 +221,7 @@ apply converter methods, and parse dates (:issue:`43567`).
.. ipython:: python
+ from io import StringIO
xml_dates = """
@@ -244,7 +245,7 @@ apply converter methods, and parse dates (:issue:`43567`).
"""
df = pd.read_xml(
- xml_dates,
+ StringIO(xml_dates),
dtype={'sides': 'Int64'},
converters={'degrees': str},
parse_dates=['date']
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 6fcddad70f22b..7450fc6fdc1da 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -313,6 +313,7 @@ Deprecations
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
+- Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`)
- Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`)
- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index bb165c4724022..6421f710f80d6 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -11,6 +11,7 @@
Any,
Callable,
)
+import warnings
from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
@@ -19,6 +20,7 @@
ParserError,
)
from pandas.util._decorators import doc
+from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend
from pandas.core.dtypes.common import is_list_like
@@ -29,6 +31,7 @@
file_exists,
get_handle,
infer_compression,
+ is_file_like,
is_fsspec_url,
is_url,
stringify_path,
@@ -802,6 +805,22 @@ def _parse(
p: _EtreeFrameParser | _LxmlFrameParser
+ if isinstance(path_or_buffer, str) and not any(
+ [
+ is_file_like(path_or_buffer),
+ file_exists(path_or_buffer),
+ is_url(path_or_buffer),
+ is_fsspec_url(path_or_buffer),
+ ]
+ ):
+ warnings.warn(
+ "Passing literal xml to 'read_xml' is deprecated and "
+ "will be removed in a future version. To read from a "
+ "literal string, wrap it in a 'StringIO' object.",
+ FutureWarning,
+ stacklevel=find_stack_level(),
+ )
+
if parser == "lxml":
lxml = import_optional_dependency("lxml.etree", errors="ignore")
@@ -894,6 +913,10 @@ def read_xml(
string or a path. The string can further be a URL. Valid URL schemes
include http, ftp, s3, and file.
+ .. deprecated:: 2.1.0
+ Passing xml literal strings is deprecated.
+ Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead.
+
xpath : str, optional, default './\*'
The XPath to parse required set of nodes for migration to DataFrame.
XPath should return a collection of elements and not a single
@@ -1049,6 +1072,7 @@ def read_xml(
Examples
--------
+ >>> import io
>>> xml = '''
...
...
@@ -1068,7 +1092,7 @@ def read_xml(
...
... '''
- >>> df = pd.read_xml(xml)
+ >>> df = pd.read_xml(io.StringIO(xml))
>>> df
shape degrees sides
0 square 360 4.0
@@ -1082,7 +1106,7 @@ def read_xml(
...
... '''
- >>> df = pd.read_xml(xml, xpath=".//row")
+ >>> df = pd.read_xml(io.StringIO(xml), xpath=".//row")
>>> df
shape degrees sides
0 square 360 4.0
@@ -1108,7 +1132,7 @@ def read_xml(
...
... '''
- >>> df = pd.read_xml(xml,
+ >>> df = pd.read_xml(io.StringIO(xml),
... xpath="//doc:row",
... namespaces={{"doc": "https://example.com"}})
>>> df
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index a3a1646bc4748..1a64d9910d8bf 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -246,6 +246,19 @@
)
+@td.skip_if_no("lxml")
+def test_literal_xml_deprecation():
+ # GH 53809
+ msg = (
+ "Passing literal xml to 'read_xml' is deprecated and "
+ "will be removed in a future version. To read from a "
+ "literal string, wrap it in a 'StringIO' object."
+ )
+
+ with tm.assert_produces_warning(FutureWarning, match=msg):
+ read_xml(xml_default_nmsp)
+
+
@pytest.fixture(params=["rb", "r"])
def mode(request):
return request.param
@@ -300,7 +313,7 @@ def test_parser_consistency_file(xml_books):
def test_parser_consistency_url(parser, httpserver):
httpserver.serve_content(content=xml_default_nmsp)
- df_xpath = read_xml(xml_default_nmsp, parser=parser)
+ df_xpath = read_xml(StringIO(xml_default_nmsp), parser=parser)
df_iter = read_xml(
BytesIO(xml_default_nmsp.encode()),
parser=parser,
@@ -353,6 +366,11 @@ def test_file_buffered_reader_string(xml_books, parser, mode):
with open(xml_books, mode, encoding="utf-8" if mode == "r" else None) as f:
xml_obj = f.read()
+ if mode == "rb":
+ xml_obj = StringIO(xml_obj.decode())
+ elif mode == "r":
+ xml_obj = StringIO(xml_obj)
+
df_str = read_xml(xml_obj, parser=parser)
df_expected = DataFrame(
@@ -373,6 +391,11 @@ def test_file_buffered_reader_no_xml_declaration(xml_books, parser, mode):
next(f)
xml_obj = f.read()
+ if mode == "rb":
+ xml_obj = StringIO(xml_obj.decode())
+ elif mode == "r":
+ xml_obj = StringIO(xml_obj)
+
df_str = read_xml(xml_obj, parser=parser)
df_expected = DataFrame(
@@ -391,7 +414,7 @@ def test_file_buffered_reader_no_xml_declaration(xml_books, parser, mode):
def test_string_charset(parser):
txt = "<中文標籤>12
中文標籤>"
- df_str = read_xml(txt, parser=parser)
+ df_str = read_xml(StringIO(txt), parser=parser)
df_expected = DataFrame({"c1": 1, "c2": 2}, index=[0])
@@ -449,34 +472,48 @@ def test_empty_string_lxml(val):
]
)
with pytest.raises(XMLSyntaxError, match=msg):
- read_xml(val, parser="lxml")
+ if isinstance(val, str):
+ read_xml(StringIO(val), parser="lxml")
+ else:
+ read_xml(BytesIO(val), parser="lxml")
@pytest.mark.parametrize("val", ["", b""])
def test_empty_string_etree(val):
with pytest.raises(ParseError, match="no element found"):
- read_xml(val, parser="etree")
+ if isinstance(val, str):
+ read_xml(StringIO(val), parser="etree")
+ else:
+ read_xml(BytesIO(val), parser="etree")
@td.skip_if_no("lxml")
def test_wrong_file_path_lxml():
- from lxml.etree import XMLSyntaxError
-
+ msg = (
+ "Passing literal xml to 'read_xml' is deprecated and "
+ "will be removed in a future version. To read from a "
+ "literal string, wrap it in a 'StringIO' object."
+ )
filename = os.path.join("data", "html", "books.xml")
with pytest.raises(
- XMLSyntaxError,
- match=("Start tag expected, '<' not found"),
+ FutureWarning,
+ match=msg,
):
read_xml(filename, parser="lxml")
def test_wrong_file_path_etree():
+ msg = (
+ "Passing literal xml to 'read_xml' is deprecated and "
+ "will be removed in a future version. To read from a "
+ "literal string, wrap it in a 'StringIO' object."
+ )
filename = os.path.join("data", "html", "books.xml")
with pytest.raises(
- ParseError,
- match=("not well-formed"),
+ FutureWarning,
+ match=msg,
):
read_xml(filename, parser="etree")
@@ -539,7 +576,7 @@ def test_bad_xpath_lxml(xml_books):
def test_default_namespace(parser):
df_nmsp = read_xml(
- xml_default_nmsp,
+ StringIO(xml_default_nmsp),
xpath=".//ns:row",
namespaces={"ns": "http://example.com"},
parser=parser,
@@ -565,7 +602,7 @@ def test_default_namespace(parser):
def test_prefix_namespace(parser):
df_nmsp = read_xml(
- xml_prefix_nmsp,
+ StringIO(xml_prefix_nmsp),
xpath=".//doc:row",
namespaces={"doc": "http://example.com"},
parser=parser,
@@ -589,14 +626,14 @@ def test_prefix_namespace(parser):
@td.skip_if_no("lxml")
def test_consistency_default_namespace():
df_lxml = read_xml(
- xml_default_nmsp,
+ StringIO(xml_default_nmsp),
xpath=".//ns:row",
namespaces={"ns": "http://example.com"},
parser="lxml",
)
df_etree = read_xml(
- xml_default_nmsp,
+ StringIO(xml_default_nmsp),
xpath=".//doc:row",
namespaces={"doc": "http://example.com"},
parser="etree",
@@ -608,14 +645,14 @@ def test_consistency_default_namespace():
@td.skip_if_no("lxml")
def test_consistency_prefix_namespace():
df_lxml = read_xml(
- xml_prefix_nmsp,
+ StringIO(xml_prefix_nmsp),
xpath=".//doc:row",
namespaces={"doc": "http://example.com"},
parser="lxml",
)
df_etree = read_xml(
- xml_prefix_nmsp,
+ StringIO(xml_prefix_nmsp),
xpath=".//doc:row",
namespaces={"doc": "http://example.com"},
parser="etree",
@@ -652,7 +689,7 @@ def test_none_namespace_prefix(key):
TypeError, match=("empty namespace prefix is not supported in XPath")
):
read_xml(
- xml_default_nmsp,
+ StringIO(xml_default_nmsp),
xpath=".//kml:Placemark",
namespaces={key: "http://www.opengis.net/kml/2.2"},
parser="lxml",
@@ -741,7 +778,7 @@ def test_empty_attrs_only(parser):
ValueError,
match=("xpath does not return any nodes or attributes"),
):
- read_xml(xml, xpath="./row", attrs_only=True, parser=parser)
+ read_xml(StringIO(xml), xpath="./row", attrs_only=True, parser=parser)
def test_empty_elems_only(parser):
@@ -756,7 +793,7 @@ def test_empty_elems_only(parser):
ValueError,
match=("xpath does not return any nodes or attributes"),
):
- read_xml(xml, xpath="./row", elems_only=True, parser=parser)
+ read_xml(StringIO(xml), xpath="./row", elems_only=True, parser=parser)
@td.skip_if_no("lxml")
@@ -781,8 +818,8 @@ def test_attribute_centric_xml():
"""
- df_lxml = read_xml(xml, xpath=".//station")
- df_etree = read_xml(xml, xpath=".//station", parser="etree")
+ df_lxml = read_xml(StringIO(xml), xpath=".//station")
+ df_etree = read_xml(StringIO(xml), xpath=".//station", parser="etree")
df_iter_lx = read_xml_iterparse(xml, iterparse={"station": ["Name", "coords"]})
df_iter_et = read_xml_iterparse(
@@ -834,7 +871,10 @@ def test_repeat_names(parser):
"""
df_xpath = read_xml(
- xml, xpath=".//shape", parser=parser, names=["type_dim", "shape", "type_edge"]
+ StringIO(xml),
+ xpath=".//shape",
+ parser=parser,
+ names=["type_dim", "shape", "type_edge"],
)
df_iter = read_xml_iterparse(
@@ -876,7 +916,9 @@ def test_repeat_values_new_names(parser):
ellipse
"""
- df_xpath = read_xml(xml, xpath=".//shape", parser=parser, names=["name", "group"])
+ df_xpath = read_xml(
+ StringIO(xml), xpath=".//shape", parser=parser, names=["name", "group"]
+ )
df_iter = read_xml_iterparse(
xml,
@@ -919,7 +961,7 @@ def test_repeat_elements(parser):
"""
df_xpath = read_xml(
- xml,
+ StringIO(xml),
xpath=".//shape",
parser=parser,
names=["name", "family", "degrees", "sides"],
@@ -1154,8 +1196,8 @@ def test_style_charset():
"""
- df_orig = read_xml(xml)
- df_style = read_xml(xml, stylesheet=xsl)
+ df_orig = read_xml(StringIO(xml))
+ df_style = read_xml(StringIO(xml), stylesheet=xsl)
tm.assert_frame_equal(df_orig, df_style)
@@ -1287,30 +1329,18 @@ def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
@td.skip_if_no("lxml")
@pytest.mark.parametrize("val", ["", b""])
def test_empty_stylesheet(val):
- from lxml.etree import XMLSyntaxError
-
+ msg = (
+ "Passing literal xml to 'read_xml' is deprecated and "
+ "will be removed in a future version. To read from a "
+ "literal string, wrap it in a 'StringIO' object."
+ )
kml = os.path.join("data", "xml", "cta_rail_lines.kml")
- with pytest.raises(
- XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
- ):
+ with pytest.raises(FutureWarning, match=msg):
read_xml(kml, stylesheet=val)
# ITERPARSE
-
-
-def test_string_error(parser):
- with pytest.raises(
- ParserError, match=("iterparse is designed for large XML files")
- ):
- read_xml(
- xml_default_nmsp,
- parser=parser,
- iterparse={"row": ["shape", "degrees", "sides", "date"]},
- )
-
-
def test_file_like_iterparse(xml_books, parser, mode):
with open(xml_books, mode, encoding="utf-8" if mode == "r" else None) as f:
if mode == "r" and parser == "lxml":
@@ -1492,7 +1522,7 @@ def test_comment(parser):
"""
- df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+ df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
df_iter = read_xml_iterparse(
xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1528,7 +1558,7 @@ def test_dtd(parser):
"""
- df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+ df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
df_iter = read_xml_iterparse(
xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1564,7 +1594,7 @@ def test_processing_instruction(parser):
"""
- df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+ df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
df_iter = read_xml_iterparse(
xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1842,7 +1872,7 @@ def test_online_stylesheet():
"""
df_xsl = read_xml(
- xml,
+ StringIO(xml),
xpath=".//tr[td and position() <= 6]",
names=["title", "artist"],
stylesheet=xsl,
@@ -1982,7 +2012,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
string_array_na = ArrowStringArray(pa.array(["x", None]))
with pd.option_context("mode.string_storage", string_storage):
- result = read_xml(data, parser=parser, dtype_backend=dtype_backend)
+ result = read_xml(StringIO(data), parser=parser, dtype_backend=dtype_backend)
expected = DataFrame(
{
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 911b540dbc380..fb24902efc0f5 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -1,5 +1,7 @@
from __future__ import annotations
+from io import StringIO
+
import pytest
from pandas.errors import ParserWarning
@@ -81,7 +83,7 @@ def read_xml_iterparse(data, **kwargs):
def test_dtype_single_str(parser):
- df_result = read_xml(xml_types, dtype={"degrees": "str"}, parser=parser)
+ df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser)
df_iter = read_xml_iterparse(
xml_types,
parser=parser,
@@ -102,7 +104,7 @@ def test_dtype_single_str(parser):
def test_dtypes_all_str(parser):
- df_result = read_xml(xml_dates, dtype="string", parser=parser)
+ df_result = read_xml(StringIO(xml_dates), dtype="string", parser=parser)
df_iter = read_xml_iterparse(
xml_dates,
parser=parser,
@@ -126,7 +128,7 @@ def test_dtypes_all_str(parser):
def test_dtypes_with_names(parser):
df_result = read_xml(
- xml_dates,
+ StringIO(xml_dates),
names=["Col1", "Col2", "Col3", "Col4"],
dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"},
parser=parser,
@@ -153,7 +155,7 @@ def test_dtypes_with_names(parser):
def test_dtype_nullable_int(parser):
- df_result = read_xml(xml_types, dtype={"sides": "Int64"}, parser=parser)
+ df_result = read_xml(StringIO(xml_types), dtype={"sides": "Int64"}, parser=parser)
df_iter = read_xml_iterparse(
xml_types,
parser=parser,
@@ -174,7 +176,7 @@ def test_dtype_nullable_int(parser):
def test_dtype_float(parser):
- df_result = read_xml(xml_types, dtype={"degrees": "float"}, parser=parser)
+ df_result = read_xml(StringIO(xml_types), dtype={"degrees": "float"}, parser=parser)
df_iter = read_xml_iterparse(
xml_types,
parser=parser,
@@ -214,7 +216,7 @@ def test_both_dtype_converters(parser):
with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
df_result = read_xml(
- xml_types,
+ StringIO(xml_types),
dtype={"degrees": "str"},
converters={"degrees": str},
parser=parser,
@@ -235,7 +237,9 @@ def test_both_dtype_converters(parser):
def test_converters_str(parser):
- df_result = read_xml(xml_types, converters={"degrees": str}, parser=parser)
+ df_result = read_xml(
+ StringIO(xml_types), converters={"degrees": str}, parser=parser
+ )
df_iter = read_xml_iterparse(
xml_types,
parser=parser,
@@ -258,7 +262,7 @@ def test_converters_str(parser):
def test_converters_date(parser):
convert_to_datetime = lambda x: to_datetime(x)
df_result = read_xml(
- xml_dates, converters={"date": convert_to_datetime}, parser=parser
+ StringIO(xml_dates), converters={"date": convert_to_datetime}, parser=parser
)
df_iter = read_xml_iterparse(
xml_dates,
@@ -305,7 +309,7 @@ def test_callable_str_converters(xml_books, parser, iterparse):
def test_parse_dates_column_name(parser):
- df_result = read_xml(xml_dates, parse_dates=["date"], parser=parser)
+ df_result = read_xml(StringIO(xml_dates), parse_dates=["date"], parser=parser)
df_iter = read_xml_iterparse(
xml_dates,
parser=parser,
@@ -327,7 +331,7 @@ def test_parse_dates_column_name(parser):
def test_parse_dates_column_index(parser):
- df_result = read_xml(xml_dates, parse_dates=[3], parser=parser)
+ df_result = read_xml(StringIO(xml_dates), parse_dates=[3], parser=parser)
df_iter = read_xml_iterparse(
xml_dates,
parser=parser,
@@ -349,7 +353,7 @@ def test_parse_dates_column_index(parser):
def test_parse_dates_true(parser):
- df_result = read_xml(xml_dates, parse_dates=True, parser=parser)
+ df_result = read_xml(StringIO(xml_dates), parse_dates=True, parser=parser)
df_iter = read_xml_iterparse(
xml_dates,
@@ -401,7 +405,7 @@ def test_parse_dates_dictionary(parser):
"""
df_result = read_xml(
- xml, parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
+ StringIO(xml), parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
)
df_iter = read_xml_iterparse(
xml,
@@ -459,7 +463,7 @@ def test_day_first_parse_dates(parser):
with tm.assert_produces_warning(
UserWarning, match="Parsing dates in %d/%m/%Y format"
):
- df_result = read_xml(xml, parse_dates=["date"], parser=parser)
+ df_result = read_xml(StringIO(xml), parse_dates=["date"], parser=parser)
df_iter = read_xml_iterparse(
xml,
parse_dates=["date"],