From 49660f7a30c1e1d92edebea7805a526cee029582 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sun, 12 Feb 2023 19:07:16 -0600
Subject: [PATCH] BUG: iterparse on  ignores repeated elements

---
 doc/source/whatsnew/v2.0.0.rst  |  1 +
 pandas/io/xml.py                |  9 ++++--
 pandas/tests/io/xml/test_xml.py | 49 +++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 29f360e050548..e98ac9bdc2dae 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1304,6 +1304,7 @@ I/O
 - Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`)
 - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
 - Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`)
+- Bug in :func:`read_xml` ignored repeated elements when iterparse is used (:issue:`51183`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 64fda2fdb0299..3ee4e64faf75c 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -337,6 +337,10 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
                 "local disk and not as compressed files or online sources."
             )
 
+        iterparse_repeats = len(self.iterparse[row_node]) != len(
+            set(self.iterparse[row_node])
+        )
+
         for event, elem in iterparse(self.path_or_buffer, events=("start", "end")):
             curr_elem = elem.tag.split("}")[1] if "}" in elem.tag else elem.tag
 
@@ -345,12 +349,13 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
                     row = {}
 
             if row is not None:
-                if self.names:
+                if self.names and iterparse_repeats:
                     for col, nm in zip(self.iterparse[row_node], self.names):
                         if curr_elem == col:
                             elem_val = elem.text.strip() if elem.text else None
-                            if row.get(nm) != elem_val and nm not in row:
+                            if elem_val not in row.values() and nm not in row:
                                 row[nm] = elem_val
+
                         if col in elem.attrib:
                             if elem.attrib[col] not in row.values() and nm not in row:
                                 row[nm] = elem.attrib[col]
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index dfa251788ddc3..b73116519178e 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -948,6 +948,55 @@ def test_repeat_values_new_names(parser):
     tm.assert_frame_equal(df_iter, df_expected)
 
 
+def test_repeat_elements(parser):
+    xml = """\
+<shapes>
+  <shape>
+    <value item="name">circle</value>
+    <value item="family">ellipse</value>
+    <value item="degrees">360</value>
+    <value item="sides">0</value>
+  </shape>
+  <shape>
+    <value item="name">triangle</value>
+    <value item="family">polygon</value>
+    <value item="degrees">180</value>
+    <value item="sides">3</value>
+  </shape>
+  <shape>
+    <value item="name">square</value>
+    <value item="family">polygon</value>
+    <value item="degrees">360</value>
+    <value item="sides">4</value>
+  </shape>
+</shapes>"""
+    df_xpath = read_xml(
+        xml,
+        xpath=".//shape",
+        parser=parser,
+        names=["name", "family", "degrees", "sides"],
+    )
+
+    df_iter = read_xml_iterparse(
+        xml,
+        parser=parser,
+        iterparse={"shape": ["value", "value", "value", "value"]},
+        names=["name", "family", "degrees", "sides"],
+    )
+
+    df_expected = DataFrame(
+        {
+            "name": ["circle", "triangle", "square"],
+            "family": ["ellipse", "polygon", "polygon"],
+            "degrees": [360, 180, 360],
+            "sides": [0, 3, 4],
+        }
+    )
+
+    tm.assert_frame_equal(df_xpath, df_expected)
+    tm.assert_frame_equal(df_iter, df_expected)
+
+
 def test_names_option_wrong_length(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")