From 7a226638b937b39ecdf497eb8d04ee07bf06c1a3 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 11 Jan 2025 07:32:52 -0500
Subject: [PATCH 1/7] TST(string dtype): Resolve xfails in test_from_dummies

---
 pandas/core/reshape/encoding.py           | 17 +++++++++--
 pandas/tests/reshape/test_from_dummies.py | 35 +++++++++++++++++++----
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index 33ff182f5baee..d7d6ada27ba0f 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -17,12 +17,14 @@
     is_integer_dtype,
     is_list_like,
     is_object_dtype,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     CategoricalDtype,
 )
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import SparseArray
 from pandas.core.arrays.categorical import factorize_from_iterable
@@ -554,9 +556,20 @@ def from_dummies(
                 "Dummy DataFrame contains multi-assignment(s); "
                 f"First instance in row: {assigned.idxmax()}"
             )
+        dtype = data.columns.dtype
         if any(assigned == 0):
             if isinstance(default_category, dict):
-                cats.append(default_category[prefix])
+                value = default_category[prefix]
+                if (
+                    is_string_dtype(data.columns.dtype)
+                    and not isinstance(value, str)
+                    and (is_list_like(value) or not isna(value))
+                ):
+                    # GH#???
+                    # `value` is not a string or NA.
+                    # Using data.columns.dtype would coerce `value` into a string.
+                    dtype = "object"
+                cats.append(value)
             else:
                 raise ValueError(
                     "Dummy DataFrame contains unassigned value(s); "
@@ -567,7 +580,7 @@ def from_dummies(
             )
         else:
             data_slice = data_to_decode.loc[:, prefix_slice]
-        cats_array = data._constructor_sliced(cats, dtype=data.columns.dtype)
+        cats_array = data._constructor_sliced(cats, dtype=dtype)
         # get indices of True entries along axis=1
         true_values = data_slice.idxmax(axis=1)
         indexer = data_slice.columns.get_indexer_for(true_values)
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index da1930323f464..4fb48cd21d428 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
+import pandas as pd
 from pandas import (
     DataFrame,
     Series,
@@ -336,8 +335,6 @@ def test_no_prefix_string_cats_default_category(
     dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
     result = from_dummies(dummies, default_category=default_category)
     expected = DataFrame(expected)
-    if using_infer_string:
-        expected[""] = expected[""].astype("str")
     tm.assert_frame_equal(result, expected)
 
 
@@ -364,7 +361,6 @@ def test_with_prefix_contains_get_dummies_NaN_column():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "default_category, expected",
     [
@@ -450,3 +446,32 @@ def test_maintain_original_index():
     result = from_dummies(df)
     expected = DataFrame({"": list("abca")}, index=list("abcd"))
     tm.assert_frame_equal(result, expected)
+
+
+def test_int_columns_with_float_default():
+    # GH#???
+    df = DataFrame(
+        {
+            3: [1, 0, 0],
+            4: [0, 1, 0],
+        },
+    )
+    with pytest.raises(ValueError, match="Trying to coerce float values to integers"):
+        from_dummies(df, default_category=0.5)
+
+
+def test_object_dtype_preserved():
+    # GH#???
+    # When the input has object dtype, the result should as
+    # well even when infer_string is True.
+    df = DataFrame(
+        {
+            "x": [1, 0, 0],
+            "y": [0, 1, 0],
+        },
+    )
+    df.columns = df.columns.astype("object")
+    with pd.option_context("future.infer_string", True):
+        result = from_dummies(df, default_category="z")
+        expected = DataFrame({"": ["x", "y", "z"]}, dtype="object")
+        tm.assert_frame_equal(result, expected)

From 40448cc9ef42358768364087f41d74d27b4d5a95 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 11 Jan 2025 12:50:48 -0500
Subject: [PATCH 2/7] Add GH references

---
 pandas/core/reshape/encoding.py           | 2 +-
 pandas/tests/reshape/test_from_dummies.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index d7d6ada27ba0f..b6c7dc9d1136e 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -565,7 +565,7 @@ def from_dummies(
                     and not isinstance(value, str)
                     and (is_list_like(value) or not isna(value))
                 ):
-                    # GH#???
+                    # https://github.com/pandas-dev/pandas/pull/60694
                     # `value` is not a string or NA.
                     # Using data.columns.dtype would coerce `value` into a string.
                     dtype = "object"
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index 4fb48cd21d428..ef928db329b48 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -449,7 +449,7 @@ def test_maintain_original_index():
 
 
 def test_int_columns_with_float_default():
-    # GH#???
+    # https://github.com/pandas-dev/pandas/pull/60694
     df = DataFrame(
         {
             3: [1, 0, 0],
@@ -461,7 +461,7 @@ def test_int_columns_with_float_default():
 
 
 def test_object_dtype_preserved():
-    # GH#???
+    # https://github.com/pandas-dev/pandas/pull/60694
     # When the input has object dtype, the result should as
     # well even when infer_string is True.
     df = DataFrame(

From 35598ee1db7f21368daf43e944dc321e25614773 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 25 Jan 2025 07:12:28 -0500
Subject: [PATCH 3/7] type-hint

---
 pandas/core/reshape/encoding.py                | 7 +++++--
 pandas/tests/io/pytables/test_complex.py       | 8 +++-----
 pandas/tests/io/pytables/test_file_handling.py | 6 ++----
 pandas/tests/io/pytables/test_timezones.py     | 8 +++-----
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index b6c7dc9d1136e..2d77549dd0955 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -37,7 +37,10 @@
 from pandas.core.series import Series
 
 if TYPE_CHECKING:
-    from pandas._typing import NpDtype
+    from pandas._typing import (
+        DtypeObj,
+        NpDtype,
+    )
 
 
 def get_dummies(
@@ -556,7 +559,7 @@ def from_dummies(
                 "Dummy DataFrame contains multi-assignment(s); "
                 f"First instance in row: {assigned.idxmax()}"
             )
-        dtype = data.columns.dtype
+        dtype: str | DtypeObj = data.columns.dtype
         if any(assigned == 0):
             if isinstance(default_category, dict):
                 value = default_category[prefix]
diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py
index d140cfc941e16..c6eb7670f1e73 100644
--- a/pandas/tests/io/pytables/test_complex.py
+++ b/pandas/tests/io/pytables/test_complex.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -13,9 +11,9 @@
 
 from pandas.io.pytables import read_hdf
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
+# pytestmark = pytest.mark.xfail(
+#     using_string_dtype(), reason="TODO(infer_string)", strict=False
+# )
 
 
 def test_complex_fixed(tmp_path, setup_path):
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
index 16c3c6798ff76..9359a18d162c0 100644
--- a/pandas/tests/io/pytables/test_file_handling.py
+++ b/pandas/tests/io/pytables/test_file_handling.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import (
     PY311,
     is_ci_environment,
@@ -329,7 +327,7 @@ def test_complibs(tmp_path, lvl, lib, request):
                 assert node.filters.complib == lib
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+# @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.skipif(
     not is_platform_little_endian(), reason="reason platform is not little endian"
 )
@@ -347,7 +345,7 @@ def test_encoding(setup_path):
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+# @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "val",
     [
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
index 8f179f844e4d0..db99f88f0f7ba 100644
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs.timezones import maybe_get_tz
 import pandas.util._test_decorators as td
 
@@ -25,9 +23,9 @@
     ensure_clean_store,
 )
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
+# pytestmark = pytest.mark.xfail(
+#     using_string_dtype(), reason="TODO(infer_string)", strict=False
+# )
 
 
 def _compare_with_tz(a, b):

From 29295627e1a28aabe925f527b531e2f03fdab2a9 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 13 Jul 2025 08:46:55 -0400
Subject: [PATCH 4/7] Revert to a doc update

---
 pandas/core/reshape/encoding.py           | 23 ++++++-----------------
 pandas/tests/reshape/test_from_dummies.py | 13 +++++++------
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index 0129d82cb9e9b..8fe0e48fa9e6b 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -17,14 +17,12 @@
     is_integer_dtype,
     is_list_like,
     is_object_dtype,
-    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     CategoricalDtype,
 )
-from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import SparseArray
 from pandas.core.arrays.categorical import factorize_from_iterable
@@ -38,7 +36,6 @@
 
 if TYPE_CHECKING:
     from pandas._typing import (
-        DtypeObj,
         NpDtype,
     )
 
@@ -395,7 +392,9 @@ def from_dummies(
         The default category is the implied category when a value has none of the
         listed categories specified with a one, i.e. if all dummies in a row are
         zero. Can be a single value for all variables or a dict directly mapping
-        the default categories to a prefix of a variable.
+        the default categories to a prefix of a variable. The default category
+        will be coerced to the dtype of ``data.columns`` if such coercion is
+        lossless, and will raise otherwise.
 
     Returns
     -------
@@ -560,20 +559,9 @@ def from_dummies(
                 "Dummy DataFrame contains multi-assignment(s); "
                 f"First instance in row: {assigned.idxmax()}"
             )
-        dtype: str | DtypeObj = data.columns.dtype
         if any(assigned == 0):
             if isinstance(default_category, dict):
-                value = default_category[prefix]
-                if (
-                    is_string_dtype(data.columns.dtype)
-                    and not isinstance(value, str)
-                    and (is_list_like(value) or not isna(value))
-                ):
-                    # https://github.com/pandas-dev/pandas/pull/60694
-                    # `value` is not a string or NA.
-                    # Using data.columns.dtype would coerce `value` into a string.
-                    dtype = "object"
-                cats.append(value)
+                cats.append(default_category[prefix])
             else:
                 raise ValueError(
                     "Dummy DataFrame contains unassigned value(s); "
@@ -584,7 +572,8 @@ def from_dummies(
             )
         else:
             data_slice = data_to_decode.loc[:, prefix_slice]
-        cats_array = data._constructor_sliced(cats, dtype=dtype)
+        # cats_array = data._constructor_sliced(cats, dtype=dtype)
+        cats_array = data._constructor_sliced(cats, dtype=data.columns.dtype)
         # get indices of True entries along axis=1
         true_values = data_slice.idxmax(axis=1)
         indexer = data_slice.columns.get_indexer_for(true_values)
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index 776631ab36978..d1e65aa262d72 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     DataFrame,
     Series,
@@ -334,7 +333,7 @@ def test_no_prefix_string_cats_default_category(
 ):
     dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
     result = from_dummies(dummies, default_category=default_category)
-    expected = DataFrame(expected)
+    expected = DataFrame(expected, dtype=dummies.columns.dtype)
     tm.assert_frame_equal(result, expected)
 
 
@@ -466,6 +465,9 @@ def test_object_dtype_preserved():
     # https://github.com/pandas-dev/pandas/pull/60694
     # When the input has object dtype, the result should as
     # well even when infer_string is True.
+    import pandas as pd
+
+    assert pd.get_option("future.infer_string")
     df = DataFrame(
         {
             "x": [1, 0, 0],
@@ -473,7 +475,6 @@ def test_object_dtype_preserved():
         },
     )
     df.columns = df.columns.astype("object")
-    with pd.option_context("future.infer_string", True):
-        result = from_dummies(df, default_category="z")
-        expected = DataFrame({"": ["x", "y", "z"]}, dtype="object")
-        tm.assert_frame_equal(result, expected)
+    result = from_dummies(df, default_category="z")
+    expected = DataFrame({"": ["x", "y", "z"]}, dtype="object")
+    tm.assert_frame_equal(result, expected)

From 770c2f5a7babca5b3d0cffbf29651ac900eebea1 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 13 Jul 2025 08:47:32 -0400
Subject: [PATCH 5/7] Cleanup

---
 pandas/core/reshape/encoding.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index 8fe0e48fa9e6b..ed53bea636a8f 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -572,7 +572,6 @@ def from_dummies(
             )
         else:
             data_slice = data_to_decode.loc[:, prefix_slice]
-        # cats_array = data._constructor_sliced(cats, dtype=dtype)
         cats_array = data._constructor_sliced(cats, dtype=data.columns.dtype)
         # get indices of True entries along axis=1
         true_values = data_slice.idxmax(axis=1)

From c073d81d7a1c874f4179b89474dd93d42c71dc07 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 13 Jul 2025 08:48:02 -0400
Subject: [PATCH 6/7] Cleanup

---
 pandas/core/reshape/encoding.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index ed53bea636a8f..67fb075110f0d 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -35,9 +35,7 @@
 from pandas.core.series import Series
 
 if TYPE_CHECKING:
-    from pandas._typing import (
-        NpDtype,
-    )
+    from pandas._typing import NpDtype
 
 
 def get_dummies(

From 7a6c847b1a1385a5e2e74e272aecaefdddf0421d Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 13 Jul 2025 08:50:15 -0400
Subject: [PATCH 7/7] Cleanup

---
 pandas/tests/reshape/test_from_dummies.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index d1e65aa262d72..dfb691c785404 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -465,9 +465,6 @@ def test_object_dtype_preserved():
     # https://github.com/pandas-dev/pandas/pull/60694
     # When the input has object dtype, the result should as
     # well even when infer_string is True.
-    import pandas as pd
-
-    assert pd.get_option("future.infer_string")
     df = DataFrame(
         {
             "x": [1, 0, 0],