diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 09d76d71c6e1b..ae96d0f8296f2 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details,
 see the `commit logs <https://github.com/pandas-dev/pandas/commits/>`_. For install and
 upgrade instructions, see :ref:`install`.
 
+Version 2.3
+-----------
+
+.. toctree::
+   :maxdepth: 2
+
+   v2.3.0
+
 Version 2.2
 -----------
 
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
new file mode 100644
index 0000000000000..01c2ed3821d7a
--- /dev/null
+++ b/doc/source/whatsnew/v2.3.0.rst
@@ -0,0 +1,180 @@
+.. _whatsnew_230:
+
+What's new in 2.3.0 (Month XX, 2024)
+------------------------------------
+
+These are the changes in pandas 2.3.0. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_230.upcoming_changes:
+
+Upcoming changes in pandas 3.0
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+.. _whatsnew_230.enhancements:
+
+Enhancements
+~~~~~~~~~~~~
+
+.. _whatsnew_230.enhancements.enhancement1:
+
+enhancement1
+^^^^^^^^^^^^
+
+
+.. _whatsnew_230.enhancements.other:
+
+Other enhancements
+^^^^^^^^^^^^^^^^^^
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.notable_bug_fixes:
+
+Notable bug fixes
+~~~~~~~~~~~~~~~~~
+
+These are bug fixes that might have notable behavior changes.
+
+.. _whatsnew_230.notable_bug_fixes.notable_bug_fix1:
+
+notable_bug_fix1
+^^^^^^^^^^^^^^^^
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.performance:
+
+Performance improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+
+Categorical
+^^^^^^^^^^^
+-
+-
+
+Datetimelike
+^^^^^^^^^^^^
+-
+-
+
+Timedelta
+^^^^^^^^^
+-
+-
+
+Timezones
+^^^^^^^^^
+-
+-
+
+Numeric
+^^^^^^^
+-
+-
+
+Conversion
+^^^^^^^^^^
+-
+-
+
+Strings
+^^^^^^^
+- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
+- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
+- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
+- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
+-
+
+Interval
+^^^^^^^^
+-
+-
+
+Indexing
+^^^^^^^^
+-
+-
+
+Missing
+^^^^^^^
+-
+-
+
+MultiIndex
+^^^^^^^^^^
+-
+-
+
+I/O
+^^^
+-
+-
+
+Period
+^^^^^^
+-
+-
+
+Plotting
+^^^^^^^^
+-
+-
+
+Groupby/resample/rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+-
+-
+
+Reshaping
+^^^^^^^^^
+-
+-
+
+Sparse
+^^^^^^
+-
+-
+
+ExtensionArray
+^^^^^^^^^^^^^^
+-
+-
+
+Styler
+^^^^^^
+-
+-
+
+Other
+^^^^^
+-
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_230.contributors:
+
+Contributors
+~~~~~~~~~~~~
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 9889436a542c1..2932f3ff56396 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -67,6 +67,10 @@ cdef class NDArrayBacked:
         """
         Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
 
+        The returned array has the same dtype as self.
+
+        Caller is responsible for ensuring `values.dtype == self._ndarray.dtype`.
+
         This should round-trip:
             self == self._from_backing_data(self._ndarray)
         """
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index ccac3d0b50d45..127b0b845d219 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -33,7 +33,10 @@ from pandas._libs.khash cimport (
     kh_python_hash_func,
     khiter_t,
 )
-from pandas._libs.missing cimport checknull
+from pandas._libs.missing cimport (
+    checknull,
+    is_matching_na,
+)
 
 
 def get_hashtable_trace_domain():
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index c0723392496c1..c42bccb7f38f7 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -1121,11 +1121,13 @@ cdef class StringHashTable(HashTable):
             const char **vecs
             khiter_t k
             bint use_na_value
+            bint non_null_na_value
 
         if return_inverse:
             labels = np.zeros(n, dtype=np.intp)
         uindexer = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
+        non_null_na_value = not checknull(na_value)
 
         # assign pointers and pre-filter out missing (if ignore_na)
         vecs = <const char **>malloc(n * sizeof(char *))
@@ -1134,7 +1136,12 @@ cdef class StringHashTable(HashTable):
 
             if (ignore_na
                 and (not isinstance(val, str)
-                     or (use_na_value and val == na_value))):
+                     or (use_na_value and (
+                        (non_null_na_value and val == na_value) or
+                        (not non_null_na_value and is_matching_na(val, na_value)))
+                        )
+                    )
+                ):
                 # if missing values do not count as unique values (i.e. if
                 # ignore_na is True), we can skip the actual value, and
                 # replace the label with na_sentinel directly
@@ -1400,10 +1407,11 @@ cdef class PyObjectHashTable(HashTable):
             object val
             khiter_t k
             bint use_na_value
-
+            bint non_null_na_value
         if return_inverse:
             labels = np.empty(n, dtype=np.intp)
         use_na_value = na_value is not None
+        non_null_na_value = not checknull(na_value)
 
         for i in range(n):
             val = values[i]
@@ -1411,7 +1419,11 @@ cdef class PyObjectHashTable(HashTable):
 
             if ignore_na and (
                 checknull(val)
-                or (use_na_value and val == na_value)
+                or (use_na_value and (
+                    (non_null_na_value and val == na_value) or
+                    (not non_null_na_value and is_matching_na(val, na_value))
+                    )
+                )
             ):
                 # if missing values do not count as unique values (i.e. if
                 # ignore_na is True), skip the hashtable entry for them, and
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 5d8a04664b0e4..c23f907aecfab 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -736,7 +736,9 @@ cpdef ndarray[object] ensure_string_array(
     convert_na_value : bool, default True
         If False, existing na values will be used unchanged in the new array.
     copy : bool, default True
-        Whether to ensure that a new array is returned.
+        Whether to ensure that a new array is returned. When True, a new array
+        is always returned. When False, a new array is only returned when needed
+        to avoid mutating the input array.
     skipna : bool, default True
         Whether or not to coerce nulls to their stringified form
         (e.g. if False, NaN becomes 'nan').
@@ -753,7 +755,14 @@ cpdef ndarray[object] ensure_string_array(
 
     if hasattr(arr, "to_numpy"):
 
-        if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
+        if (
+            hasattr(arr, "dtype")
+            and arr.dtype.kind in "mM"
+            # TODO: we should add a custom ArrowExtensionArray.astype implementation
+            # that handles astype(str) specifically, avoiding ending up here and
+            # then we can remove the below check for `_pa_array` (for ArrowEA)
+            and not hasattr(arr, "_pa_array")
+        ):
             # dtype check to exclude DataFrame
             # GH#41409 TODO: not a great place for this
             out = arr.astype(str).astype(object)
@@ -765,10 +774,17 @@ cpdef ndarray[object] ensure_string_array(
 
     result = np.asarray(arr, dtype="object")
 
-    if copy and (result is arr or np.shares_memory(arr, result)):
-        # GH#54654
-        result = result.copy()
-    elif not copy and result is arr:
+    if result is arr or np.may_share_memory(arr, result):
+        # if np.asarray(..) did not make a copy of the input arr, we still need
+        #  to do that to avoid mutating the input array
+        # GH#54654: share_memory check is needed for rare cases where np.asarray
+        #  returns a new object without making a copy of the actual data
+        if copy:
+            result = result.copy()
+        else:
+            already_copied = False
+    elif not copy and not result.flags.writeable:
+        # Weird edge case where result is a view
         already_copied = False
 
     if issubclass(arr.dtype.type, np.str_):
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 10c1c490551fb..3aa7c64831efe 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -112,7 +112,7 @@
 
 COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
 if using_string_dtype():
-    STRING_DTYPES: list[Dtype] = [str, "U"]
+    STRING_DTYPES: list[Dtype] = ["U"]
 else:
     STRING_DTYPES: list[Dtype] = [str, "str", "U"]  # type: ignore[no-redef]
 COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 433ea7275223d..c6237d0309630 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1228,6 +1228,34 @@ def string_dtype(request):
     return request.param
 
 
+@pytest.fixture(
+    params=[
+        ("python", pd.NA),
+        pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
+        pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
+        ("python", np.nan),
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
+)
+def string_dtype_no_object(request):
+    """
+    Parametrized fixture for string dtypes.
+    * 'string[python]' (NA variant)
+    * 'string[pyarrow]' (NA variant)
+    * 'str' (NaN variant, with pyarrow)
+    * 'str' (NaN variant, without pyarrow)
+    """
+    # need to instantiate the StringDtype here instead of in the params
+    # to avoid importing pyarrow during test collection
+    storage, na_value = request.param
+    return pd.StringDtype(storage, na_value)
+
+
 @pytest.fixture(
     params=[
         "string[python]",
@@ -1266,7 +1294,13 @@ def string_storage(request):
         pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
         pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
         ("python", np.nan),
-    ]
+    ],
+    ids=[
+        "string=string[python]",
+        "string=string[pyarrow]",
+        "string=str[pyarrow]",
+        "string=str[python]",
+    ],
 )
 def string_dtype_arguments(request):
     """
@@ -1297,6 +1331,7 @@ def dtype_backend(request):
 
 # Alias so we can test with cartesian product of string_storage
 string_storage2 = string_storage
+string_dtype_arguments2 = string_dtype_arguments
 
 
 @pytest.fixture(params=tm.BYTES_DTYPES)
diff --git a/pandas/core/_numba/extensions.py b/pandas/core/_numba/extensions.py
index ee09c9380fb0f..b05f12295a729 100644
--- a/pandas/core/_numba/extensions.py
+++ b/pandas/core/_numba/extensions.py
@@ -49,7 +49,8 @@
 @contextmanager
 def set_numba_data(index: Index):
     numba_data = index._data
-    if numba_data.dtype == object:
+    if numba_data.dtype in (object, "string"):
+        numba_data = np.asarray(numba_data)
         if not lib.is_string_array(numba_data):
             raise ValueError(
                 "The numba engine only supports using string or numeric column names"
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 25a71ce5b5f4f..fafc9ee1b6928 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1174,12 +1174,7 @@ def apply_with_numba(self) -> dict[int, Any]:
         from pandas.core._numba.extensions import set_numba_data
 
         index = self.obj.index
-        if index.dtype == "string":
-            index = index.astype(object)
-
         columns = self.obj.columns
-        if columns.dtype == "string":
-            columns = columns.astype(object)
 
         # Convert from numba dict to regular dict
         # Our isinstance checks in the df constructor don't pass for numbas typed dict
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index cc41985843574..a39668faf779e 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -1,22 +1,85 @@
 from __future__ import annotations
 
-from typing import Literal
+from functools import partial
+import re
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+)
 
 import numpy as np
 
-from pandas.compat import pa_version_under10p1
+from pandas.compat import (
+    pa_version_under10p1,
+    pa_version_under11p0,
+    pa_version_under13p0,
+    pa_version_under17p0,
+)
+
+from pandas.core.dtypes.missing import isna
 
 if not pa_version_under10p1:
     import pyarrow as pa
     import pyarrow.compute as pc
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from pandas._typing import (
+        Scalar,
+        Self,
+    )
+
 
 class ArrowStringArrayMixin:
-    _pa_array = None
+    _pa_array: pa.ChunkedArray
 
     def __init__(self, *args, **kwargs) -> None:
         raise NotImplementedError
 
+    def _convert_bool_result(self, result):
+        # Convert a bool-dtype result to the appropriate result type
+        raise NotImplementedError
+
+    def _convert_int_result(self, result):
+        # Convert an integer-dtype result to the appropriate result type
+        raise NotImplementedError
+
+    def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
+        raise NotImplementedError
+
+    def _str_len(self):
+        result = pc.utf8_length(self._pa_array)
+        return self._convert_int_result(result)
+
+    def _str_lower(self) -> Self:
+        return type(self)(pc.utf8_lower(self._pa_array))
+
+    def _str_upper(self) -> Self:
+        return type(self)(pc.utf8_upper(self._pa_array))
+
+    def _str_strip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_trim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_trim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
+    def _str_lstrip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_ltrim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
+    def _str_rstrip(self, to_strip=None) -> Self:
+        if to_strip is None:
+            result = pc.utf8_rtrim_whitespace(self._pa_array)
+        else:
+            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
+        return type(self)(result)
+
     def _str_pad(
         self,
         width: int,
@@ -28,7 +91,19 @@ def _str_pad(
         elif side == "right":
             pa_pad = pc.utf8_rpad
         elif side == "both":
-            pa_pad = pc.utf8_center
+            if pa_version_under17p0:
+                # GH#59624 fall back to object dtype
+                from pandas import array as pd_array
+
+                obj_arr = self.astype(object, copy=False)  # type: ignore[attr-defined]
+                obj = pd_array(obj_arr, dtype=object)
+                result = obj._str_pad(width, side, fillchar)  # type: ignore[attr-defined]
+                return type(self)._from_sequence(result, dtype=self.dtype)  # type: ignore[attr-defined]
+            else:
+                # GH#54792
+                # https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
+                lean_left = (width % 2) == 0
+                pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
         else:
             raise ValueError(
                 f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
@@ -51,12 +126,29 @@ def _str_get(self, i: int):
         selected = pc.utf8_slice_codeunits(
             self._pa_array, start=start, stop=stop, step=step
         )
-        null_value = pa.scalar(
-            None, type=self._pa_array.type  # type: ignore[attr-defined]
-        )
+        null_value = pa.scalar(None, type=self._pa_array.type)
         result = pc.if_else(not_out_of_bounds, selected, null_value)
         return type(self)(result)
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ):
+        if pa_version_under11p0:
+            # GH#59724
+            result = self._apply_elementwise(lambda val: val[start:stop:step])
+            return type(self)(pa.chunked_array(result, type=self._pa_array.type))
+        if start is None:
+            if step is not None and step < 0:
+                # GH#59710
+                start = -1
+            else:
+                start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_slice_replace(
         self, start: int | None = None, stop: int | None = None, repl: str | None = None
     ):
@@ -68,7 +160,34 @@ def _str_slice_replace(
             stop = np.iinfo(np.int64).max
         return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
 
-    def _str_capitalize(self):
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ) -> Self:
+        if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
+            raise NotImplementedError(
+                "replace is not supported with a re.Pattern, callable repl, "
+                "case=False, or flags!=0"
+            )
+
+        func = pc.replace_substring_regex if regex else pc.replace_substring
+        # https://github.com/apache/arrow/issues/39149
+        # GH 56404, unexpected behavior with negative max_replacements with pyarrow.
+        pa_max_replacements = None if n < 0 else n
+        result = func(
+            self._pa_array,
+            pattern=pat,
+            replacement=repl,
+            max_replacements=pa_max_replacements,
+        )
+        return type(self)(result)
+
+    def _str_capitalize(self) -> Self:
         return type(self)(pc.utf8_capitalize(self._pa_array))
 
     def _str_title(self):
@@ -77,8 +196,151 @@ def _str_title(self):
     def _str_swapcase(self):
         return type(self)(pc.utf8_swapcase(self._pa_array))
 
+    def _str_removeprefix(self, prefix: str):
+        if not pa_version_under13p0:
+            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
+            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
+            result = pc.if_else(starts_with, removed, self._pa_array)
+            return type(self)(result)
+        predicate = lambda val: val.removeprefix(prefix)
+        result = self._apply_elementwise(predicate)
+        return type(self)(pa.chunked_array(result))
+
     def _str_removesuffix(self, suffix: str):
         ends_with = pc.ends_with(self._pa_array, pattern=suffix)
         removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
         result = pc.if_else(ends_with, removed, self._pa_array)
         return type(self)(result)
+
+    def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+        if isinstance(pat, str):
+            result = pc.starts_with(self._pa_array, pattern=pat)
+        else:
+            if len(pat) == 0:
+                # For empty tuple we return null for missing values and False
+                #  for valid values.
+                result = pc.if_else(pc.is_null(self._pa_array), None, False)
+            else:
+                result = pc.starts_with(self._pa_array, pattern=pat[0])
+
+                for p in pat[1:]:
+                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
+
+    def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
+        if isinstance(pat, str):
+            result = pc.ends_with(self._pa_array, pattern=pat)
+        else:
+            if len(pat) == 0:
+                # For empty tuple we return null for missing values and False
+                #  for valid values.
+                result = pc.if_else(pc.is_null(self._pa_array), None, False)
+            else:
+                result = pc.ends_with(self._pa_array, pattern=pat[0])
+
+                for p in pat[1:]:
+                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
+
+    def _str_isalnum(self):
+        result = pc.utf8_is_alnum(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isalpha(self):
+        result = pc.utf8_is_alpha(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isdecimal(self):
+        result = pc.utf8_is_decimal(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isdigit(self):
+        result = pc.utf8_is_digit(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_islower(self):
+        result = pc.utf8_is_lower(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isnumeric(self):
+        result = pc.utf8_is_numeric(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isspace(self):
+        result = pc.utf8_is_space(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_istitle(self):
+        result = pc.utf8_is_title(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_isupper(self):
+        result = pc.utf8_is_upper(self._pa_array)
+        return self._convert_bool_result(result)
+
+    def _str_contains(
+        self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
+    ):
+        if flags:
+            raise NotImplementedError(f"contains not implemented with {flags=}")
+
+        if regex:
+            pa_contains = pc.match_substring_regex
+        else:
+            pa_contains = pc.match_substring
+        result = pa_contains(self._pa_array, pat, ignore_case=not case)
+        if not isna(na):  # pyright: ignore [reportGeneralTypeIssues]
+            result = result.fill_null(na)
+        return self._convert_bool_result(result)
+
+    def _str_match(
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
+        if not pat.startswith("^"):
+            pat = f"^{pat}"
+        return self._str_contains(pat, case, flags, na, regex=True)
+
+    def _str_fullmatch(
+        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
+        if not pat.endswith("$") or pat.endswith("\\$"):
+            pat = f"{pat}$"
+        return self._str_match(pat, case, flags, na)
+
+    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
+        if (
+            pa_version_under13p0
+            and not (start != 0 and end is not None)
+            and not (start == 0 and end is None)
+        ):
+            # GH#59562
+            res_list = self._apply_elementwise(lambda val: val.find(sub, start, end))
+            return self._convert_int_result(pa.chunked_array(res_list))
+
+        if (start == 0 or start is None) and end is None:
+            result = pc.find_substring(self._pa_array, sub)
+        else:
+            if sub == "":
+                # GH#56792
+                res_list = self._apply_elementwise(
+                    lambda val: val.find(sub, start, end)
+                )
+                return self._convert_int_result(pa.chunked_array(res_list))
+            if start is None:
+                start_offset = 0
+                start = 0
+            elif start < 0:
+                start_offset = pc.add(start, pc.utf8_length(self._pa_array))
+                start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset)
+            else:
+                start_offset = start
+            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
+            result = pc.find_substring(slices, sub)
+            found = pc.not_equal(result, pa.scalar(-1, type=result.type))
+            offset_result = pc.add(result, start_offset)
+            result = pc.if_else(found, offset_result, -1)
+        return self._convert_int_result(result)
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 0da121c36644a..cb6861a8dd00f 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -515,17 +515,14 @@ def _quantile(
         fill_value = self._internal_fill_value
 
         res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
-
-        res_values = self._cast_quantile_result(res_values)
-        return self._from_backing_data(res_values)
-
-    # TODO: see if we can share this with other dispatch-wrapping methods
-    def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
-        """
-        Cast the result of quantile_with_mask to an appropriate dtype
-        to pass to _from_backing_data in _quantile.
-        """
-        return res_values
+        if res_values.dtype == self._ndarray.dtype:
+            return self._from_backing_data(res_values)
+        else:
+            # e.g. test_quantile_empty we are empty integer dtype and res_values
+            #  has floating dtype
+            # TODO: technically __init__ isn't defined here.
+            #  Should we raise NotImplementedError and handle this on NumpyEA?
+            return type(self)(res_values)  # type: ignore[call-arg]
 
     # ------------------------------------------------------------------------
     # numpy-like methods
diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py
index 2a053fac2985c..285c3fd465ffc 100644
--- a/pandas/core/arrays/arrow/_arrow_utils.py
+++ b/pandas/core/arrays/arrow/_arrow_utils.py
@@ -1,24 +1,8 @@
 from __future__ import annotations
 
-import warnings
-
 import numpy as np
 import pyarrow
 
-from pandas.errors import PerformanceWarning
-from pandas.util._exceptions import find_stack_level
-
-
-def fallback_performancewarning(version: str | None = None) -> None:
-    """
-    Raise a PerformanceWarning for falling back to ExtensionArray's
-    non-pyarrow method
-    """
-    msg = "Falling back on a non-pyarrow code path which may decrease performance."
-    if version is not None:
-        msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
-    warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
-
 
 def pyarrow_array_to_numpy_and_mask(
     arr, dtype: np.dtype
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 46f2cbb2ebeef..e0ccbd6fdc5fd 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -676,7 +676,12 @@ def __invert__(self) -> Self:
             return type(self)(pc.invert(self._pa_array))
 
     def __neg__(self) -> Self:
-        return type(self)(pc.negate_checked(self._pa_array))
+        try:
+            return type(self)(pc.negate_checked(self._pa_array))
+        except pa.ArrowNotImplementedError as err:
+            raise TypeError(
+                f"unary '-' not supported for dtype '{self.dtype}'"
+            ) from err
 
     def __pos__(self) -> Self:
         return type(self)(self._pa_array)
@@ -731,8 +736,19 @@ def _cmp_method(self, other, op):
             )
         return ArrowExtensionArray(result)
 
-    def _evaluate_op_method(self, other, op, arrow_funcs):
+    def _op_method_error_message(self, other, op) -> str:
+        if hasattr(other, "dtype"):
+            other_type = f"dtype '{other.dtype}'"
+        else:
+            other_type = f"object of type {type(other)}"
+        return (
+            f"operation '{op.__name__}' not supported for "
+            f"dtype '{self.dtype}' with {other_type}"
+        )
+
+    def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
         pa_type = self._pa_array.type
+        other_original = other
         other = self._box_pa(other)
 
         if (
@@ -742,10 +758,15 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
         ):
             if op in [operator.add, roperator.radd]:
                 sep = pa.scalar("", type=pa_type)
-                if op is operator.add:
-                    result = pc.binary_join_element_wise(self._pa_array, other, sep)
-                elif op is roperator.radd:
-                    result = pc.binary_join_element_wise(other, self._pa_array, sep)
+                try:
+                    if op is operator.add:
+                        result = pc.binary_join_element_wise(self._pa_array, other, sep)
+                    elif op is roperator.radd:
+                        result = pc.binary_join_element_wise(other, self._pa_array, sep)
+                except pa.ArrowNotImplementedError as err:
+                    raise TypeError(
+                        self._op_method_error_message(other_original, op)
+                    ) from err
                 return type(self)(result)
             elif op in [operator.mul, roperator.rmul]:
                 binary = self._pa_array
@@ -777,9 +798,14 @@ def _evaluate_op_method(self, other, op, arrow_funcs):
 
         pc_func = arrow_funcs[op.__name__]
         if pc_func is NotImplemented:
+            if pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
+                raise TypeError(self._op_method_error_message(other_original, op))
             raise NotImplementedError(f"{op.__name__} not implemented.")
 
-        result = pc_func(self._pa_array, other)
+        try:
+            result = pc_func(self._pa_array, other)
+        except pa.ArrowNotImplementedError as err:
+            raise TypeError(self._op_method_error_message(other_original, op)) from err
         return type(self)(result)
 
     def _logical_method(self, other, op):
@@ -1963,7 +1989,7 @@ def _rank(
         """
         See Series.rank.__doc__.
         """
-        return type(self)(
+        return self._convert_rank_result(
             self._rank_calc(
                 axis=axis,
                 method=method,
@@ -2259,86 +2285,19 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
             for chunk in self._pa_array.iterchunks()
         ]
 
-    def _str_count(self, pat: str, flags: int = 0):
-        if flags:
-            raise NotImplementedError(f"count not implemented with {flags=}")
-        return type(self)(pc.count_substring_regex(self._pa_array, pat))
-
-    def _str_contains(
-        self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
-    ):
-        if flags:
-            raise NotImplementedError(f"contains not implemented with {flags=}")
-
-        if regex:
-            pa_contains = pc.match_substring_regex
-        else:
-            pa_contains = pc.match_substring
-        result = pa_contains(self._pa_array, pat, ignore_case=not case)
-        if not isna(na):
-            result = result.fill_null(na)
+    def _convert_bool_result(self, result):
         return type(self)(result)
 
-    def _str_startswith(self, pat: str | tuple[str, ...], na=None):
-        if isinstance(pat, str):
-            result = pc.starts_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # For empty tuple, pd.StringDtype() returns null for missing values
-                # and false for valid values.
-                result = pc.if_else(pc.is_null(self._pa_array), None, False)
-            else:
-                result = pc.starts_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
+    def _convert_int_result(self, result):
         return type(self)(result)
 
-    def _str_endswith(self, pat: str | tuple[str, ...], na=None):
-        if isinstance(pat, str):
-            result = pc.ends_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # For empty tuple, pd.StringDtype() returns null for missing values
-                # and false for valid values.
-                result = pc.if_else(pc.is_null(self._pa_array), None, False)
-            else:
-                result = pc.ends_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
+    def _convert_rank_result(self, result):
         return type(self)(result)
 
-    def _str_replace(
-        self,
-        pat: str | re.Pattern,
-        repl: str | Callable,
-        n: int = -1,
-        case: bool = True,
-        flags: int = 0,
-        regex: bool = True,
-    ):
-        if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
-            raise NotImplementedError(
-                "replace is not supported with a re.Pattern, callable repl, "
-                "case=False, or flags!=0"
-            )
-
-        func = pc.replace_substring_regex if regex else pc.replace_substring
-        # https://github.com/apache/arrow/issues/39149
-        # GH 56404, unexpected behavior with negative max_replacements with pyarrow.
-        pa_max_replacements = None if n < 0 else n
-        result = func(
-            self._pa_array,
-            pattern=pat,
-            replacement=repl,
-            max_replacements=pa_max_replacements,
-        )
-        return type(self)(result)
+    def _str_count(self, pat: str, flags: int = 0):
+        if flags:
+            raise NotImplementedError(f"count not implemented with {flags=}")
+        return type(self)(pc.count_substring_regex(self._pa_array, pat))
 
     def _str_repeat(self, repeats: int | Sequence[int]):
         if not isinstance(repeats, int):
@@ -2348,37 +2307,6 @@ def _str_repeat(self, repeats: int | Sequence[int]):
         else:
             return type(self)(pc.binary_repeat(self._pa_array, repeats))
 
-    def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.startswith("^"):
-            pat = f"^{pat}"
-        return self._str_contains(pat, case, flags, na, regex=True)
-
-    def _str_fullmatch(
-        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.endswith("$") or pat.endswith("\\$"):
-            pat = f"{pat}$"
-        return self._str_match(pat, case, flags, na)
-
-    def _str_find(self, sub: str, start: int = 0, end: int | None = None):
-        if start != 0 and end is not None:
-            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
-            result = pc.find_substring(slices, sub)
-            not_found = pc.equal(result, -1)
-            start_offset = max(0, start)
-            offset_result = pc.add(result, start_offset)
-            result = pc.if_else(not_found, result, offset_result)
-        elif start == 0 and end is None:
-            slices = self._pa_array
-            result = pc.find_substring(slices, sub)
-        else:
-            raise NotImplementedError(
-                f"find not implemented with {sub=}, {start=}, {end=}"
-            )
-        return type(self)(result)
-
     def _str_join(self, sep: str):
         if pa.types.is_string(self._pa_array.type) or pa.types.is_large_string(
             self._pa_array.type
@@ -2399,84 +2327,6 @@ def _str_rpartition(self, sep: str, expand: bool):
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ):
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
-    def _str_isalnum(self):
-        return type(self)(pc.utf8_is_alnum(self._pa_array))
-
-    def _str_isalpha(self):
-        return type(self)(pc.utf8_is_alpha(self._pa_array))
-
-    def _str_isdecimal(self):
-        return type(self)(pc.utf8_is_decimal(self._pa_array))
-
-    def _str_isdigit(self):
-        return type(self)(pc.utf8_is_digit(self._pa_array))
-
-    def _str_islower(self):
-        return type(self)(pc.utf8_is_lower(self._pa_array))
-
-    def _str_isnumeric(self):
-        return type(self)(pc.utf8_is_numeric(self._pa_array))
-
-    def _str_isspace(self):
-        return type(self)(pc.utf8_is_space(self._pa_array))
-
-    def _str_istitle(self):
-        return type(self)(pc.utf8_is_title(self._pa_array))
-
-    def _str_isupper(self):
-        return type(self)(pc.utf8_is_upper(self._pa_array))
-
-    def _str_len(self):
-        return type(self)(pc.utf8_length(self._pa_array))
-
-    def _str_lower(self):
-        return type(self)(pc.utf8_lower(self._pa_array))
-
-    def _str_upper(self):
-        return type(self)(pc.utf8_upper(self._pa_array))
-
-    def _str_strip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_trim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_trim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_lstrip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_ltrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_rstrip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_rtrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_removeprefix(self, prefix: str):
-        if not pa_version_under13p0:
-            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
-            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
-            result = pc.if_else(starts_with, removed, self._pa_array)
-            return type(self)(result)
-        predicate = lambda val: val.removeprefix(prefix)
-        result = self._apply_elementwise(predicate)
-        return type(self)(pa.chunked_array(result))
-
     def _str_casefold(self):
         predicate = lambda val: val.casefold()
         result = self._apply_elementwise(predicate)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f191f7277743f..97004474648b2 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2475,11 +2475,6 @@ def unique(self) -> Self:
         # pylint: disable=useless-parent-delegation
         return super().unique()
 
-    def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
-        # make sure we have correct itemsize for resulting codes
-        assert res_values.dtype == self._ndarray.dtype
-        return res_values
-
     def equals(self, other: object) -> bool:
         """
         Returns True if categorical arrays are equal.
@@ -2696,7 +2691,9 @@ def _str_get_dummies(self, sep: str = "|"):
         # sep may not be in categories. Just bail on this.
         from pandas.core.arrays import NumpyExtensionArray
 
-        return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep)
+        return NumpyExtensionArray(self.to_numpy(str, na_value="NaN"))._str_get_dummies(
+            sep
+        )
 
     # ------------------------------------------------------------------------
     # GroupBy Methods
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index e85c0222bbec3..81e2f04f2ba2e 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -472,10 +472,16 @@ def astype(self, dtype, copy: bool = True):
 
             return self._box_values(self.asi8.ravel()).reshape(self.shape)
 
+        elif is_string_dtype(dtype):
+            if isinstance(dtype, ExtensionDtype):
+                arr_object = self._format_native_types(na_rep=dtype.na_value)  # type: ignore[arg-type]
+                cls = dtype.construct_array_type()
+                return cls._from_sequence(arr_object, dtype=dtype, copy=False)
+            else:
+                return self._format_native_types()
+
         elif isinstance(dtype, ExtensionDtype):
             return super().astype(dtype, copy=copy)
-        elif is_string_dtype(dtype):
-            return self._format_native_types()
         elif dtype.kind in "iu":
             # we deliberately ignore int32 vs. int64 here.
             # See https://github.com/pandas-dev/pandas/issues/24381 for more.
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 03712f75db0c7..aafcd82114b97 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -137,9 +137,6 @@ def _from_sequence(
             result = result.copy()
         return cls(result)
 
-    def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray:
-        return type(self)(arr)
-
     # ------------------------------------------------------------------------
     # Data
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 1aa6fb70d250c..0b0fffcb928a3 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -46,6 +46,7 @@
     nanops,
     ops,
 )
+from pandas.core.algorithms import isin
 from pandas.core.array_algos import masked_reductions
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.floating import (
@@ -65,6 +66,7 @@
     import pyarrow
 
     from pandas._typing import (
+        ArrayLike,
         AxisInt,
         Dtype,
         DtypeObj,
@@ -167,9 +169,9 @@ def __init__(
             # a consistent NaN value (and we can use `dtype.na_value is np.nan`)
             na_value = np.nan
         elif na_value is not libmissing.NA:
-            raise ValueError("'na_value' must be np.nan or pd.NA, got {na_value}")
+            raise ValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}")
 
-        self.storage = storage
+        self.storage = cast(str, storage)
         self._na_value = na_value
 
     def __repr__(self) -> str:
@@ -280,6 +282,34 @@ def construct_array_type(  # type: ignore[override]
         else:
             return ArrowStringArrayNumpySemantics
 
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        storages = set()
+        na_values = set()
+
+        for dtype in dtypes:
+            if isinstance(dtype, StringDtype):
+                storages.add(dtype.storage)
+                na_values.add(dtype.na_value)
+            elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "T"):
+                continue
+            else:
+                return None
+
+        if len(storages) == 2:
+            # if both python and pyarrow storage -> priority to pyarrow
+            storage = "pyarrow"
+        else:
+            storage = next(iter(storages))  # type: ignore[assignment]
+
+        na_value: libmissing.NAType | float
+        if len(na_values) == 2:
+            # if both NaN and NA -> priority to NA
+            na_value = libmissing.NA
+        else:
+            na_value = next(iter(na_values))
+
+        return StringDtype(storage=storage, na_value=na_value)
+
     def __from_arrow__(
         self, array: pyarrow.Array | pyarrow.ChunkedArray
     ) -> BaseStringArray:
@@ -657,11 +687,10 @@ def __arrow_array__(self, type=None):
         values[self.isna()] = None
         return pa.array(values, type=type, from_pandas=True)
 
-    def _values_for_factorize(self):
+    def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:  # type: ignore[override]
         arr = self._ndarray.copy()
-        mask = self.isna()
-        arr[mask] = None
-        return arr, None
+
+        return arr, self.dtype.na_value
 
     def __setitem__(self, key, value) -> None:
         value = extract_array(value, extract_numpy=True)
@@ -686,6 +715,10 @@ def __setitem__(self, key, value) -> None:
         else:
             if not is_array_like(value):
                 value = np.asarray(value, dtype=object)
+            else:
+                # cast categories and friends to arrays to see if values are
+                # compatible, compatibility with arrow backed strings
+                value = np.asarray(value)
             if len(value) and not lib.is_string_array(value, skipna=True):
                 raise TypeError("Must provide strings.")
 
@@ -702,6 +735,24 @@ def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
         # base class implementation that uses __setitem__
         ExtensionArray._putmask(self, mask, value)
 
+    def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
+        if isinstance(values, BaseStringArray) or (
+            isinstance(values, ExtensionArray) and is_string_dtype(values.dtype)
+        ):
+            values = values.astype(self.dtype, copy=False)
+        else:
+            if not lib.is_string_array(np.asarray(values), skipna=True):
+                values = np.array(
+                    [val for val in values if isinstance(val, str) or isna(val)],
+                    dtype=object,
+                )
+                if not len(values):
+                    return np.zeros(self.shape, dtype=bool)
+
+            values = self._from_sequence(values, dtype=self.dtype)
+
+        return isin(np.asarray(self), np.asarray(values))
+
     def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 
@@ -824,8 +875,11 @@ def _cmp_method(self, other, op):
                     f"Lengths of operands do not match: {len(self)} != {len(other)}"
                 )
 
-            other = np.asarray(other)
+            # for array-likes, first filter out NAs before converting to numpy
+            if not is_array_like(other):
+                other = np.asarray(other)
             other = other[valid]
+            other = np.asarray(other)
 
         if op.__name__ in ops.ARITHMETIC_BINOPS:
             result = np.empty_like(self._ndarray, dtype="object")
@@ -871,8 +925,3 @@ def _from_sequence(
         if dtype is None:
             dtype = StringDtype(storage="python", na_value=np.nan)
         return super()._from_sequence(scalars, dtype=dtype, copy=copy)
-
-    def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
-        # need to override NumpyExtensionArray._from_backing_data to ensure
-        # we always preserve the dtype
-        return NDArrayBacked._from_backing_data(self, arr)
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 91c1f20ba93c6..56f7d3aecce20 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -30,6 +30,7 @@
 from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
 from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float64Dtype
 from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.arrays.numeric import NumericDtype
 from pandas.core.arrays.string_ import (
@@ -42,17 +43,13 @@
     import pyarrow as pa
     import pyarrow.compute as pc
 
-    from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning
-
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
     from pandas._typing import (
         ArrayLike,
-        AxisInt,
         Dtype,
-        Scalar,
         npt,
     )
 
@@ -214,10 +211,8 @@ def insert(self, loc: int, item) -> ArrowStringArray:
             raise TypeError("Scalar must be NA or str")
         return super().insert(loc, item)
 
-    def _result_converter(self, values, na=None):
+    def _convert_bool_result(self, values):
         if self.dtype.na_value is np.nan:
-            if not isna(na):
-                values = values.fill_null(bool(na))
             return ArrowExtensionArray(values).to_numpy(na_value=np.nan)
         return BooleanDtype().__from_arrow__(values)
 
@@ -233,7 +228,7 @@ def _maybe_convert_setitem_value(self, value):
             value[isna(value)] = None
             for v in value:
                 if not (v is None or isinstance(v, str)):
-                    raise TypeError("Scalar must be NA or str")
+                    raise TypeError("Must provide strings")
         return super()._maybe_convert_setitem_value(value)
 
     def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
@@ -283,61 +278,54 @@ def _data(self):
     # ------------------------------------------------------------------------
     # String methods interface
 
+    _str_isalnum = ArrowStringArrayMixin._str_isalnum
+    _str_isalpha = ArrowStringArrayMixin._str_isalpha
+    _str_isdecimal = ArrowStringArrayMixin._str_isdecimal
+    _str_isdigit = ArrowStringArrayMixin._str_isdigit
+    _str_islower = ArrowStringArrayMixin._str_islower
+    _str_isnumeric = ArrowStringArrayMixin._str_isnumeric
+    _str_isspace = ArrowStringArrayMixin._str_isspace
+    _str_istitle = ArrowStringArrayMixin._str_istitle
+    _str_isupper = ArrowStringArrayMixin._str_isupper
+
     _str_map = BaseStringArray._str_map
+    _str_startswith = ArrowStringArrayMixin._str_startswith
+    _str_endswith = ArrowStringArrayMixin._str_endswith
+    _str_pad = ArrowStringArrayMixin._str_pad
+    _str_match = ArrowStringArrayMixin._str_match
+    _str_fullmatch = ArrowStringArrayMixin._str_fullmatch
+    _str_lower = ArrowStringArrayMixin._str_lower
+    _str_upper = ArrowStringArrayMixin._str_upper
+    _str_strip = ArrowStringArrayMixin._str_strip
+    _str_lstrip = ArrowStringArrayMixin._str_lstrip
+    _str_rstrip = ArrowStringArrayMixin._str_rstrip
+    _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
+    _str_get = ArrowStringArrayMixin._str_get
+    _str_capitalize = ArrowStringArrayMixin._str_capitalize
+    _str_title = ArrowStringArrayMixin._str_title
+    _str_swapcase = ArrowStringArrayMixin._str_swapcase
+    _str_slice_replace = ArrowStringArrayMixin._str_slice_replace
+    _str_len = ArrowStringArrayMixin._str_len
+    _str_slice = ArrowStringArrayMixin._str_slice
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
     ):
         if flags:
-            fallback_performancewarning()
             return super()._str_contains(pat, case, flags, na, regex)
 
-        if regex:
-            result = pc.match_substring_regex(self._pa_array, pat, ignore_case=not case)
-        else:
-            result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
-        result = self._result_converter(result, na=na)
         if not isna(na):
-            result[isna(result)] = bool(na)
-        return result
-
-    def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
-        if isinstance(pat, str):
-            result = pc.starts_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # mimic existing behaviour of string extension array
-                # and python string method
-                result = pa.array(
-                    np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
-                )
-            else:
-                result = pc.starts_with(self._pa_array, pattern=pat[0])
-
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
-        return self._result_converter(result)
-
-    def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
-        if isinstance(pat, str):
-            result = pc.ends_with(self._pa_array, pattern=pat)
-        else:
-            if len(pat) == 0:
-                # mimic existing behaviour of string extension array
-                # and python string method
-                result = pa.array(
-                    np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
+            if not isinstance(na, bool):
+                # GH#59561
+                warnings.warn(
+                    "Allowing a non-bool 'na' in obj.str.contains is deprecated "
+                    "and will raise in a future version.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
                 )
-            else:
-                result = pc.ends_with(self._pa_array, pattern=pat[0])
+                na = bool(na)
 
-                for p in pat[1:]:
-                    result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
-        if not isna(na):
-            result = result.fill_null(na)
-        return self._result_converter(result)
+        return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
 
     def _str_replace(
         self,
@@ -349,146 +337,38 @@ def _str_replace(
         regex: bool = True,
     ):
         if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
-            fallback_performancewarning()
             return super()._str_replace(pat, repl, n, case, flags, regex)
 
-        func = pc.replace_substring_regex if regex else pc.replace_substring
-        result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
-        return type(self)(result)
+        return ArrowStringArrayMixin._str_replace(
+            self, pat, repl, n, case, flags, regex
+        )
 
     def _str_repeat(self, repeats: int | Sequence[int]):
         if not isinstance(repeats, int):
             return super()._str_repeat(repeats)
         else:
-            return type(self)(pc.binary_repeat(self._pa_array, repeats))
-
-    def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.startswith("^"):
-            pat = f"^{pat}"
-        return self._str_contains(pat, case, flags, na, regex=True)
-
-    def _str_fullmatch(
-        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
-    ):
-        if not pat.endswith("$") or pat.endswith("\\$"):
-            pat = f"{pat}$"
-        return self._str_match(pat, case, flags, na)
-
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ):
-        if stop is None:
-            return super()._str_slice(start, stop, step)
-        if start is None:
-            start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
-    def _str_isalnum(self):
-        result = pc.utf8_is_alnum(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isalpha(self):
-        result = pc.utf8_is_alpha(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isdecimal(self):
-        result = pc.utf8_is_decimal(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isdigit(self):
-        result = pc.utf8_is_digit(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_islower(self):
-        result = pc.utf8_is_lower(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isnumeric(self):
-        result = pc.utf8_is_numeric(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isspace(self):
-        result = pc.utf8_is_space(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_istitle(self):
-        result = pc.utf8_is_title(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_isupper(self):
-        result = pc.utf8_is_upper(self._pa_array)
-        return self._result_converter(result)
-
-    def _str_len(self):
-        result = pc.utf8_length(self._pa_array)
-        return self._convert_int_dtype(result)
-
-    def _str_lower(self):
-        return type(self)(pc.utf8_lower(self._pa_array))
-
-    def _str_upper(self):
-        return type(self)(pc.utf8_upper(self._pa_array))
-
-    def _str_strip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_trim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_trim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_lstrip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_ltrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
-
-    def _str_rstrip(self, to_strip=None):
-        if to_strip is None:
-            result = pc.utf8_rtrim_whitespace(self._pa_array)
-        else:
-            result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
-        return type(self)(result)
+            return ArrowExtensionArray._str_repeat(self, repeats=repeats)
 
     def _str_removeprefix(self, prefix: str):
         if not pa_version_under13p0:
-            starts_with = pc.starts_with(self._pa_array, pattern=prefix)
-            removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
-            result = pc.if_else(starts_with, removed, self._pa_array)
-            return type(self)(result)
+            return ArrowStringArrayMixin._str_removeprefix(self, prefix)
         return super()._str_removeprefix(prefix)
 
-    def _str_removesuffix(self, suffix: str):
-        ends_with = pc.ends_with(self._pa_array, pattern=suffix)
-        removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
-        result = pc.if_else(ends_with, removed, self._pa_array)
-        return type(self)(result)
-
     def _str_count(self, pat: str, flags: int = 0):
         if flags:
             return super()._str_count(pat, flags)
         result = pc.count_substring_regex(self._pa_array, pat)
-        return self._convert_int_dtype(result)
+        return self._convert_int_result(result)
 
     def _str_find(self, sub: str, start: int = 0, end: int | None = None):
-        if start != 0 and end is not None:
-            slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end)
-            result = pc.find_substring(slices, sub)
-            not_found = pc.equal(result, -1)
-            offset_result = pc.add(result, end - start)
-            result = pc.if_else(not_found, result, offset_result)
-        elif start == 0 and end is None:
-            slices = self._pa_array
-            result = pc.find_substring(slices, sub)
-        else:
+        if (
+            pa_version_under13p0
+            and not (start != 0 and end is not None)
+            and not (start == 0 and end is None)
+        ):
+            # GH#59562
             return super()._str_find(sub, start, end)
-        return self._convert_int_dtype(result)
+        return ArrowStringArrayMixin._str_find(self, sub, start, end)
 
     def _str_get_dummies(self, sep: str = "|"):
         dummies_pa, labels = ArrowExtensionArray(self._pa_array)._str_get_dummies(sep)
@@ -497,7 +377,7 @@ def _str_get_dummies(self, sep: str = "|"):
         dummies = np.vstack(dummies_pa.to_numpy())
         return dummies.astype(np.int64, copy=False), labels
 
-    def _convert_int_dtype(self, result):
+    def _convert_int_result(self, result):
         if self.dtype.na_value is np.nan:
             if isinstance(result, pa.Array):
                 result = result.to_numpy(zero_copy_only=False)
@@ -509,6 +389,16 @@ def _convert_int_dtype(self, result):
 
         return Int64Dtype().__from_arrow__(result)
 
+    def _convert_rank_result(self, result):
+        if self.dtype.na_value is np.nan:
+            if isinstance(result, pa.Array):
+                result = result.to_numpy(zero_copy_only=False)
+            else:
+                result = result.to_numpy()
+            return result.astype("float64", copy=False)
+
+        return Float64Dtype().__from_arrow__(result)
+
     def _reduce(
         self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
     ):
@@ -524,34 +414,12 @@ def _reduce(
 
         result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
         if name in ("argmin", "argmax") and isinstance(result, pa.Array):
-            return self._convert_int_dtype(result)
+            return self._convert_int_result(result)
         elif isinstance(result, pa.Array):
             return type(self)(result)
         else:
             return result
 
-    def _rank(
-        self,
-        *,
-        axis: AxisInt = 0,
-        method: str = "average",
-        na_option: str = "keep",
-        ascending: bool = True,
-        pct: bool = False,
-    ):
-        """
-        See Series.rank.__doc__.
-        """
-        return self._convert_int_dtype(
-            self._rank_calc(
-                axis=axis,
-                method=method,
-                na_option=na_option,
-                ascending=ascending,
-                pct=pct,
-            )
-        )
-
     def value_counts(self, dropna: bool = True) -> Series:
         result = super().value_counts(dropna=dropna)
         if self.dtype.na_value is np.nan:
@@ -573,10 +441,3 @@ def _cmp_method(self, other, op):
 
 class ArrowStringArrayNumpySemantics(ArrowStringArray):
     _na_value = np.nan
-    _str_get = ArrowStringArrayMixin._str_get
-    _str_removesuffix = ArrowStringArrayMixin._str_removesuffix
-    _str_capitalize = ArrowStringArrayMixin._str_capitalize
-    _str_pad = ArrowStringArrayMixin._str_pad
-    _str_title = ArrowStringArrayMixin._str_title
-    _str_swapcase = ArrowStringArrayMixin._str_swapcase
-    _str_slice_replace = ArrowStringArrayMixin._str_slice_replace
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index f1fe528de06f8..7bb623cba3755 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -10,7 +10,10 @@
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_bool_kwarg
 
-from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.common import (
+    is_extension_array_dtype,
+    is_string_dtype,
+)
 
 from pandas.core.computation.engines import ENGINES
 from pandas.core.computation.expr import (
@@ -336,10 +339,13 @@ def eval(
         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
 
         if engine == "numexpr" and (
-            is_extension_array_dtype(parsed_expr.terms.return_type)
+            (
+                is_extension_array_dtype(parsed_expr.terms.return_type)
+                and not is_string_dtype(parsed_expr.terms.return_type)
+            )
             or getattr(parsed_expr.terms, "operand_types", None) is not None
             and any(
-                is_extension_array_dtype(elem)
+                (is_extension_array_dtype(elem) and not is_string_dtype(elem))
                 for elem in parsed_expr.terms.operand_types
             )
         ):
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index d642c37cea129..34055d2177626 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -20,6 +20,8 @@
 
 from pandas.errors import UndefinedVariableError
 
+from pandas.core.dtypes.common import is_string_dtype
+
 import pandas.core.common as com
 from pandas.core.computation.ops import (
     ARITH_OPS_SYMS,
@@ -520,10 +522,12 @@ def _maybe_evaluate_binop(
         elif self.engine != "pytables":
             if (
                 getattr(lhs, "return_type", None) == object
+                or is_string_dtype(getattr(lhs, "return_type", None))
                 or getattr(rhs, "return_type", None) == object
+                or is_string_dtype(getattr(rhs, "return_type", None))
             ):
                 # evaluate "==" and "!=" in python if either of our operands
-                # has an object return type
+                # has an object or string return type
                 return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
         return res
 
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 5bccca9cfbd47..584a1d417d198 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -609,7 +609,10 @@ def sanitize_array(
                 dtype = StringDtype(na_value=np.nan)
                 subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)
 
-            if subarr is data and copy:
+            if (
+                subarr is data
+                or (subarr.dtype == "str" and subarr.dtype.storage == "python")  # type: ignore[union-attr]
+            ) and copy:
                 subarr = subarr.copy()
 
         else:
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index df0251d141984..fe705daaad5fa 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -12,6 +12,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import (
     Interval,
     Period,
@@ -1325,7 +1327,15 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     elif isinstance(dtype, np.dtype):
         return False
     else:
-        return registry.find(dtype) is not None
+        try:
+            with warnings.catch_warnings():
+                # pandas_dtype(..) can raise UserWarning for class input
+                warnings.simplefilter("ignore", UserWarning)
+                dtype = pandas_dtype(dtype)
+        except (TypeError, ValueError):
+            # np.dtype(..) can raise ValueError
+            return False
+        return isinstance(dtype, ExtensionDtype)
 
 
 def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
@@ -1620,6 +1630,12 @@ def pandas_dtype(dtype) -> DtypeObj:
     elif isinstance(dtype, (np.dtype, ExtensionDtype)):
         return dtype
 
+    # builtin aliases
+    if dtype is str and using_string_dtype():
+        from pandas.core.arrays.string_ import StringDtype
+
+        return StringDtype(na_value=np.nan)
+
     # registered extension types
     result = registry.find(dtype)
     if result is not None:
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 1c43ef55c11d7..e7efb8598ec61 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -453,7 +453,7 @@ def __eq__(self, other: object) -> bool:
                 # Because left and right have the same length and are unique,
                 #  `indexer` not having any -1s implies that there is a
                 #  bijection between `left` and `right`.
-                return (indexer != -1).all()
+                return bool((indexer != -1).all())
 
             # With object-dtype we need a comparison that identifies
             #  e.g. int(2) as distinct from float(2)
@@ -1791,7 +1791,7 @@ def _is_na_fill_value(self) -> bool:
 
     @property
     def _is_numeric(self) -> bool:
-        return not self.subtype == object
+        return self.subtype != object
 
     @property
     def _is_boolean(self) -> bool:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 825316585c03c..8e8eb768130fd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -506,7 +506,8 @@ def __new__(
 
         elif is_ea_or_datetimelike_dtype(dtype):
             # non-EA dtype indexes have special casting logic, so we punt here
-            pass
+            if isinstance(data, (set, frozenset)):
+                data = list(data)
 
         elif is_ea_or_datetimelike_dtype(data_dtype):
             pass
@@ -6414,7 +6415,11 @@ def _should_compare(self, other: Index) -> bool:
             return False
 
         dtype = _unpack_nested_dtype(other)
-        return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
+        return (
+            self._is_comparable_dtype(dtype)
+            or is_object_dtype(dtype)
+            or is_string_dtype(dtype)
+        )
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         """
@@ -6995,6 +7000,9 @@ def insert(self, loc: int, item) -> Index:
             #  We cannot keep the same dtype, so cast to the (often object)
             #  minimal shared dtype before doing the insert.
             dtype = self._find_common_type_compat(item)
+            if dtype == self.dtype:
+                # EA's might run into recursion errors if loc is invalid
+                raise
             return self.astype(dtype).insert(loc, item)
 
         if arr.dtype != object or not isinstance(
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 4fcdb87974511..635924674d9f4 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -50,6 +50,7 @@
     is_number,
     is_object_dtype,
     is_scalar,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -699,7 +700,7 @@ def _get_indexer(
             # left/right get_indexer, compare elementwise, equality -> match
             indexer = self._get_indexer_unique_sides(target)
 
-        elif not is_object_dtype(target.dtype):
+        elif not (is_object_dtype(target.dtype) or is_string_dtype(target.dtype)):
             # homogeneous scalar index: use IntervalTree
             # we should always have self._should_partial_index(target) here
             target = self._maybe_convert_i8(target)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 090e27ec58cc3..f376c239a0ce0 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -10,12 +10,14 @@
     cast,
 )
 import unicodedata
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
 import pandas._libs.missing as libmissing
 import pandas._libs.ops as libops
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.missing import isna
 
@@ -140,14 +142,38 @@ def _str_contains(
             else:
                 upper_pat = pat.upper()
                 f = lambda x: upper_pat in x.upper()
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.contains is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na, dtype=np.dtype("bool"))
 
     def _str_startswith(self, pat, na=None):
         f = lambda x: x.startswith(pat)
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.startswith is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
+        if not isna(na) and not isinstance(na, bool):
+            # GH#59561
+            warnings.warn(
+                "Allowing a non-bool 'na' in obj.str.endswith is deprecated "
+                "and will raise in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_replace(
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 68fcfcf65e0c2..50a97f1059b5c 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -24,6 +24,8 @@ def _arrow_dtype_mapping() -> dict:
         pa.string(): pd.StringDtype(),
         pa.float32(): pd.Float32Dtype(),
         pa.float64(): pd.Float64Dtype(),
+        pa.string(): pd.StringDtype(),
+        pa.large_string(): pd.StringDtype(),
     }
 
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 5e0f991d5c406..6a328dfb39be5 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -65,7 +65,6 @@ def test_apply(float_frame, engine, request):
         assert result.index is float_frame.index
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize("raw", [True, False])
 def test_apply_args(float_frame, axis, raw, engine, request):
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index 6bbe5100e8826..20c067a776f4d 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -1,10 +1,9 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
+import pandas as pd
 from pandas import (
     DataFrame,
     Index,
@@ -19,7 +18,6 @@ def apply_axis(request):
     return request.param
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_numba_vs_python_noop(float_frame, apply_axis):
     func = lambda x: x
     result = float_frame.apply(func, engine="numba", axis=apply_axis)
@@ -29,11 +27,10 @@ def test_numba_vs_python_noop(float_frame, apply_axis):
 
 def test_numba_vs_python_string_index():
     # GH#56189
-    pytest.importorskip("pyarrow")
     df = DataFrame(
         1,
-        index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
-        columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
+        index=Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
+        columns=Index(["x", "y"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     func = lambda x: x
     result = df.apply(func, engine="numba", axis=0)
@@ -43,7 +40,6 @@ def test_numba_vs_python_string_index():
     )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_numba_vs_python_indexing():
     frame = DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index 899ea1910d055..44e485d40ba53 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -8,9 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -186,6 +183,10 @@ def test_objarr_add_invalid(self, op, box_with_array):
                 "unsupported operand type",
                 "must be str",
                 "has no kernel",
+                "operation 'add' not supported",
+                "operation 'radd' not supported",
+                "operation 'sub' not supported",
+                "operation 'rsub' not supported",
             ]
         )
         with pytest.raises(Exception, match=msg):
@@ -318,27 +319,17 @@ def test_add(self):
         expected = pd.Index(["1a", "1b", "1c"])
         tm.assert_index_equal("1" + index, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
-    def test_sub_fail(self, using_infer_string):
+    def test_sub_fail(self):
         index = pd.Index([str(i) for i in range(10)])
 
-        if using_infer_string:
-            import pyarrow as pa
-
-            err = pa.lib.ArrowNotImplementedError
-            msg = "has no kernel"
-        else:
-            err = TypeError
-            msg = "unsupported operand type|Cannot broadcast"
-        with pytest.raises(err, match=msg):
+        msg = "unsupported operand type|Cannot broadcast|sub' not supported"
+        with pytest.raises(TypeError, match=msg):
             index - "a"
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index - index
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index - index.tolist()
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             index.tolist() - index
 
     def test_sub_object(self):
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
index 4dbd8eb9f5ca7..9ff690cdc914d 100644
--- a/pandas/tests/arrays/boolean/test_arithmetic.py
+++ b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -3,10 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 import pandas as pd
 import pandas._testing as tm
 
@@ -94,19 +90,8 @@ def test_op_int8(left_array, right_array, opname):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     # invalid ops
-
-    if using_infer_string:
-        import pyarrow as pa
-
-        err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        err = TypeError
-
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
@@ -116,7 +101,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
         "did not contain a loop with signature matching types|"
         "BooleanArray cannot perform the operation|"
         "not supported for the input types, and the inputs could not be safely coerced "
-        "to any supported types according to the casting rule ''safe''"
+        "to any supported types according to the casting rule ''safe''|"
+        "not supported for dtype"
     )
     with pytest.raises(TypeError, match=msg):
         ops("foo")
@@ -125,9 +111,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             r"unsupported operand type\(s\) for",
             "Concatenation operation is not implemented for NumPy arrays",
             "has no kernel",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(err, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
@@ -140,7 +127,8 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
                 "not all arguments converted during string formatting",
                 "has no kernel",
                 "not implemented",
+                "not supported for dtype",
             ]
         )
-        with pytest.raises(err, match=msg):
+        with pytest.raises(TypeError, match=msg):
             ops(pd.Series("foo", index=s.index))
diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
index 1d948b7495a43..a939ee5f6f53f 100644
--- a/pandas/tests/arrays/categorical/test_api.py
+++ b/pandas/tests/arrays/categorical/test_api.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import PY311
 
 from pandas import (
@@ -158,7 +156,6 @@ def test_reorder_categories_raises(self, new_categories):
         with pytest.raises(ValueError, match=msg):
             cat.reorder_categories(new_categories)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_add_categories(self):
         cat = Categorical(["a", "b", "c", "a"], ordered=True)
         old = cat.copy()
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 768d3c1449fa4..009fac4c2f5ed 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import FloatingArray
@@ -124,19 +122,11 @@ def test_arith_zero_dim_ndarray(other):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
 
-    if using_infer_string:
-        import pyarrow as pa
-
-        errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        errs = TypeError
-
     # invalid scalars
     msg = "|".join(
         [
@@ -152,15 +142,17 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "Concatenation operation is not implemented for NumPy arrays",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
+            "Can only string multiply by an integer",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops("foo")
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series("foo", index=s.index))
 
     msg = "|".join(
@@ -181,9 +173,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "cannot subtract DatetimeArray from ndarray",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series(pd.date_range("20180101", periods=len(s))))
 
 
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index ccf644b34051d..752ebe194ffcf 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -68,11 +68,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
 
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index 8aa8c2db940b4..dee3deeee0f2f 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core import ops
@@ -174,19 +172,11 @@ def test_numpy_zero_dim_ndarray(other):
 # -----------------------------------------------------------------------------
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
+def test_error_invalid_values(data, all_arithmetic_operators):
     op = all_arithmetic_operators
     s = pd.Series(data)
     ops = getattr(s, op)
 
-    if using_infer_string:
-        import pyarrow as pa
-
-        errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
-    else:
-        errs = TypeError
-
     # invalid scalars
     msg = "|".join(
         [
@@ -201,24 +191,21 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "has no kernel",
             "not implemented",
             "The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops("foo")
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Timestamp("20180101"))
 
     # invalid array-likes
     str_ser = pd.Series("foo", index=s.index)
     # with pytest.raises(TypeError, match=msg):
-    if (
-        all_arithmetic_operators
-        in [
-            "__mul__",
-            "__rmul__",
-        ]
-        and not using_infer_string
-    ):  # (data[~data.isna()] >= 0).all():
+    if all_arithmetic_operators in [
+        "__mul__",
+        "__rmul__",
+    ]:  # (data[~data.isna()] >= 0).all():
         res = ops(str_ser)
         expected = pd.Series(["foo" * x for x in data], index=s.index)
         expected = expected.fillna(np.nan)
@@ -227,7 +214,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
         #  more-correct than np.nan here.
         tm.assert_series_equal(res, expected)
     else:
-        with pytest.raises(errs, match=msg):
+        with pytest.raises(TypeError, match=msg):
             ops(str_ser)
 
     msg = "|".join(
@@ -242,9 +229,10 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "cannot subtract DatetimeArray from ndarray",
             "has no kernel",
             "not implemented",
+            "not supported for dtype",
         ]
     )
-    with pytest.raises(errs, match=msg):
+    with pytest.raises(TypeError, match=msg):
         ops(pd.Series(pd.date_range("20180101", periods=len(s))))
 
 
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 7be00e569b3fe..90879d8bd3063 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -283,11 +283,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py
index 31765165f5f16..293ee4095d02e 100644
--- a/pandas/tests/arrays/masked/test_arrow_compat.py
+++ b/pandas/tests/arrays/masked/test_arrow_compat.py
@@ -1,17 +1,12 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 
-pytestmark = [
-    pytest.mark.filterwarnings(
-        "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
-    ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
-]
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
 
 
 pa = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py
index 83a507e679d46..e6e4a11a0f5ab 100644
--- a/pandas/tests/arrays/sparse/test_astype.py
+++ b/pandas/tests/arrays/sparse/test_astype.py
@@ -81,8 +81,8 @@ def test_astype_all(self, any_real_numpy_dtype):
             ),
             (
                 SparseArray([0, 1, 10]),
-                str,
-                SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
+                np.str_,
+                SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")),
             ),
             (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
             (
diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py
index 234f4092421e5..149c28341ba3d 100644
--- a/pandas/tests/arrays/sparse/test_dtype.py
+++ b/pandas/tests/arrays/sparse/test_dtype.py
@@ -177,7 +177,7 @@ def test_construct_from_string_fill_value_raises(string):
     [
         (SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
         (SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
-        (SparseDtype(int, 1), str, SparseDtype(object, "1")),
+        (SparseDtype(int, 1), np.str_, SparseDtype(object, "1")),
         (SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
     ],
 )
diff --git a/pandas/tests/arrays/string_/test_concat.py b/pandas/tests/arrays/string_/test_concat.py
new file mode 100644
index 0000000000000..320d700b2b6c3
--- /dev/null
+++ b/pandas/tests/arrays/string_/test_concat.py
@@ -0,0 +1,73 @@
+import numpy as np
+import pytest
+
+from pandas.compat import HAS_PYARROW
+
+from pandas.core.dtypes.cast import find_common_type
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.util.version import Version
+
+
+@pytest.mark.parametrize(
+    "to_concat_dtypes, result_dtype",
+    [
+        # same types
+        ([("pyarrow", pd.NA), ("pyarrow", pd.NA)], ("pyarrow", pd.NA)),
+        ([("pyarrow", np.nan), ("pyarrow", np.nan)], ("pyarrow", np.nan)),
+        ([("python", pd.NA), ("python", pd.NA)], ("python", pd.NA)),
+        ([("python", np.nan), ("python", np.nan)], ("python", np.nan)),
+        # pyarrow preference
+        ([("pyarrow", pd.NA), ("python", pd.NA)], ("pyarrow", pd.NA)),
+        # NA preference
+        ([("python", pd.NA), ("python", np.nan)], ("python", pd.NA)),
+    ],
+)
+def test_concat_series(request, to_concat_dtypes, result_dtype):
+    if any(storage == "pyarrow" for storage, _ in to_concat_dtypes) and not HAS_PYARROW:
+        pytest.skip("Could not import 'pyarrow'")
+
+    ser_list = [
+        pd.Series(["a", "b", None], dtype=pd.StringDtype(storage, na_value))
+        for storage, na_value in to_concat_dtypes
+    ]
+
+    result = pd.concat(ser_list, ignore_index=True)
+    expected = pd.Series(
+        ["a", "b", None, "a", "b", None], dtype=pd.StringDtype(*result_dtype)
+    )
+    tm.assert_series_equal(result, expected)
+
+    # order doesn't matter for result
+    result = pd.concat(ser_list[::1], ignore_index=True)
+    tm.assert_series_equal(result, expected)
+
+
+def test_concat_with_object(string_dtype_arguments):
+    # _get_common_dtype cannot inspect values, so object dtype with strings still
+    # results in object dtype
+    result = pd.concat(
+        [
+            pd.Series(["a", "b", None], dtype=pd.StringDtype(*string_dtype_arguments)),
+            pd.Series(["a", "b", None], dtype=object),
+        ]
+    )
+    assert result.dtype == np.dtype("object")
+
+
+def test_concat_with_numpy(string_dtype_arguments):
+    # common type with a numpy string dtype always preserves the pandas string dtype
+    dtype = pd.StringDtype(*string_dtype_arguments)
+    assert find_common_type([dtype, np.dtype("U")]) == dtype
+    assert find_common_type([np.dtype("U"), dtype]) == dtype
+    assert find_common_type([dtype, np.dtype("U10")]) == dtype
+    assert find_common_type([np.dtype("U10"), dtype]) == dtype
+
+    # with any other numpy dtype -> object
+    assert find_common_type([dtype, np.dtype("S")]) == np.dtype("object")
+    assert find_common_type([dtype, np.dtype("int64")]) == np.dtype("object")
+
+    if Version(np.__version__) >= Version("2"):
+        assert find_common_type([dtype, np.dtypes.StringDType()]) == dtype
+        assert find_common_type([np.dtypes.StringDType(), dtype]) == dtype
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 1296cc3b5a494..265b9fc40629b 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -29,6 +29,12 @@ def dtype(string_dtype_arguments):
     return pd.StringDtype(storage=storage, na_value=na_value)
 
 
+@pytest.fixture
+def dtype2(string_dtype_arguments2):
+    storage, na_value = string_dtype_arguments2
+    return pd.StringDtype(storage=storage, na_value=na_value)
+
+
 @pytest.fixture
 def cls(dtype):
     """Fixture giving array type from parametrized 'dtype'"""
@@ -101,10 +107,7 @@ def test_setitem_validates(cls, dtype):
     with pytest.raises(TypeError, match=msg):
         arr[0] = 10
 
-    if dtype.storage == "python":
-        msg = "Must provide strings."
-    else:
-        msg = "Scalar must be NA or str"
+    msg = "Must provide strings"
     with pytest.raises(TypeError, match=msg):
         arr[:] = np.array([1, 2])
 
@@ -524,7 +527,6 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
     # roundtrip possible from arrow 1.0.0
@@ -543,13 +545,16 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
         assert result["a"].dtype == "object"
     else:
         assert isinstance(result["a"].dtype, pd.StringDtype)
-        expected = df.astype(f"string[{string_storage}]")
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
         tm.assert_frame_equal(result, expected)
         # ensure the missing value is represented by NA and not np.nan or None
         assert result.loc[2, "a"] is result["a"].dtype.na_value
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     # GH-41040
@@ -571,7 +576,11 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
         assert result["a"].dtype == "object"
     else:
         assert isinstance(result["a"].dtype, pd.StringDtype)
-        expected = df.astype(f"string[{string_storage}]")
+        expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
         tm.assert_frame_equal(result, expected)
 
 
@@ -686,11 +695,7 @@ def test_isin(dtype, fixed_now_ts):
     tm.assert_series_equal(result, expected)
 
     result = s.isin(["a", pd.NA])
-    if dtype.storage == "python" and dtype.na_value is np.nan:
-        # TODO(infer_string) we should make this consistent
-        expected = pd.Series([True, False, False])
-    else:
-        expected = pd.Series([True, False, True])
+    expected = pd.Series([True, False, True])
     tm.assert_series_equal(result, expected)
 
     result = s.isin([])
@@ -701,6 +706,35 @@ def test_isin(dtype, fixed_now_ts):
     expected = pd.Series([True, False, False])
     tm.assert_series_equal(result, expected)
 
+    result = s.isin([fixed_now_ts])
+    expected = pd.Series([False, False, False])
+    tm.assert_series_equal(result, expected)
+
+
+def test_isin_string_array(dtype, dtype2):
+    s = pd.Series(["a", "b", None], dtype=dtype)
+
+    result = s.isin(pd.array(["a", "c"], dtype=dtype2))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+
+    result = s.isin(pd.array(["a", None], dtype=dtype2))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+
+
+def test_isin_arrow_string_array(dtype):
+    pa = pytest.importorskip("pyarrow")
+    s = pd.Series(["a", "b", None], dtype=dtype)
+
+    result = s.isin(pd.array(["a", "c"], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, False])
+    tm.assert_series_equal(result, expected)
+
+    result = s.isin(pd.array(["a", None], dtype=pd.ArrowDtype(pa.string())))
+    expected = pd.Series([True, False, True])
+    tm.assert_series_equal(result, expected)
+
 
 def test_setitem_scalar_with_mask_validation(dtype):
     # https://github.com/pandas-dev/pandas/issues/47628
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index e6957feecf4b5..2f3840e92b62a 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -241,10 +241,11 @@ def test_setitem_invalid_indexer_raises():
         arr[[0, 1]] = ["foo", "bar", "baz"]
 
 
-@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
-def test_pickle_roundtrip(dtype):
+@pytest.mark.parametrize("na_value", [pd.NA, np.nan])
+def test_pickle_roundtrip(na_value):
     # GH 42600
     pytest.importorskip("pyarrow")
+    dtype = StringDtype("pyarrow", na_value=na_value)
     expected = pd.Series(range(10), dtype=dtype)
     expected_sliced = expected.head(2)
     full_pickled = pickle.dumps(expected)
diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
index 3e0d8b1afedc0..b42e01c76335c 100644
--- a/pandas/tests/base/test_misc.py
+++ b/pandas/tests/base/test_misc.py
@@ -180,9 +180,7 @@ def test_access_by_position(index_flat):
     assert index[-1] == index[size - 1]
 
     msg = f"index {size} is out of bounds for axis 0 with size {size}"
-    if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
-        index.dtype, "string[pyarrow_numpy]"
-    ):
+    if isinstance(index.dtype, pd.StringDtype) and index.dtype.storage == "pyarrow":
         msg = "index out of bounds"
     with pytest.raises(IndexError, match=msg):
         index[size]
diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py
index fb82329d5b50d..e0e3f6dc058a4 100644
--- a/pandas/tests/copy_view/test_astype.py
+++ b/pandas/tests/copy_view/test_astype.py
@@ -135,7 +135,8 @@ def test_astype_string_and_object_update_original(
     tm.assert_frame_equal(df2, df_orig)
 
 
-def test_astype_string_copy_on_pickle_roundrip():
+def test_astype_str_copy_on_pickle_roundrip():
+    # TODO(infer_string) this test can be removed after 3.0 (once str is the default)
     # https://github.com/pandas-dev/pandas/issues/54654
     # ensure_string_array may alter array inplace
     base = Series(np.array([(1, 2), None, 1], dtype="object"))
@@ -144,6 +145,25 @@ def test_astype_string_copy_on_pickle_roundrip():
     tm.assert_series_equal(base, base_copy)
 
 
+def test_astype_string_copy_on_pickle_roundrip(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/54654
+    # ensure_string_array may alter array inplace
+    base = Series(np.array([(1, 2), None, 1], dtype="object"))
+    base_copy = pickle.loads(pickle.dumps(base))
+    base_copy.astype(any_string_dtype)
+    tm.assert_series_equal(base, base_copy)
+
+
+def test_astype_string_read_only_on_pickle_roundrip(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/54654
+    # ensure_string_array may alter read-only array inplace
+    base = Series(np.array([(1, 2), None, 1], dtype="object"))
+    base_copy = pickle.loads(pickle.dumps(base))
+    base_copy._values.flags.writeable = False
+    base_copy.astype(any_string_dtype)
+    tm.assert_series_equal(base, base_copy)
+
+
 def test_astype_dict_dtypes(using_copy_on_write):
     df = DataFrame(
         {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index 866b1964a334f..66c9b456f18ad 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -285,10 +283,9 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
     assert np.shares_memory(arr_before, arr_after)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("cons", [Series, Index])
 @pytest.mark.parametrize(
-    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
+    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], object)]
 )
 def test_dataframe_from_series_or_index(
     using_copy_on_write, warn_copy_on_write, data, dtype, cons
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
index a87baaedb9244..23ed7f9edcd22 100644
--- a/pandas/tests/copy_view/test_functions.py
+++ b/pandas/tests/copy_view/test_functions.py
@@ -16,10 +16,9 @@
 from pandas.tests.copy_view.util import get_array
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames(using_copy_on_write):
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     df_orig = df.copy()
     result = concat([df, df2], axis=1)
 
@@ -41,10 +40,9 @@ def test_concat_frames(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames_updating_input(using_copy_on_write):
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     result = concat([df, df2], axis=1)
 
     if using_copy_on_write:
@@ -203,7 +201,7 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+# @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "func",
     [
@@ -212,8 +210,8 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
     ],
 )
 def test_merge_on_key(using_copy_on_write, func):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "b": [4, 5, 6]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -267,7 +265,6 @@ def test_merge_on_index(using_copy_on_write):
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "func, how",
     [
@@ -276,8 +273,8 @@ def test_merge_on_index(using_copy_on_write):
     ],
 )
 def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b"], dtype=object), "b": [4, 5]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -321,9 +318,13 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW,
+    reason="TODO(infer_string); result.index infers str dtype while both "
+    "df1 and df2 index are object.",
+)
 def test_join_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
@@ -355,9 +356,8 @@ def test_join_on_key(using_copy_on_write):
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     dfs_list = [
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
index 6f7198520d22e..8526d38588897 100644
--- a/pandas/tests/copy_view/test_internals.py
+++ b/pandas/tests/copy_view/test_internals.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    Series,
+)
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
 
@@ -78,7 +79,6 @@ def test_switch_options():
 
 
 @td.skip_array_manager_invalid_test
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype", [np.intp, np.int8])
 @pytest.mark.parametrize(
     "locs, arr",
@@ -105,7 +105,7 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
             "c": [7, 8, 9],
             "d": [10, 11, 12],
             "e": [13, 14, 15],
-            "f": ["a", "b", "c"],
+            "f": Series(["a", "b", "c"], dtype=object),
         },
     )
     arr = arr.astype(dtype)
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index ccd30caba5dee..f7442cf5d6d3c 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -810,11 +810,23 @@ def test_pandas_dtype_string_dtypes(string_storage):
         "pyarrow" if HAS_PYARROW else "python", na_value=np.nan
     )
 
+    with pd.option_context("future.infer_string", True):
+        # with the default string_storage setting
+        result = pandas_dtype(str)
+    assert result == pd.StringDtype(
+        "pyarrow" if HAS_PYARROW else "python", na_value=np.nan
+    )
+
     with pd.option_context("future.infer_string", True):
         with pd.option_context("string_storage", string_storage):
             result = pandas_dtype("str")
     assert result == pd.StringDtype(string_storage, na_value=np.nan)
 
+    with pd.option_context("future.infer_string", True):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype(str)
+    assert result == pd.StringDtype(string_storage, na_value=np.nan)
+
     with pd.option_context("future.infer_string", False):
         with pd.option_context("string_storage", string_storage):
             result = pandas_dtype("str")
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index a4916ed1bbd8a..a5666e169fb4c 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -5,8 +5,6 @@
 import pytest
 import pytz
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 
 from pandas.core.dtypes.base import _registry as registry
@@ -961,7 +959,6 @@ def test_same_categories_different_order(self):
         c2 = CategoricalDtype(["b", "a"], ordered=True)
         assert c1 is not c2
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("ordered1", [True, False, None])
     @pytest.mark.parametrize("ordered2", [True, False, None])
     def test_categorical_equality(self, ordered1, ordered2):
diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
index 2bfe801c48a77..56879129c3a28 100644
--- a/pandas/tests/extension/base/casting.py
+++ b/pandas/tests/extension/base/casting.py
@@ -43,8 +43,8 @@ def test_tolist(self, data):
         assert result == expected
 
     def test_astype_str(self, data):
-        result = pd.Series(data[:5]).astype(str)
-        expected = pd.Series([str(x) for x in data[:5]], dtype=str)
+        result = pd.Series(data[:2]).astype(str)
+        expected = pd.Series([str(x) for x in data[:2]], dtype=str)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index ff9f3cbed64a2..547114ecfddd0 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -24,7 +24,7 @@ class BaseOpsUtil:
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         # Find the Exception, if any we expect to raise calling
         #  obj.__op_name__(other)
 
@@ -39,14 +39,6 @@ def _get_expected_exception(
         else:
             result = self.frame_scalar_exc
 
-        if using_string_dtype() and result is not None:
-            import pyarrow as pa
-
-            result = (  # type: ignore[assignment]
-                result,
-                pa.lib.ArrowNotImplementedError,
-                NotImplementedError,
-            )
         return result
 
     def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 9907e345ada63..8afb989508e04 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -68,7 +68,7 @@ def data_for_grouping():
 class TestDecimalArray(base.ExtensionTests):
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         return None
 
     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index e43b50322bb92..5cbd45a99ae5c 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -207,9 +207,8 @@ def astype(self, dtype, copy=True):
                 return self.copy()
             return self
         elif isinstance(dtype, StringDtype):
-            value = self.astype(str)  # numpy doesn't like nested dicts
             arr_cls = dtype.construct_array_type()
-            return arr_cls._from_sequence(value, dtype=dtype, copy=False)
+            return arr_cls._from_sequence(self, dtype=dtype, copy=False)
         elif not copy:
             return np.asarray([dict(x) for x in self], dtype=dtype)
         else:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index d9a3033b8380e..60e7bd83432c5 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -41,7 +41,6 @@
     pa_version_under13p0,
     pa_version_under14p0,
 )
-import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
@@ -286,7 +285,7 @@ def test_map(self, data_missing, na_action):
                 expected = data_missing.to_numpy()
             tm.assert_numpy_array_equal(result, expected)
 
-    def test_astype_str(self, data, request):
+    def test_astype_str(self, data, request, using_infer_string):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_binary(pa_dtype):
             request.applymarker(
@@ -294,9 +293,10 @@ def test_astype_str(self, data, request):
                     reason=f"For {pa_dtype} .astype(str) decodes.",
                 )
             )
-        elif (
-            pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
-        ) or pa.types.is_duration(pa_dtype):
+        elif not using_infer_string and (
+            (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None)
+            or pa.types.is_duration(pa_dtype)
+        ):
             request.applymarker(
                 pytest.mark.xfail(
                     reason="pd.Timestamp/pd.Timedelta repr different from numpy repr",
@@ -304,25 +304,6 @@ def test_astype_str(self, data, request):
             )
         super().test_astype_str(data)
 
-    @pytest.mark.parametrize(
-        "nullable_string_dtype",
-        [
-            "string[python]",
-            pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-        ],
-    )
-    def test_astype_string(self, data, nullable_string_dtype, request):
-        pa_dtype = data.dtype.pyarrow_dtype
-        if (
-            pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
-        ) or pa.types.is_duration(pa_dtype):
-            request.applymarker(
-                pytest.mark.xfail(
-                    reason="pd.Timestamp/pd.Timedelta repr different from numpy repr",
-                )
-            )
-        super().test_astype_string(data, nullable_string_dtype)
-
     def test_from_dtype(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_string(pa_dtype) or pa.types.is_decimal(pa_dtype):
@@ -800,8 +781,6 @@ def test_value_counts_returns_pyarrow_int64(self, data):
 
     _combine_le_expected_dtype = "bool[pyarrow]"
 
-    divmod_exc = NotImplementedError
-
     def get_op_from_name(self, op_name):
         short_opname = op_name.strip("_")
         if short_opname == "rtruediv":
@@ -935,10 +914,11 @@ def _is_temporal_supported(self, opname, pa_dtype):
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
         if op_name in ("__divmod__", "__rdivmod__"):
-            return self.divmod_exc
+            return (NotImplementedError, TypeError)
 
+        exc: type[Exception] | tuple[type[Exception], ...] | None
         dtype = tm.get_dtype(obj)
         # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no
         # attribute "pyarrow_dtype"
@@ -949,7 +929,7 @@ def _get_expected_exception(
             "__mod__",
             "__rmod__",
         }:
-            exc = NotImplementedError
+            exc = (NotImplementedError, TypeError)
         elif arrow_temporal_supported:
             exc = None
         elif op_name in ["__add__", "__radd__"] and (
@@ -961,10 +941,7 @@ def _get_expected_exception(
             or pa.types.is_integer(pa_dtype)
             or pa.types.is_decimal(pa_dtype)
         ):
-            # TODO: in many of these cases, e.g. non-duration temporal,
-            #  these will *never* be allowed. Would it make more sense to
-            #  re-raise as TypeError, more consistent with non-pyarrow cases?
-            exc = pa.ArrowNotImplementedError
+            exc = TypeError
         else:
             exc = None
         return exc
@@ -1020,14 +997,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
 
         if all_arithmetic_operators == "__rmod__" and pa.types.is_binary(pa_dtype):
             pytest.skip("Skip testing Python string formatting")
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
@@ -1042,14 +1011,6 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
             pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype)
         ):
             pytest.skip("Skip testing Python string formatting")
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
@@ -1073,14 +1034,6 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
                     ),
                 )
             )
-        elif all_arithmetic_operators in ("__rmul__", "__mul__") and (
-            pa.types.is_binary(pa_dtype) or pa.types.is_string(pa_dtype)
-        ):
-            request.applymarker(
-                pytest.mark.xfail(
-                    raises=TypeError, reason="Can only string multiply by an integer."
-                )
-            )
 
         mark = self._get_arith_xfail_marker(all_arithmetic_operators, pa_dtype)
         if mark is not None:
@@ -1868,6 +1821,17 @@ def test_str_replace_negative_n():
     expected = pd.Series(["bc", ""], dtype=ArrowDtype(pa.string()))
     tm.assert_series_equal(expected, actual)
 
+    # Same bug for pyarrow-backed StringArray GH#59628
+    ser2 = ser.astype(pd.StringDtype(storage="pyarrow"))
+    actual2 = ser2.str.replace("a", "", -3, True)
+    expected2 = expected.astype(ser2.dtype)
+    tm.assert_series_equal(expected2, actual2)
+
+    ser3 = ser.astype(pd.StringDtype(storage="pyarrow", na_value=np.nan))
+    actual3 = ser3.str.replace("a", "", -3, True)
+    expected3 = expected.astype(ser3.dtype)
+    tm.assert_series_equal(expected3, actual3)
+
 
 def test_str_repeat_unsupported():
     ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
@@ -1942,10 +1906,56 @@ def test_str_find_negative_start():
     tm.assert_series_equal(result, expected)
 
 
-def test_str_find_notimplemented():
+def test_str_find_no_end():
     ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string()))
-    with pytest.raises(NotImplementedError, match="find not implemented"):
-        ser.str.find("ab", start=1)
+    result = ser.str.find("ab", start=1)
+    expected = pd.Series([-1, None], dtype="int64[pyarrow]")
+    tm.assert_series_equal(result, expected)
+
+
+def test_str_find_negative_start_negative_end():
+    # GH 56791
+    ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string()))
+    result = ser.str.find(sub="d", start=-6, end=-3)
+    expected = pd.Series([3, None], dtype=ArrowDtype(pa.int64()))
+    tm.assert_series_equal(result, expected)
+
+
+def test_str_find_large_start():
+    # GH 56791
+    ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string()))
+    result = ser.str.find(sub="d", start=16)
+    expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64()))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.skipif(
+    pa_version_under13p0, reason="https://github.com/apache/arrow/issues/36311"
+)
+@pytest.mark.parametrize("start", [-15, -3, 0, 1, 15, None])
+@pytest.mark.parametrize("end", [-15, -1, 0, 3, 15, None])
+@pytest.mark.parametrize("sub", ["", "az", "abce", "a", "caa"])
+def test_str_find_e2e(start, end, sub):
+    s = pd.Series(
+        ["abcaadef", "abc", "abcdeddefgj8292", "ab", "a", ""],
+        dtype=ArrowDtype(pa.string()),
+    )
+    object_series = s.astype(pd.StringDtype(storage="python"))
+    result = s.str.find(sub, start, end)
+    expected = object_series.str.find(sub, start, end).astype(result.dtype)
+    tm.assert_series_equal(result, expected)
+
+    arrow_str_series = s.astype(pd.StringDtype(storage="pyarrow"))
+    result2 = arrow_str_series.str.find(sub, start, end).astype(result.dtype)
+    tm.assert_series_equal(result2, expected)
+
+
+def test_str_find_negative_start_negative_end_no_match():
+    # GH 56791
+    ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string()))
+    result = ser.str.find(sub="d", start=-3, end=-6)
+    expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64()))
+    tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -1989,6 +1999,7 @@ def test_str_join_string_type():
         [None, 2, None, ["ab", None]],
         [None, 2, 1, ["ab", None]],
         [1, 3, 1, ["bc", None]],
+        (None, None, -1, ["dcba", None]),
     ],
 )
 def test_str_slice(start, stop, step, exp):
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index f800f734ec9d9..07c3b4224e76f 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -21,7 +21,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
+from pandas.compat import HAS_PYARROW
 
 import pandas as pd
 import pandas._testing as tm
@@ -30,10 +30,6 @@
 from pandas.core.arrays.string_ import StringDtype
 from pandas.tests.extension import base
 
-pytestmark = pytest.mark.xfail(
-    using_string_dtype(), reason="TODO(infer_string)", strict=False
-)
-
 
 def maybe_split_array(arr, chunked):
     if not chunked:
@@ -168,24 +164,15 @@ def test_fillna_no_op_returns_copy(self, data):
 
     def _get_expected_exception(
         self, op_name: str, obj, other
-    ) -> type[Exception] | None:
-        if op_name in ["__divmod__", "__rdivmod__"]:
-            if (
-                isinstance(obj, pd.Series)
-                and cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow"
-            ):
-                # TODO: re-raise as TypeError?
-                return NotImplementedError
-            elif (
-                isinstance(other, pd.Series)
-                and cast(StringDtype, tm.get_dtype(other)).storage == "pyarrow"
-            ):
-                # TODO: re-raise as TypeError?
-                return NotImplementedError
-            return TypeError
-        elif op_name in ["__mod__", "__rmod__", "__pow__", "__rpow__"]:
-            if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
-                return NotImplementedError
+    ) -> type[Exception] | tuple[type[Exception], ...] | None:
+        if op_name in [
+            "__mod__",
+            "__rmod__",
+            "__divmod__",
+            "__rdivmod__",
+            "__pow__",
+            "__rpow__",
+        ]:
             return TypeError
         elif op_name in ["__mul__", "__rmul__"]:
             # Can only multiply strings by integers
@@ -198,11 +185,6 @@ def _get_expected_exception(
             "__sub__",
             "__rsub__",
         ]:
-            if cast(StringDtype, tm.get_dtype(obj)).storage == "pyarrow":
-                import pyarrow as pa
-
-                # TODO: better to re-raise as TypeError?
-                return pa.ArrowNotImplementedError
             return TypeError
 
         return None
@@ -230,9 +212,35 @@ def test_compare_scalar(self, data, comparison_op):
         ser = pd.Series(data)
         self._compare_other(ser, data, comparison_op, "abc")
 
-    @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
-    def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
-        super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
+    def test_combine_add(self, data_repeated, using_infer_string, request):
+        dtype = next(data_repeated(1)).dtype
+        if using_infer_string and (
+            (dtype.na_value is pd.NA) and dtype.storage == "python"
+        ):
+            mark = pytest.mark.xfail(
+                reason="The pointwise operation result will be inferred to "
+                "string[nan, pyarrow], which does not match the input dtype"
+            )
+            request.applymarker(mark)
+        super().test_combine_add(data_repeated)
+
+    def test_arith_series_with_array(
+        self, data, all_arithmetic_operators, using_infer_string, request
+    ):
+        dtype = data.dtype
+        if (
+            using_infer_string
+            and all_arithmetic_operators == "__radd__"
+            and (
+                (dtype.na_value is pd.NA) or (dtype.storage == "python" and HAS_PYARROW)
+            )
+        ):
+            mark = pytest.mark.xfail(
+                reason="The pointwise operation result will be inferred to "
+                "string[nan, pyarrow], which does not match the input dtype"
+            )
+            request.applymarker(mark)
+        super().test_arith_series_with_array(data, all_arithmetic_operators)
 
 
 class Test2DCompat(base.Dim2CompatTests):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 7a7586961deca..04dba325f060f 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -1955,13 +1955,11 @@ def test_adding_new_conditional_column() -> None:
     ("dtype", "infer_string"),
     [
         (object, False),
-        ("string[pyarrow_numpy]", True),
+        (pd.StringDtype(na_value=np.nan), True),
     ],
 )
 def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     # https://github.com/pandas-dev/pandas/issues/56204
-    pytest.importorskip("pyarrow")
-
     df = DataFrame({"a": [1, 2], "b": [3, 4]})
     with pd.option_context("future.infer_string", infer_string):
         df.loc[df["a"] == 1, "c"] = "1"
@@ -1971,16 +1969,14 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
     tm.assert_frame_equal(df, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_add_new_column_infer_string():
     # GH#55366
-    pytest.importorskip("pyarrow")
     df = DataFrame({"x": [1]})
     with pd.option_context("future.infer_string", True):
         df.loc[df["x"] == 1, "y"] = "1"
     expected = DataFrame(
-        {"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
-        columns=Index(["x", "y"], dtype=object),
+        {"x": [1], "y": Series(["1"], dtype=pd.StringDtype(na_value=np.nan))},
+        columns=Index(["x", "y"], dtype="str"),
     )
     tm.assert_frame_equal(df, expected)
 
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 9c27e76de91b2..ca3764ac87e95 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -169,21 +169,21 @@ def test_astype_str(self):
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
             },
-            dtype="object",
+            dtype="str",
         )
 
         tm.assert_frame_equal(result, expected)
 
-    def test_astype_str_float(self):
+    def test_astype_str_float(self, using_infer_string):
         # see GH#11302
         result = DataFrame([np.nan]).astype(str)
-        expected = DataFrame(["nan"], dtype="object")
+        expected = DataFrame([np.nan if using_infer_string else "nan"], dtype="str")
 
         tm.assert_frame_equal(result, expected)
         result = DataFrame([1.12345678901234567890]).astype(str)
 
         val = "1.1234567890123457"
-        expected = DataFrame([val], dtype="object")
+        expected = DataFrame([val], dtype="str")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype_class", [dict, Series])
@@ -285,7 +285,7 @@ def test_astype_duplicate_col_series_arg(self):
         result = df.astype(dtypes)
         expected = DataFrame(
             {
-                0: Series(vals[:, 0].astype(str), dtype=object),
+                0: Series(vals[:, 0].astype(str), dtype="str"),
                 1: vals[:, 1],
                 2: pd.array(vals[:, 2], dtype="Float64"),
                 3: vals[:, 3],
@@ -666,9 +666,10 @@ def test_astype_dt64tz(self, timezone_frame):
             # dt64tz->dt64 deprecated
             timezone_frame.astype("datetime64[ns]")
 
-    def test_astype_dt64tz_to_str(self, timezone_frame):
+    def test_astype_dt64tz_to_str(self, timezone_frame, using_infer_string):
         # str formatting
         result = timezone_frame.astype(str)
+        na_value = np.nan if using_infer_string else "NaT"
         expected = DataFrame(
             [
                 [
@@ -676,7 +677,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                     "2013-01-01 00:00:00-05:00",
                     "2013-01-01 00:00:00+01:00",
                 ],
-                ["2013-01-02", "NaT", "NaT"],
+                ["2013-01-02", na_value, na_value],
                 [
                     "2013-01-03",
                     "2013-01-03 00:00:00-05:00",
@@ -684,7 +685,7 @@ def test_astype_dt64tz_to_str(self, timezone_frame):
                 ],
             ],
             columns=timezone_frame.columns,
-            dtype="object",
+            dtype="str",
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
index edba971408d04..37bed2da05743 100644
--- a/pandas/tests/frame/methods/test_rank.py
+++ b/pandas/tests/frame/methods/test_rank.py
@@ -6,13 +6,10 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.algos import (
     Infinity,
     NegInfinity,
 )
-from pandas.compat import HAS_PYARROW
 
 from pandas import (
     DataFrame,
@@ -473,23 +470,10 @@ def test_rank_inf_nans_na_option(
             ("top", False, [2.0, 3.0, 1.0, 4.0]),
         ],
     )
-    def test_rank_object_first(
-        self,
-        request,
-        frame_or_series,
-        na_option,
-        ascending,
-        expected,
-        using_infer_string,
-    ):
+    def test_rank_object_first(self, frame_or_series, na_option, ascending, expected):
         obj = frame_or_series(["foo", "foo", None, "foo"])
-        if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series):
-            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
-
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
-        if using_infer_string and isinstance(obj, Series):
-            expected = expected.astype("uint64")
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -509,14 +493,15 @@ def test_rank_mixed_axis_zero(self, data, expected):
         result = df.rank(numeric_only=True)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "dtype, exp_dtype",
-        [("string[pyarrow]", "Int64"), ("string[pyarrow_numpy]", "float64")],
-    )
-    def test_rank_string_dtype(self, dtype, exp_dtype):
+    def test_rank_string_dtype(self, string_dtype_no_object):
         # GH#55362
-        pytest.importorskip("pyarrow")
-        obj = Series(["foo", "foo", None, "foo"], dtype=dtype)
+        obj = Series(["foo", "foo", None, "foo"], dtype=string_dtype_no_object)
         result = obj.rank(method="first")
+        exp_dtype = (
+            "Float64" if string_dtype_no_object == "string[pyarrow]" else "float64"
+        )
+        if string_dtype_no_object.storage == "python":
+            # TODO nullable string[python] should also return nullable Int64
+            exp_dtype = "float64"
         expected = Series([1, 2, None, 3], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index 875dca321635f..0354e9df3d168 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -99,6 +99,9 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
             ei = df[["a"]]
             tm.assert_frame_equal(ri, ei)
 
+            ri = df.select_dtypes(include=[str])
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame(
             {
@@ -358,7 +361,7 @@ def test_select_dtypes_datetime_with_tz(self):
     @pytest.mark.parametrize("dtype", [str, "str", np.bytes_, "S1", np.str_, "U1"])
     @pytest.mark.parametrize("arg", ["include", "exclude"])
     def test_select_dtypes_str_raises(self, dtype, arg, using_infer_string):
-        if using_infer_string and dtype == "str":
+        if using_infer_string and (dtype == "str" or dtype is str):
             # this is tested below
             pytest.skip("Selecting string columns works with future strings")
         df = DataFrame(
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 86d9dc0c7fbdc..fd770b368c9da 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -24,7 +24,6 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
-from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
@@ -83,7 +82,7 @@ def test_constructor_from_ndarray_with_str_dtype(self):
         #  with an array of strings each of which is e.g. "[0 1 2]"
         arr = np.arange(12).reshape(4, 3)
         df = DataFrame(arr, dtype=str)
-        expected = DataFrame(arr.astype(str), dtype=object)
+        expected = DataFrame(arr.astype(str), dtype="str")
         tm.assert_frame_equal(df, expected)
 
     def test_constructor_from_2d_datetimearray(self, using_array_manager):
@@ -328,19 +327,39 @@ def test_constructor_dtype_nocast_view_2d_array(
             assert df2._mgr.arrays[0].flags.c_contiguous
 
     @td.skip_array_manager_invalid_test
-    @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies")
-    def test_1d_object_array_does_not_copy(self):
+    def test_1d_object_array_does_not_copy(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
         df = DataFrame(arr, copy=False)
+        if using_infer_string:
+            if df[0].dtype.storage == "pyarrow":
+                # object dtype strings are converted to arrow memory,
+                # no numpy arrays to compare
+                pass
+            else:
+                assert np.shares_memory(df[0].to_numpy(), arr)
+        else:
+            assert np.shares_memory(df.values, arr)
+
+        df = DataFrame(arr, dtype=object, copy=False)
         assert np.shares_memory(df.values, arr)
 
     @td.skip_array_manager_invalid_test
-    @pytest.mark.xfail(using_string_dtype(), reason="conversion copies")
-    def test_2d_object_array_does_not_copy(self):
+    def test_2d_object_array_does_not_copy(self, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
         df = DataFrame(arr, copy=False)
+        if using_infer_string:
+            if df[0].dtype.storage == "pyarrow":
+                # object dtype strings are converted to arrow memory,
+                # no numpy arrays to compare
+                pass
+            else:
+                assert np.shares_memory(df[0].to_numpy(), arr)
+        else:
+            assert np.shares_memory(df.values, arr)
+
+        df = DataFrame(arr, dtype=object, copy=False)
         assert np.shares_memory(df.values, arr)
 
     def test_constructor_dtype_list_data(self):
@@ -1793,12 +1812,18 @@ def test_constructor_column_duplicates(self):
 
         tm.assert_frame_equal(idf, edf)
 
-    def test_constructor_empty_with_string_dtype(self):
+    def test_constructor_empty_with_string_dtype(self, using_infer_string):
         # GH 9428
         expected = DataFrame(index=[0, 1], columns=[0, 1], dtype=object)
+        expected_str = DataFrame(
+            index=[0, 1], columns=[0, 1], dtype=pd.StringDtype(na_value=np.nan)
+        )
 
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype=str)
-        tm.assert_frame_equal(df, expected)
+        if using_infer_string:
+            tm.assert_frame_equal(df, expected_str)
+        else:
+            tm.assert_frame_equal(df, expected)
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype=np.str_)
         tm.assert_frame_equal(df, expected)
         df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5")
@@ -2721,8 +2746,7 @@ def test_construct_with_strings_and_none(self):
 
     def test_frame_string_inference(self):
         # GH#54430
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2756,8 +2780,7 @@ def test_frame_string_inference(self):
 
     def test_frame_string_inference_array_string_dtype(self):
         # GH#54496
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
+        dtype = pd.StringDtype(na_value=np.nan)
         expected = DataFrame(
             {"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
         )
@@ -2781,7 +2804,6 @@ def test_frame_string_inference_array_string_dtype(self):
 
     def test_frame_string_inference_block_dim(self):
         # GH#55363
-        pytest.importorskip("pyarrow")
         with pd.option_context("future.infer_string", True):
             df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
         assert df._mgr.blocks[0].ndim == 2
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
index 2684704f86b82..f1163e994557f 100644
--- a/pandas/tests/frame/test_logical_ops.py
+++ b/pandas/tests/frame/test_logical_ops.py
@@ -4,10 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -100,9 +96,6 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
@@ -114,15 +107,12 @@ def test_logical_ops_invalid(self, using_infer_string):
 
         df1 = DataFrame("foo", index=[1], columns=["A"])
         df2 = DataFrame(True, index=[1], columns=["A"])
-        msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
-        if using_infer_string:
-            import pyarrow as pa
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match="|has no kernel"):
-                df1 | df2
+        if using_infer_string and df1["A"].dtype.storage == "pyarrow":
+            msg = "operation 'or_' not supported for dtype 'str'"
         else:
-            with pytest.raises(TypeError, match=msg):
-                df1 | df2
+            msg = re.escape("unsupported operand type(s) for |: 'str' and 'bool'")
+        with pytest.raises(TypeError, match=msg):
+            df1 | df2
 
     def test_logical_operators(self):
         def _check_bin_op(op):
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 7dde0683aa960..27848e4d18596 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     NumExprClobberingError,
     UndefinedVariableError,
@@ -747,7 +745,6 @@ def test_inf(self, op, f, engine, parser):
         result = df.query(q, engine=engine, parser=parser)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_check_tz_aware_index_query(self, tz_aware_fixture):
         # https://github.com/pandas-dev/pandas/issues/29463
         tz = tz_aware_fixture
@@ -760,6 +757,7 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture):
         tm.assert_frame_equal(result, expected)
 
         expected = DataFrame(df_index)
+        expected.columns = expected.columns.astype(object)
         result = df.reset_index().query('"2018-01-03 00:00:00+00" < time')
         tm.assert_frame_equal(result, expected)
 
@@ -1057,7 +1055,7 @@ def test_query_with_string_columns(self, parser, engine):
             with pytest.raises(NotImplementedError, match=msg):
                 df.query("a in b and c < d", parser=parser, engine=engine)
 
-    def test_object_array_eq_ne(self, parser, engine, using_infer_string):
+    def test_object_array_eq_ne(self, parser, engine):
         df = DataFrame(
             {
                 "a": list("aaaabbbbcccc"),
@@ -1066,14 +1064,11 @@ def test_object_array_eq_ne(self, parser, engine, using_infer_string):
                 "d": np.random.default_rng(2).integers(9, size=12),
             }
         )
-        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
-        with tm.assert_produces_warning(warning):
-            res = df.query("a == b", parser=parser, engine=engine)
+        res = df.query("a == b", parser=parser, engine=engine)
         exp = df[df.a == df.b]
         tm.assert_frame_equal(res, exp)
 
-        with tm.assert_produces_warning(warning):
-            res = df.query("a != b", parser=parser, engine=engine)
+        res = df.query("a != b", parser=parser, engine=engine)
         exp = df[df.a != df.b]
         tm.assert_frame_equal(res, exp)
 
@@ -1112,16 +1107,12 @@ def test_query_with_nested_special_character(self, parser, engine):
             [">=", operator.ge],
         ],
     )
-    def test_query_lex_compare_strings(
-        self, parser, engine, op, func, using_infer_string
-    ):
+    def test_query_lex_compare_strings(self, parser, engine, op, func):
         a = Series(np.random.default_rng(2).choice(list("abcde"), 20))
         b = Series(np.arange(a.size))
         df = DataFrame({"X": a, "Y": b})
 
-        warning = RuntimeWarning if using_infer_string and engine == "numexpr" else None
-        with tm.assert_produces_warning(warning):
-            res = df.query(f'X {op} "d"', engine=engine, parser=parser)
+        res = df.query(f'X {op} "d"', engine=engine, parser=parser)
         expected = df[func(df.X, "d")]
         tm.assert_frame_equal(res, expected)
 
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index 8e1df679ee1b4..a76d33e922486 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -44,11 +44,6 @@ def test_neg_object(self, df, expected):
         tm.assert_frame_equal(-df, expected)
         tm.assert_series_equal(-df["a"], expected["a"])
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.parametrize(
         "df",
         [
@@ -59,22 +54,13 @@ def test_neg_object(self, df, expected):
     def test_neg_raises(self, df, using_infer_string):
         msg = (
             "bad operand type for unary -: 'str'|"
-            r"bad operand type for unary -: 'DatetimeArray'"
+            r"bad operand type for unary -: 'DatetimeArray'|"
+            "unary '-' not supported for dtype"
         )
-        if using_infer_string and df.dtypes.iloc[0] == "string":
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                (-df)
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                (-df["a"])
-
-        else:
-            with pytest.raises(TypeError, match=msg):
-                (-df)
-            with pytest.raises(TypeError, match=msg):
-                (-df["a"])
+        with pytest.raises(TypeError, match=msg):
+            (-df)
+        with pytest.raises(TypeError, match=msg):
+            (-df["a"])
 
     def test_invert(self, float_frame):
         df = float_frame
diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py
index 34b046bff7c91..c0889ab415e74 100644
--- a/pandas/tests/groupby/methods/test_describe.py
+++ b/pandas/tests/groupby/methods/test_describe.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -73,8 +71,7 @@ def test_series_describe_as_index(as_index, keys):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_frame_describe_multikey(tsframe):
+def test_frame_describe_multikey(tsframe, using_infer_string):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.describe()
     desc_groups = []
@@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
         group = grouped[col].describe()
         # GH 17464 - Remove duplicate MultiIndex levels
         group_col = MultiIndex(
-            levels=[[col], group.columns],
+            levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
             codes=[[0] * len(group.columns), range(len(group.columns))],
         )
         group = DataFrame(group.values, columns=group_col, index=group.index)
@@ -90,6 +87,10 @@ def test_frame_describe_multikey(tsframe):
     expected = pd.concat(desc_groups, axis=1)
     tm.assert_frame_equal(result, expected)
 
+    # remainder of the tests fails with string dtype but is testing deprecated behaviour
+    if using_infer_string:
+        return
+
     msg = "DataFrame.groupby with axis=1 is deprecated"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
@@ -275,7 +276,6 @@ def test_describe(self, df, gb, gni):
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype", [int, float, object])
 @pytest.mark.parametrize(
     "kwargs",
@@ -297,5 +297,5 @@ def test_groupby_empty_dataset(dtype, kwargs):
 
     result = df.iloc[:0].groupby("A").B.describe(**kwargs)
     expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
-    expected.index = Index([])
+    expected.index = Index([], dtype=df.columns.dtype)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py
index 344258257ba80..2722993ee5cdf 100644
--- a/pandas/tests/groupby/methods/test_nth.py
+++ b/pandas/tests/groupby/methods/test_nth.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -706,14 +704,14 @@ def test_first_multi_key_groupby_categorical():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("method", ["first", "last", "nth"])
 def test_groupby_last_first_nth_with_none(method, nulls_fixture):
     # GH29645
-    expected = Series(["y"])
+    expected = Series(["y"], dtype=object)
     data = Series(
         [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
         index=[0, 0, 0, 0, 0],
+        dtype=object,
     ).groupby(level=0)
 
     if method == "nth":
diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py
index 5b4c08fc24411..fb834ee2a8799 100644
--- a/pandas/tests/groupby/methods/test_size.py
+++ b/pandas/tests/groupby/methods/test_size.py
@@ -3,8 +3,6 @@
 
 from pandas._config import using_string_dtype
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import is_integer_dtype
 
 from pandas import (
@@ -111,16 +109,9 @@ def test_size_series_masked_type_returns_Int64(dtype):
 
 
 @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_size_strings(dtype):
+def test_size_strings(any_string_dtype):
     # GH#55627
+    dtype = any_string_dtype
     df = DataFrame({"a": ["a", "a", "b"], "b": "a"}, dtype=dtype)
     result = df.groupby("a")["b"].size()
     exp_dtype = "Int64" if dtype == "string[pyarrow]" else "int64"
diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py
index 51232fac7d6f6..d8c6c7c3fe50c 100644
--- a/pandas/tests/groupby/methods/test_value_counts.py
+++ b/pandas/tests/groupby/methods/test_value_counts.py
@@ -8,11 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
-from pandas.compat import HAS_PYARROW
-import pandas.util._test_decorators as td
-
 from pandas import (
     Categorical,
     CategoricalIndex,
@@ -288,7 +283,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
     return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("groupby", ["column", "array", "function"])
 @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
 @pytest.mark.parametrize(
@@ -302,7 +296,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize("frame", [True, False])
 def test_against_frame_and_seriesgroupby(
-    education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
+    education_df,
+    groupby,
+    normalize,
+    name,
+    sort,
+    ascending,
+    as_index,
+    frame,
+    request,
+    using_infer_string,
 ):
     # test all parameters:
     # - Use column, array or function as by= parameter
@@ -366,25 +369,24 @@ def test_against_frame_and_seriesgroupby(
             index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
             index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
             del index_frame["both"]
-            index_frame = index_frame.rename({0: None}, axis=1)
-            expected.index = MultiIndex.from_frame(index_frame)
+            index_frame2 = index_frame.rename({0: None}, axis=1)
+            expected.index = MultiIndex.from_frame(index_frame2)
+
+            if index_frame2.columns.isna()[0]:
+                # with using_infer_string, the columns in index_frame as string
+                #  dtype, which makes the rename({0: None}) above use np.nan
+                #  instead of None, so we need to set None more explicitly.
+                expected.index.names = [None] + expected.index.names[1:]
             tm.assert_series_equal(result, expected)
         else:
             expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
             expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
+            if using_infer_string:
+                expected = expected.astype({"gender": "str", "education": "str"})
             del expected["both"]
             tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
 @pytest.mark.parametrize("normalize", [True, False])
 @pytest.mark.parametrize(
     "sort, ascending, expected_rows, expected_count, expected_group_size",
@@ -402,8 +404,10 @@ def test_compound(
     expected_rows,
     expected_count,
     expected_group_size,
-    dtype,
+    any_string_dtype,
+    using_infer_string,
 ):
+    dtype = any_string_dtype
     education_df = education_df.astype(dtype)
     education_df.columns = education_df.columns.astype(dtype)
     # Multiple groupby keys and as_index=False
@@ -420,11 +424,17 @@ def test_compound(
         expected["proportion"] = expected_count
         expected["proportion"] /= expected_group_size
         if dtype == "string[pyarrow]":
+            # TODO(nullable) also string[python] should return nullable dtypes
             expected["proportion"] = expected["proportion"].convert_dtypes()
     else:
         expected["count"] = expected_count
         if dtype == "string[pyarrow]":
             expected["count"] = expected["count"].convert_dtypes()
+    if using_infer_string and dtype == object:
+        expected = expected.astype(
+            {"country": "str", "gender": "str", "education": "str"}
+        )
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -537,9 +547,6 @@ def names_with_nulls_df(nulls_fixture):
     )
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, expected_data, expected_index",
     [
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 015a9db32883b..586ef8a126536 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2832,20 +2832,13 @@ def test_rolling_wrong_param_min_period():
         test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        object,
-        pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-    ],
-)
-def test_by_column_values_with_same_starting_value(dtype):
+def test_by_column_values_with_same_starting_value(any_string_dtype):
     # GH29635
     df = DataFrame(
         {
             "Name": ["Thomas", "Thomas", "Thomas John"],
             "Credit": [1200, 1300, 900],
-            "Mood": Series(["sad", "happy", "happy"], dtype=dtype),
+            "Mood": Series(["sad", "happy", "happy"], dtype=any_string_dtype),
         }
     )
     aggregate_details = {"Mood": Series.mode, "Credit": "sum"}
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d843a992daee0..9c01e017dd29c 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -3,7 +3,6 @@
 
 from pandas._config import using_string_dtype
 
-from pandas.compat import HAS_PYARROW
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.missing import na_value_for_dtype
@@ -13,9 +12,6 @@
 from pandas.tests.groupby import get_groupby_method_args
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, idx, expected",
     [
@@ -216,9 +206,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [
@@ -300,9 +287,6 @@ def test_groupby_dropna_datetime_like_data(
     tm.assert_frame_equal(grouped, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
-)
 @pytest.mark.parametrize(
     "dropna, data, selected_data, levels",
     [
@@ -388,9 +372,6 @@ def test_groupby_dropna_with_multiindex_input(input_index, keys, series):
     tm.assert_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 def test_groupby_nan_included():
     # GH 35646
     data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index b1fa541d42086..3b7614347d181 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -273,6 +273,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only, using_infer_str
             # cumsum, diff, pct_change
             "unsupported operand type",
             "has no kernel",
+            "operation 'sub' not supported for dtype 'str' with dtype 'float64'",
         )
         if using_infer_string:
             pa = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index f67051de6e8c7..8e1bbcb43e3f3 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -702,10 +702,9 @@ def test_groupby_min_max_categorical(func):
 
 
 @pytest.mark.parametrize("func", ["min", "max"])
-def test_min_empty_string_dtype(func):
+def test_min_empty_string_dtype(func, string_dtype_no_object):
     # GH#55619
-    pytest.importorskip("pyarrow")
-    dtype = "string[pyarrow_numpy]"
+    dtype = string_dtype_no_object
     df = DataFrame({"a": ["a"], "b": "a", "c": "a"}, dtype=dtype).iloc[:0]
     result = getattr(df.groupby("a"), func)()
     expected = DataFrame(
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index a5433d5496b0b..395036dd400e5 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -5,6 +5,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import ensure_platform_int
 
@@ -499,8 +500,7 @@ def test_transform_select_columns(df):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_transform_nuisance_raises(df):
+def test_transform_nuisance_raises(df, using_infer_string):
     # case that goes through _transform_item_by_item
 
     df.columns = ["A", "B", "B", "D"]
@@ -510,10 +510,16 @@ def test_transform_nuisance_raises(df):
     grouped = df.groupby("A")
 
     gbc = grouped["B"]
-    with pytest.raises(TypeError, match="Could not convert"):
+    msg = "Could not convert"
+    if using_infer_string:
+        if df.columns.dtype.storage == "pyarrow":
+            msg = "with dtype str does not support reduction 'mean'"
+        else:
+            msg = "Cannot perform reduction 'mean' with string dtype"
+    with pytest.raises(TypeError, match=msg):
         gbc.transform(lambda x: np.mean(x))
 
-    with pytest.raises(TypeError, match="Could not convert"):
+    with pytest.raises(TypeError, match=msg):
         df.groupby("A").transform(lambda x: np.mean(x))
 
 
@@ -582,8 +588,7 @@ def test_transform_coercion():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_groupby_transform_with_int():
+def test_groupby_transform_with_int(using_infer_string):
     # GH 3740, make sure that we might upcast on item-by-item transform
 
     # floats
@@ -613,8 +618,14 @@ def test_groupby_transform_with_int():
             "D": "foo",
         }
     )
+    msg = "Could not convert"
+    if using_infer_string:
+        if HAS_PYARROW:
+            msg = "with dtype str does not support reduction 'mean'"
+        else:
+            msg = "Cannot perform reduction 'mean' with string dtype"
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match=msg):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -626,7 +637,7 @@ def test_groupby_transform_with_int():
     s = Series([2, 3, 4, 10, 5, -1])
     df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
     with np.errstate(all="ignore"):
-        with pytest.raises(TypeError, match="Could not convert"):
+        with pytest.raises(TypeError, match=msg):
             df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
         result = df.groupby("A")[["B", "C"]].transform(
             lambda x: (x - x.mean()) / x.std()
@@ -850,7 +861,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.slow
 @pytest.mark.parametrize(
     "op, args, targop",
@@ -901,6 +911,7 @@ def test_cython_transform_frame_column(
                 "does not support .* operations",
                 ".* is not supported for object dtype",
                 "is not implemented for this dtype",
+                ".* is not supported for str dtype",
             ]
         )
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py
index 338509dd239e6..dcf0165ead6c0 100644
--- a/pandas/tests/indexes/base_class/test_constructors.py
+++ b/pandas/tests/indexes/base_class/test_constructors.py
@@ -47,9 +47,7 @@ def test_construct_empty_tuples(self, tuple_list):
 
     def test_index_string_inference(self):
         # GH#54430
-        pytest.importorskip("pyarrow")
-        dtype = "string[pyarrow_numpy]"
-        expected = Index(["a", "b"], dtype=dtype)
+        expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
         with pd.option_context("future.infer_string", True):
             ser = Index(["a", "b"])
         tm.assert_index_equal(ser, expected)
diff --git a/pandas/tests/indexes/base_class/test_reshape.py b/pandas/tests/indexes/base_class/test_reshape.py
index 6a544e448ebe1..b1a6c30b52f68 100644
--- a/pandas/tests/indexes/base_class/test_reshape.py
+++ b/pandas/tests/indexes/base_class/test_reshape.py
@@ -59,12 +59,11 @@ def test_insert_datetime_into_object(self, loc, val):
         tm.assert_index_equal(result, expected)
         assert type(expected[2]) is type(val)
 
-    def test_insert_none_into_string_numpy(self):
+    def test_insert_none_into_string_numpy(self, string_dtype_no_object):
         # GH#55365
-        pytest.importorskip("pyarrow")
-        index = Index(["a", "b", "c"], dtype="string[pyarrow_numpy]")
+        index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
         result = index.insert(-1, None)
-        expected = Index(["a", "b", None, "c"], dtype="string[pyarrow_numpy]")
+        expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index 2176aa52b17f4..a897e5aca058a 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     Index,
@@ -233,7 +231,6 @@ def test_tuple_union_bug(self, method, expected, sort):
         expected = Index(expected)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("first_list", [["b", "a"], []])
     @pytest.mark.parametrize("second_list", [["a", "b"], []])
     @pytest.mark.parametrize(
@@ -243,6 +240,7 @@ def test_tuple_union_bug(self, method, expected, sort):
     def test_union_name_preservation(
         self, first_list, second_list, first_name, second_name, expected_name, sort
     ):
+        expected_dtype = object if not first_list or not second_list else "str"
         first = Index(first_list, name=first_name)
         second = Index(second_list, name=second_name)
         union = first.union(second, sort=sort)
@@ -253,7 +251,7 @@ def test_union_name_preservation(
             expected = Index(sorted(vals), name=expected_name)
             tm.assert_index_equal(union, expected)
         else:
-            expected = Index(vals, name=expected_name)
+            expected = Index(vals, name=expected_name, dtype=expected_dtype)
             tm.assert_index_equal(union.sort_values(), expected.sort_values())
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py
index c0bc6601769b1..a9bcae625e494 100644
--- a/pandas/tests/indexes/datetimes/methods/test_astype.py
+++ b/pandas/tests/indexes/datetimes/methods/test_astype.py
@@ -102,13 +102,16 @@ def test_astype_tznaive_to_tzaware(self):
             # dt64->dt64tz deprecated
             idx._data.astype("datetime64[ns, US/Eastern]")
 
-    def test_astype_str_nat(self):
+    def test_astype_str_nat(self, using_infer_string):
         # GH 13149, GH 13209
         # verify that we are returning NaT as a string (and not unicode)
 
         idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
         result = idx.astype(str)
-        expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
+        if using_infer_string:
+            expected = Index(["2016-05-16", None, None, None], dtype="str")
+        else:
+            expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
         tm.assert_index_equal(result, expected)
 
     def test_astype_str(self):
@@ -118,7 +121,7 @@ def test_astype_str(self):
         expected = Index(
             ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -133,7 +136,7 @@ def test_astype_str_tz_and_name(self):
                 "2012-01-03 00:00:00-05:00",
             ],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -144,7 +147,7 @@ def test_astype_str_freq_and_name(self):
         expected = Index(
             ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
             name="test_name",
-            dtype=object,
+            dtype="str",
         )
         tm.assert_index_equal(result, expected)
 
@@ -156,7 +159,7 @@ def test_astype_str_freq_and_tz(self):
         result = dti.astype(str)
         expected = Index(
             ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
-            dtype=object,
+            dtype="str",
             name="test_name",
         )
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py
index 9c1ef302c5b51..ce05b5e9f2238 100644
--- a/pandas/tests/indexes/object/test_astype.py
+++ b/pandas/tests/indexes/object/test_astype.py
@@ -15,12 +15,12 @@ def test_astype_str_from_bytes():
     #  ensure_string_array which does f"{val}"
     idx = Index(["あ", b"a"], dtype="object")
     result = idx.astype(str)
-    expected = Index(["あ", "a"], dtype="object")
+    expected = Index(["あ", "a"], dtype="str")
     tm.assert_index_equal(result, expected)
 
     # while we're here, check that Series.astype behaves the same
     result = Series(idx).astype(str)
-    expected = Series(expected, dtype=object)
+    expected = Series(expected, dtype="str")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index add2f3f18b348..57e5c5e3b6abb 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -3,14 +3,10 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.missing import (
     NA,
     is_matching_na,
 )
-from pandas.compat import HAS_PYARROW
-import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import Index
@@ -31,39 +27,25 @@ def test_get_indexer_strings(self, method, expected):
 
         tm.assert_numpy_array_equal(actual, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_get_indexer_strings_raises(self, using_infer_string):
         index = Index(["b", "c"])
 
-        if using_infer_string:
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="nearest")
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                index.get_indexer(
-                    ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
-                )
-
-        else:
-            msg = r"unsupported operand type\(s\) for -: 'str' and 'str'"
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="nearest")
+        msg = "|".join(
+            [
+                "operation 'sub' not supported for dtype 'str'",
+                r"unsupported operand type\(s\) for -: 'str' and 'str'",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="nearest")
 
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
 
-            with pytest.raises(TypeError, match=msg):
-                index.get_indexer(
-                    ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
-                )
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(
+                ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
+            )
 
     def test_get_indexer_with_NA_values(
         self, unique_nulls_fixture, unique_nulls_fixture2
@@ -176,14 +158,6 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
 
 
 class TestSliceLocs:
-    # TODO(infer_string) parametrize over multiple string dtypes
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            "object",
-            pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
-        ],
-    )
     @pytest.mark.parametrize(
         "in_slice,expected",
         [
@@ -207,24 +181,22 @@ class TestSliceLocs:
             (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
         ],
     )
-    def test_slice_locs_negative_step(self, in_slice, expected, dtype):
-        index = Index(list("bcdxy"), dtype=dtype)
+    def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
 
         s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
         result = index[s_start : s_stop : in_slice.step]
-        expected = Index(list(expected), dtype=dtype)
+        expected = Index(list(expected), dtype=any_string_dtype)
         tm.assert_index_equal(result, expected)
 
-    # TODO(infer_string) parametrize over multiple string dtypes
-    @td.skip_if_no("pyarrow")
-    def test_slice_locs_negative_step_oob(self):
-        index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")
+    def test_slice_locs_negative_step_oob(self, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
 
         result = index[-10:5:1]
         tm.assert_index_equal(result, index)
 
         result = index[4:-10:-1]
-        expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]")
+        expected = Index(list("yxdcb"), dtype=any_string_dtype)
         tm.assert_index_equal(result, expected)
 
     def test_slice_locs_dup(self):
diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py
index d545bfd2fae0f..af3c2667f51b4 100644
--- a/pandas/tests/indexes/period/methods/test_astype.py
+++ b/pandas/tests/indexes/period/methods/test_astype.py
@@ -22,7 +22,7 @@ def test_astype_raises(self, dtype):
         with pytest.raises(TypeError, match=msg):
             idx.astype(dtype)
 
-    def test_astype_conversion(self):
+    def test_astype_conversion(self, using_infer_string):
         # GH#13149, GH#13209
         idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.nan], freq="D", name="idx")
 
@@ -41,7 +41,12 @@ def test_astype_conversion(self):
         tm.assert_index_equal(result, expected)
 
         result = idx.astype(str)
-        expected = Index([str(x) for x in idx], name="idx", dtype=object)
+        if using_infer_string:
+            expected = Index(
+                [str(x) if x is not NaT else None for x in idx], name="idx", dtype="str"
+            )
+        else:
+            expected = Index([str(x) for x in idx], name="idx", dtype=object)
         tm.assert_index_equal(result, expected)
 
         idx = period_range("1990", "2009", freq="Y", name="idx")
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index cf75f95d17b0a..3bcc62445f0ac 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -76,9 +76,6 @@ def test_constructor_casting(self, index):
         tm.assert_contains_all(arr, new_index)
         tm.assert_index_equal(index, new_index)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_constructor_copy(self, using_infer_string):
         index = Index(list("abc"), name="name")
         arr = np.array(index)
@@ -346,11 +343,6 @@ def test_constructor_empty_special(self, empty, klass):
     def test_view_with_args(self, index):
         index.view("i8")
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.parametrize(
         "index",
         [
@@ -367,7 +359,8 @@ def test_view_with_args_object_array_raises(self, index):
             msg = "When changing to a larger dtype"
             with pytest.raises(ValueError, match=msg):
                 index.view("i8")
-        elif index.dtype == "string":
+        elif index.dtype == "str" and not index.dtype.storage == "python":
+            # TODO(infer_string): Make the errors consistent
             with pytest.raises(NotImplementedError, match="i8"):
                 index.view("i8")
         else:
@@ -978,10 +971,9 @@ def test_isin_empty(self, empty):
         result = index.isin(empty)
         tm.assert_numpy_array_equal(expected, result)
 
-    @td.skip_if_no("pyarrow")
-    def test_isin_arrow_string_null(self):
+    def test_isin_string_null(self, string_dtype_no_object):
         # GH#55821
-        index = Index(["a", "b"], dtype="string[pyarrow_numpy]")
+        index = Index(["a", "b"], dtype=string_dtype_no_object)
         result = index.isin([None])
         expected = np.array([False, False])
         tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
index 8d859a61a2bd5..176bf893cafa8 100644
--- a/pandas/tests/indexes/test_old_base.py
+++ b/pandas/tests/indexes/test_old_base.py
@@ -6,10 +6,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs import Timestamp
-from pandas.compat import HAS_PYARROW
 
 from pandas.core.dtypes.common import (
     is_integer_dtype,
@@ -28,6 +25,7 @@
     PeriodIndex,
     RangeIndex,
     Series,
+    StringDtype,
     TimedeltaIndex,
     isna,
     period_range,
@@ -233,7 +231,6 @@ def test_logical_compat(self, simple_index):
             with pytest.raises(TypeError, match=msg):
                 idx.any()
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_repr_roundtrip(self, simple_index):
         if isinstance(simple_index, IntervalIndex):
             pytest.skip(f"Not a valid repr for {type(simple_index).__name__}")
@@ -250,11 +247,6 @@ def test_repr_max_seq_item_setting(self, simple_index):
             repr(idx)
             assert "..." not in str(idx)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW,
-        reason="TODO(infer_string)",
-        strict=False,
-    )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_ensure_copied_data(self, index):
         # Check the "copy" argument of each Index.__new__ is honoured
@@ -302,12 +294,17 @@ def test_ensure_copied_data(self, index):
                 tm.assert_numpy_array_equal(
                     index._values._mask, result._values._mask, check_same="same"
                 )
-            elif index.dtype == "string[python]":
+            elif (
+                isinstance(index.dtype, StringDtype) and index.dtype.storage == "python"
+            ):
                 assert np.shares_memory(index._values._ndarray, result._values._ndarray)
                 tm.assert_numpy_array_equal(
                     index._values._ndarray, result._values._ndarray, check_same="same"
                 )
-            elif index.dtype in ("string[pyarrow]", "string[pyarrow_numpy]"):
+            elif (
+                isinstance(index.dtype, StringDtype)
+                and index.dtype.storage == "pyarrow"
+            ):
                 assert tm.shares_memory(result._values, index._values)
             else:
                 raise NotImplementedError(index.dtype)
@@ -432,11 +429,7 @@ def test_insert_base(self, index):
             result = trimmed.insert(0, index[0])
         assert index[0:4].equals(result)
 
-    @pytest.mark.skipif(
-        using_string_dtype(),
-        reason="completely different behavior, tested elsewher",
-    )
-    def test_insert_out_of_bounds(self, index):
+    def test_insert_out_of_bounds(self, index, using_infer_string):
         # TypeError/IndexError matches what np.insert raises in these cases
 
         if len(index) > 0:
@@ -448,6 +441,12 @@ def test_insert_out_of_bounds(self, index):
             msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
         else:
             msg = "slice indices must be integers or None or have an __index__ method"
+
+        if using_infer_string and (
+            index.dtype == "string" or index.dtype == "category"  # noqa: PLR1714
+        ):
+            msg = "loc must be an integer between"
+
         with pytest.raises(err, match=msg):
             index.insert(0.5, "foo")
 
@@ -853,7 +852,6 @@ def test_append_preserves_dtype(self, simple_index):
         alt = index.take(list(range(N)) * 2)
         tm.assert_index_equal(result, alt, check_exact=True)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_inv(self, simple_index, using_infer_string):
         idx = simple_index
 
@@ -867,21 +865,14 @@ def test_inv(self, simple_index, using_infer_string):
             tm.assert_series_equal(res2, Series(expected))
         else:
             if idx.dtype.kind == "f":
-                err = TypeError
                 msg = "ufunc 'invert' not supported for the input types"
-            elif using_infer_string and idx.dtype == "string":
-                import pyarrow as pa
-
-                err = pa.lib.ArrowNotImplementedError
-                msg = "has no kernel"
             else:
-                err = TypeError
-                msg = "bad operand"
-            with pytest.raises(err, match=msg):
+                msg = "bad operand|__invert__ is not supported for string dtype"
+            with pytest.raises(TypeError, match=msg):
                 ~idx
 
             # check that we get the same behavior with Series
-            with pytest.raises(err, match=msg):
+            with pytest.raises(TypeError, match=msg):
                 ~Series(idx)
 
     def test_is_boolean_is_deprecated(self, simple_index):
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 4a6982cf98670..72c3396f124b8 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -240,9 +240,6 @@ def test_intersection_base(self, index):
             with pytest.raises(TypeError, match=msg):
                 first.intersection([1, 2, 3])
 
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_union_base(self, index):
         index = index.unique()
@@ -270,9 +267,6 @@ def test_union_base(self, index):
                 first.union([1, 2, 3])
 
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     def test_difference_base(self, sort, index):
         first = index[2:]
         second = index[:4]
@@ -299,9 +293,6 @@ def test_difference_base(self, sort, index):
                 first.difference([1, 2, 3], sort)
 
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    @pytest.mark.filterwarnings(
-        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-    )
     def test_symmetric_difference(self, index):
         if isinstance(index, CategoricalIndex):
             pytest.skip(f"Not relevant for {type(index).__name__}")
@@ -523,9 +514,6 @@ def test_intersection_difference_match_empty(self, index, sort):
 
 
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-@pytest.mark.filterwarnings(
-    "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
-)
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
 )
diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py
index 311f2b5c9aa59..5166cadae499e 100644
--- a/pandas/tests/indexes/timedeltas/methods/test_astype.py
+++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py
@@ -44,7 +44,7 @@ def test_astype_object_with_nat(self):
         tm.assert_index_equal(result, expected)
         assert idx.tolist() == expected_list
 
-    def test_astype(self):
+    def test_astype(self, using_infer_string):
         # GH 13149, GH 13209
         idx = TimedeltaIndex([1e14, "NaT", NaT, np.nan], name="idx")
 
@@ -61,7 +61,12 @@ def test_astype(self):
         tm.assert_index_equal(result, expected)
 
         result = idx.astype(str)
-        expected = Index([str(x) for x in idx], name="idx", dtype=object)
+        if using_infer_string:
+            expected = Index(
+                [str(x) if x is not NaT else None for x in idx], name="idx", dtype="str"
+            )
+        else:
+            expected = Index([str(x) for x in idx], name="idx", dtype=object)
         tm.assert_index_equal(result, expected)
 
         rng = timedelta_range("1 days", periods=10)
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 3fd9498e21a73..c2742f42e3a92 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 import pandas.util._test_decorators as td
 
@@ -1218,22 +1216,27 @@ def test_iloc_getitem_int_single_ea_block_view(self):
         arr[2] = arr[-1]
         assert ser[0] == arr[-1]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_iloc_setitem_multicolumn_to_datetime(self):
+    def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
         # GH#20511
         df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})
 
-        df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
-        expected = DataFrame(
-            {
-                "A": [
-                    Timestamp("2021-01-01 00:00:00"),
-                    Timestamp("2022-01-01 00:00:00"),
-                ],
-                "B": ["2021", "2022"],
-            }
-        )
-        tm.assert_frame_equal(df, expected, check_dtype=False)
+        if using_infer_string:
+            with tm.assert_produces_warning(
+                FutureWarning, match="Setting an item of incompatible dtype"
+            ):
+                df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
+        else:
+            df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
+            expected = DataFrame(
+                {
+                    "A": [
+                        Timestamp("2021-01-01 00:00:00"),
+                        Timestamp("2022-01-01 00:00:00"),
+                    ],
+                    "B": ["2021", "2022"],
+                }
+            )
+            tm.assert_frame_equal(df, expected, check_dtype=False)
 
 
 class TestILocErrors:
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index e57598cfc2be1..0ff33ba88b16f 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 
 from pandas.core.dtypes.common import (
@@ -563,12 +561,12 @@ def test_string_slice_empty(self):
         with pytest.raises(KeyError, match="^0$"):
             df.loc["2011", 0]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_astype_assignment(self, using_infer_string):
         # GH4312 (iloc)
         df_orig = DataFrame(
             [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
+        df_orig[list("ABCDG")] = df_orig[list("ABCDG")].astype(object)
 
         df = df_orig.copy()
 
@@ -578,9 +576,9 @@ def test_astype_assignment(self, using_infer_string):
         expected = DataFrame(
             [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["A"] = expected["A"].astype(object)
-            expected["B"] = expected["B"].astype(object)
+        expected[list("CDG")] = expected[list("CDG")].astype(object)
+        expected["A"] = expected["A"].astype(object)
+        expected["B"] = expected["B"].astype(object)
         tm.assert_frame_equal(df, expected)
 
         # GH5702 (loc)
@@ -589,18 +587,16 @@ def test_astype_assignment(self, using_infer_string):
         expected = DataFrame(
             [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["A"] = expected["A"].astype(object)
+        expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
         tm.assert_frame_equal(df, expected)
 
         df = df_orig.copy()
+
         df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
         expected = DataFrame(
             [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
         )
-        if not using_infer_string:
-            expected["B"] = expected["B"].astype(object)
-            expected["C"] = expected["C"].astype(object)
+        expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
         tm.assert_frame_equal(df, expected)
 
     def test_astype_assignment_full_replacements(self):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index d61b2ea642439..bdc6d9aff6f4e 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1,5 +1,6 @@
 """ test label based indexing with loc """
 from collections import namedtuple
+import contextlib
 from datetime import (
     date,
     datetime,
@@ -15,7 +16,6 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import index as libindex
-from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IndexingError
 import pandas.util._test_decorators as td
@@ -648,8 +648,9 @@ def test_loc_setitem_consistency_empty(self):
         expected["x"] = expected["x"].astype(np.int64)
         tm.assert_frame_equal(df, expected)
 
+    # incompatible dtype warning
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_loc_setitem_consistency_slice_column_len(self):
+    def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
         # .loc[:,column] setting with slice == len of the column
         # GH10408
         levels = [
@@ -673,13 +674,24 @@ def test_loc_setitem_consistency_slice_column_len(self):
         ]
         df = DataFrame(values, index=mi, columns=cols)
 
-        df.loc[:, ("Respondent", "StartDate")] = to_datetime(
-            df.loc[:, ("Respondent", "StartDate")]
-        )
-        df.loc[:, ("Respondent", "EndDate")] = to_datetime(
-            df.loc[:, ("Respondent", "EndDate")]
-        )
-        df = df.infer_objects(copy=False)
+        ctx = contextlib.nullcontext()
+        if using_infer_string:
+            ctx = pytest.raises(TypeError, match="Invalid value")
+
+        with ctx:
+            df.loc[:, ("Respondent", "StartDate")] = to_datetime(
+                df.loc[:, ("Respondent", "StartDate")]
+            )
+        with ctx:
+            df.loc[:, ("Respondent", "EndDate")] = to_datetime(
+                df.loc[:, ("Respondent", "EndDate")]
+            )
+
+        if using_infer_string:
+            # infer-objects won't infer stuff anymore
+            return
+
+        df = df.infer_objects()
 
         # Adding a new key
         df.loc[:, ("Respondent", "Duration")] = (
@@ -1269,20 +1281,23 @@ def test_loc_reverse_assignment(self):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
-    def test_loc_setitem_str_to_small_float_conversion_type(self):
+    def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
         # GH#20388
 
         col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
         result = DataFrame(col_data, columns=["A"])
-        expected = DataFrame(col_data, columns=["A"], dtype=object)
+        expected = DataFrame(col_data, columns=["A"])
         tm.assert_frame_equal(result, expected)
 
         # assigning with loc/iloc attempts to set the values inplace, which
         #  in this case is successful
-        result.loc[result.index, "A"] = [float(x) for x in col_data]
-        expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
-        tm.assert_frame_equal(result, expected)
+        if using_infer_string:
+            with pytest.raises(TypeError, match="Must provide strings"):
+                result.loc[result.index, "A"] = [float(x) for x in col_data]
+        else:
+            result.loc[result.index, "A"] = [float(x) for x in col_data]
+            expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
+            tm.assert_frame_equal(result, expected)
 
         # assigning the entire column using __setitem__ swaps in the new array
         # GH#???
@@ -1443,9 +1458,6 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
 
         tm.assert_frame_equal(expected, df)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_loc_setitem_categorical_values_partial_column_slice(self):
         # Assigning a Category to parts of a int/... column uses the values of
         # the Categorical
@@ -1458,9 +1470,6 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
             df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
         tm.assert_frame_equal(df, exp)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
     def test_loc_setitem_single_row_categorical(self, using_infer_string):
         # GH#25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
index d1a15dc93f702..ef94c4c7aff2c 100644
--- a/pandas/tests/interchange/test_impl.py
+++ b/pandas/tests/interchange/test_impl.py
@@ -412,6 +412,7 @@ def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
     pd.api.interchange.from_dataframe(df)
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_empty_string_column():
     # https://github.com/pandas-dev/pandas/issues/56703
     df = pd.DataFrame({"a": []}, dtype=str)
@@ -476,7 +477,7 @@ def test_non_str_names_w_duplicates():
         ([1.0, 2.25, None], "Float32[pyarrow]", "float32"),
         ([True, False, None], "boolean", "bool"),
         ([True, False, None], "boolean[pyarrow]", "bool"),
-        (["much ado", "about", None], "string[pyarrow_numpy]", "large_string"),
+        (["much ado", "about", None], pd.StringDtype(na_value=np.nan), "large_string"),
         (["much ado", "about", None], "string[pyarrow]", "large_string"),
         (
             [datetime(2020, 1, 1), datetime(2020, 1, 2), None],
@@ -539,7 +540,11 @@ def test_pandas_nullable_with_missing_values(
         ([1.0, 2.25, 5.0], "Float32[pyarrow]", "float32"),
         ([True, False, False], "boolean", "bool"),
         ([True, False, False], "boolean[pyarrow]", "bool"),
-        (["much ado", "about", "nothing"], "string[pyarrow_numpy]", "large_string"),
+        (
+            ["much ado", "about", "nothing"],
+            pd.StringDtype(na_value=np.nan),
+            "large_string",
+        ),
         (["much ado", "about", "nothing"], "string[pyarrow]", "large_string"),
         (
             [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 8dc76d8f747cb..3c5e1e1cf5afb 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -550,7 +550,7 @@ def test_reader_dtype(self, read_ext):
 
         expected["a"] = expected["a"].astype("float64")
         expected["b"] = expected["b"].astype("float32")
-        expected["c"] = Series(["001", "002", "003", "004"], dtype=object)
+        expected["c"] = Series(["001", "002", "003", "004"], dtype="str")
         tm.assert_frame_equal(actual, expected)
 
         msg = "Unable to convert column d to type int64"
@@ -577,9 +577,9 @@ def test_reader_dtype(self, read_ext):
                     {
                         "a": Series([1, 2, 3, 4], dtype="float64"),
                         "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"),
-                        "c": Series(["001", "002", "003", "004"], dtype=object),
-                        "d": Series(["1", "2", np.nan, "4"], dtype=object),
-                    }
+                        "c": Series(["001", "002", "003", "004"], dtype="str"),
+                        "d": Series(["1", "2", np.nan, "4"], dtype="str"),
+                    },
                 ),
             ),
         ],
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index de40441fe25dd..a8608434be5ee 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -2139,18 +2139,18 @@ def test_pyarrow_engine_lines_false():
 
 
 def test_json_roundtrip_string_inference(orient):
-    pytest.importorskip("pyarrow")
     df = DataFrame(
         [["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
     )
     out = df.to_json()
     with pd.option_context("future.infer_string", True):
         result = read_json(StringIO(out))
+    dtype = pd.StringDtype(na_value=np.nan)
     expected = DataFrame(
         [["a", "b"], ["c", "d"]],
-        dtype="string[pyarrow_numpy]",
-        index=Index(["row 1", "row 2"], dtype="string[pyarrow_numpy]"),
-        columns=Index(["col 1", "col 2"], dtype="string[pyarrow_numpy]"),
+        dtype=dtype,
+        index=Index(["row 1", "row 2"], dtype=dtype),
+        columns=Index(["col 1", "col 2"], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 800ece5a409e1..787941c5d0376 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -28,7 +28,7 @@
 @pytest.mark.parametrize("dtype", [str, object])
 @pytest.mark.parametrize("check_orig", [True, False])
 @pytest.mark.usefixtures("pyarrow_xfail")
-def test_dtype_all_columns(all_parsers, dtype, check_orig):
+def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
     # see gh-3795, gh-6607
     parser = all_parsers
 
@@ -46,8 +46,10 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig):
         if check_orig:
             expected = df.copy()
             result = result.astype(float)
-        else:
+        elif using_infer_string and dtype is str:
             expected = df.astype(str)
+        else:
+            expected = df.astype(str).astype(object)
 
         tm.assert_frame_equal(result, expected)
 
@@ -300,7 +302,6 @@ def test_true_values_cast_to_bool(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.usefixtures("pyarrow_xfail")
 @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
 def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
@@ -316,7 +317,6 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.usefixtures("pyarrow_xfail")
 def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
     # GH#42022
@@ -547,8 +547,7 @@ def test_ea_int_avoid_overflow(all_parsers):
 
 def test_string_inference(all_parsers):
     # GH#54430
-    pytest.importorskip("pyarrow")
-    dtype = "string[pyarrow_numpy]"
+    dtype = pd.StringDtype(na_value=np.nan)
 
     data = """a,b
 x,1
@@ -566,10 +565,8 @@ def test_string_inference(all_parsers):
 
 
 @pytest.mark.parametrize("dtype", ["O", object, "object", np.object_, str, np.str_])
-def test_string_inference_object_dtype(all_parsers, dtype):
+def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
     # GH#56047
-    pytest.importorskip("pyarrow")
-
     data = """a,b
 x,a
 y,a
@@ -578,12 +575,13 @@ def test_string_inference_object_dtype(all_parsers, dtype):
     with pd.option_context("future.infer_string", True):
         result = parser.read_csv(StringIO(data), dtype=dtype)
 
+    expected_dtype = pd.StringDtype(na_value=np.nan) if dtype is str else object
     expected = DataFrame(
         {
-            "a": pd.Series(["x", "y", "z"], dtype=object),
-            "b": pd.Series(["a", "a", "a"], dtype=object),
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
+            "b": pd.Series(["a", "a", "a"], dtype=expected_dtype),
         },
-        columns=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
 
@@ -592,10 +590,10 @@ def test_string_inference_object_dtype(all_parsers, dtype):
 
     expected = DataFrame(
         {
-            "a": pd.Series(["x", "y", "z"], dtype=object),
-            "b": pd.Series(["a", "a", "a"], dtype="string[pyarrow_numpy]"),
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
+            "b": pd.Series(["a", "a", "a"], dtype=pd.StringDtype(na_value=np.nan)),
         },
-        columns=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 1a3b7b37bf66b..5f9823f7225f9 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -630,7 +630,6 @@ def test_inf_na_values_with_int_index(all_parsers):
     tm.assert_frame_equal(out, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @xfail_pyarrow  # mismatched shape
 @pytest.mark.parametrize("na_filter", [True, False])
 def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
@@ -682,7 +681,6 @@ def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
 # TODO: this test isn't about the na_values keyword, it is about the empty entries
 #  being returned with NaN entries, whereas the pyarrow engine returns "nan"
 @xfail_pyarrow  # mismatched shapes
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_str_nan_dropped(all_parsers):
     # see gh-21131
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 9e7530906afa3..5f2ddf7de9c6d 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -17,8 +17,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import (
     ParserError,
     ParserWarning,
@@ -498,7 +496,6 @@ def test_header_int_do_not_infer_multiindex_names_on_different_line(python_parse
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "dtype", [{"a": object}, {"a": str, "b": np.int64, "c": np.int64}]
 )
@@ -523,10 +520,11 @@ def test_no_thousand_convert_with_dot_for_non_numeric_cols(python_parser_only, d
             "c": [0, 4000, 131],
         }
     )
+    if dtype["a"] == object:
+        expected["a"] = expected["a"].astype(object)
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "dtype,expected",
     [
diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py
index a04f02f0e052b..28cd8aea1defc 100644
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -403,7 +403,6 @@ def test_read_py2_hdf_file_in_py3(datapath):
 
 def test_read_infer_string(tmp_path, setup_path):
     # GH#54431
-    pytest.importorskip("pyarrow")
     df = DataFrame({"a": ["a", "b", None]})
     path = tmp_path / setup_path
     df.to_hdf(path, key="data", format="table")
@@ -411,7 +410,7 @@ def test_read_infer_string(tmp_path, setup_path):
         result = read_hdf(path, key="data", mode="r")
     expected = DataFrame(
         {"a": ["a", "b", None]},
-        dtype="string[pyarrow_numpy]",
-        columns=Index(["a"], dtype="string[pyarrow_numpy]"),
+        dtype=pd.StringDtype(na_value=np.nan),
+        columns=Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index d1201686edefa..3b4484e44e155 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -2,19 +2,15 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 
 from pandas.io.feather_format import read_feather, to_feather  # isort:skip
 
-pytestmark = [
-    pytest.mark.filterwarnings(
-        "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
-    ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
-]
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
 
 pa = pytest.importorskip("pyarrow")
 
@@ -150,12 +146,11 @@ def test_path_localpath(self):
         result = tm.round_trip_localpath(df.to_feather, read_feather)
         tm.assert_frame_equal(df, result)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_passthrough_keywords(self):
         df = pd.DataFrame(
             1.1 * np.arange(120).reshape((30, 4)),
-            columns=pd.Index(list("ABCD"), dtype=object),
-            index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
+            columns=pd.Index(list("ABCD")),
+            index=pd.Index([f"i-{i}" for i in range(30)]),
         ).reset_index()
         self.check_round_trip(df, write_kwargs={"version": 1})
 
@@ -169,7 +164,9 @@ def test_http_path(self, feather_file, httpserver):
             res = read_feather(httpserver.url)
         tm.assert_frame_equal(expected, res)
 
-    def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
+    def test_read_feather_dtype_backend(
+        self, string_storage, dtype_backend, using_infer_string
+    ):
         # GH#50765
         df = pd.DataFrame(
             {
@@ -191,7 +188,10 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
 
         if dtype_backend == "pyarrow":
             pa = pytest.importorskip("pyarrow")
-            string_dtype = pd.ArrowDtype(pa.string())
+            if using_infer_string:
+                string_dtype = pd.ArrowDtype(pa.large_string())
+            else:
+                string_dtype = pd.ArrowDtype(pa.string())
         else:
             string_dtype = pd.StringDtype(string_storage)
 
@@ -218,6 +218,10 @@ def test_read_feather_dtype_backend(self, string_storage, dtype_backend):
                 }
             )
 
+        if using_infer_string:
+            expected.columns = expected.columns.astype(
+                pd.StringDtype(string_storage, na_value=np.nan)
+            )
         tm.assert_frame_equal(result, expected)
 
     def test_int_columns_and_index(self):
@@ -242,5 +246,7 @@ def test_string_inference(self, tmp_path):
         df.to_feather(path)
         with pd.option_context("future.infer_string", True):
             result = read_feather(path)
-        expected = pd.DataFrame(data={"a": ["x", "y"]}, dtype="string[pyarrow_numpy]")
+        expected = pd.DataFrame(
+            data={"a": ["x", "y"]}, dtype=pd.StringDtype(na_value=np.nan)
+        )
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 19b60e17d3a92..5ed64e3eb0958 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -168,7 +168,7 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
@@ -198,7 +198,7 @@ def test_arrowparquet_options(fsspectest):
 
 
 @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fastparquet
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_fastparquet_options(fsspectest):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
@@ -256,7 +256,7 @@ def test_s3_protocols(s3_public_bucket_with_data, tips_file, protocol, s3so):
     )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 @pytest.mark.single_cpu
 @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fastparquet
 def test_s3_parquet(s3_public_bucket, s3so, df1):
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index 96bc0326b23ab..81f951b3958b0 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -197,7 +197,7 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 52d6850483418..4c4d7461e4ac5 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import read_orc
 import pandas._testing as tm
@@ -19,12 +17,9 @@
 
 import pyarrow as pa
 
-pytestmark = [
-    pytest.mark.filterwarnings(
-        "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
-    ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
-]
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
 
 
 @pytest.fixture
@@ -47,7 +42,7 @@ def orc_writer_dtypes_not_supported(request):
     return pd.DataFrame({"unimpl": request.param})
 
 
-def test_orc_reader_empty(dirpath):
+def test_orc_reader_empty(dirpath, using_infer_string):
     columns = [
         "boolean1",
         "byte1",
@@ -68,11 +63,12 @@ def test_orc_reader_empty(dirpath):
         "float32",
         "float64",
         "object",
-        "object",
+        "str" if using_infer_string else "object",
     ]
     expected = pd.DataFrame(index=pd.RangeIndex(0))
     for colname, dtype in zip(columns, dtypes):
         expected[colname] = pd.Series(dtype=dtype)
+    expected.columns = expected.columns.astype("str")
 
     inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
     got = read_orc(inputfile, columns=columns)
@@ -309,7 +305,7 @@ def test_orc_writer_dtypes_not_supported(orc_writer_dtypes_not_supported):
         orc_writer_dtypes_not_supported.to_orc()
 
 
-def test_orc_dtype_backend_pyarrow():
+def test_orc_dtype_backend_pyarrow(using_infer_string):
     pytest.importorskip("pyarrow")
     df = pd.DataFrame(
         {
@@ -340,6 +336,13 @@ def test_orc_dtype_backend_pyarrow():
             for col in df.columns
         }
     )
+    if using_infer_string:
+        # ORC does not preserve distinction between string and large string
+        # -> the default large string comes back as string
+        string_dtype = pd.ArrowDtype(pa.string())
+        expected["string"] = expected["string"].astype(string_dtype)
+        expected["string_with_nan"] = expected["string_with_nan"].astype(string_dtype)
+        expected["string_with_none"] = expected["string_with_none"].astype(string_dtype)
 
     tm.assert_frame_equal(result, expected)
 
@@ -435,7 +438,7 @@ def test_string_inference(tmp_path):
         result = read_orc(path)
     expected = pd.DataFrame(
         data={"a": ["x", "y"]},
-        dtype="string[pyarrow_numpy]",
-        columns=pd.Index(["a"], dtype="string[pyarrow_numpy]"),
+        dtype=pd.StringDtype(na_value=np.nan),
+        columns=pd.Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 59662ec77d52f..746ca3cf6534d 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -55,7 +55,6 @@
     pytest.mark.filterwarnings(
         "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
     ),
-    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
 ]
 
 
@@ -64,11 +63,18 @@
     params=[
         pytest.param(
             "fastparquet",
-            marks=pytest.mark.skipif(
-                not _HAVE_FASTPARQUET
-                or _get_option("mode.data_manager", silent=True) == "array",
-                reason="fastparquet is not installed or ArrayManager is used",
-            ),
+            marks=[
+                pytest.mark.skipif(
+                    not _HAVE_FASTPARQUET
+                    or _get_option("mode.data_manager", silent=True) == "array",
+                    reason="fastparquet is not installed or ArrayManager is used",
+                ),
+                pytest.mark.xfail(
+                    using_string_dtype(),
+                    reason="TODO(infer_string) fastparquet",
+                    strict=False,
+                ),
+            ],
         ),
         pytest.param(
             "pyarrow",
@@ -90,17 +96,24 @@ def pa():
 
 
 @pytest.fixture
-def fp():
+def fp(request):
     if not _HAVE_FASTPARQUET:
         pytest.skip("fastparquet is not installed")
     elif _get_option("mode.data_manager", silent=True) == "array":
         pytest.skip("ArrayManager is not supported with fastparquet")
+    if using_string_dtype():
+        request.applymarker(
+            pytest.mark.xfail(reason="TODO(infer_string) fastparquet", strict=False)
+        )
     return "fastparquet"
 
 
 @pytest.fixture
 def df_compat():
-    return pd.DataFrame({"A": [1, 2, 3], "B": "foo"})
+    # TODO(infer_string) should this give str columns?
+    return pd.DataFrame(
+        {"A": [1, 2, 3], "B": "foo"}, columns=pd.Index(["A", "B"], dtype=object)
+    )
 
 
 @pytest.fixture
@@ -389,16 +402,6 @@ def check_external_error_on_write(self, df, engine, exc):
             with tm.external_error_raised(exc):
                 to_parquet(df, path, engine, compression=None)
 
-    @pytest.mark.network
-    @pytest.mark.single_cpu
-    def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine):
-        if engine != "auto":
-            pytest.importorskip(engine)
-        with open(datapath("io", "data", "parquet", "simple.parquet"), mode="rb") as f:
-            httpserver.serve_content(content=f.read())
-            df = read_parquet(httpserver.url)
-        tm.assert_frame_equal(df, df_compat)
-
 
 class TestBasic(Base):
     def test_error(self, engine):
@@ -696,6 +699,16 @@ def test_read_empty_array(self, pa, dtype):
             df, pa, read_kwargs={"dtype_backend": "numpy_nullable"}, expected=expected
         )
 
+    @pytest.mark.network
+    @pytest.mark.single_cpu
+    def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine):
+        if engine != "auto":
+            pytest.importorskip(engine)
+        with open(datapath("io", "data", "parquet", "simple.parquet"), mode="rb") as f:
+            httpserver.serve_content(content=f.read())
+            df = read_parquet(httpserver.url, engine=engine)
+        tm.assert_frame_equal(df, df_compat)
+
 
 class TestParquetPyArrow(Base):
     def test_basic(self, pa, df_full):
@@ -925,7 +938,7 @@ def test_write_with_schema(self, pa):
         out_df = df.astype(bool)
         check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)
 
-    def test_additional_extension_arrays(self, pa):
+    def test_additional_extension_arrays(self, pa, using_infer_string):
         # test additional ExtensionArrays that are supported through the
         # __arrow_array__ protocol
         pytest.importorskip("pyarrow")
@@ -936,17 +949,25 @@ def test_additional_extension_arrays(self, pa):
                 "c": pd.Series(["a", None, "c"], dtype="string"),
             }
         )
-        check_round_trip(df, pa)
+        if using_infer_string:
+            check_round_trip(df, pa, expected=df.astype({"c": "str"}))
+        else:
+            check_round_trip(df, pa)
 
         df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
         check_round_trip(df, pa)
 
-    def test_pyarrow_backed_string_array(self, pa, string_storage):
+    def test_pyarrow_backed_string_array(self, pa, string_storage, using_infer_string):
         # test ArrowStringArray supported through the __arrow_array__ protocol
         pytest.importorskip("pyarrow")
         df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")})
         with pd.option_context("string_storage", string_storage):
-            check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]"))
+            if using_infer_string:
+                expected = df.astype("str")
+                expected.columns = expected.columns.astype("str")
+            else:
+                expected = df.astype(f"string[{string_storage}]")
+            check_round_trip(df, pa, expected=expected)
 
     def test_additional_extension_types(self, pa):
         # test additional ExtensionArrays that are supported through the
@@ -1107,8 +1128,8 @@ def test_string_inference(self, tmp_path, pa):
             result = read_parquet(path, engine="pyarrow")
         expected = pd.DataFrame(
             data={"a": ["x", "y"]},
-            dtype="string[pyarrow_numpy]",
-            index=pd.Index(["a", "b"], dtype="string[pyarrow_numpy]"),
+            dtype=pd.StringDtype(na_value=np.nan),
+            index=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
         )
         tm.assert_frame_equal(result, expected)
 
@@ -1138,8 +1159,8 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
             result = read_parquet(path)
         expected = pd.DataFrame(
             data={"a": [None, "b", "c"]},
-            dtype="string[pyarrow_numpy]",
-            columns=pd.Index(["a"], dtype="string[pyarrow_numpy]"),
+            dtype=pd.StringDtype(na_value=np.nan),
+            columns=pd.Index(["a"], dtype=pd.StringDtype(na_value=np.nan)),
         )
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index b1557d71f15e4..514eaceaccbe6 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -3840,7 +3840,6 @@ class Test(BaseModel):
 def test_read_sql_string_inference(sqlite_engine):
     conn = sqlite_engine
     # GH#54430
-    pytest.importorskip("pyarrow")
     table = "test"
     df = DataFrame({"a": ["x", "y"]})
     df.to_sql(table, con=conn, index=False, if_exists="replace")
@@ -3848,7 +3847,7 @@ def test_read_sql_string_inference(sqlite_engine):
     with pd.option_context("future.infer_string", True):
         result = read_sql_table(table, conn)
 
-    dtype = "string[pyarrow_numpy]"
+    dtype = pd.StringDtype(na_value=np.nan)
     expected = DataFrame(
         {"a": ["x", "y"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
     )
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 8583d8bcc052c..17dae1879f3b8 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -1,3 +1,5 @@
+import pickle
+
 import numpy as np
 import pytest
 
@@ -283,3 +285,15 @@ def test_no_default_pickle():
     # GH#40397
     obj = tm.round_trip_pickle(lib.no_default)
     assert obj is lib.no_default
+
+
+def test_ensure_string_array_copy():
+    # ensure the original array is not modified in case of copy=False with
+    # pickle-roundtripped object dtype array
+    # https://github.com/pandas-dev/pandas/issues/54654
+    arr = np.array(["a", None], dtype=object)
+    arr = pickle.loads(pickle.dumps(arr))
+    result = lib.ensure_string_array(arr, copy=False)
+    assert not np.shares_memory(arr, result)
+    assert arr[1] is None
+    assert result[1] is np.nan
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index 2a52d3060e4b9..77c45cf36894b 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -46,6 +46,7 @@ def test_append_concat(self):
         assert isinstance(result.index, PeriodIndex)
         assert result.index[0] == s1.index[0]
 
+    # test is not written to work with string dtype (checks .base)
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_concat_copy(self, using_array_manager, using_copy_on_write):
         df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
@@ -80,6 +81,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write):
                     assert arr is df3._mgr.arrays[0]
                 else:
                     assert arr.base is not None
+                assert arr.base is not None
 
         # Float block was consolidated.
         df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1)))
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 11e29f4e10dc4..77a3d64415ace 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -4,8 +4,6 @@
 import pytest
 import pytz
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -3083,12 +3081,8 @@ def test_on_float_by_int(self):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_merge_datatype_error_raises(self, using_infer_string):
-        if using_infer_string:
-            msg = "incompatible merge keys"
-        else:
-            msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
+    def test_merge_datatype_error_raises(self):
+        msg = r"Incompatible merge dtype, .*, both sides must have numeric dtype"
 
         left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]})
         right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]})
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index 2c17b7f6a5a47..637bce59e9e2c 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
@@ -216,11 +214,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
+    def test_dataframe_dummies_string_dtype(self, df, any_string_dtype):
         # GH44965
         df = df[["A", "B"]]
-        df = df.astype({"A": "object", "B": "string"})
+        df = df.astype({"A": "str", "B": any_string_dtype})
         result = get_dummies(df)
         expected = DataFrame(
             {
@@ -231,8 +228,7 @@ def test_dataframe_dummies_string_dtype(self, df, using_infer_string):
             },
             dtype=bool,
         )
-        if not using_infer_string:
-            # infer_string returns numpy bools
+        if any_string_dtype == "string" and any_string_dtype.na_value is pd.NA:
             expected[["B_b", "B_c"]] = expected[["B_b", "B_c"]].astype("boolean")
         tm.assert_frame_equal(result, expected)
 
@@ -711,19 +707,17 @@ def test_get_dummies_ea_dtype_dataframe(self, any_numeric_ea_and_arrow_dtype):
         )
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_if_no("pyarrow")
-    def test_get_dummies_ea_dtype(self):
+    @pytest.mark.parametrize("dtype_type", ["string", "category"])
+    def test_get_dummies_ea_dtype(self, dtype_type, string_dtype_no_object):
         # GH#56273
-        for dtype, exp_dtype in [
-            ("string[pyarrow]", "boolean"),
-            ("string[pyarrow_numpy]", "bool"),
-            (CategoricalDtype(Index(["a"], dtype="string[pyarrow]")), "boolean"),
-            (CategoricalDtype(Index(["a"], dtype="string[pyarrow_numpy]")), "bool"),
-        ]:
-            df = DataFrame({"name": Series(["a"], dtype=dtype), "x": 1})
-            result = get_dummies(df)
-            expected = DataFrame({"x": 1, "name_a": Series([True], dtype=exp_dtype)})
-            tm.assert_frame_equal(result, expected)
+        dtype = string_dtype_no_object
+        exp_dtype = "boolean" if dtype.na_value is pd.NA else "bool"
+        if dtype_type == "category":
+            dtype = CategoricalDtype(Index(["a"], dtype))
+        df = DataFrame({"name": Series(["a"], dtype=dtype), "x": 1})
+        result = get_dummies(df)
+        expected = DataFrame({"x": 1, "name_a": Series([True], dtype=exp_dtype)})
+        tm.assert_frame_equal(result, expected)
 
     @td.skip_if_no("pyarrow")
     def test_get_dummies_arrow_dtype(self):
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index cbe2c9b931ee3..72fd72df60761 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -21,7 +19,7 @@
 def df():
     res = DataFrame(
         np.random.default_rng(2).standard_normal((10, 4)),
-        columns=Index(list("ABCD"), dtype=object),
+        columns=Index(list("ABCD")),
         index=date_range("2000-01-01", periods=10, freq="B"),
     )
     res["id1"] = (res["A"] > 0).astype(np.int64)
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
         result2 = df.melt(id_vars=["id1", "id2"])
         assert result2.columns.tolist() == ["id1", "id2", "variable", "value"]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_value_vars(self, df):
         result3 = df.melt(id_vars=["id1", "id2"], value_vars="A")
         assert len(result3) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
         )
         tm.assert_frame_equal(result4, expected4)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     @pytest.mark.parametrize("type_", (tuple, list, np.array))
     def test_value_vars_types(self, type_, df):
         # GH 15348
@@ -181,7 +177,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
         with pytest.raises(ValueError, match=msg):
             df1.melt(id_vars=id_vars, value_vars=value_vars)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_var_name(self, df, var_name):
         result5 = df.melt(var_name=var_name)
         assert result5.columns.tolist() == ["var", "value"]
@@ -209,7 +204,6 @@ def test_custom_var_name(self, df, var_name):
         )
         tm.assert_frame_equal(result9, expected9)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_value_name(self, df, value_name):
         result10 = df.melt(value_name=value_name)
         assert result10.columns.tolist() == ["variable", "val"]
@@ -239,7 +233,6 @@ def test_custom_value_name(self, df, value_name):
         )
         tm.assert_frame_equal(result14, expected14)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_custom_var_and_value_name(self, df, value_name, var_name):
         result15 = df.melt(var_name=var_name, value_name=value_name)
         assert result15.columns.tolist() == ["var", "val"]
@@ -364,7 +357,6 @@ def test_melt_missing_columns_raises(self):
         with pytest.raises(KeyError, match=msg):
             multi.melt(["A"], ["F"], col_level=0)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_melt_mixed_int_str_id_vars(self):
         # GH 29718
         df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]})
@@ -372,6 +364,8 @@ def test_melt_mixed_int_str_id_vars(self):
         expected = DataFrame(
             {0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]}
         )
+        # the df's columns are mixed type and thus object -> preserves object dtype
+        expected["variable"] = expected["variable"].astype(object)
         tm.assert_frame_equal(result, expected)
 
     def test_melt_mixed_int_str_value_vars(self):
@@ -1205,12 +1199,13 @@ def test_raise_of_column_name_value(self):
         ):
             df.melt(id_vars="value", value_name="value")
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    @pytest.mark.parametrize("dtype", ["O", "string"])
-    def test_missing_stubname(self, dtype):
+    def test_missing_stubname(self, request, any_string_dtype, using_infer_string):
+        if using_infer_string and any_string_dtype == "object":
+            # triggers object dtype inference warning of dtype=object
+            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
         # GH46044
         df = DataFrame({"id": ["1", "2"], "a-1": [100, 200], "a-2": [300, 400]})
-        df = df.astype({"id": dtype})
+        df = df.astype({"id": any_string_dtype})
         result = wide_to_long(
             df,
             stubnames=["a", "b"],
@@ -1226,15 +1221,16 @@ def test_missing_stubname(self, dtype):
             {"a": [100, 200, 300, 400], "b": [np.nan] * 4},
             index=index,
         )
-        new_level = expected.index.levels[0].astype(dtype)
+        new_level = expected.index.levels[0].astype(any_string_dtype)
+        if any_string_dtype == "object":
+            new_level = expected.index.levels[0].astype("str")
         expected.index = expected.index.set_levels(new_level, level=0)
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-def test_wide_to_long_pyarrow_string_columns():
+def test_wide_to_long_string_columns(string_storage):
     # GH 57066
-    pytest.importorskip("pyarrow")
+    string_dtype = pd.StringDtype(string_storage, na_value=np.nan)
     df = DataFrame(
         {
             "ID": {0: 1},
@@ -1244,17 +1240,17 @@ def test_wide_to_long_pyarrow_string_columns():
             "D": {0: 1},
         }
     )
-    df.columns = df.columns.astype("string[pyarrow_numpy]")
+    df.columns = df.columns.astype(string_dtype)
     result = wide_to_long(
         df, stubnames="R", i="ID", j="UNPIVOTED", sep="_", suffix=".*"
     )
     expected = DataFrame(
         [[1, 1], [1, 1], [1, 2]],
-        columns=Index(["D", "R"], dtype=object),
+        columns=Index(["D", "R"]),
         index=pd.MultiIndex.from_arrays(
             [
                 [1, 1, 1],
-                Index(["test1", "test2", "test3"], dtype="string[pyarrow_numpy]"),
+                Index(["test1", "test2", "test3"], dtype=string_dtype),
             ],
             names=["ID", "UNPIVOTED"],
         ),
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 9aa13d59a586b..d0858a0ea5558 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1081,7 +1081,6 @@ def test_margins_dtype_len(self, data):
 
         tm.assert_frame_equal(expected, result)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("cols", [(1, 2), ("a", "b"), (1, "b"), ("a", 1)])
     def test_pivot_table_multiindex_only(self, cols):
         # GH 17038
@@ -1091,7 +1090,7 @@ def test_pivot_table_multiindex_only(self, cols):
         expected = DataFrame(
             [[4.0, 5.0, 6.0]],
             columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
-            index=Index(["v"], dtype=object),
+            index=Index(["v"], dtype="str" if cols == ("a", "b") else "object"),
         )
 
         tm.assert_frame_equal(result, expected)
@@ -2525,13 +2524,16 @@ def test_pivot_empty(self):
         expected = DataFrame(index=[], columns=[])
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
-    @pytest.mark.parametrize("dtype", [object, "string"])
-    def test_pivot_integer_bug(self, dtype):
-        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=dtype)
+    def test_pivot_integer_bug(self, any_string_dtype):
+        df = DataFrame(
+            data=[("A", "1", "A1"), ("B", "2", "B2")], dtype=any_string_dtype
+        )
 
         result = df.pivot(index=1, columns=0, values=2)
-        tm.assert_index_equal(result.columns, Index(["A", "B"], name=0, dtype=dtype))
+        expected_columns = Index(["A", "B"], name=0, dtype=any_string_dtype)
+        if any_string_dtype == "object":
+            expected_columns = expected_columns.astype("str")
+        tm.assert_index_equal(result.columns, expected_columns)
 
     def test_pivot_index_none(self):
         # GH#3962
@@ -2613,7 +2615,9 @@ def test_pivot_columns_not_given(self):
         with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
             df.pivot()  # pylint: disable=missing-kwoa
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_columns_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2629,7 +2633,9 @@ def test_pivot_columns_is_none(self):
         expected = DataFrame({1: 3}, index=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_index_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
@@ -2643,7 +2649,9 @@ def test_pivot_index_is_none(self):
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="None is cast to NaN")
+    @pytest.mark.xfail(
+        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
+    )
     def test_pivot_values_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index ef0757ffe4aa8..b9ba03d1e9f41 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -76,7 +76,7 @@ def test_astype_dict_like(self, dtype_class):
 
         dt1 = dtype_class({"abc": str})
         result = ser.astype(dt1)
-        expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
+        expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype="str")
         tm.assert_series_equal(result, expected)
 
         dt2 = dtype_class({"abc": "float64"})
@@ -172,10 +172,14 @@ def test_astype_empty_constructor_equality(self, dtype):
     )
     def test_astype_str_map(self, dtype, series, using_infer_string):
         # see GH#4405
+        using_string_dtype = using_infer_string and dtype is str
         result = series.astype(dtype)
-        expected = series.map(str)
-        if using_infer_string:
-            expected = expected.astype(object)
+        if using_string_dtype:
+            expected = series.map(lambda val: str(val) if val is not np.nan else np.nan)
+        else:
+            expected = series.map(str)
+            if using_infer_string:
+                expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
     def test_astype_float_to_period(self):
@@ -212,7 +216,7 @@ def test_astype_dt64_to_str(self):
         # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
         dti = date_range("2012-01-01", periods=3)
         result = Series(dti).astype(str)
-        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
+        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype="str")
         tm.assert_series_equal(result, expected)
 
     def test_astype_dt64tz_to_str(self):
@@ -225,7 +229,7 @@ def test_astype_dt64tz_to_str(self):
                 "2012-01-02 00:00:00-05:00",
                 "2012-01-03 00:00:00-05:00",
             ],
-            dtype=object,
+            dtype="str",
         )
         tm.assert_series_equal(result, expected)
 
@@ -285,13 +289,13 @@ def test_astype_str_cast_dt64(self):
         ts = Series([Timestamp("2010-01-04 00:00:00")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04"], dtype=object)
+        expected = Series(["2010-01-04"], dtype="str")
         tm.assert_series_equal(res, expected)
 
         ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
         res = ts.astype(str)
 
-        expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
+        expected = Series(["2010-01-04 00:00:00-05:00"], dtype="str")
         tm.assert_series_equal(res, expected)
 
     def test_astype_str_cast_td64(self):
@@ -300,7 +304,7 @@ def test_astype_str_cast_td64(self):
         td = Series([Timedelta(1, unit="d")])
         ser = td.astype(str)
 
-        expected = Series(["1 days"], dtype=object)
+        expected = Series(["1 days"], dtype="str")
         tm.assert_series_equal(ser, expected)
 
     def test_dt64_series_astype_object(self):
@@ -347,7 +351,7 @@ def test_astype_from_float_to_str(self, dtype):
         # https://github.com/pandas-dev/pandas/issues/36451
         ser = Series([0.1], dtype=dtype)
         result = ser.astype(str)
-        expected = Series(["0.1"], dtype=object)
+        expected = Series(["0.1"], dtype="str")
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -358,11 +362,13 @@ def test_astype_from_float_to_str(self, dtype):
             (NA, "<NA>"),
         ],
     )
-    def test_astype_to_str_preserves_na(self, value, string_value):
+    def test_astype_to_str_preserves_na(self, value, string_value, using_infer_string):
         # https://github.com/pandas-dev/pandas/issues/36904
         ser = Series(["a", "b", value], dtype=object)
         result = ser.astype(str)
-        expected = Series(["a", "b", string_value], dtype=object)
+        expected = Series(
+            ["a", "b", None if using_infer_string else string_value], dtype="str"
+        )
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index ac489b2579e05..e5281a18236da 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -553,13 +553,11 @@ def f(x):
         (list(range(3)), {0: 42}, [42] + [np.nan] * 3),
     ],
 )
-def test_map_missing_mixed(vals, mapping, exp, using_infer_string):
+def test_map_missing_mixed(vals, mapping, exp):
     # GH20495
     s = Series(vals + [np.nan])
     result = s.map(mapping)
     exp = Series(exp)
-    if using_infer_string and mapping == {np.nan: "not NaN"}:
-        exp.iloc[-1] = np.nan
     tm.assert_series_equal(result, exp)
 
 
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 24cf97c05c0a8..1c3ebe5653ce3 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -33,7 +33,8 @@ def ser():
         ["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
         ["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
         ["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
-    ]
+    ],
+    ids=lambda x: x[0],
 )
 def results(request):
     return request.param
@@ -48,12 +49,29 @@ def results(request):
         "Int64",
         pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
         pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
+        "string[python]",
+        "str",
     ]
 )
 def dtype(request):
     return request.param
 
 
+def expected_dtype(dtype, method, pct=False):
+    exp_dtype = "float64"
+    # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
+    if dtype in ["string[pyarrow]"]:
+        exp_dtype = "Float64"
+    elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]:
+        if method == "average" or pct:
+            exp_dtype = "double[pyarrow]"
+        else:
+            exp_dtype = "uint64[pyarrow]"
+
+    return exp_dtype
+
+
 class TestSeriesRank:
     def test_rank(self, datetime_series):
         sp_stats = pytest.importorskip("scipy.stats")
@@ -241,12 +259,18 @@ def test_rank_signature(self):
         with pytest.raises(ValueError, match=msg):
             s.rank("average")
 
-    @pytest.mark.parametrize("dtype", [None, object])
-    def test_rank_tie_methods(self, ser, results, dtype):
+    def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
         method, exp = results
+        if (
+            dtype == "int64"
+            or dtype == "Int64"
+            or (not using_infer_string and dtype == "str")
+        ):
+            pytest.skip("int64/str does not support NaN")
+
         ser = ser if dtype is None else ser.astype(dtype)
         result = ser.rank(method=method)
-        tm.assert_series_equal(result, Series(exp))
+        tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
 
     @pytest.mark.parametrize("ascending", [True, False])
     @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@@ -346,25 +370,35 @@ def test_rank_methods_series(self, method, op, value):
         ],
     )
     def test_rank_dense_method(self, dtype, ser, exp):
+        if ser[0] < 0 and dtype.startswith("str"):
+            exp = exp[::-1]
         s = Series(ser).astype(dtype)
         result = s.rank(method="dense")
-        expected = Series(exp).astype(result.dtype)
+        expected = Series(exp).astype(expected_dtype(dtype, "dense"))
         tm.assert_series_equal(result, expected)
 
-    def test_rank_descending(self, ser, results, dtype):
+    def test_rank_descending(self, ser, results, dtype, using_infer_string):
         method, _ = results
-        if "i" in dtype:
+        if dtype == "int64" or (not using_infer_string and dtype == "str"):
             s = ser.dropna()
         else:
             s = ser.astype(dtype)
 
         res = s.rank(ascending=False)
-        expected = (s.max() - s).rank()
-        tm.assert_series_equal(res, expected)
+        if dtype.startswith("str"):
+            expected = (s.astype("float64").max() - s.astype("float64")).rank()
+        else:
+            expected = (s.max() - s).rank()
+        tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
 
-        expected = (s.max() - s).rank(method=method)
+        if dtype.startswith("str"):
+            expected = (s.astype("float64").max() - s.astype("float64")).rank(
+                method=method
+            )
+        else:
+            expected = (s.max() - s).rank(method=method)
         res2 = s.rank(method=method, ascending=False)
-        tm.assert_series_equal(res2, expected)
+        tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
 
     def test_rank_int(self, ser, results):
         method, exp = results
@@ -421,9 +455,11 @@ def test_rank_ea_small_values(self):
     ],
 )
 def test_rank_dense_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="dense", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -442,9 +478,11 @@ def test_rank_dense_pct(dtype, ser, exp):
     ],
 )
 def test_rank_min_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="min", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -463,9 +501,11 @@ def test_rank_min_pct(dtype, ser, exp):
     ],
 )
 def test_rank_max_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="max", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -484,9 +524,11 @@ def test_rank_max_pct(dtype, ser, exp):
     ],
 )
 def test_rank_average_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="average", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True))
     tm.assert_series_equal(result, expected)
 
 
@@ -505,9 +547,11 @@ def test_rank_average_pct(dtype, ser, exp):
     ],
 )
 def test_rank_first_pct(dtype, ser, exp):
+    if ser[0] < 0 and dtype.startswith("str"):
+        exp = exp[::-1]
     s = Series(ser).astype(dtype)
     result = s.rank(method="first", pct=True)
-    expected = Series(exp).astype(result.dtype)
+    expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True))
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 1ffc9ddca5adf..a65d7687cfb06 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import lib
 from pandas._libs.tslibs import IncompatibleFrequency
 
@@ -214,9 +212,9 @@ def test_series_integer_mod(self, index):
         s1 = Series(range(1, 10))
         s2 = Series("foo", index=index)
 
-        msg = "not all arguments converted during string formatting|mod not"
+        msg = "not all arguments converted during string formatting|'mod' not supported"
 
-        with pytest.raises((TypeError, NotImplementedError), match=msg):
+        with pytest.raises(TypeError, match=msg):
             s2 % s1
 
     def test_add_with_duplicate_index(self):
@@ -501,28 +499,14 @@ def test_ser_cmp_result_names(self, names, comparison_op):
             result = op(ser, cidx)
             assert result.name == names[2]
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
-    def test_comparisons(self, using_infer_string):
+    def test_comparisons(self):
         s = Series(["a", "b", "c"])
         s2 = Series([False, True, False])
 
         # it works!
         exp = Series([False, False, False])
-        if using_infer_string:
-            import pyarrow as pa
-
-            msg = "has no kernel"
-            # TODO(3.0) GH56008
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                s == s2
-            with tm.assert_produces_warning(
-                DeprecationWarning, match="comparison", check_stacklevel=False
-            ):
-                with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
-                    s2 == s
-        else:
-            tm.assert_series_equal(s == s2, exp)
-            tm.assert_series_equal(s2 == s, exp)
+        tm.assert_series_equal(s == s2, exp)
+        tm.assert_series_equal(s2 == s, exp)
 
     # -----------------------------------------------------------------
     # Categorical Dtype Comparisons
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 6efe0bcb8b45d..60b2ec7b6912d 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -230,7 +230,7 @@ def test_constructor_empty(self, input_class, using_infer_string):
             # GH 19853 : with empty string, index and dtype str
             empty = Series("", dtype=str, index=range(3))
             if using_infer_string:
-                empty2 = Series("", index=range(3), dtype=object)
+                empty2 = Series("", index=range(3), dtype="str")
             else:
                 empty2 = Series("", index=range(3))
             tm.assert_series_equal(empty, empty2)
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
index a9f1726afc942..26bdfcbc6ec56 100644
--- a/pandas/tests/series/test_logical_ops.py
+++ b/pandas/tests/series/test_logical_ops.py
@@ -6,13 +6,12 @@
 
 from pandas._config import using_string_dtype
 
-from pandas.compat import HAS_PYARROW
-
 from pandas import (
     ArrowDtype,
     DataFrame,
     Index,
     Series,
+    StringDtype,
     bdate_range,
 )
 import pandas._testing as tm
@@ -151,10 +150,7 @@ def test_logical_operators_int_dtype_with_bool(self):
         expected = Series([False, True, True, True])
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
-    def test_logical_operators_int_dtype_with_object(self, using_infer_string):
+    def test_logical_operators_int_dtype_with_object(self):
         # GH#9016: support bitwise op for integer types
         s_0123 = Series(range(4), dtype="int64")
 
@@ -163,14 +159,10 @@ def test_logical_operators_int_dtype_with_object(self, using_infer_string):
         tm.assert_series_equal(result, expected)
 
         s_abNd = Series(["a", "b", np.nan, "d"])
-        if using_infer_string:
-            import pyarrow as pa
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
-                s_0123 & s_abNd
-        else:
-            with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
-                s_0123 & s_abNd
+        with pytest.raises(
+            TypeError, match="unsupported.* 'int' and 'str'|'rand_' not supported"
+        ):
+            s_0123 & s_abNd
 
     def test_logical_operators_bool_dtype_with_int(self):
         index = list("bca")
@@ -368,9 +360,7 @@ def test_reverse_ops_with_index(self, op, expected):
         result = op(ser, idx)
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-    )
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_logical_ops_label_based(self, using_infer_string):
         # GH#4947
         # logical ops should be label based
@@ -439,15 +429,12 @@ def test_logical_ops_label_based(self, using_infer_string):
                 tm.assert_series_equal(result, a[a])
 
         for e in [Series(["z"])]:
-            warn = FutureWarning if using_infer_string else None
             if using_infer_string:
-                import pyarrow as pa
-
-                with tm.assert_produces_warning(warn, match="Operation between non"):
-                    with pytest.raises(
-                        pa.lib.ArrowNotImplementedError, match="has no kernel"
-                    ):
-                        result = a[a | e]
+                # TODO(infer_string) should this behave differently?
+                with pytest.raises(
+                    TypeError, match="not supported for dtype|unsupported operand type"
+                ):
+                    result = a[a | e]
             else:
                 result = a[a | e]
             tm.assert_series_equal(result, a[a])
@@ -547,7 +534,7 @@ def test_pyarrow_numpy_string_invalid(self):
         # GH#56008
         pa = pytest.importorskip("pyarrow")
         ser = Series([False, True])
-        ser2 = Series(["a", "b"], dtype="string[pyarrow_numpy]")
+        ser2 = Series(["a", "b"], dtype=StringDtype(na_value=np.nan))
         result = ser == ser2
         expected_eq = Series(False, index=ser.index)
         tm.assert_series_equal(result, expected_eq)
diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py
index 41aedae90ca76..819556f961fa3 100644
--- a/pandas/tests/strings/test_case_justify.py
+++ b/pandas/tests/strings/test_case_justify.py
@@ -291,11 +291,7 @@ def test_center_ljust_rjust_mixed_object():
 
 
 def test_center_ljust_rjust_fillchar(any_string_dtype):
-    if any_string_dtype == "string[pyarrow_numpy]":
-        pytest.skip(
-            "Arrow logic is different, "
-            "see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
-        )
+    # GH#54533, GH#54792
     s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
 
     result = s.str.center(5, fillchar="X")
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 78ce1d7418886..2742c5b67e57e 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -4,7 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas.errors import PerformanceWarning
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -22,10 +21,6 @@
 # --------------------------------------------------------------------------------------
 
 
-def using_pyarrow(dtype):
-    return dtype in ("string[pyarrow]", "string[pyarrow_numpy]")
-
-
 def test_contains(any_string_dtype):
     values = np.array(
         ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
@@ -167,7 +162,16 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
     # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416
 
     values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)
-    result = values.str.contains("a", na=na, regex=regex)
+
+    msg = (
+        "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
+        "will raise in a future version"
+    )
+    warn = None
+    if not pd.isna(na) and not isinstance(na, bool):
+        warn = FutureWarning
+    with tm.assert_produces_warning(warn, match=msg):
+        result = values.str.contains("a", na=na, regex=regex)
     expected = Series([True, False, False, True, expected], dtype="boolean")
     tm.assert_series_equal(result, expected)
 
@@ -233,6 +237,7 @@ def test_contains_nan(any_string_dtype):
     expected = Series([True, True, True], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
+    # TODO(infer_string)
     # this particular combination of events is broken on 2.3
     # would require cherry picking #58483, which in turn requires #57481
     # which introduce many behavioral changes
@@ -241,14 +246,19 @@ def test_contains_nan(any_string_dtype):
         and any_string_dtype.storage == "python"
         and any_string_dtype.na_value is np.nan
     ):
-        result = s.str.contains("foo", na="foo")
+        msg = (
+            "Allowing a non-bool 'na' in obj.str.contains is deprecated and "
+            "will raise in a future version"
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = s.str.contains("foo", na="foo")
         if any_string_dtype == "object":
             expected = Series(["foo", "foo", "foo"], dtype=np.object_)
         elif any_string_dtype.na_value is np.nan:
             expected = Series([True, True, True], dtype=np.bool_)
         else:
             expected = Series([True, True, True], dtype="boolean")
-            tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     result = s.str.contains("foo")
     expected_dtype = (
@@ -263,6 +273,40 @@ def test_contains_nan(any_string_dtype):
 # --------------------------------------------------------------------------------------
 
 
+def test_startswith_endswith_validate_na(request, any_string_dtype):
+    if (
+        any_string_dtype == "string"
+        and any_string_dtype.na_value is np.nan
+        and any_string_dtype.storage == "python"
+    ):
+        request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+    # GH#59615
+    ser = Series(
+        ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
+        dtype=any_string_dtype,
+    )
+
+    dtype = ser.dtype
+    if (
+        isinstance(dtype, pd.StringDtype) and dtype.storage == "python"
+    ) or dtype == np.dtype("object"):
+        msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            ser.str.startswith("kapow", na="baz")
+        msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            ser.str.endswith("bar", na="baz")
+    else:
+        # TODO(infer_string): don't surface pyarrow errors
+        import pyarrow as pa
+
+        msg = "Could not convert 'baz' with type str: tried to convert to boolean"
+        with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+            ser.str.startswith("kapow", na="baz")
+        with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+            ser.str.endswith("kapow", na="baz")
+
+
 @pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
 @pytest.mark.parametrize("dtype", ["object", "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
@@ -413,8 +457,7 @@ def test_replace_mixed_object():
 def test_replace_unicode(any_string_dtype):
     ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
+    result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
     tm.assert_series_equal(result, expected)
 
 
@@ -434,8 +477,7 @@ def test_replace_callable(any_string_dtype):
 
     # test with callable
     repl = lambda m: m.group(0).swapcase()
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
+    result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -453,10 +495,7 @@ def test_replace_callable_raises(any_string_dtype, repl):
         r"(?(3)required )positional arguments?"
     )
     with pytest.raises(TypeError, match=msg):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, using_pyarrow(any_string_dtype)
-        ):
-            values.str.replace("a", repl, regex=True)
+        values.str.replace("a", repl, regex=True)
 
 
 def test_replace_callable_named_groups(any_string_dtype):
@@ -464,8 +503,7 @@ def test_replace_callable_named_groups(any_string_dtype):
     ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
     pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
     repl = lambda m: m.group("middle").swapcase()
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(pat, repl, regex=True)
+    result = ser.str.replace(pat, repl, regex=True)
     expected = Series(["bAR", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -476,13 +514,11 @@ def test_replace_compiled_regex(any_string_dtype):
 
     # test with compiled regex
     pat = re.compile(r"BAD_*")
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(pat, "", regex=True)
+    result = ser.str.replace(pat, "", regex=True)
     expected = Series(["foobar", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(pat, "", n=1, regex=True)
+    result = ser.str.replace(pat, "", n=1, regex=True)
     expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -503,8 +539,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
     ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(pat, ", ", regex=True)
+    result = ser.str.replace(pat, ", ", regex=True)
     tm.assert_series_equal(result, expected)
 
 
@@ -531,8 +566,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
     ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
     repl = lambda m: m.group(0).swapcase()
     pat = re.compile("[a-z][A-Z]{2}")
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace(pat, repl, n=2, regex=True)
+    result = ser.str.replace(pat, repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -580,8 +614,7 @@ def test_replace_moar(any_string_dtype):
     )
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace("A", "YYY", case=False)
+    result = ser.str.replace("A", "YYY", case=False)
     expected = Series(
         [
             "YYY",
@@ -599,8 +632,7 @@ def test_replace_moar(any_string_dtype):
     )
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
+    result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
     expected = Series(
         [
             "A",
@@ -623,13 +655,11 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
     # https://github.com/pandas-dev/pandas/issues/41602
     ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace("a", "c", case=False, regex=False)
+    result = ser.str.replace("a", "c", case=False, regex=False)
     expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.replace("a.", "c.", case=False, regex=False)
+    result = ser.str.replace("a.", "c.", case=False, regex=False)
     expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -801,8 +831,7 @@ def test_fullmatch_case_kwarg(any_string_dtype):
     result = ser.str.fullmatch("ab", case=False)
     tm.assert_series_equal(result, expected)
 
-    with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)):
-        result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
+    result = ser.str.fullmatch("ab", flags=re.IGNORECASE)
     tm.assert_series_equal(result, expected)
 
 
@@ -987,17 +1016,13 @@ def test_flags_kwarg(any_string_dtype):
 
     pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})"
 
-    use_pyarrow = using_pyarrow(any_string_dtype)
-
     result = data.str.extract(pat, flags=re.IGNORECASE, expand=True)
     assert result.iloc[0].tolist() == ["dave", "google", "com"]
 
-    with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow):
-        result = data.str.match(pat, flags=re.IGNORECASE)
+    result = data.str.match(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
 
-    with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow):
-        result = data.str.fullmatch(pat, flags=re.IGNORECASE)
+    result = data.str.fullmatch(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
 
     result = data.str.findall(pat, flags=re.IGNORECASE)
@@ -1007,8 +1032,6 @@ def test_flags_kwarg(any_string_dtype):
     assert result.iloc[0] == 1
 
     msg = "has match groups"
-    with tm.assert_produces_warning(
-        UserWarning, match=msg, raise_on_extra_warnings=not use_pyarrow
-    ):
+    with tm.assert_produces_warning(UserWarning, match=msg):
         result = data.str.contains(pat, flags=re.IGNORECASE)
     assert result.iloc[0]
diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py
index 0b3f368afea5e..517ddb164985c 100644
--- a/pandas/tests/strings/test_string_array.py
+++ b/pandas/tests/strings/test_string_array.py
@@ -12,7 +12,6 @@
 )
 
 
-@pytest.mark.filterwarnings("ignore:Falling back")
 def test_string_array(nullable_string_dtype, any_string_method):
     method_name, args, kwargs = any_string_method
 
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 015df18221b40..40b6c69dc8025 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -393,6 +393,7 @@ def test_pipe_failures(any_string_dtype):
         (2, 5, None, ["foo", "bar", np.nan, "baz"]),
         (0, 3, -1, ["", "", np.nan, ""]),
         (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
+        (None, 2, -1, ["owtoo", "owtra", np.nan, "xuqza"]),
         (3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
         (3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
     ],
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index f3a7ba2607f4a..a7c2ec5acb7c2 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1900,13 +1900,16 @@ def test_strobj_mode(self):
         tm.assert_series_equal(ser.mode(), exp)
 
     @pytest.mark.parametrize("dt", [str, object])
-    def test_strobj_multi_char(self, dt):
+    def test_strobj_multi_char(self, dt, using_infer_string):
         exp = ["bar"]
         data = ["foo"] * 2 + ["bar"] * 3
 
         ser = Series(data, dtype=dt)
         exp = Series(exp, dtype=dt)
-        tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
+        if using_infer_string and dt is str:
+            tm.assert_extension_array_equal(algos.mode(ser.values), exp.values)
+        else:
+            tm.assert_numpy_array_equal(algos.mode(ser.values), exp.values)
         tm.assert_series_equal(ser.mode(), exp)
 
     def test_datelike_mode(self):
diff --git a/pandas/tests/util/test_shares_memory.py b/pandas/tests/util/test_shares_memory.py
index 00a897d574a07..8f1ac93b40247 100644
--- a/pandas/tests/util/test_shares_memory.py
+++ b/pandas/tests/util/test_shares_memory.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -20,10 +22,10 @@ def test_shares_memory_string():
     # GH#55823
     import pyarrow as pa
 
-    obj = pd.array(["a", "b"], dtype="string[pyarrow]")
+    obj = pd.array(["a", "b"], dtype=pd.StringDtype("pyarrow", na_value=pd.NA))
     assert tm.shares_memory(obj, obj)
 
-    obj = pd.array(["a", "b"], dtype="string[pyarrow_numpy]")
+    obj = pd.array(["a", "b"], dtype=pd.StringDtype("pyarrow", na_value=np.nan))
     assert tm.shares_memory(obj, obj)
 
     obj = pd.array(["a", "b"], dtype=pd.ArrowDtype(pa.string()))
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index acf636616421f..f353a7fa2f0fe 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -6,10 +6,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import (
-    HAS_PYARROW,
     IS64,
     is_platform_arm,
     is_platform_power,
@@ -1423,9 +1420,6 @@ def test_rolling_corr_timedelta_index(index, window):
     tm.assert_almost_equal(result, expected)
 
 
-@pytest.mark.xfail(
-    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
-)
 def test_groupby_rolling_nan_included():
     # GH 35542
     data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}