diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 994b351acf42c..10c1c490551fb 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -14,6 +14,7 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
 from pandas._config.localization import (
     can_set_locale,
     get_locales,
@@ -110,7 +111,10 @@
 ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
 
 COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
-STRING_DTYPES: list[Dtype] = [str, "str", "U"]
+if using_string_dtype():
+    STRING_DTYPES: list[Dtype] = [str, "U"]
+else:
+    STRING_DTYPES: list[Dtype] = [str, "str", "U"]  # type: ignore[no-redef]
 COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
 
 DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index a156042ac0c0e..6c44b7759f0e2 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -570,7 +570,10 @@ def __getitem__(self, item: PositionalIndexer):
         if isinstance(item, np.ndarray):
             if not len(item):
                 # Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
-                if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
+                if (
+                    isinstance(self._dtype, StringDtype)
+                    and self._dtype.storage == "pyarrow"
+                ):
                     # TODO(infer_string) should this be large_string?
                     pa_dtype = pa.string()
                 else:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 3cbacec9d411d..0929791ded58c 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -4,7 +4,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    ClassVar,
     Literal,
     cast,
 )
@@ -114,9 +113,12 @@ class StringDtype(StorageExtensionDtype):
     string[pyarrow]
     """
 
-    # error: Cannot override instance variable (previously declared on
-    # base class "StorageExtensionDtype") with class variable
-    name: ClassVar[str] = "string"  # type: ignore[misc]
+    @property
+    def name(self) -> str:  # type: ignore[override]
+        if self._na_value is libmissing.NA:
+            return "string"
+        else:
+            return "str"
 
     #: StringDtype().na_value uses pandas.NA except the implementation that
     # follows NumPy semantics, which uses nan.
@@ -133,7 +135,7 @@ def __init__(
     ) -> None:
         # infer defaults
         if storage is None:
-            if using_string_dtype() and na_value is not libmissing.NA:
+            if na_value is not libmissing.NA:
                 if HAS_PYARROW:
                     storage = "pyarrow"
                 else:
@@ -166,11 +168,19 @@ def __init__(
         self.storage = storage
         self._na_value = na_value
 
+    def __repr__(self) -> str:
+        if self._na_value is libmissing.NA:
+            return f"{self.name}[{self.storage}]"
+        else:
+            # TODO add more informative repr
+            return self.name
+
     def __eq__(self, other: object) -> bool:
         # we need to override the base class __eq__ because na_value (NA or NaN)
         # cannot be checked with normal `==`
         if isinstance(other, str):
-            if other == self.name:
+            # TODO should dtype == "string" work for the NaN variant?
+            if other == "string" or other == self.name:  # noqa: PLR1714
                 return True
             try:
                 other = self.construct_from_string(other)
@@ -227,6 +237,8 @@ def construct_from_string(cls, string) -> Self:
             )
         if string == "string":
             return cls()
+        elif string == "str" and using_string_dtype():
+            return cls(na_value=np.nan)
         elif string == "string[python]":
             return cls(storage="python")
         elif string == "string[pyarrow]":
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index afcd4d014316e..1403fc2ceaaf8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4979,7 +4979,9 @@ def select_dtypes(self, include=None, exclude=None) -> Self:
         -----
         * To select all *numeric* types, use ``np.number`` or ``'number'``
         * To select strings you must use the ``object`` dtype, but note that
-          this will return *all* object dtype columns
+          this will return *all* object dtype columns. With
+          ``pd.options.future.infer_string`` enabled, using ``"str"`` will
+          work to select all string columns.
         * See the `numpy dtype hierarchy
           <https://numpy.org/doc/stable/reference/arrays.scalars.html>`__
         * To select datetimes, use ``np.datetime64``, ``'datetime'`` or
diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py
index fd1c7c9639242..035a1f8abdbc5 100644
--- a/pandas/core/interchange/utils.py
+++ b/pandas/core/interchange/utils.py
@@ -135,7 +135,12 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
     if format_str is not None:
         return format_str
 
-    if lib.is_np_dtype(dtype, "M"):
+    if isinstance(dtype, pd.StringDtype):
+        # TODO(infer_string) this should be LARGE_STRING for pyarrow storage,
+        # but current tests don't cover this distinction
+        return ArrowCTypes.STRING
+
+    elif lib.is_np_dtype(dtype, "M"):
         # Selecting the first char of resolution string:
         # dtype.str -> '<M8[ns]' -> 'n'
         resolution = np.datetime_data(dtype)[0][0]
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
index aee9100702350..6ac0b49f0e4e7 100644
--- a/pandas/tests/apply/test_numba.py
+++ b/pandas/tests/apply/test_numba.py
@@ -110,7 +110,7 @@ def test_numba_unsupported_dtypes(apply_axis):
 
     with pytest.raises(
         ValueError,
-        match="Column b must have a numeric dtype. Found 'object|string' instead",
+        match="Column b must have a numeric dtype. Found 'object|str' instead",
     ):
         df.apply(f, engine="numba", axis=apply_axis)
 
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index df24fa08f48e1..69f84ca74ab0b 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -244,7 +244,7 @@ def test_apply_categorical(by_row, using_infer_string):
     result = ser.apply(lambda x: "A")
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
-    assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
+    assert result.dtype == object if not using_infer_string else "str"
 
 
 @pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
index 932e903c0e448..8c2672218f273 100644
--- a/pandas/tests/arrays/boolean/test_astype.py
+++ b/pandas/tests/arrays/boolean/test_astype.py
@@ -5,7 +5,7 @@
 import pandas._testing as tm
 
 
-def test_astype():
+def test_astype(using_infer_string):
     # with missing values
     arr = pd.array([True, False, None], dtype="boolean")
 
@@ -20,8 +20,14 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)
 
     result = arr.astype("str")
-    expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
-    tm.assert_numpy_array_equal(result, expected)
+    if using_infer_string:
+        expected = pd.array(
+            ["True", "False", None], dtype=pd.StringDtype(na_value=np.nan)
+        )
+        tm.assert_extension_array_equal(result, expected)
+    else:
+        expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
+        tm.assert_numpy_array_equal(result, expected)
 
     # no missing values
     arr = pd.array([True, False, True], dtype="boolean")
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
index a2a53af6ab1ad..ee930ac84aaf2 100644
--- a/pandas/tests/arrays/categorical/test_astype.py
+++ b/pandas/tests/arrays/categorical/test_astype.py
@@ -89,7 +89,7 @@ def test_astype(self, ordered):
         expected = np.array(cat)
         tm.assert_numpy_array_equal(result, expected)
 
-        msg = r"Cannot cast object|string dtype to float64"
+        msg = r"Cannot cast object|str dtype to float64"
         with pytest.raises(ValueError, match=msg):
             cat.astype(float)
 
diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
index e2e5d47f50209..3a2c489920eb0 100644
--- a/pandas/tests/arrays/categorical/test_repr.py
+++ b/pandas/tests/arrays/categorical/test_repr.py
@@ -22,7 +22,7 @@ def test_print(self, using_infer_string):
         if using_infer_string:
             expected = [
                 "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
-                "Categories (3, string): [a < b < c]",
+                "Categories (3, str): [a < b < c]",
             ]
         else:
             expected = [
diff --git a/pandas/tests/arrays/floating/test_astype.py b/pandas/tests/arrays/floating/test_astype.py
index ade3dbd2c99da..ccf644b34051d 100644
--- a/pandas/tests/arrays/floating/test_astype.py
+++ b/pandas/tests/arrays/floating/test_astype.py
@@ -63,12 +63,21 @@ def test_astype_to_integer_array():
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_astype_str():
+def test_astype_str(using_infer_string):
     a = pd.array([0.1, 0.2, None], dtype="Float64")
-    expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
 
-    tm.assert_numpy_array_equal(a.astype(str), expected)
-    tm.assert_numpy_array_equal(a.astype("str"), expected)
+    if using_infer_string:
+        expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_extension_array_equal(a.astype("str"), expected)
+
+        # TODO(infer_string) this should also be a string array like above
+        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+    else:
+        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
+
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_numpy_array_equal(a.astype("str"), expected)
 
 
 def test_astype_copy():
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index 8620763988e06..7be00e569b3fe 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -278,12 +278,21 @@ def test_to_numpy_na_raises(dtype):
         a.to_numpy(dtype=dtype)
 
 
-def test_astype_str():
+def test_astype_str(using_infer_string):
     a = pd.array([1, 2, None], dtype="Int64")
-    expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
-    tm.assert_numpy_array_equal(a.astype(str), expected)
-    tm.assert_numpy_array_equal(a.astype("str"), expected)
+    if using_infer_string:
+        expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
+        tm.assert_extension_array_equal(a.astype("str"), expected)
+
+        # TODO(infer_string) this should also be a string array like above
+        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+    else:
+        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
+
+        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_numpy_array_equal(a.astype("str"), expected)
 
 
 def test_astype_boolean():
diff --git a/pandas/tests/arrays/interval/test_interval_pyarrow.py b/pandas/tests/arrays/interval/test_interval_pyarrow.py
index be87d5d3ef7ba..ef8701be81e2b 100644
--- a/pandas/tests/arrays/interval/test_interval_pyarrow.py
+++ b/pandas/tests/arrays/interval/test_interval_pyarrow.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays import IntervalArray
@@ -82,7 +80,6 @@ def test_arrow_array_missing():
     assert result.storage.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings(
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py
index ff86b696c8403..431309aca0df2 100644
--- a/pandas/tests/arrays/period/test_arrow_compat.py
+++ b/pandas/tests/arrays/period/test_arrow_compat.py
@@ -1,7 +1,5 @@
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under10p1
 
 from pandas.core.dtypes.dtypes import PeriodDtype
@@ -79,7 +77,6 @@ def test_arrow_array_missing():
     assert result.storage.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_arrow_table_roundtrip():
     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
 
@@ -99,7 +96,6 @@ def test_arrow_table_roundtrip():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_arrow_load_from_zero_chunks():
     # GH-41040
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index b51b01c2b5168..1296cc3b5a494 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -65,7 +65,7 @@ def test_repr(dtype):
     assert repr(df) == expected
 
     if dtype.na_value is np.nan:
-        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: string"
+        expected = "0      a\n1    NaN\n2      b\nName: A, dtype: str"
     else:
         expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
     assert repr(df.A) == expected
@@ -75,10 +75,10 @@ def test_repr(dtype):
         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
     elif dtype.storage == "pyarrow" and dtype.na_value is np.nan:
         arr_name = "ArrowStringArrayNumpySemantics"
-        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: string"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
     elif dtype.storage == "python" and dtype.na_value is np.nan:
         arr_name = "StringArrayNumpySemantics"
-        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: string"
+        expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str"
     else:
         arr_name = "StringArray"
         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
@@ -502,7 +502,7 @@ def test_fillna_args(dtype):
     tm.assert_extension_array_equal(res, expected)
 
     if dtype.storage == "pyarrow":
-        msg = "Invalid value '1' for dtype string"
+        msg = "Invalid value '1' for dtype str"
     else:
         msg = "Cannot set non-string value '1' into a StringArray."
     with pytest.raises(TypeError, match=msg):
@@ -524,7 +524,7 @@ def test_arrow_array(dtype):
     assert arr.equals(expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
     # roundtrip possible from arrow 1.0.0
@@ -539,14 +539,17 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
         assert table.field("a").type == "large_string"
     with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
-    assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage}]")
-    tm.assert_frame_equal(result, expected)
-    # ensure the missing value is represented by NA and not np.nan or None
-    assert result.loc[2, "a"] is result["a"].dtype.na_value
+    if dtype.na_value is np.nan and not using_string_dtype():
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(f"string[{string_storage}]")
+        tm.assert_frame_equal(result, expected)
+        # ensure the missing value is represented by NA and not np.nan or None
+        assert result.loc[2, "a"] is result["a"].dtype.na_value
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     # GH-41040
@@ -563,9 +566,13 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
     table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema)
     with pd.option_context("string_storage", string_storage):
         result = table.to_pandas()
-    assert isinstance(result["a"].dtype, pd.StringDtype)
-    expected = df.astype(f"string[{string_storage}]")
-    tm.assert_frame_equal(result, expected)
+
+    if dtype.na_value is np.nan and not using_string_dtype():
+        assert result["a"].dtype == "object"
+    else:
+        assert isinstance(result["a"].dtype, pd.StringDtype)
+        expected = df.astype(f"string[{string_storage}]")
+        tm.assert_frame_equal(result, expected)
 
 
 def test_value_counts_na(dtype):
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
index d38b728aaf120..8d5c16e448cee 100644
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import HAS_PYARROW
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -27,8 +28,9 @@ def test_eq_all_na():
 
 
 def test_config(string_storage, request, using_infer_string):
-    if using_infer_string and string_storage == "python":
-        # python string storage with na_value=NaN is not yet implemented
+    if using_infer_string and string_storage == "python" and HAS_PYARROW:
+        # string storage with na_value=NaN always uses pyarrow if available
+        # -> does not yet honor the option
         request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
 
     with pd.option_context("string_storage", string_storage):
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 4961123a7ca07..360ab960088ed 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -295,7 +295,9 @@ def test_searchsorted(self):
         assert result == 10
 
     @pytest.mark.parametrize("box", [None, "index", "series"])
-    def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
+    def test_searchsorted_castable_strings(
+        self, arr1d, box, string_storage, using_infer_string
+    ):
         arr = arr1d
         if box is None:
             pass
@@ -331,7 +333,8 @@ def test_searchsorted_castable_strings(self, arr1d, box, string_storage):
                 TypeError,
                 match=re.escape(
                     f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', "
-                    "or array of those. Got string array instead."
+                    "or array of those. Got "
+                    f"{'str' if using_infer_string else 'string'} array instead."
                 ),
             ):
                 arr.searchsorted([str(arr[1]), "baz"])
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index c34c97b6e4f04..e0232bb292d6e 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -799,3 +799,22 @@ def test_pandas_dtype_ea_not_instance():
     # GH 31356 GH 54592
     with tm.assert_produces_warning(UserWarning):
         assert pandas_dtype(CategoricalDtype) == CategoricalDtype()
+
+
+def test_pandas_dtype_string_dtypes(string_storage):
+    # TODO(infer_string) remove skip if "python" is supported
+    pytest.importorskip("pyarrow")
+    with pd.option_context("future.infer_string", True):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype("str")
+    # TODO(infer_string) hardcoded to pyarrow until python is supported
+    assert result == pd.StringDtype("pyarrow", na_value=np.nan)
+
+    with pd.option_context("future.infer_string", False):
+        with pd.option_context("string_storage", string_storage):
+            result = pandas_dtype("str")
+    assert result == np.dtype("U")
+
+    with pd.option_context("string_storage", string_storage):
+        result = pandas_dtype("string")
+    assert result == pd.StringDtype(string_storage, na_value=pd.NA)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index e522d2666a2dc..a4916ed1bbd8a 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -1062,7 +1062,7 @@ def test_str_vs_repr(self, ordered, using_infer_string):
         c1 = CategoricalDtype(["a", "b"], ordered=ordered)
         assert str(c1) == "category"
         # Py2 will have unicode prefixes
-        dtype = "string" if using_infer_string else "object"
+        dtype = "str" if using_infer_string else "object"
         pat = (
             r"CategoricalDtype\(categories=\[.*\], ordered={ordered}, "
             rf"categories_dtype={dtype}\)"
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 1102d9d941663..f800f734ec9d9 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -116,6 +116,20 @@ def test_is_not_string_type(self, dtype):
         # because StringDtype is a string type
         assert is_string_dtype(dtype)
 
+    def test_is_dtype_from_name(self, dtype, using_infer_string):
+        if dtype.na_value is np.nan and not using_infer_string:
+            result = type(dtype).is_dtype(dtype.name)
+            assert result is False
+        else:
+            super().test_is_dtype_from_name(dtype)
+
+    def test_construct_from_string_own_name(self, dtype, using_infer_string):
+        if dtype.na_value is np.nan and not using_infer_string:
+            with pytest.raises(TypeError, match="Cannot construct a 'StringDtype'"):
+                dtype.construct_from_string(dtype.name)
+        else:
+            super().test_construct_from_string_own_name(dtype)
+
     def test_view(self, data):
         if data.dtype.storage == "pyarrow":
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 09f359df37dd1..ec00044b84c49 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -337,7 +337,7 @@ def test_setitem(
                 smaller["col10"] = ["1", "2"]
 
         if using_infer_string:
-            assert smaller["col10"].dtype == "string"
+            assert smaller["col10"].dtype == "str"
         else:
             assert smaller["col10"].dtype == np.object_
         assert (smaller["col10"] == ["1", "2"]).all()
@@ -472,13 +472,13 @@ def test_setitem_corner(self, float_frame, using_infer_string):
         del dm["foo"]
         dm["foo"] = "bar"
         if using_infer_string:
-            assert dm["foo"].dtype == "string"
+            assert dm["foo"].dtype == "str"
         else:
             assert dm["foo"].dtype == np.object_
 
         dm["coercible"] = ["1", "2", "3"]
         if using_infer_string:
-            assert dm["coercible"].dtype == "string"
+            assert dm["coercible"].dtype == "str"
         else:
             assert dm["coercible"].dtype == np.object_
 
@@ -514,7 +514,7 @@ def test_setitem_ambig(self, using_infer_string):
         dm[2] = uncoercable_series
         assert len(dm.columns) == 3
         if using_infer_string:
-            assert dm[2].dtype == "string"
+            assert dm[2].dtype == "str"
         else:
             assert dm[2].dtype == np.object_
 
diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py
index ce771280bc264..3d23e13264911 100644
--- a/pandas/tests/frame/indexing/test_set_value.py
+++ b/pandas/tests/frame/indexing/test_set_value.py
@@ -28,7 +28,7 @@ def test_set_value_resize(self, float_frame, using_infer_string):
         res = float_frame.copy()
         res._set_value("foobar", "baz", "sam")
         if using_infer_string:
-            assert res["baz"].dtype == "string"
+            assert res["baz"].dtype == "str"
         else:
             assert res["baz"].dtype == np.object_
         res = float_frame.copy()
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index ea9cc22d93758..9c27e76de91b2 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -202,7 +202,7 @@ def test_astype_dict_like(self, dtype_class):
         expected = DataFrame(
             {
                 "a": a,
-                "b": Series(["0", "1", "2", "3", "4"], dtype="object"),
+                "b": Series(["0", "1", "2", "3", "4"], dtype="str"),
                 "c": c,
                 "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
             }
@@ -263,9 +263,9 @@ def test_astype_duplicate_col(self):
         a2 = Series([0, 1, 2, 3, 4], name="a")
         df = concat([a1, b, a2], axis=1)
 
-        result = df.astype(str)
+        result = df.astype("str")
         a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a")
-        b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b")
+        b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype="str", name="b")
         a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a")
         expected = concat([a1_str, b_str, a2_str], axis=1)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py
index c5d32d56d03c1..6d097e75f6703 100644
--- a/pandas/tests/frame/methods/test_get_numeric_data.py
+++ b/pandas/tests/frame/methods/test_get_numeric_data.py
@@ -33,7 +33,9 @@ def test_get_numeric_data(self, using_infer_string):
             [
                 np.dtype("float64"),
                 np.dtype("int64"),
-                np.dtype(objectname) if not using_infer_string else "string",
+                np.dtype(objectname)
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype(datetime64name),
             ],
             index=["a", "b", "c", "f"],
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 3ba893501914a..54f2e45488b78 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -86,7 +86,7 @@ def test_nlargest_n(self, df_strings, nselect_method, n, order):
         df = df_strings
         if "b" in order:
             error_msg = (
-                f"Column 'b' has dtype (object|string), "
+                f"Column 'b' has dtype (object|str), "
                 f"cannot use method '{nselect_method}' with this dtype"
             )
             with pytest.raises(TypeError, match=error_msg):
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
index 44d7bbf57fe0a..8d93c97b6b68a 100644
--- a/pandas/tests/frame/methods/test_reset_index.py
+++ b/pandas/tests/frame/methods/test_reset_index.py
@@ -664,7 +664,7 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = DataFrame(index=idx)[:0].reset_index().dtypes
     if using_infer_string and dtype == object:
-        dtype = "string"
+        dtype = pd.StringDtype(na_value=np.nan)
     expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
     tm.assert_series_equal(result, expected)
 
@@ -697,7 +697,7 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby(
     expected["c3"] = expected["c3"].astype("datetime64[ns]")
     expected["c1"] = expected["c1"].astype("float64")
     if using_infer_string:
-        expected["c2"] = expected["c2"].astype("string[pyarrow_numpy]")
+        expected["c2"] = expected["c2"].astype("str")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
index d1bee6a3de613..875dca321635f 100644
--- a/pandas/tests/frame/methods/test_select_dtypes.py
+++ b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -50,7 +50,7 @@ def copy(self):
 
 
 class TestSelectDtypes:
-    def test_select_dtypes_include_using_list_like(self):
+    def test_select_dtypes_include_using_list_like(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -94,6 +94,11 @@ def test_select_dtypes_include_using_list_like(self):
         with pytest.raises(NotImplementedError, match=r"^$"):
             df.select_dtypes(include=["period"])
 
+        if using_infer_string:
+            ri = df.select_dtypes(include=["str"])
+            ei = df[["a"]]
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_list_like(self):
         df = DataFrame(
             {
@@ -151,7 +156,7 @@ def test_select_dtypes_exclude_include_int(self, include):
         expected = df[["b", "c", "e"]]
         tm.assert_frame_equal(result, expected)
 
-    def test_select_dtypes_include_using_scalars(self):
+    def test_select_dtypes_include_using_scalars(self, using_infer_string):
         df = DataFrame(
             {
                 "a": list("abc"),
@@ -187,6 +192,11 @@ def test_select_dtypes_include_using_scalars(self):
         with pytest.raises(NotImplementedError, match=r"^$"):
             df.select_dtypes(include="period")
 
+        if using_infer_string:
+            ri = df.select_dtypes(include="str")
+            ei = df[["a"]]
+            tm.assert_frame_equal(ri, ei)
+
     def test_select_dtypes_exclude_using_scalars(self):
         df = DataFrame(
             {
@@ -347,7 +357,10 @@ def test_select_dtypes_datetime_with_tz(self):
 
     @pytest.mark.parametrize("dtype", [str, "str", np.bytes_, "S1", np.str_, "U1"])
     @pytest.mark.parametrize("arg", ["include", "exclude"])
-    def test_select_dtypes_str_raises(self, dtype, arg):
+    def test_select_dtypes_str_raises(self, dtype, arg, using_infer_string):
+        if using_infer_string and dtype == "str":
+            # this is tested below
+            pytest.skip("Selecting string columns works with future strings")
         df = DataFrame(
             {
                 "a": list("abc"),
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index bed8b030bc72a..20a8e95f990ec 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -697,10 +697,7 @@ def test_to_csv_interval_index(self, using_infer_string):
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
             expected = df.copy()
-            if using_infer_string:
-                expected.index = expected.index.astype("string[pyarrow_numpy]")
-            else:
-                expected.index = expected.index.astype(str)
+            expected.index = expected.index.astype("str")
 
             tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index 9bd61736624ca..0766e927a64a9 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -209,7 +209,9 @@ def test_construction_with_mixed(self, float_string_frame, using_infer_string):
         expected = Series(
             [np.dtype("float64")] * 4
             + [
-                np.dtype("object") if not using_infer_string else "string",
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype("datetime64[us]"),
                 np.dtype("timedelta64[us]"),
             ],
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index aab900f6eef47..c9eb2d5ca7be4 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -265,7 +265,7 @@ def test_emptylike_constructor(self, emptylike, expected_index, expected_columns
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_mixed(self, float_string_frame, using_infer_string):
-        dtype = "string" if using_infer_string else np.object_
+        dtype = "str" if using_infer_string else np.object_
         assert float_string_frame["foo"].dtype == dtype
 
     def test_constructor_cast_failure(self):
@@ -789,7 +789,7 @@ def test_constructor_dict_cast(self, using_infer_string):
 
         frame = DataFrame(test_data)
         assert len(frame) == 3
-        assert frame["B"].dtype == np.object_ if not using_infer_string else "string"
+        assert frame["B"].dtype == np.object_ if not using_infer_string else "str"
         assert frame["A"].dtype == np.float64
 
     def test_constructor_dict_cast2(self):
@@ -1209,7 +1209,7 @@ def test_constructor_scalar_inference(self, using_infer_string):
         assert df["bool"].dtype == np.bool_
         assert df["float"].dtype == np.float64
         assert df["complex"].dtype == np.complex128
-        assert df["object"].dtype == np.object_ if not using_infer_string else "string"
+        assert df["object"].dtype == np.object_ if not using_infer_string else "str"
 
     def test_constructor_arrays_and_scalars(self):
         df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True})
@@ -1292,7 +1292,7 @@ def test_constructor_list_of_lists(self, using_infer_string):
         # GH #484
         df = DataFrame(data=[[1, "a"], [2, "b"]], columns=["num", "str"])
         assert is_integer_dtype(df["num"])
-        assert df["str"].dtype == np.object_ if not using_infer_string else "string"
+        assert df["str"].dtype == np.object_ if not using_infer_string else "str"
 
         # GH 4851
         # list of 0-dim ndarrays
@@ -1860,7 +1860,12 @@ def test_constructor_with_datetimes(self, using_infer_string):
         result = df.dtypes
         expected = Series(
             [np.dtype("int64")]
-            + [np.dtype(objectname) if not using_infer_string else "string"] * 2
+            + [
+                np.dtype(objectname)
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
+            * 2
             + [np.dtype("M8[s]"), np.dtype("M8[us]")],
             index=list("ABCDE"),
         )
@@ -1882,7 +1887,11 @@ def test_constructor_with_datetimes(self, using_infer_string):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object") if not using_infer_string else "string"]
+            + [
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -1904,7 +1913,11 @@ def test_constructor_with_datetimes(self, using_infer_string):
         expected = Series(
             [np.dtype("float64")]
             + [np.dtype("int64")]
-            + [np.dtype("object") if not using_infer_string else "string"]
+            + [
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan)
+            ]
             + [np.dtype("float64")]
             + [np.dtype(intname)],
             index=["a", "b", "c", floatname, intname],
@@ -2124,7 +2137,9 @@ def test_constructor_for_list_with_dtypes(self, using_infer_string):
             [
                 np.dtype("int64"),
                 np.dtype("float64"),
-                np.dtype("object") if not using_infer_string else "string",
+                np.dtype("object")
+                if not using_infer_string
+                else pd.StringDtype(na_value=np.nan),
                 np.dtype("datetime64[ns]"),
                 np.dtype("float64"),
             ],
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 75ef348b75deb..2c3e9c1d5e327 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -657,7 +657,11 @@ def test_unstack_dtypes(self, using_infer_string):
         df2["D"] = "foo"
         df3 = df2.unstack("B")
         result = df3.dtypes
-        dtype = "string" if using_infer_string else np.dtype("object")
+        dtype = (
+            pd.StringDtype(na_value=np.nan)
+            if using_infer_string
+            else np.dtype("object")
+        )
         expected = Series(
             [np.dtype("float64")] * 2 + [dtype] * 2,
             index=MultiIndex.from_arrays(
diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py
index d8401a8b2ae3f..9fe9bca8abdc9 100644
--- a/pandas/tests/generic/test_to_xarray.py
+++ b/pandas/tests/generic/test_to_xarray.py
@@ -52,7 +52,7 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
         # column names are lost
         expected = df.copy()
         expected["f"] = expected["f"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.columns.name = None
         tm.assert_frame_equal(result.to_dataframe(), expected)
@@ -81,7 +81,7 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
         result = result.to_dataframe()
         expected = df.copy()
         expected["f"] = expected["f"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.columns.name = None
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 4972a6b3afa17..d91510d834e6c 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -79,7 +77,7 @@ def test_apply_index_date(using_infer_string):
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_index_date_object(using_infer_string):
+def test_apply_index_date_object():
     # GH 5789
     # don't auto coerce dates
     ts = [
@@ -111,10 +109,7 @@ def test_apply_index_date_object(using_infer_string):
         1.40750,
         1.40649,
     ]
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
-    exp_idx = Index(
-        ["2011-05-16", "2011-05-17", "2011-05-18"], dtype=dtype, name="date"
-    )
+    exp_idx = Index(["2011-05-16", "2011-05-17", "2011-05-18"], name="date")
     expected = Series(["00:00", "02:00", "02:00"], index=exp_idx)
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
@@ -942,12 +937,11 @@ def test_func_returns_object():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "group_column_dtlike",
     [datetime.today(), datetime.today().date(), datetime.today().time()],
 )
-def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
+def test_apply_datetime_issue(group_column_dtlike):
     # GH-28247
     # groupby-apply throws an error if one of the columns in the DataFrame
     #   is a datetime object and the column labels are different from
@@ -958,8 +952,7 @@ def test_apply_datetime_issue(group_column_dtlike, using_infer_string):
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
         result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42]))
 
-    dtype = "string" if using_infer_string else "object"
-    expected = DataFrame(["spam"], Index(["foo"], dtype=dtype, name="a"), columns=[42])
+    expected = DataFrame(["spam"], Index(["foo"], dtype="str", name="a"), columns=[42])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1040,7 +1033,7 @@ def test_groupby_apply_datetime_result_dtypes(using_infer_string):
     msg = "DataFrameGroupBy.apply operated on the grouping columns"
     with tm.assert_produces_warning(DeprecationWarning, match=msg):
         result = data.groupby("color").apply(lambda g: g.iloc[0]).dtypes
-    dtype = "string" if using_infer_string else object
+    dtype = pd.StringDtype(na_value=np.nan) if using_infer_string else object
     expected = Series(
         [np.dtype("datetime64[ns]"), dtype, dtype, np.int64, dtype],
         index=["observation", "color", "mood", "intensity", "score"],
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 1073dda954563..c70995de7b3b2 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -131,7 +131,7 @@ def f(x):
         result = g.apply(f)
     expected = x.iloc[[0, 1]].copy()
     expected.index = Index([1, 2], name="person_id")
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
+    dtype = "str" if using_infer_string else object
     expected["person_name"] = expected["person_name"].astype(dtype)
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 4381b36b0b73a..dc1658e9acf3b 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1216,7 +1216,7 @@ def test_groupby_complex_mean():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_complex_numbers(using_infer_string):
+def test_groupby_complex_numbers():
     # GH 17927
     df = DataFrame(
         [
@@ -1225,11 +1225,10 @@ def test_groupby_complex_numbers(using_infer_string):
             {"a": 4, "b": 1},
         ]
     )
-    dtype = "string[pyarrow_numpy]" if using_infer_string else object
     expected = DataFrame(
         np.array([1, 1, 1], dtype=np.int64),
         index=Index([(1 + 1j), (1 + 2j), (1 + 0j)], name="b"),
-        columns=Index(["a"], dtype=dtype),
+        columns=Index(["a"]),
     )
     result = df.groupby("b", sort=False).count()
     tm.assert_frame_equal(result, expected)
@@ -2097,7 +2096,7 @@ def get_categorical_invalid_expected():
             idx = Index(lev, name=keys[0])
 
         if using_infer_string:
-            columns = Index([], dtype="string[pyarrow_numpy]")
+            columns = Index([], dtype="str")
         else:
             columns = []
         expected = DataFrame([], columns=columns, index=idx)
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index ff4685b1e412d..029d322e4fdc3 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -181,6 +181,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
+                    re.escape(f"agg function failed [how->{method},dtype->str]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -198,6 +199,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
+                    re.escape(f"agg function failed [how->{method},dtype->str]"),
                 ]
             )
             with pytest.raises(exception, match=msg):
diff --git a/pandas/tests/indexes/base_class/test_formats.py b/pandas/tests/indexes/base_class/test_formats.py
index b2f345e5e6f77..955e3be107f75 100644
--- a/pandas/tests/indexes/base_class/test_formats.py
+++ b/pandas/tests/indexes/base_class/test_formats.py
@@ -9,7 +9,6 @@
 
 
 class TestIndexRendering:
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_repr_is_valid_construction_code(self):
         # for the case of Index, where the repr is traditional rather than
         # stylized
diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py
index 8456e6a7acba5..b1180f2d7af14 100644
--- a/pandas/tests/indexes/multi/test_constructors.py
+++ b/pandas/tests/indexes/multi/test_constructors.py
@@ -851,7 +851,7 @@ def test_dtype_representation(using_infer_string):
     # GH#46900
     pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
     result = pmidx.dtypes
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = Series(
         ["int64", exp],
         index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py
index 6eeaeb6711d03..17ca876487330 100644
--- a/pandas/tests/indexes/multi/test_get_set.py
+++ b/pandas/tests/indexes/multi/test_get_set.py
@@ -41,7 +41,7 @@ def test_get_dtypes(using_infer_string):
         names=["int", "string", "dt"],
     )
 
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         {
             "int": np.dtype("int64"),
@@ -61,7 +61,7 @@ def test_get_dtypes_no_level_name(using_infer_string):
             pd.date_range("20200101", periods=2, tz="UTC"),
         ],
     )
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         {
             "level_0": np.dtype("int64"),
@@ -82,7 +82,7 @@ def test_get_dtypes_duplicate_level_names(using_infer_string):
         ],
         names=["A", "A", "A"],
     ).dtypes
-    exp = "object" if not using_infer_string else "string"
+    exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
     expected = pd.Series(
         [np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
         index=["A", "A", "A"],
diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
index ebf9dac715f8d..493a5be735d1a 100644
--- a/pandas/tests/indexes/object/test_indexing.py
+++ b/pandas/tests/indexes/object/test_indexing.py
@@ -170,6 +170,7 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
 
 
 class TestSliceLocs:
+    # TODO(infer_string) parametrize over multiple string dtypes
     @pytest.mark.parametrize(
         "dtype",
         [
@@ -208,6 +209,7 @@ def test_slice_locs_negative_step(self, in_slice, expected, dtype):
         expected = Index(list(expected), dtype=dtype)
         tm.assert_index_equal(result, expected)
 
+    # TODO(infer_string) parametrize over multiple string dtypes
     @td.skip_if_no("pyarrow")
     def test_slice_locs_negative_step_oob(self):
         index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]")
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 7eeb626d91dc8..4d02ec853e0da 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -79,7 +79,7 @@ def test_constructor_copy(self, using_infer_string):
         assert new_index.name == "name"
         if using_infer_string:
             tm.assert_extension_array_equal(
-                new_index.values, pd.array(arr, dtype="string[pyarrow_numpy]")
+                new_index.values, pd.array(arr, dtype="str")
             )
         else:
             tm.assert_numpy_array_equal(arr, new_index.values)
@@ -160,7 +160,7 @@ def test_constructor_from_frame_series_freq(self, using_infer_string):
         df = DataFrame(np.random.default_rng(2).random((5, 3)))
         df["date"] = dts
         result = DatetimeIndex(df["date"], freq="MS")
-        dtype = object if not using_infer_string else "string"
+        dtype = object if not using_infer_string else "str"
         assert df["date"].dtype == dtype
         expected.name = "date"
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
index 5508153322adb..fa5ec63dd32fe 100644
--- a/pandas/tests/indexing/multiindex/test_loc.py
+++ b/pandas/tests/indexing/multiindex/test_loc.py
@@ -588,7 +588,7 @@ def test_loc_nan_multiindex(using_infer_string):
         np.ones((1, 4)),
         index=Index(
             [np.nan],
-            dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
+            dtype="object" if not using_infer_string else "str",
             name="u3",
         ),
         columns=Index(["d1", "d2", "d3", "d4"]),
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index d2c8454019a5e..908e95accfb0f 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -294,7 +294,7 @@ def test_dups_fancy_indexing_only_missing_label(self, using_infer_string):
             with pytest.raises(
                 KeyError,
                 match=re.escape(
-                    "\"None of [Index(['E'], dtype='string')] are in the [index]\""
+                    "\"None of [Index(['E'], dtype='str')] are in the [index]\""
                 ),
             ):
                 dfnu.loc[["E"]]
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 41431c0e2813b..34d827a209dae 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -63,12 +63,17 @@ def test_not_change_nan_loc(series, new_series, expected_ser):
 
 
 class TestLoc:
-    def test_none_values_on_string_columns(self):
+    def test_none_values_on_string_columns(self, using_infer_string):
         # Issue #32218
-        df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
-
+        df = DataFrame(["1", "2", None], columns=["a"], dtype=object)
         assert df.loc[2, "a"] is None
 
+        df = DataFrame(["1", "2", None], columns=["a"], dtype="str")
+        if using_infer_string:
+            assert np.isnan(df.loc[2, "a"])
+        else:
+            assert df.loc[2, "a"] is None
+
     @pytest.mark.parametrize("kind", ["series", "frame"])
     def test_loc_getitem_int(self, kind, request):
         # int label
@@ -1460,7 +1465,7 @@ def test_loc_setitem_single_row_categorical(self, using_infer_string):
 
         result = df["Alpha"]
         expected = Series(categories, index=df.index, name="Alpha").astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         tm.assert_series_equal(result, expected)
 
@@ -1635,7 +1640,7 @@ def test_loc_setitem_single_column_mixed(self, using_infer_string):
         df.loc[df.index[::2], "str"] = np.nan
         expected = Series(
             [np.nan, "qux", np.nan, "qux", np.nan],
-            dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+            dtype=object if not using_infer_string else "str",
         ).values
         tm.assert_almost_equal(df["str"].values, expected)
 
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index ca551024b4c1f..5fcb71d0186a6 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -227,7 +227,7 @@ def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
             {
                 "x": Series(
                     ["1", "2"],
-                    dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+                    dtype=object if not using_infer_string else "str",
                 ),
                 "y": Series([np.nan, np.nan], dtype=object),
             }
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index ce88bae6e02f2..30c5d3177c5a5 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -626,7 +626,7 @@ def _compare(old_mgr, new_mgr):
         mgr.iset(1, np.array(["2."] * N, dtype=np.object_))
         mgr.iset(2, np.array(["foo."] * N, dtype=np.object_))
         new_mgr = mgr.convert(copy=True)
-        dtype = "string[pyarrow_numpy]" if using_infer_string else np.object_
+        dtype = "str" if using_infer_string else np.object_
         assert new_mgr.iget(0).dtype == dtype
         assert new_mgr.iget(1).dtype == dtype
         assert new_mgr.iget(2).dtype == dtype
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 7ecddb18a61ec..57091b268a9db 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -766,7 +766,7 @@ def test_to_excel_interval_no_labels(self, path, using_infer_string):
 
         df["new"] = pd.cut(df[0], 10)
         expected["new"] = pd.cut(expected[0], 10).astype(
-            str if not using_infer_string else "string[pyarrow_numpy]"
+            str if not using_infer_string else "str"
         )
 
         df.to_excel(path, sheet_name="test1")
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index 53e819ac5eaff..1e47b3bc38737 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -75,7 +75,7 @@ def test_build_table_schema(self, df_schema, using_infer_string):
             "primaryKey": ["idx"],
         }
         if using_infer_string:
-            expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "string"}
+            expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
         result = build_table_schema(df_schema)
         assert "pandas_version" in result
@@ -128,7 +128,7 @@ def test_multiindex(self, df_schema, using_infer_string):
                 "type": "any",
                 "extDtype": "string",
             }
-            expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "string"}
+            expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"}
         assert result == expected
 
         df.index.names = ["idx0", None]
@@ -311,7 +311,7 @@ def test_to_json(self, df_table, using_infer_string):
         ]
 
         if using_infer_string:
-            fields[2] = {"name": "B", "type": "any", "extDtype": "string"}
+            fields[2] = {"name": "B", "type": "any", "extDtype": "str"}
 
         schema = {"fields": fields, "primaryKey": ["idx"]}
         data = [
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index a1d2e93e7c523..cb94111aedffd 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -266,7 +266,7 @@ def test_roundtrip_categorical(
 
         expected = categorical_frame.copy()
         expected.index = expected.index.astype(
-            str if not using_infer_string else "string[pyarrow_numpy]"
+            str if not using_infer_string else "str"
         )  # Categorical not preserved
         expected.index.name = None  # index names aren't preserved in JSON
         assert_json_roundtrip_equal(result, expected, orient)
@@ -621,7 +621,7 @@ def test_blocks_compat_GH9037(self, using_infer_string):
 
         # JSON deserialisation always creates unicode strings
         df_mixed.columns = df_mixed.columns.astype(
-            np.str_ if not using_infer_string else "string[pyarrow_numpy]"
+            np.str_ if not using_infer_string else "str"
         )
         data = StringIO(df_mixed.to_json(orient="split"))
         df_roundtrip = read_json(data, orient="split")
@@ -706,7 +706,7 @@ def test_series_roundtrip_simple(self, orient, string_series, using_infer_string
         expected = string_series
         if using_infer_string and orient in ("split", "index", "columns"):
             # These schemas don't contain dtypes, so we infer string
-            expected.index = expected.index.astype("string[pyarrow_numpy]")
+            expected.index = expected.index.astype("str")
         if orient in ("values", "records"):
             expected = expected.reset_index(drop=True)
         if orient != "split":
@@ -1492,7 +1492,6 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns):
         result = read_json(StringIO(dfjson), orient="table")
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_from_json_to_json_table_dtypes(self):
         # GH21345
         expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
diff --git a/pandas/tests/io/pytables/test_keys.py b/pandas/tests/io/pytables/test_keys.py
index 55bd3f0d5a03a..7d0802dcf2e47 100644
--- a/pandas/tests/io/pytables/test_keys.py
+++ b/pandas/tests/io/pytables/test_keys.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas import (
     DataFrame,
     HDFStore,
@@ -13,7 +15,10 @@
     tables,
 )
 
-pytestmark = pytest.mark.single_cpu
+pytestmark = [
+    pytest.mark.single_cpu,
+    pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
+]
 
 
 def test_keys(setup_path):
diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py
index 03622faa2b5a8..bbe1cd77e0d9f 100644
--- a/pandas/tests/io/pytables/test_subclass.py
+++ b/pandas/tests/io/pytables/test_subclass.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas import (
     DataFrame,
     Series,
@@ -17,6 +19,7 @@
 
 class TestHDFStoreSubclass:
     # GH 33748
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_supported_for_subclass_dataframe(self, tmp_path):
         data = {"a": [1, 2], "b": [3, 4]}
         sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 56707560c2fda..75ecd1d929d58 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -377,6 +377,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_write_fspath_hdf5(self):
         # Same test as write_fspath_all, except HDF5 files aren't
         # necessarily byte-for-byte identical for a given dataframe, so we'll
diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 65f4156cedf49..19b60e17d3a92 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -168,6 +168,7 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
@@ -277,7 +278,6 @@ def test_not_present_exception():
         read_csv("memory://test/test.csv")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_feather_options(fsspectest):
     pytest.importorskip("pyarrow")
     df = DataFrame({"a": [0]})
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index a7ae9c7049702..96bc0326b23ab 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -197,6 +197,7 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index b2d96cb1d9133..a85576ff13f5c 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -4,8 +4,6 @@
 
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import ParserWarning
 import pandas.util._test_decorators as td
 
@@ -85,7 +83,6 @@ def read_xml_iterparse(data, **kwargs):
 # DTYPE
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_dtype_single_str(parser):
     df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser)
     df_iter = read_xml_iterparse(
@@ -211,7 +208,6 @@ def test_wrong_dtype(xml_books, parser, iterparse):
         )
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_both_dtype_converters(parser):
     df_expected = DataFrame(
         {
diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py
index bbaaf0abecfbd..8e6a14e6bfb8f 100644
--- a/pandas/tests/reshape/concat/test_categorical.py
+++ b/pandas/tests/reshape/concat/test_categorical.py
@@ -59,9 +59,7 @@ def test_categorical_concat_dtypes(self, using_infer_string):
         num = Series([1, 2, 3])
         df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
 
-        result = df.dtypes == (
-            object if not using_infer_string else "string[pyarrow_numpy]"
-        )
+        result = df.dtypes == (object if not using_infer_string else "str")
         expected = Series([False, True, False], index=index)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 30ef0a934157b..9560087615123 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -27,7 +27,7 @@ def test_handle_empty_objects(self, sort, using_infer_string):
 
         expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
         expected["foo"] = expected["foo"].astype(
-            object if not using_infer_string else "string[pyarrow_numpy]"
+            object if not using_infer_string else "str"
         )
         expected.loc[0:4, "foo"] = "bar"
 
@@ -284,7 +284,7 @@ def test_concat_empty_dataframe_different_dtypes(self, using_infer_string):
 
         result = concat([df1[:0], df2[:0]])
         assert result["a"].dtype == np.int64
-        assert result["b"].dtype == np.object_ if not using_infer_string else "string"
+        assert result["b"].dtype == np.object_ if not using_infer_string else "str"
 
     def test_concat_to_empty_ea(self):
         """48510 `concat` to an empty EA should maintain type EA dtype."""
diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py
index 52bb9fa0f151b..49c94168d203e 100644
--- a/pandas/tests/reshape/concat/test_index.py
+++ b/pandas/tests/reshape/concat/test_index.py
@@ -452,9 +452,7 @@ def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string):
         s1 = Series(["a", "b", "c"])
         s2 = Series(["a", "b"])
         s3 = Series(["a", "b", "c", "d"])
-        s4 = Series(
-            [], dtype=object if not using_infer_string else "string[pyarrow_numpy]"
-        )
+        s4 = Series([], dtype=object if not using_infer_string else "str")
         result = concat(
             [s1, s2, s3, s4], sort=False, join="outer", ignore_index=False, axis=1
         )
@@ -465,7 +463,7 @@ def test_concat_axis_1_sort_false_rangeindex(self, using_infer_string):
                 ["c", np.nan] * 2,
                 [np.nan] * 2 + ["d"] + [np.nan],
             ],
-            dtype=object if not using_infer_string else "string[pyarrow_numpy]",
+            dtype=object if not using_infer_string else "str",
         )
         tm.assert_frame_equal(
             result, expected, check_index_type=True, check_column_type=True
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index db5a0437a14f0..91f0cf6c31085 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -156,7 +156,7 @@ def test_join_on(self, target_source, infer_string):
         # overlap
         source_copy = source.copy()
         msg = (
-            "You are trying to merge on float64 and object|string columns for key "
+            "You are trying to merge on float64 and object|str columns for key "
             "'A'. If you wish to proceed you should use pd.concat"
         )
         with pytest.raises(ValueError, match=msg):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index ed49f3b758cc5..8a9fe9f3e2cfd 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -826,7 +826,7 @@ def test_overlapping_columns_error_message(self):
 
         # #2649, #10639
         df2.columns = ["key1", "foo", "foo"]
-        msg = r"Data columns not unique: Index\(\['foo'\], dtype='object|string'\)"
+        msg = r"Data columns not unique: Index\(\['foo'\], dtype='object|str'\)"
         with pytest.raises(MergeError, match=msg):
             merge(df, df2)
 
@@ -1877,7 +1877,7 @@ def test_identical(self, left, using_infer_string):
         # merging on the same, should preserve dtypes
         merged = merge(left, left, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [CategoricalDtype(categories=["foo", "bar"]), dtype, dtype],
             index=["X", "Y_x", "Y_y"],
@@ -1889,7 +1889,7 @@ def test_basic(self, left, right, using_infer_string):
         # so should preserve the merged column
         merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [
                 CategoricalDtype(categories=["foo", "bar"]),
@@ -2003,7 +2003,7 @@ def test_other_columns(self, left, right, using_infer_string):
 
         merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series(
             [
                 CategoricalDtype(categories=["foo", "bar"]),
@@ -2040,7 +2040,7 @@ def test_dtype_on_merged_different(
         merged = merge(left, right, on="X", how=join_type)
 
         result = merged.dtypes.sort_index()
-        dtype = np.dtype("O") if not using_infer_string else "string"
+        dtype = np.dtype("O") if not using_infer_string else "str"
         expected = Series([dtype, dtype, np.dtype("int64")], index=["X", "Y", "Z"])
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 0865e3cfa8149..11e29f4e10dc4 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -3183,7 +3183,7 @@ def test_by_nullable(self, any_numeric_ea_dtype, using_infer_string):
         )
         expected["value_y"] = np.array([np.nan, np.nan, np.nan], dtype=object)
         if using_infer_string:
-            expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
+            expected["value_y"] = expected["value_y"].astype("str")
         tm.assert_frame_equal(result, expected)
 
     def test_merge_by_col_tz_aware(self):
@@ -3234,7 +3234,7 @@ def test_by_mixed_tz_aware(self, using_infer_string):
         )
         expected["value_y"] = np.array([np.nan], dtype=object)
         if using_infer_string:
-            expected["value_y"] = expected["value_y"].astype("string[pyarrow_numpy]")
+            expected["value_y"] = expected["value_y"].astype("str")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", ["float64", "int16", "m8[ns]", "M8[us]"])
diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
index b12438b6327ad..6009b263a83c5 100644
--- a/pandas/tests/reshape/test_from_dummies.py
+++ b/pandas/tests/reshape/test_from_dummies.py
@@ -336,7 +336,7 @@ def test_no_prefix_string_cats_default_category(
     dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
     result = from_dummies(dummies, default_category=default_category)
     if using_infer_string:
-        expected[""] = expected[""].astype("string[pyarrow_numpy]")
+        expected[""] = expected[""].astype("str")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py
index 3d9b3a6d1c7a2..2c17b7f6a5a47 100644
--- a/pandas/tests/reshape/test_get_dummies.py
+++ b/pandas/tests/reshape/test_get_dummies.py
@@ -122,7 +122,7 @@ def test_get_dummies_basic_types(self, sparse, dtype, using_infer_string):
 
         result = get_dummies(s_df, columns=["a"], sparse=sparse, dtype=dtype)
 
-        key = "string" if using_infer_string else "object"
+        key = "str" if using_infer_string else "object"
         expected_counts = {"int64": 1, key: 1}
         expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0)
 
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index f0803ac2f2a30..0dd2c227d6aa7 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -599,7 +599,7 @@ def test_strftime_period_days(self, using_infer_string):
             dtype="=U10",
         )
         if using_infer_string:
-            expected = expected.astype("string[pyarrow_numpy]")
+            expected = expected.astype("str")
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
diff --git a/pandas/tests/series/indexing/test_delitem.py b/pandas/tests/series/indexing/test_delitem.py
index 3d1082c3d040b..7440ef2692c47 100644
--- a/pandas/tests/series/indexing/test_delitem.py
+++ b/pandas/tests/series/indexing/test_delitem.py
@@ -31,16 +31,15 @@ def test_delitem(self):
         del s[0]
         tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
 
-    def test_delitem_object_index(self, using_infer_string):
+    def test_delitem_object_index(self):
         # Index(dtype=object)
-        dtype = "string[pyarrow_numpy]" if using_infer_string else object
-        s = Series(1, index=Index(["a"], dtype=dtype))
+        s = Series(1, index=Index(["a"], dtype="str"))
         del s["a"]
-        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="str")))
         s["a"] = 1
-        tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
+        tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype="str")))
         del s["a"]
-        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
+        tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="str")))
 
     def test_delitem_missing_key(self):
         # empty
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index 596a225c288b8..9783dcd2fea07 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -363,9 +363,7 @@ def test_getitem_no_matches(self, box):
         key = Series(["C"], dtype=object)
         key = box(key)
 
-        msg = (
-            r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
-        )
+        msg = r"None of \[Index\(\['C'\], dtype='object|str'\)\] are in the \[index\]"
         with pytest.raises(KeyError, match=msg):
             ser[key]
 
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index fb8e5c31929b2..e2c27fe5575db 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -624,7 +624,7 @@ def test_setitem_enlargement_object_none(self, nulls_fixture, using_infer_string
         ser = Series(["a", "b"])
         ser[3] = nulls_fixture
         dtype = (
-            "string[pyarrow_numpy]"
+            "str"
             if using_infer_string and not isinstance(nulls_fixture, Decimal)
             else object
         )
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index 4c8028e74ee55..ef0757ffe4aa8 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -538,12 +538,12 @@ def test_astype_categorical_to_other(self):
         expected = ser
         tm.assert_series_equal(ser.astype("category"), expected)
         tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
-        msg = r"Cannot cast object|string dtype to float64"
+        msg = r"Cannot cast object|str dtype to float64"
         with pytest.raises(ValueError, match=msg):
             ser.astype("float64")
 
         cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
-        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
+        exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype="str")
         tm.assert_series_equal(cat.astype("str"), exp)
         s2 = Series(Categorical(["1", "2", "3", "4"]))
         exp2 = Series([1, 2, 3, 4]).astype("int")
diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py
index 251d4063008b9..ac489b2579e05 100644
--- a/pandas/tests/series/methods/test_map.py
+++ b/pandas/tests/series/methods/test_map.py
@@ -101,16 +101,16 @@ def test_map_series_stringdtype(any_string_dtype, using_infer_string):
 
     expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
     if using_infer_string and any_string_dtype == "object":
-        expected = expected.astype("string[pyarrow_numpy]")
+        expected = expected.astype("str")
 
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize(
     "data, expected_dtype",
-    [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], object)],
+    [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], "str")],
 )
-def test_map_categorical_with_nan_values(data, expected_dtype, using_infer_string):
+def test_map_categorical_with_nan_values(data, expected_dtype):
     # GH 20714 bug fixed in: GH 24275
     def func(val):
         return val.split("-")[0]
@@ -118,8 +118,6 @@ def func(val):
     s = Series(data, dtype="category")
 
     result = s.map(func, na_action="ignore")
-    if using_infer_string and expected_dtype == object:
-        expected_dtype = "string[pyarrow_numpy]"
     expected = Series(["1", "1", np.nan], dtype=expected_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -145,9 +143,7 @@ def test_map_simple_str_callables_same_as_astype(
     # test that we are evaluating row-by-row first
     # before vectorized evaluation
     result = string_series.map(func)
-    expected = string_series.astype(
-        str if not using_infer_string else "string[pyarrow_numpy]"
-    )
+    expected = string_series.astype(str if not using_infer_string else "str")
     tm.assert_series_equal(result, expected)
 
 
@@ -497,7 +493,7 @@ def test_map_categorical(na_action, using_infer_string):
     result = s.map(lambda x: "A", na_action=na_action)
     exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
     tm.assert_series_equal(result, exp)
-    assert result.dtype == object if not using_infer_string else "string"
+    assert result.dtype == object if not using_infer_string else "str"
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py
index 119654bd19b3f..a8f3862d39f07 100644
--- a/pandas/tests/series/methods/test_rename.py
+++ b/pandas/tests/series/methods/test_rename.py
@@ -64,7 +64,7 @@ def test_rename_set_name_inplace(self, using_infer_string):
             assert ser.name == name
             exp = np.array(["a", "b", "c"], dtype=np.object_)
             if using_infer_string:
-                exp = array(exp, dtype="string[pyarrow_numpy]")
+                exp = array(exp, dtype="str")
                 tm.assert_extension_array_equal(ser.index.values, exp)
             else:
                 tm.assert_numpy_array_equal(ser.index.values, exp)
diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py
index 48e2608a1032a..fa571fa126b38 100644
--- a/pandas/tests/series/methods/test_reset_index.py
+++ b/pandas/tests/series/methods/test_reset_index.py
@@ -193,7 +193,7 @@ def test_reset_index_dtypes_on_empty_series_with_multiindex(
     # GH 19602 - Preserve dtype on empty Series with MultiIndex
     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
     result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
-    exp = "string" if using_infer_string else object
+    exp = "str" if using_infer_string else object
     expected = Series(
         {
             "level_0": np.int64,
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 999dd90d337d9..efb249fdedf3d 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -177,9 +177,6 @@ def test_to_csv_interval_index(self, using_infer_string):
             result = self.read_csv(path, index_col=0)
 
             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
-            expected = s.copy()
-            if using_infer_string:
-                expected.index = expected.index.astype("string[pyarrow_numpy]")
-            else:
-                expected.index = expected.index.astype(str)
+            expected = s
+            expected.index = expected.index.astype("str")
             tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 0aaa8ddcfda0c..6efe0bcb8b45d 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -167,7 +167,7 @@ def test_constructor(self, datetime_series, using_infer_string):
 
         # Mixed type Series
         mixed = Series(["hello", np.nan], index=[0, 1])
-        assert mixed.dtype == np.object_ if not using_infer_string else "string"
+        assert mixed.dtype == np.object_ if not using_infer_string else "str"
         assert np.isnan(mixed[1])
 
         assert not empty_series.index._is_all_dates
@@ -1469,7 +1469,7 @@ def test_fromDict(self, using_infer_string):
 
         data = {"a": 0, "b": "1", "c": "2", "d": "3"}
         series = Series(data)
-        assert series.dtype == np.object_ if not using_infer_string else "string"
+        assert series.dtype == np.object_ if not using_infer_string else "str"
 
         data = {"a": "0", "b": "1"}
         series = Series(data, dtype=float)
@@ -1481,7 +1481,7 @@ def test_fromValue(self, datetime_series, using_infer_string):
         assert len(nans) == len(datetime_series)
 
         strings = Series("foo", index=datetime_series.index)
-        assert strings.dtype == np.object_ if not using_infer_string else "string"
+        assert strings.dtype == np.object_ if not using_infer_string else "str"
         assert len(strings) == len(datetime_series)
 
         d = datetime.now()
@@ -2141,6 +2141,11 @@ def test_series_string_inference_storage_definition(self):
             result = Series(["a", "b"], dtype="string")
         tm.assert_series_equal(result, expected)
 
+        expected = Series(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
+        with pd.option_context("future.infer_string", True):
+            result = Series(["a", "b"], dtype="str")
+        tm.assert_series_equal(result, expected)
+
     def test_series_constructor_infer_string_scalar(self):
         # GH#55537
         with pd.option_context("future.infer_string", True):
diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py
index 4939f3221d268..77e77a9337d63 100644
--- a/pandas/tests/series/test_formats.py
+++ b/pandas/tests/series/test_formats.py
@@ -323,7 +323,7 @@ def test_categorical_repr(self, using_infer_string):
                 "0     a\n1     b\n"
                 "     ..\n"
                 "48    a\n49    b\n"
-                "Length: 50, dtype: category\nCategories (2, string): [a, b]"
+                "Length: 50, dtype: category\nCategories (2, str): [a, b]"
             )
         else:
             exp = (
@@ -341,7 +341,7 @@ def test_categorical_repr(self, using_infer_string):
             exp = (
                 "0    a\n1    b\n"
                 "dtype: category\n"
-                "Categories (26, string): [a < b < c < d ... w < x < y < z]"
+                "Categories (26, str): [a < b < c < d ... w < x < y < z]"
             )
         else:
             exp = (
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index bf88da04b73ff..51ce73ef54300 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
 
@@ -167,7 +165,6 @@ def test_pandas_datareader():
     pytest.importorskip("pandas_datareader")
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
 def test_pyarrow(df):
     pyarrow = pytest.importorskip("pyarrow")
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
index 79132591b15b3..dd5218ab9404f 100644
--- a/pandas/tests/util/test_assert_frame_equal.py
+++ b/pandas/tests/util/test_assert_frame_equal.py
@@ -111,7 +111,7 @@ def test_empty_dtypes(check_dtype):
 @pytest.mark.parametrize("check_like", [True, False])
 def test_frame_equal_index_mismatch(check_like, obj_fixture, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""{obj_fixture}\\.index are different
@@ -131,7 +131,7 @@ def test_frame_equal_index_mismatch(check_like, obj_fixture, using_infer_string)
 @pytest.mark.parametrize("check_like", [True, False])
 def test_frame_equal_columns_mismatch(check_like, obj_fixture, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""{obj_fixture}\\.columns are different
diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py
index dc6efdcec380e..ab52d6c8e9f39 100644
--- a/pandas/tests/util/test_assert_index_equal.py
+++ b/pandas/tests/util/test_assert_index_equal.py
@@ -207,7 +207,7 @@ def test_index_equal_names(name1, name2):
 
 def test_index_equal_category_mismatch(check_categorical, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""Index are different
diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
index 1878e7d838064..0d56885a1cb84 100644
--- a/pandas/tests/util/test_assert_series_equal.py
+++ b/pandas/tests/util/test_assert_series_equal.py
@@ -221,9 +221,9 @@ def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
 Series values are different \\(66\\.66667 %\\)
 \\[index\\]: \\[0, 1, 2\\]
 \\[left\\]:  \\['a', 'b', 'c'\\]
-Categories \\(3, string\\): \\[a, b, c\\]
+Categories \\(3, str\\): \\[a, b, c\\]
 \\[right\\]: \\['a', 'c', 'b'\\]
-Categories \\(3, string\\): \\[a, b, c\\]"""
+Categories \\(3, str\\): \\[a, b, c\\]"""
     else:
         msg = """Series are different
 
@@ -258,7 +258,7 @@ def test_series_equal_datetime_values_mismatch(rtol):
 
 def test_series_equal_categorical_mismatch(check_categorical, using_infer_string):
     if using_infer_string:
-        dtype = "string"
+        dtype = "str"
     else:
         dtype = "object"
     msg = f"""Attributes of Series are different
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index fe2da210c6fe9..948565be36b5b 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -71,7 +71,7 @@ def test_sum_object_str_raises(step):
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3, step=step)
     with pytest.raises(
-        DataError, match="Cannot aggregate non-numeric type: object|string"
+        DataError, match="Cannot aggregate non-numeric type: object|str"
     ):
         # GH#42738, enforced in 2.0
         r.sum()