pandas-dev · jorisvandenbossche · Oct 19, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -2372,11 +2372,15 @@ integers:
 
    df.select_dtypes(include=["number", "bool"], exclude=["unsignedinteger"])
 
-To select string columns you must use the ``object`` dtype:
+To select string columns include ``str``:
 
 .. ipython:: python
 
-   df.select_dtypes(include=["object"])
+   df.select_dtypes(include=[str])
+
+.. note::
+
+    This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``.  See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#hardcoded-use-of-object-dtype.
-    This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``.  See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#hardcoded-use-of-object-dtype.
+    This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``.  See :ref:`string_migration.object`.
-    This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``.  See https://pandas.pydata.org/docs/user_guide/migration-3-strings.html#hardcoded-use-of-object-dtype.
+    This is a change in pandas 3.0. Previously strings were stored in ``object`` dtype columns, so would be selected with ``include=[object]``.  See :ref:`string_migration.object`.
 
 To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you
 can define a function that returns a tree of child dtypes:

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -717,6 +717,7 @@ Other Deprecations
 - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
 - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)
 - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
+- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
 - Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
 - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5237,6 +5237,27 @@ def predicate(arr: ArrayLike) -> bool:
 
             return True
 
+        blk_dtypes = [blk.dtype for blk in self._mgr.blocks]
+        if (
+            np.object_ in include
+            and str not in include
+            and str not in exclude
+            and any(
+                isinstance(dtype, StringDtype) and dtype.na_value is np.nan
+                for dtype in blk_dtypes
+            )
+        ):
+            # GH#61916
+            warnings.warn(
+                "For backward compatibility, 'str' dtypes are included by "
+                "select_dtypes when 'object' dtype is specified. "
+                "This behavior is deprecated and will be removed in a future "
+                "version. Explicitly pass 'str' to `include` to select them, "
+                "or to `exclude` to remove them and silence this warning.",
+                Pandas4Warning,
+                stacklevel=find_stack_level(),
+            )
+
         mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
         return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
 

diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import Pandas4Warning
+
 from pandas.core.dtypes.dtypes import ExtensionDtype
 
 import pandas as pd
@@ -102,7 +104,12 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
             ri = df.select_dtypes(include=[str])
             tm.assert_frame_equal(ri, ei)
 
-        ri = df.select_dtypes(include=["object"])
+        msg = "For backward compatibility, 'str' dtypes are included"
+        warn = None
+        if using_infer_string:
+            warn = Pandas4Warning
+        with tm.assert_produces_warning(warn, match=msg):
+            ri = df.select_dtypes(include=["object"])
         ei = df[["a"]]
         tm.assert_frame_equal(ri, ei)
 
@@ -312,15 +319,18 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
         )
         df["g"] = df.f.diff()
         assert not hasattr(np, "u8")
-        r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
-        # if using_infer_string:
-        #     TODO warn
+
+        msg = "For backward compatibility, 'str' dtypes are included"
+        warn = None
+        if using_infer_string:
+            warn = Pandas4Warning
+        with tm.assert_produces_warning(warn, match=msg):
+            r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
         e = df[["a", "b"]]
         tm.assert_frame_equal(r, e)
 
-        r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
-        # if using_infer_string:
-        #     TODO warn
+        with tm.assert_produces_warning(warn, match=msg):
+            r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
         e = df[["a", "b", "g"]]
         tm.assert_frame_equal(r, e)
 
@@ -497,7 +507,12 @@ def test_select_dtype_object_and_str(self, using_infer_string):
         )
 
         # with "object" -> only select the object or default str dtype column
-        result = df.select_dtypes(include=["object"])
+        msg = "For backward compatibility, 'str' dtypes are included"
+        warn = None
+        if using_infer_string:
+            warn = Pandas4Warning
+        with tm.assert_produces_warning(warn, match=msg):
+            result = df.select_dtypes(include=["object"])
         expected = df[["a"]]
         tm.assert_frame_equal(result, expected)