From 4094088e4b993c388db15a82ba590c654b2db756 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Sun, 12 Oct 2025 20:35:31 -0400 Subject: [PATCH 1/9] fix assert frame eual for NA and dtype is false-61473 --- pandas/_testing/asserters.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index c8f3bb6bd77d2..b8c25ff50af3f 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -882,6 +882,9 @@ def assert_series_equal( Second Series to compare. check_dtype : bool, default True Whether to check the Series dtype is identical. + check_dtype : bool, if False + Whether to check the series is equal or not if dtype is + False. (#GH #61473) check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. @@ -1025,6 +1028,21 @@ def assert_series_equal( pass else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if not check_dtype: + # checks only when check_dtype is False + lv, rv = left._values, right._values + lv = np.asarray(lv, dtype=object) + rv = np.asarray(rv, dtype=object) + assert_numpy_array_equal( + lv, + rv, + check_dtype=check_dtype, + obj=str(obj), + index_values=left.index, + ) + check_exact = False + if check_exact: left_values = left._values right_values = right._values From 12759b4bb4dbcfbb0e5fc1bd1083c77e72f7f6e2 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Sun, 12 Oct 2025 20:35:52 -0400 Subject: [PATCH 2/9] added test cases for 61473 fix --- pandas/tests/util/test_assert_frame_equal.py | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 8711365a19214..4d7425dffd676 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -413,3 +413,34 @@ def test_datetimelike_compat_deprecated(): tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=True) with tm.assert_produces_warning(Pandas4Warning, match=msg): tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=False) + + +def test_assert_frame_equal_na_object_vs_int32_check_dtype_false(): + #GH# 61473 + df1 = pd.DataFrame({"a": pd.Series([pd.NA], dtype="Int32")}) + df2 = pd.DataFrame({"a": pd.Series([pd.NA], dtype="object")}) + tm.assert_frame_equal(df1, df2, check_dtype=False) + + +def test_assert_frame_equal_object_vs_int32_check_dtype_false(): + #GH# 61473 + df1 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) + df2 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="object")}) + tm.assert_frame_equal(df1, df2, check_dtype=False) + + +def test_assert_frame_not_equal_object_vs_int32_check_dtype_false(): + #GH# 61473 + df1 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) + df2 = pd.DataFrame({"a": pd.Series([pd.NA,1], dtype="object")}) + msg = ( + r"""DataFrame\.iloc\[:, 0\] \(column name="a"\) are different + +DataFrame\.iloc\[:, 0\] \(column name="a"\) values are different \(50\.0 %\) +\[index\]: \[0, 1\] +\[left\]: \[, 0\] +\[right\]: \[, 1\]""" +) + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_dtype=False) + From 890af41aa6fe2a45e6128fc16c12407f959113a7 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Sun, 12 Oct 2025 20:51:11 -0400 Subject: [PATCH 3/9] adding entry in rst file --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..56d8248b8f511 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1194,6 +1194,7 @@ Other - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`) +- Bug in :func:`assert_frame_equal` fails when comparing two DFs containing pd.NA that only differ in dtype and dtype is false (:issue:`61473`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) From 7f8411828e7d1464fd5919711753ece81546d07f Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Sun, 12 Oct 2025 21:03:16 -0400 Subject: [PATCH 4/9] updating pd.dataframe to DataFreame in test cases --- pandas/tests/util/test_assert_frame_equal.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 4d7425dffd676..011b0587304ef 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -417,22 +417,22 @@ def test_datetimelike_compat_deprecated(): def test_assert_frame_equal_na_object_vs_int32_check_dtype_false(): #GH# 61473 - df1 = pd.DataFrame({"a": pd.Series([pd.NA], dtype="Int32")}) - df2 = pd.DataFrame({"a": pd.Series([pd.NA], dtype="object")}) + df1 = DataFrame({"a": pd.Series([pd.NA], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA], dtype="object")}) tm.assert_frame_equal(df1, df2, check_dtype=False) def test_assert_frame_equal_object_vs_int32_check_dtype_false(): #GH# 61473 - df1 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) - df2 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="object")}) + df1 = DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA,0], dtype="object")}) tm.assert_frame_equal(df1, df2, check_dtype=False) def test_assert_frame_not_equal_object_vs_int32_check_dtype_false(): #GH# 61473 - df1 = pd.DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) - df2 = pd.DataFrame({"a": pd.Series([pd.NA,1], dtype="object")}) + df1 = DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA,1], dtype="object")}) msg = ( r"""DataFrame\.iloc\[:, 0\] \(column name="a"\) are different From 484c1b415412ef9fbb14fea17ad7f0332620c514 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Sun, 12 Oct 2025 21:22:45 -0400 Subject: [PATCH 5/9] making alphabatical order in rst file --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 56d8248b8f511..024ce8dd7f96b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1187,6 +1187,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) +- Bug in :func:`assert_frame_equal` fails when comparing two DFs containing pd.NA that only differ in dtype and dtype is false (:issue:`61473`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) @@ -1194,7 +1195,6 @@ Other - Bug in :func:`to_numeric` raising ``TypeError`` when ``arg`` is a :class:`Timedelta` or :class:`Timestamp` scalar. (:issue:`59944`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` raising ``RecursionError`` when passing ``func=list[int]``. (:issue:`61565`) -- Bug in :func:`assert_frame_equal` fails when comparing two DFs containing pd.NA that only differ in dtype and dtype is false (:issue:`61473`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) From 193a2a12cd1ac0502cd5dfd29f5514de33cb62c0 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Mon, 13 Oct 2025 09:06:29 -0400 Subject: [PATCH 6/9] check dtype condition missmatch --- pandas/_testing/asserters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index b8c25ff50af3f..50bdcfe522f33 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1029,7 +1029,7 @@ def assert_series_equal( else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") - if not check_dtype: + if not check_dtype and (left.dtype != right.dtype): # checks only when check_dtype is False lv, rv = left._values, right._values lv = np.asarray(lv, dtype=object) From 99513c9506a44f73fea13fcc187353e0a5427421 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Mon, 13 Oct 2025 09:54:28 -0400 Subject: [PATCH 7/9] fixing the formatting issue --- pandas/tests/frame/test_query_eval.py | 20 +++++--------------- pandas/tests/util/test_assert_frame_equal.py | 19 ++++++++----------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b599be5d042fe..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 011b0587304ef..3622ed0c7f4aa 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -416,31 +416,28 @@ def test_datetimelike_compat_deprecated(): def test_assert_frame_equal_na_object_vs_int32_check_dtype_false(): - #GH# 61473 + # GH# 61473 df1 = DataFrame({"a": pd.Series([pd.NA], dtype="Int32")}) df2 = DataFrame({"a": pd.Series([pd.NA], dtype="object")}) tm.assert_frame_equal(df1, df2, check_dtype=False) def test_assert_frame_equal_object_vs_int32_check_dtype_false(): - #GH# 61473 - df1 = DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) - df2 = DataFrame({"a": pd.Series([pd.NA,0], dtype="object")}) + # GH# 61473 + df1 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="object")}) tm.assert_frame_equal(df1, df2, check_dtype=False) def test_assert_frame_not_equal_object_vs_int32_check_dtype_false(): - #GH# 61473 - df1 = DataFrame({"a": pd.Series([pd.NA,0], dtype="Int32")}) - df2 = DataFrame({"a": pd.Series([pd.NA,1], dtype="object")}) - msg = ( - r"""DataFrame\.iloc\[:, 0\] \(column name="a"\) are different + # GH# 61473 + df1 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="object")}) + msg = r"""DataFrame\.iloc\[:, 0\] \(column name="a"\) are different DataFrame\.iloc\[:, 0\] \(column name="a"\) values are different \(50\.0 %\) \[index\]: \[0, 1\] \[left\]: \[, 0\] \[right\]: \[, 1\]""" -) with pytest.raises(AssertionError, match=msg): tm.assert_frame_equal(df1, df2, check_dtype=False) - From ce0d1f1b5c0abb529265d59ab69cc40f7e367efa Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Mon, 13 Oct 2025 10:35:57 -0400 Subject: [PATCH 8/9] fixing doc string validation error --- pandas/_testing/asserters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 50bdcfe522f33..e6d16286c9f1a 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -884,7 +884,6 @@ def assert_series_equal( Whether to check the Series dtype is identical. check_dtype : bool, if False Whether to check the series is equal or not if dtype is - False. (#GH #61473) check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. @@ -1030,7 +1029,7 @@ def assert_series_equal( assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if not check_dtype and (left.dtype != right.dtype): - # checks only when check_dtype is False + # checks only when check_dtype is False #GH #61473 lv, rv = left._values, right._values lv = np.asarray(lv, dtype=object) rv = np.asarray(rv, dtype=object) From 1bcc168a544ff7107d1082195979aeda39b49019 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Bethi Date: Mon, 13 Oct 2025 13:27:30 -0400 Subject: [PATCH 9/9] fixing docstring issue --- pandas/_testing/asserters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index e6d16286c9f1a..644c217caf5dc 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -884,6 +884,7 @@ def assert_series_equal( Whether to check the Series dtype is identical. check_dtype : bool, if False Whether to check the series is equal or not if dtype is + False. check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical.