Skip to content

[DOC] Incorrect typing in astype for the copy argument #63144

@Matt711

Description

@Matt711

From https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html

copy : bool, default True

But from the source, copy can be None

copy: bool_t | None = None

pandas/pandas/core/generic.py

Lines 6485 to 6682 in 9c8bc3e

@final
def astype(
self, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise"
) -> Self:
"""
Cast a pandas object to a specified dtype ``dtype``.
Parameters
----------
dtype : str, data type, Series or Mapping of column name -> data type
Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to
cast entire pandas object to the same type. Alternatively, use a
mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
a numpy.dtype or Python type to cast one or more of the DataFrame's
columns to column-specific types.
copy : bool, default True
Return a copy when ``copy=True`` (be very careful setting
``copy=False`` as changes to values then may propagate to other
pandas objects).
.. note::
The `copy` keyword will change behavior in pandas 3.0.
`Copy-on-Write
<https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__
will be enabled by default, which means that all methods with a
`copy` keyword will use a lazy copy mechanism to defer the copy and
ignore the `copy` keyword. The `copy` keyword will be removed in a
future version of pandas.
You can already get the future behavior and improvements through
enabling copy on write ``pd.options.mode.copy_on_write = True``
errors : {'raise', 'ignore'}, default 'raise'
Control raising of exceptions on invalid data for provided dtype.
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object.
Returns
-------
same type as caller
See Also
--------
to_datetime : Convert argument to datetime.
to_timedelta : Convert argument to timedelta.
to_numeric : Convert argument to a numeric type.
numpy.ndarray.astype : Cast a numpy array to a specified type.
Notes
-----
.. versionchanged:: 2.0.0
Using ``astype`` to convert from timezone-naive dtype to
timezone-aware dtype will raise an exception.
Use :meth:`Series.dt.tz_localize` instead.
Examples
--------
Create a DataFrame:
>>> d = {'col1': [1, 2], 'col2': [3, 4]}
>>> df = pd.DataFrame(data=d)
>>> df.dtypes
col1 int64
col2 int64
dtype: object
Cast all columns to int32:
>>> df.astype('int32').dtypes
col1 int32
col2 int32
dtype: object
Cast col1 to int32 using a dictionary:
>>> df.astype({'col1': 'int32'}).dtypes
col1 int32
col2 int64
dtype: object
Create a series:
>>> ser = pd.Series([1, 2], dtype='int32')
>>> ser
0 1
1 2
dtype: int32
>>> ser.astype('int64')
0 1
1 2
dtype: int64
Convert to categorical type:
>>> ser.astype('category')
0 1
1 2
dtype: category
Categories (2, int32): [1, 2]
Convert to ordered categorical type with custom ordering:
>>> from pandas.api.types import CategoricalDtype
>>> cat_dtype = CategoricalDtype(
... categories=[2, 1], ordered=True)
>>> ser.astype(cat_dtype)
0 1
1 2
dtype: category
Categories (2, int64): [2 < 1]
Create a series of dates:
>>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
>>> ser_date
0 2020-01-01
1 2020-01-02
2 2020-01-03
dtype: datetime64[ns]
"""
if copy and using_copy_on_write():
copy = False
if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
if len(dtype) > 1 or self.name not in dtype:
raise KeyError(
"Only the Series name can be used for "
"the key in Series dtype mappings."
)
new_type = dtype[self.name]
return self.astype(new_type, copy, errors)
# GH#44417 cast to Series so we can use .iat below, which will be
# robust in case we
from pandas import Series
dtype_ser = Series(dtype, dtype=object)
for col_name in dtype_ser.index:
if col_name not in self:
raise KeyError(
"Only a column name can be used for the "
"key in a dtype mappings argument. "
f"'{col_name}' not found in columns."
)
dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)
results = []
for i, (col_name, col) in enumerate(self.items()):
cdt = dtype_ser.iat[i]
if isna(cdt):
res_col = col.copy(deep=copy)
else:
try:
res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
except ValueError as ex:
ex.args = (
f"{ex}: Error while type casting for column '{col_name}'",
)
raise
results.append(res_col)
elif is_extension_array_dtype(dtype) and self.ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
dtype = pandas_dtype(dtype)
if isinstance(dtype, ExtensionDtype) and all(
arr.dtype == dtype for arr in self._mgr.arrays
):
return self.copy(deep=copy)
# GH 18099/22869: columnwise conversion to extension dtype
# GH 24704: self.items handles duplicate column names
results = [
ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items()
]
else:
# else, only a single dtype is given
new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
res = self._constructor_from_mgr(new_data, axes=new_data.axes)
return res.__finalize__(self, method="astype")
# GH 33113: handle empty frame or series
if not results:
return self.copy(deep=None)
# GH 19920: retain column metadata after concat
result = concat(results, axis=1, copy=False)
# GH#40810 retain subclass
# error: Incompatible types in assignment
# (expression has type "Self", variable has type "DataFrame")
result = self._constructor(result) # type: ignore[assignment]
result.columns = self.columns
result = result.__finalize__(self, method="astype")
# https://github.com/python/mypy/issues/8354
return cast(Self, result)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions