|
29 | 29 | from pandas.util._decorators import doc |
30 | 30 | from pandas.util._validators import validate_fillna_kwargs |
31 | 31 |
|
| 32 | +from pandas.core.dtypes.cast import can_hold_element |
32 | 33 | from pandas.core.dtypes.common import ( |
33 | 34 | is_array_like, |
34 | 35 | is_bool_dtype, |
35 | 36 | is_integer, |
36 | 37 | is_list_like, |
37 | | - is_object_dtype, |
38 | 38 | is_scalar, |
39 | 39 | ) |
40 | 40 | from pandas.core.dtypes.dtypes import DatetimeTZDtype |
@@ -1240,46 +1240,50 @@ def to_numpy( |
1240 | 1240 | ) -> np.ndarray: |
1241 | 1241 | if dtype is not None: |
1242 | 1242 | dtype = np.dtype(dtype) |
1243 | | - elif self._hasna: |
1244 | | - dtype = np.dtype(object) |
1245 | 1243 |
|
1246 | 1244 | if na_value is lib.no_default: |
1247 | 1245 | na_value = self.dtype.na_value |
1248 | 1246 |
|
1249 | 1247 | pa_type = self._pa_array.type |
| 1248 | + if not self._hasna or isna(na_value) or pa.types.is_null(pa_type): |
| 1249 | + data = self |
| 1250 | + else: |
| 1251 | + data = self.fillna(na_value) |
| 1252 | + copy = False |
| 1253 | + |
1250 | 1254 | if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type): |
1251 | | - result = self._maybe_convert_datelike_array() |
| 1255 | + result = data._maybe_convert_datelike_array() |
1252 | 1256 | if dtype is None or dtype.kind == "O": |
1253 | 1257 | result = result.to_numpy(dtype=object, na_value=na_value) |
1254 | 1258 | else: |
1255 | 1259 | result = result.to_numpy(dtype=dtype) |
1256 | | - return result |
1257 | 1260 | elif pa.types.is_time(pa_type) or pa.types.is_date(pa_type): |
1258 | 1261 | # convert to list of python datetime.time objects before |
1259 | 1262 | # wrapping in ndarray |
1260 | | - result = np.array(list(self), dtype=dtype) |
1261 | | - elif is_object_dtype(dtype) and self._hasna: |
1262 | | - result = np.empty(len(self), dtype=object) |
1263 | | - mask = ~self.isna() |
1264 | | - result[mask] = np.asarray(self[mask]._pa_array) |
1265 | | - elif pa.types.is_null(self._pa_array.type): |
1266 | | - fill_value = None if isna(na_value) else na_value |
1267 | | - return np.full(len(self), fill_value=fill_value, dtype=dtype) |
1268 | | - elif self._hasna: |
1269 | | - data = self.fillna(na_value) |
| 1263 | + result = np.array(list(data), dtype=dtype) |
| 1264 | + if data._hasna: |
| 1265 | + result[data.isna()] = na_value |
| 1266 | + elif pa.types.is_null(pa_type): |
| 1267 | + if dtype is not None and isna(na_value): |
| 1268 | + na_value = None |
| 1269 | + result = np.full(len(data), fill_value=na_value, dtype=dtype) |
| 1270 | + elif not data._hasna or (pa.types.is_floating(pa_type) and na_value is np.nan): |
1270 | 1271 | result = data._pa_array.to_numpy() |
1271 | | - if dtype is not None: |
1272 | | - result = result.astype(dtype, copy=False) |
1273 | | - return result |
1274 | | - else: |
1275 | | - result = self._pa_array.to_numpy() |
1276 | 1272 | if dtype is not None: |
1277 | 1273 | result = result.astype(dtype, copy=False) |
1278 | 1274 | if copy: |
1279 | 1275 | result = result.copy() |
1280 | | - return result |
1281 | | - if self._hasna: |
1282 | | - result[self.isna()] = na_value |
| 1276 | + else: |
| 1277 | + if dtype is None: |
| 1278 | + empty = pa.array([], type=pa_type).to_numpy(zero_copy_only=False) |
| 1279 | + if can_hold_element(empty, na_value): |
| 1280 | + dtype = empty.dtype |
| 1281 | + else: |
| 1282 | + dtype = np.object_ |
| 1283 | + result = np.empty(len(data), dtype=dtype) |
| 1284 | + mask = data.isna() |
| 1285 | + result[mask] = na_value |
| 1286 | + result[~mask] = data[~mask]._pa_array.to_numpy() |
1283 | 1287 | return result |
1284 | 1288 |
|
1285 | 1289 | def unique(self) -> Self: |
|
0 commit comments