From acc47b9360a2a471a83c41c8143f505b70d3f1e1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2019 10:58:33 +0200 Subject: [PATCH 1/8] Use class_without_autosummary template for all Dtype docstring pages --- doc/source/reference/arrays.rst | 39 +++++++++++++++++++++++++++++++++ pandas/core/arrays/integer.py | 14 +++++++++++- pandas/core/arrays/sparse.py | 8 +++++++ pandas/core/dtypes/dtypes.py | 27 ++++++++++++++++++++++- 4 files changed, 86 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 4cf8db895f0ac..f2987971acea7 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -146,6 +146,11 @@ If the data are tz-aware, then every value in the array must have the same timez :toctree: api/ arrays.DatetimeArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + DatetimeTZDtype .. _api.arrays.timedelta: @@ -260,6 +265,11 @@ Every period in a ``PeriodArray`` must have the same ``freq``. :toctree: api/ arrays.PeriodArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + PeriodDtype .. _api.arrays.interval: @@ -296,6 +306,11 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`. :toctree: api/ arrays.IntervalArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + IntervalDtype .. _api.arrays.integer_na: @@ -310,6 +325,11 @@ Pandas provides this through :class:`arrays.IntegerArray`. :toctree: api/ arrays.IntegerArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + Int8Dtype Int16Dtype Int32Dtype @@ -396,8 +416,27 @@ be stored efficiently as a :class:`SparseArray`. :toctree: api/ SparseArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + SparseDtype The ``Series.sparse`` accessor may be used to access sparse-specific attributes and methods if the :class:`Series` contains sparse values. See :ref:`api.series.sparse` for more. + + + +.. Dtype attributes which are manually listed in their docstrings: including +.. it here to make sure a docstring page is built for them + +.. + .. autosummary:: + :toctree: api/ + + DatetimeTZDtype.unit + DatetimeTZDtype.tz + PeriodDtype.freq + IntervalDtype.subdtype \ No newline at end of file diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index bbacfa3077054..9b53c8fa977bc 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -682,6 +682,17 @@ def integer_arithmetic_method(self, other): module = sys.modules[__name__] +_dtype_docstring = """ +An ExtensionDtype to hold a single size & kind of integer dtype. + +Attributes +---------- +None + +Methods +------- +None +""" # create the Dtype _dtypes = {} @@ -695,7 +706,8 @@ def integer_arithmetic_method(self, other): classname = "{}Dtype".format(name) numpy_dtype = getattr(np, dtype) attributes_dict = {'type': numpy_dtype, - 'name': name} + 'name': name, + '__doc__': _dtype_docstring} dtype_type = register_extension_dtype( type(classname, (_IntegerDtype, ), attributes_dict) ) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 1184cf1ee71f2..49d774cd316c6 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -72,6 +72,14 @@ class SparseDtype(ExtensionDtype): =========== ========== The default value may be overridden by specifying a `fill_value`. + + Attributes + ---------- + None + + Methods + ------- + None """ # We include `_is_na_fill_value` in the metadata to avoid hash collisions # between SparseDtype(float, 0.0) and SparseDtype(float, nan). diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index f0dd70886dc06..ed7667fcf8933 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -175,7 +175,7 @@ class CategoricalDtypeType(type): @register_extension_dtype class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): """ - Type for categorical data with the categories and orderedness + Type for categorical data with the categories and orderedness. .. versionchanged:: 0.21.0 @@ -583,6 +583,15 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.datetime64[ns] + + Attributes + ---------- + unit + tz + + Methods + ------- + None """ type = Timestamp kind = 'M' @@ -735,6 +744,14 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): A Period duck-typed class, suitable for holding a period with freq dtype. THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. + + Attributes + ---------- + freq + + Methods + ------- + None """ type = Period kind = 'O' @@ -858,6 +875,14 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): A Interval duck-typed class, suitable for holding an interval THIS IS NOT A REAL NUMPY DTYPE + + Attributes + ---------- + subdtype + + Methods + ------- + None """ name = 'interval' kind = None From e79d793071ecbc2c245158ebd0155cc5d31bff86 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2019 11:40:54 +0200 Subject: [PATCH 2/8] make PeriodDtype.freq and IntervalDtype.subdtype into properties --- pandas/core/dtypes/dtypes.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ed7667fcf8933..8e3b5f29ed041 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -783,10 +783,15 @@ def __new__(cls, freq=None): return cls._cache[freq.freqstr] except KeyError: u = object.__new__(cls) - u.freq = freq + u._freq = freq cls._cache[freq.freqstr] = u return u + @property + def freq(self): + """The frequency object of this PeriodDtype.""" + return self._freq + @classmethod def _parse_dtype_strict(cls, freq): if isinstance(freq, str): @@ -934,10 +939,15 @@ def __new__(cls, subtype=None): return cls._cache[str(subtype)] except KeyError: u = object.__new__(cls) - u.subtype = subtype + u._subtype = subtype cls._cache[str(subtype)] = u return u + @property + def subdtype(self): + """The dtype of the Interval bounds.""" + return self._subtype + @classmethod def construct_array_type(cls): """ From 6620d84adcdcaa1c0f0315a839776c738509f8eb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2019 20:43:39 +0200 Subject: [PATCH 3/8] update docstrings --- pandas/core/arrays/integer.py | 4 +- pandas/core/dtypes/dtypes.py | 97 +++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9b53c8fa977bc..29c146cb55a23 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -683,7 +683,7 @@ def integer_arithmetic_method(self, other): module = sys.modules[__name__] _dtype_docstring = """ -An ExtensionDtype to hold a single size & kind of integer dtype. +An ExtensionDtype for {dtype} integer data. Attributes ---------- @@ -707,7 +707,7 @@ def integer_arithmetic_method(self, other): numpy_dtype = getattr(np, dtype) attributes_dict = {'type': numpy_dtype, 'name': name, - '__doc__': _dtype_docstring} + '__doc__': _dtype_docstring.format(dtype=dtype)} dtype_type = register_extension_dtype( type(classname, (_IntegerDtype, ), attributes_dict) ) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 8e3b5f29ed041..e7b2ae89ff1a7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -576,13 +576,31 @@ def _is_boolean(self): @register_extension_dtype class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): - """ - A np.dtype duck-typed class, suitable for holding a custom datetime with tz - dtype. + An ExtensionDtype for timezone-aware datetime data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. + + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. - THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of - np.datetime64[ns] + Examples + -------- + >>> pd.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] + + >>> pd.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] Attributes ---------- @@ -604,30 +622,6 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): _cache = {} def __init__(self, unit="ns", tz=None): - """ - An ExtensionDtype for timezone-aware datetime data. - - Parameters - ---------- - unit : str, default "ns" - The precision of the datetime data. Currently limited - to ``"ns"``. - tz : str, int, or datetime.tzinfo - The timezone. - - Raises - ------ - pytz.UnknownTimeZoneError - When the requested timezone cannot be found. - - Examples - -------- - >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC') - datetime64[ns, UTC] - - >>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central') - datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] - """ if isinstance(unit, DatetimeTZDtype): unit, tz = unit.unit, unit.tz @@ -741,9 +735,22 @@ def __setstate__(self, state): @register_extension_dtype class PeriodDtype(ExtensionDtype, PandasExtensionDtype): """ - A Period duck-typed class, suitable for holding a period with freq dtype. + An ExtensionDtype for Period data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + freq : str or DateOffset + The frequency of this PeriodDtype + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] - THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] Attributes ---------- @@ -877,9 +884,22 @@ def construct_array_type(cls): @register_extension_dtype class IntervalDtype(PandasExtensionDtype, ExtensionDtype): """ - A Interval duck-typed class, suitable for holding an interval + An ExtensionDtype for Interval data. - THIS IS NOT A REAL NUMPY DTYPE + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + subtype : str, np.dtype + The dtype of the Interval bounds. + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] + + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] Attributes ---------- @@ -899,11 +919,6 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): _cache = {} def __new__(cls, subtype=None): - """ - Parameters - ---------- - subtype : the dtype of the Interval - """ from pandas.core.dtypes.common import ( is_categorical_dtype, is_string_dtype, pandas_dtype) @@ -978,7 +993,11 @@ def construct_from_string(cls, string): 'Valid formats include Interval or Interval[dtype] ' 'where dtype is numeric, datetime, or timedelta') raise TypeError(msg) - + """ + Parameters + ---------- + subtype : the dtype of the Interval + """ @property def type(self): return Interval From 44b8d095186c653d2a91ae71661adf7ad9352e8b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2019 20:49:39 +0200 Subject: [PATCH 4/8] fix subdtype -> subtype --- doc/source/reference/arrays.rst | 2 +- pandas/core/dtypes/dtypes.py | 19 ++++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index f2987971acea7..fb9a95b6736d5 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -439,4 +439,4 @@ and methods if the :class:`Series` contains sparse values. See DatetimeTZDtype.unit DatetimeTZDtype.tz PeriodDtype.freq - IntervalDtype.subdtype \ No newline at end of file + IntervalDtype.subtype \ No newline at end of file diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e7b2ae89ff1a7..d277d3a9df5ca 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -895,15 +895,12 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): Examples -------- - >>> pd.PeriodDtype(freq='D') - period[D] - - >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) - period[M] + >>> pd.IntervalDtype(subtype='int64') + interval[int64] Attributes ---------- - subdtype + subtype Methods ------- @@ -928,7 +925,7 @@ def __new__(cls, subtype=None): # we are called as an empty constructor # generally for pickle compat u = object.__new__(cls) - u.subtype = None + u._subtype = None return u elif (isinstance(subtype, str) and subtype.lower() == 'interval'): @@ -959,7 +956,7 @@ def __new__(cls, subtype=None): return u @property - def subdtype(self): + def subtype(self): """The dtype of the Interval bounds.""" return self._subtype @@ -993,11 +990,7 @@ def construct_from_string(cls, string): 'Valid formats include Interval or Interval[dtype] ' 'where dtype is numeric, datetime, or timedelta') raise TypeError(msg) - """ - Parameters - ---------- - subtype : the dtype of the Interval - """ + @property def type(self): return Interval From f61ebc0c83ddfbb7f1642559d46b062bed7a01da Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Apr 2019 09:12:55 +0200 Subject: [PATCH 5/8] linter (order of sections) --- pandas/core/dtypes/dtypes.py | 44 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d2794803f5dce..0ecc586ee83c8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -583,6 +583,15 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): tz : str, int, or datetime.tzinfo The timezone. + Attributes + ---------- + unit + tz + + Methods + ------- + None + Raises ------ pytz.UnknownTimeZoneError @@ -595,15 +604,6 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): >>> pd.DatetimeTZDtype(tz='dateutil/US/Central') datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] - - Attributes - ---------- - unit - tz - - Methods - ------- - None """ type = Timestamp # type: Type[Timestamp] kind = 'M' # type: str_type @@ -738,14 +738,6 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): freq : str or DateOffset The frequency of this PeriodDtype - Examples - -------- - >>> pd.PeriodDtype(freq='D') - period[D] - - >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) - period[M] - Attributes ---------- freq @@ -753,6 +745,14 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): Methods ------- None + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] + + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] """ type = Period # type: Type[Period] kind = 'O' # type: str_type @@ -887,11 +887,6 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): subtype : str, np.dtype The dtype of the Interval bounds. - Examples - -------- - >>> pd.IntervalDtype(subtype='int64') - interval[int64] - Attributes ---------- subtype @@ -899,6 +894,11 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): Methods ------- None + + Examples + -------- + >>> pd.IntervalDtype(subtype='int64') + interval[int64] """ name = 'interval' kind = None # type: Optional[str_type] From d43ee0f9be3b718e2831797f8c2b92f7b7426b6b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Apr 2019 11:23:53 +0200 Subject: [PATCH 6/8] add pickle compat --- pandas/core/dtypes/dtypes.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0ecc586ee83c8..533c591426a54 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -775,7 +775,9 @@ def __new__(cls, freq=None): elif freq is None: # empty constructor for pickle compat - return object.__new__(cls) + u = object.__new__(cls) + u._freq = None + return u if not isinstance(freq, ABCDateOffset): freq = cls._parse_dtype_strict(freq) @@ -846,6 +848,10 @@ def __eq__(self, other): return isinstance(other, PeriodDtype) and self.freq == other.freq + def __setstate__(self, state): + # for pickle compat. + self._freq = state['freq'] + @classmethod def is_dtype(cls, dtype): """ @@ -1010,6 +1016,10 @@ def __eq__(self, other): from pandas.core.dtypes.common import is_dtype_equal return is_dtype_equal(self.subtype, other.subtype) + def __setstate__(self, state): + # for pickle compat. + self._subtype = state['subtype'] + @classmethod def is_dtype(cls, dtype): """ From 82e53a5bb18c5e60b41a7afc662a1c60ea982a91 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 19 Apr 2019 15:57:26 +0200 Subject: [PATCH 7/8] add extra comments --- pandas/core/dtypes/dtypes.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 533c591426a54..dd3d4d784d34d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -334,6 +334,9 @@ def _finalize(self, categories, ordered, fastpath=False): self._ordered = ordered def __setstate__(self, state): + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) self._categories = state.pop('categories', None) self._ordered = state.pop('ordered', False) @@ -721,7 +724,9 @@ def __eq__(self, other): str(self.tz) == str(other.tz)) def __setstate__(self, state): - # for pickle compat. + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) self._tz = state['tz'] self._unit = state['unit'] @@ -849,7 +854,9 @@ def __eq__(self, other): return isinstance(other, PeriodDtype) and self.freq == other.freq def __setstate__(self, state): - # for pickle compat. + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) self._freq = state['freq'] @classmethod @@ -1017,7 +1024,9 @@ def __eq__(self, other): return is_dtype_equal(self.subtype, other.subtype) def __setstate__(self, state): - # for pickle compat. + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) self._subtype = state['subtype'] @classmethod From c8ae0c94c44d3458ec401a4c2d22005d0ad1eff0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 19 Apr 2019 16:27:01 +0200 Subject: [PATCH 8/8] empty