pandas-dev · simonjayhawkins · Apr 1, 2020 · Mar 31, 2020 · Mar 31, 2020 · Apr 1, 2020
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -288,26 +288,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests interval classes' ; echo $MSG
-    pytest -q --doctest-modules \
-        pandas/core/indexes/interval.py \
-        pandas/core/arrays/interval.py
+    pytest -q --doctest-modules pandas/core/indexes/interval.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests arrays'; echo $MSG
-    pytest -q --doctest-modules \
-        pandas/core/arrays/string_.py \
-        pandas/core/arrays/integer.py \
-        pandas/core/arrays/boolean.py
+    pytest -q --doctest-modules pandas/core/arrays/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests dtypes'; echo $MSG
     pytest -q --doctest-modules pandas/core/dtypes/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Doctests arrays/boolean.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/arrays/boolean.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Doctests base.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/base.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1161,7 +1161,7 @@ def _create_method(cls, op, coerce_to_dtype=True):
         --------
         Given an ExtensionArray subclass called MyExtensionArray, use
 
-        >>> __add__ = cls._create_method(operator.add)
+        >>> __add__ = cls._create_method(operator.add)  # doctest: +SKIP
 
         in the class definition of MyExtensionArray to create the operator
         for addition, that will be based on the operator implementation

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1598,19 +1598,19 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
 
         >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5])
         >>> c
-        [NaN, 2.0, 2.0, NaN, 5.0]
+        [NaN, 2, 2, NaN, 5]
         Categories (2, int64): [2, 5]
         >>> c.sort_values()
-        [2.0, 2.0, 5.0, NaN, NaN]
+        [2, 2, 5, NaN, NaN]
         Categories (2, int64): [2, 5]
         >>> c.sort_values(ascending=False)
-        [5.0, 2.0, 2.0, NaN, NaN]
+        [5, 2, 2, NaN, NaN]
         Categories (2, int64): [2, 5]
         >>> c.sort_values(na_position='first')
-        [NaN, NaN, 2.0, 2.0, 5.0]
+        [NaN, NaN, 2, 2, 5]
         Categories (2, int64): [2, 5]
         >>> c.sort_values(ascending=False, na_position='first')
-        [NaN, NaN, 5.0, 2.0, 2.0]
+        [NaN, NaN, 5, 2, 2]
         Categories (2, int64): [2, 5]
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
@@ -1835,7 +1835,7 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None):
 
         >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
         [a, a, a]
-        Categories (3, object): [a, b]
+        Categories (2, object): [a, b]
 
         Specifying a fill value that's not in ``self.categories``
         will raise a ``TypeError``.
@@ -2237,21 +2237,20 @@ def unique(self):
         order of appearance.
 
         >>> pd.Categorical(list('baabc'))
-        [b, a, c]
-        Categories (3, object): [b, a, c]
+        [b, a, a, b, c]
+        Categories (3, object): [a, b, c]
 
         >>> pd.Categorical(list('baabc'), categories=list('abc'))
-        [b, a, c]
-        Categories (3, object): [b, a, c]
+        [b, a, a, b, c]
+        Categories (3, object): [a, b, c]
 
         An ordered Categorical preserves the category ordering.
 
-        >>> pd.Categorical(list('baabc'),
-        ...                categories=list('abc'),
-        ...                ordered=True)
-        [b, a, c]
+        >>> pd.Categorical(list('baabc'), categories=list('abc'), ordered=True)
+        [b, a, a, b, c]
         Categories (3, object): [a < b < c]
 
+
         See Also
         --------
         unique
@@ -2438,7 +2437,7 @@ def replace(self, to_replace, value, inplace: bool = False):
         --------
         >>> s = pd.Categorical([1, 2, 1, 3])
         >>> s.replace(1, 3)
-        [3, 3, 2, 3]
+        [3, 2, 3, 3]
         Categories (2, int64): [2, 3]
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
@@ -2506,16 +2505,90 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
 
     Examples
     --------
+    >>> s = pd.Series(list("aabc")).astype("category")
+    >>> s
+    0    a
+    1    a
+    2    b
+    3    c
+    dtype: category
+    Categories (3, object): [a, b, c]
+
     >>> s.cat.categories
-    >>> s.cat.categories = list('abc')
-    >>> s.cat.rename_categories(list('cab'))
-    >>> s.cat.reorder_categories(list('cab'))
-    >>> s.cat.add_categories(['d','e'])
+    Index(['a', 'b', 'c'], dtype='object')
+
+    >>> s.cat.categories = list("bcd")
+    >>> s
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (3, object): [b, c, d]
+
+    >>> s.cat.rename_categories(list("abc"))
+    0    a
+    1    a
+    2    b
+    3    c
+    dtype: category
+    Categories (3, object): [a, b, c]
+
+    >>> s.cat.reorder_categories(list("cdb"))
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (3, object): [c, d, b]
+
+    >>> s.cat.add_categories(["e", "f"])
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (5, object): [b, c, d, e, f]
+
     >>> s.cat.remove_categories(['d'])
+    0      b
+    1      b
+    2      c
+    3    NaN
+    dtype: category
+    Categories (2, object): [b, c]
+
     >>> s.cat.remove_unused_categories()
-    >>> s.cat.set_categories(list('abcde'))
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (3, object): [b, c, d]
+
+    >>> s.cat.set_categories(list("abcde"))
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (5, object): [a, b, c, d, e]
+
     >>> s.cat.as_ordered()
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (3, object): [b < c < d]
+
     >>> s.cat.as_unordered()
+    0    b
+    1    b
+    2    c
+    3    d
+    dtype: category
+    Categories (3, object): [b, c, d]
     """
 
     def __init__(self, data):
@@ -2603,7 +2676,7 @@ def _recode_for_categories(codes: np.ndarray, old_categories, new_categories):
     >>> new_cat = pd.Index(['a', 'b'])
     >>> codes = np.array([0, 1, 1, 2])
     >>> _recode_for_categories(codes, old_cat, new_cat)
-    array([ 1,  0,  0, -1])
+    array([ 1,  0,  0, -1], dtype=int8)
     """
     if len(old_categories) == 0:
         # All null anyway, so just retain the nulls

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -181,7 +181,7 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) ->
 
         Examples
         --------
-        >>> self._unbox_scalar(Timedelta('10s'))  # DOCTEST: +SKIP
+        >>> _unbox_scalar(Timedelta('10s'))  # doctest: +SKIP
         10000000000
         """
         raise AbstractMethodError(self)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -922,9 +922,10 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
         ...                               '2018-10-28 02:36:00',
         ...                               '2018-10-28 03:46:00']))
         >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
-        0   2015-03-29 03:00:00+02:00
-        1   2015-03-29 03:30:00+02:00
-        dtype: datetime64[ns, Europe/Warsaw]
+        0   2018-10-28 01:20:00+02:00
+        1   2018-10-28 02:36:00+02:00
+        2   2018-10-28 03:46:00+01:00
+        dtype: datetime64[ns, CET]
 
         If the DST transition causes nonexistent times, you can shift these
         dates forward or backwards with a timedelta object or `'shift_forward'`
@@ -935,15 +936,17 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
         >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
         0   2015-03-29 03:00:00+02:00
         1   2015-03-29 03:30:00+02:00
-        dtype: datetime64[ns, 'Europe/Warsaw']
+        dtype: datetime64[ns, Europe/Warsaw]
+
         >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
         0   2015-03-29 01:59:59.999999999+01:00
         1   2015-03-29 03:30:00+02:00
-        dtype: datetime64[ns, 'Europe/Warsaw']
+        dtype: datetime64[ns, Europe/Warsaw]
+
         >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
         0   2015-03-29 03:30:00+02:00
         1   2015-03-29 03:30:00+02:00
-        dtype: datetime64[ns, 'Europe/Warsaw']
+        dtype: datetime64[ns, Europe/Warsaw]
         """
         nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
         if nonexistent not in nonexistent_options and not isinstance(
@@ -1604,9 +1607,9 @@ def date(self):
         DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
                       dtype='datetime64[ns]', freq='A-DEC')
         >>> idx.is_leap_year
-        array([ True, False, False], dtype=bool)
+        array([ True, False, False])
 
-        >>> dates = pd.Series(idx)
+        >>> dates_series = pd.Series(idx)
         >>> dates_series
         0   2012-12-31
         1   2013-12-31

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -94,7 +94,7 @@ def to_numpy(
 
         >>> a = pd.array([True, False, pd.NA], dtype="boolean")
         >>> a.to_numpy()
-        array([True, False, NA], dtype=object)
+        array([True, False, <NA>], dtype=object)
 
         When no missing values are present, an equivalent dtype can be used.
 
@@ -110,7 +110,7 @@ def to_numpy(
         >>> a = pd.array([True, False, pd.NA], dtype="boolean")
         >>> a
         <BooleanArray>
-        [True, False, NA]
+        [True, False, <NA>]
         Length: 3, dtype: boolean
 
         >>> a.to_numpy(dtype="bool")

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -818,6 +818,7 @@ def period_array(
     Integers that look like years are handled
 
     >>> period_array([2000, 2001, 2002], freq='D')
+    <PeriodArray>
     ['2000-01-01', '2001-01-01', '2002-01-01']
     Length: 3, dtype: period[D]