simonjayhawkins
diff --git a/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 83 additions & 4 deletions b/‎doc/source/whatsnew/v1.5.0.rst
Lines changed: 83 additions & 4 deletions
diff --git a/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/algos.pyi
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/algos.pyx
Lines changed: 7 additions & 2 deletions b/‎pandas/_libs/algos.pyx
Lines changed: 7 additions & 2 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 1 addition & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 10 additions & 1 deletion b/‎pandas/_libs/groupby.pyx
Lines changed: 10 additions & 1 deletion
diff --git a/‎pandas/_libs/src/ujson/python/date_conversions.c
Lines changed: 15 additions & 3 deletions b/‎pandas/_libs/src/ujson/python/date_conversions.c
Lines changed: 15 additions & 3 deletions
diff --git a/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 11 additions & 1 deletion b/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 11 additions & 1 deletion
diff --git a/‎pandas/_libs/tslibs/src/datetime/np_datetime.c
Lines changed: 34 additions & 19 deletions b/‎pandas/_libs/tslibs/src/datetime/np_datetime.c
Lines changed: 34 additions & 19 deletions
diff --git a/‎pandas/_libs/tslibs/src/datetime/np_datetime.h
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/tslibs/src/datetime/np_datetime.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Lines changed: 8 additions & 7 deletions b/‎pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Lines changed: 8 additions & 7 deletions
@@ -120,7 +120,7 @@ Other enhancements
 - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
 - :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
 - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
-- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.GroupBy.quantile` (:issue:`46560`)
+- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`)
 - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`)
 - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
@@ -194,10 +194,47 @@ did not have the same index as the input.
     df.groupby('a', dropna=True).transform('ffill')
     df.groupby('a', dropna=True).transform(lambda x: x)
 
-.. _whatsnew_150.notable_bug_fixes.notable_bug_fix2:
+.. _whatsnew_150.notable_bug_fixes.to_json_incorrectly_localizing_naive_timestamps:
 
-notable_bug_fix2
-^^^^^^^^^^^^^^^^
+Serializing tz-naive Timestamps with to_json() with ``iso_dates=True``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`DataFrame.to_json`, :meth:`Series.to_json`, and :meth:`Index.to_json`
+would incorrectly localize DatetimeArrays/DatetimeIndexes with tz-naive Timestamps
+to UTC. (:issue:`38760`)
+
+Note that this patch does not fix the localization of tz-aware Timestamps to UTC
+upon serialization. (Related issue :issue:`12997`)
+
+*Old Behavior*
+
+.. ipython:: python
+
+    index = pd.date_range(
+        start='2020-12-28 00:00:00',
+        end='2020-12-28 02:00:00',
+        freq='1H',
+    )
+    a = pd.Series(
+        data=range(3),
+        index=index,
+    )
+
+.. code-block:: ipython
+
+    In [4]: a.to_json(date_format='iso')
+    Out[4]: '{"2020-12-28T00:00:00.000Z":0,"2020-12-28T01:00:00.000Z":1,"2020-12-28T02:00:00.000Z":2}'
+
+    In [5]: pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
+    Out[5]: array([False, False, False])
+
+*New Behavior*
+
+.. ipython:: python
+
+    a.to_json(date_format='iso')
+    # Roundtripping now works
+    pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_150.api_breaking:
@@ -426,6 +463,48 @@ As ``group_keys=True`` is the default value of :meth:`DataFrame.groupby` and
 raise a ``FutureWarning``. This can be silenced and the previous behavior
 retained by specifying ``group_keys=False``.
 
+.. _whatsnew_150.deprecations.numeric_only_default:
+
+``numeric_only`` default value
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Across the DataFrame operations such as ``min``, ``sum``, and ``idxmax``, the default
+value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
+Furthermore, operations with the default value ``None`` can lead to surprising
+results. (:issue:`46560`)
+
+.. code-block:: ipython
+
+    In [1]: df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]})
+
+    In [2]: # Reading the next line without knowing the contents of df, one would
+            # expect the result to contain the products for both columns a and b.
+            df[["a", "b"]].prod()
+    Out[2]:
+    a    2
+    dtype: int64
+
+To avoid this behavior, the specifying the value ``numeric_only=None`` has been
+deprecated, and will be removed in a future version of pandas. In the future,
+all operations with a ``numeric_only`` argument will default to ``False``. Users
+should either call the operation only with columns that can be operated on, or
+specify ``numeric_only=True`` to operate only on Boolean, integer, and float columns.
+
+In order to support the transition to the new behavior, the following methods have
+gained the ``numeric_only`` argument.
+
+- :meth:`DataFrame.corr`
+- :meth:`DataFrame.corrwith`
+- :meth:`DataFrame.cov`
+- :meth:`DataFrame.idxmin`
+- :meth:`DataFrame.idxmax`
+- :meth:`.DataFrameGroupBy.idxmin`
+- :meth:`.DataFrameGroupBy.idxmax`
+- :meth:`.GroupBy.var`
+- :meth:`.GroupBy.std`
+- :meth:`.GroupBy.sem`
+- :meth:`.DataFrameGroupBy.quantile`
+
 .. _whatsnew_150.deprecations.other:
 
 Other Deprecations
 
@@ -109,6 +109,7 @@ def rank_1d(
     ascending: bool = ...,
     pct: bool = ...,
     na_option=...,
+    mask: npt.NDArray[np.bool_] | None = ...,
 ) -> np.ndarray: ...  # np.ndarray[float64_t, ndim=1]
 def rank_2d(
     in_arr: np.ndarray,  # ndarray[numeric_object_t, ndim=2]
 
@@ -889,6 +889,7 @@ def rank_1d(
     bint ascending=True,
     bint pct=False,
     na_option="keep",
+    const uint8_t[:] mask=None,
 ):
     """
     Fast NaN-friendly version of ``scipy.stats.rankdata``.
@@ -918,6 +919,8 @@ def rank_1d(
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
+    mask : np.ndarray[bool], optional, default None
+        Specify locations to be treated as NA, for e.g. Categorical.
     """
     cdef:
         TiebreakEnumType tiebreak
@@ -927,7 +930,6 @@ def rank_1d(
         float64_t[::1] out
         ndarray[numeric_object_t, ndim=1] masked_vals
         numeric_object_t[:] masked_vals_memview
-        uint8_t[:] mask
         bint keep_na, nans_rank_highest, check_labels, check_mask
         numeric_object_t nan_fill_val
 
@@ -956,6 +958,7 @@ def rank_1d(
         or numeric_object_t is object
         or (numeric_object_t is int64_t and is_datetimelike)
     )
+    check_mask = check_mask or mask is not None
 
     # Copy values into new array in order to fill missing data
     # with mask, without obfuscating location of missing data
@@ -965,7 +968,9 @@ def rank_1d(
     else:
         masked_vals = values.copy()
 
-    if numeric_object_t is object:
+    if mask is not None:
+        pass
+    elif numeric_object_t is object:
         mask = missing.isnaobj(masked_vals)
     elif numeric_object_t is int64_t and is_datetimelike:
         mask = (masked_vals == NPY_NAT).astype(np.uint8)
 
@@ -128,6 +128,7 @@ def group_rank(
     ascending: bool = ...,
     pct: bool = ...,
     na_option: Literal["keep", "top", "bottom"] = ...,
+    mask: npt.NDArray[np.bool_] | None = ...,
 ) -> None: ...
 def group_max(
     out: np.ndarray,  # groupby_t[:, ::1]
 
@@ -1262,6 +1262,7 @@ def group_rank(
     bint ascending=True,
     bint pct=False,
     str na_option="keep",
+    const uint8_t[:, :] mask=None,
 ) -> None:
     """
     Provides the rank of values within each group.
@@ -1294,6 +1295,7 @@ def group_rank(
         * keep: leave NA values where they are
         * top: smallest rank if ascending
         * bottom: smallest rank if descending
+    mask : np.ndarray[bool] or None, default None
 
     Notes
     -----
@@ -1302,18 +1304,25 @@ def group_rank(
     cdef:
         Py_ssize_t i, k, N
         ndarray[float64_t, ndim=1] result
+        const uint8_t[:] sub_mask
 
     N = values.shape[1]
 
     for k in range(N):
+        if mask is None:
+            sub_mask = None
+        else:
+            sub_mask = mask[:, k]
+
         result = rank_1d(
             values=values[:, k],
             labels=labels,
             is_datetimelike=is_datetimelike,
             ties_method=ties_method,
             ascending=ascending,
             pct=pct,
-            na_option=na_option
+            na_option=na_option,
+            mask=sub_mask,
         )
         for i in range(len(result)):
             # TODO: why can't we do out[:, k] = result?
 
@@ -54,8 +54,8 @@ char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) {
         PyErr_NoMemory();
         return NULL;
     }
-
-    ret_code = make_iso_8601_datetime(&dts, result, *len, base);
+    // datetime64 is always naive
+    ret_code = make_iso_8601_datetime(&dts, result, *len, 0, base);
     if (ret_code != 0) {
         PyErr_SetString(PyExc_ValueError,
                         "Could not convert datetime value to string");
@@ -90,7 +90,19 @@ char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
 
     *len = (size_t)get_datetime_iso_8601_strlen(0, base);
     char *result = PyObject_Malloc(*len);
-    ret = make_iso_8601_datetime(&dts, result, *len, base);
+    // Check to see if PyDateTime has a timezone.
+    // Don't convert to UTC if it doesn't.
+    int is_tz_aware = 0;
+    if (PyObject_HasAttrString(obj, "tzinfo")) {
+        PyObject *offset = extract_utc_offset(obj);
+        if (offset == NULL) {
+            PyObject_Free(result);
+            return NULL;
+        }
+        is_tz_aware = offset != Py_None;
+        Py_DECREF(offset);
+    }
+    ret = make_iso_8601_datetime(&dts, result, *len, is_tz_aware, base);
 
     if (ret != 0) {
         PyErr_SetString(PyExc_ValueError,
 
@@ -221,8 +221,18 @@ static PyObject *get_values(PyObject *obj) {
         // The special cases to worry about are dt64tz and category[dt64tz].
         //  In both cases we want the UTC-localized datetime64 ndarray,
         //  without going through and object array of Timestamps.
+        if (PyObject_HasAttrString(obj, "tz")) {
+            PyObject *tz = PyObject_GetAttrString(obj, "tz");
+            if (tz != Py_None) {
+                // Go through object array if we have dt64tz, since tz info will
+                // be lost if values is used directly.
+                Py_DECREF(tz);
+                values = PyObject_CallMethod(obj, "__array__", NULL);
+                return values;
+            }
+            Py_DECREF(tz);
+        }
         values = PyObject_GetAttrString(obj, "values");
-
         if (values == NULL) {
             // Clear so we can subsequently try another method
             PyErr_Clear();
 
@@ -331,6 +331,31 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
 
     return 0;
 }
+/*
+* Returns the offset from utc of the timezone as a timedelta.
+* The caller is responsible for ensuring that the tzinfo
+* attribute exists on the datetime object.
+*
+* If the passed object is timezone naive, Py_None is returned.
+* If extraction of the offset fails, NULL is returned.
+*
+* NOTE: This function is not vendored from numpy.
+*/
+PyObject *extract_utc_offset(PyObject *obj) {
+    PyObject *tmp = PyObject_GetAttrString(obj, "tzinfo");
+    if (tmp == NULL) {
+        return NULL;
+    }
+    if (tmp != Py_None) {
+        PyObject *offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
+        if (offset == NULL) {
+            Py_DECREF(tmp);
+            return NULL;
+        }
+        return offset;
+    }
+    return tmp;
+}
 
 /*
  *
@@ -376,32 +401,22 @@ int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
     out->sec = PyLong_AsLong(PyObject_GetAttrString(obj, "second"));
     out->us = PyLong_AsLong(PyObject_GetAttrString(obj, "microsecond"));
 
-    /* Apply the time zone offset if datetime obj is tz-aware */
-    if (PyObject_HasAttrString((PyObject*)obj, "tzinfo")) {
-        tmp = PyObject_GetAttrString(obj, "tzinfo");
-        if (tmp == NULL) {
-            return -1;
-        }
-        if (tmp == Py_None) {
-            Py_DECREF(tmp);
-        } else {
-            PyObject *offset;
+    if (PyObject_HasAttrString(obj, "tzinfo")) {
+        PyObject *offset = extract_utc_offset(obj);
+        /* Apply the time zone offset if datetime obj is tz-aware */
+        if (offset != NULL) {
+            if (offset == Py_None) {
+                Py_DECREF(offset);
+                return 0;
+            }
             PyObject *tmp_int;
             int seconds_offset, minutes_offset;
-
-            /* The utcoffset function should return a timedelta */
-            offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj);
-            if (offset == NULL) {
-                Py_DECREF(tmp);
-                return -1;
-            }
-            Py_DECREF(tmp);
-
             /*
              * The timedelta should have a function "total_seconds"
              * which contains the value we want.
              */
             tmp = PyObject_CallMethod(offset, "total_seconds", "");
+            Py_DECREF(offset);
             if (tmp == NULL) {
                 return -1;
             }
 
@@ -48,6 +48,8 @@ extern const npy_datetimestruct _M_MAX_DTS;
 // stuff pandas needs
 // ----------------------------------------------------------------------------
 
+PyObject *extract_utc_offset(PyObject *obj);
+
 int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
                                          npy_datetimestruct *out);
 
 
@@ -632,7 +632,7 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
  *  string was too short).
  */
 int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
-                           NPY_DATETIMEUNIT base) {
+                           int utc, NPY_DATETIMEUNIT base) {
     char *substr = outstr;
     int sublen = outlen;
     int tmplen;
@@ -911,13 +911,14 @@ int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
 
 add_time_zone:
     /* UTC "Zulu" time */
-    if (sublen < 1) {
-        goto string_too_short;
+    if (utc) {
+        if (sublen < 1) {
+            goto string_too_short;
+        }
+        substr[0] = 'Z';
+        substr += 1;
+        sublen -= 1;
     }
-    substr[0] = 'Z';
-    substr += 1;
-    sublen -= 1;
-
     /* Add a NULL terminator, and return */
     if (sublen > 0) {
         substr[0] = '\0';