From 68cfbd8d2f41a793ec226369834e714b35a667d8 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 30 Aug 2020 12:05:16 +0100 Subject: [PATCH 001/409] MAINT: Use utf8 strings in more of datetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes an omission in a previous patch that did not allow μs in datetime_as_string. This also will very slightly speed up most uses, since all but very unusual code will be passing in `str` not `bytes`. --- numpy/core/src/multiarray/datetime_busday.c | 20 ++++----- .../core/src/multiarray/datetime_busdaycal.c | 25 ++++++----- numpy/core/src/multiarray/datetime_strings.c | 43 +++++++++++-------- numpy/core/tests/test_datetime.py | 5 ++- 4 files changed, 49 insertions(+), 44 deletions(-) diff --git a/numpy/core/src/multiarray/datetime_busday.c b/numpy/core/src/multiarray/datetime_busday.c index d3cce8a370da..5acba600afa6 100644 --- a/numpy/core/src/multiarray/datetime_busday.c +++ b/numpy/core/src/multiarray/datetime_busday.c @@ -834,24 +834,24 @@ static int PyArray_BusDayRollConverter(PyObject *roll_in, NPY_BUSDAY_ROLL *roll) { PyObject *obj = roll_in; - char *str; - Py_ssize_t len; - /* Make obj into an ASCII string */ - Py_INCREF(obj); - if (PyUnicode_Check(obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(obj); + /* Make obj into an UTF8 string */ + if (PyBytes_Check(obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { Py_DECREF(obj); return 0; } - Py_DECREF(obj); obj = obj_str; } + else { + Py_INCREF(obj); + } - if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) { + Py_ssize_t len; + char const *str = PyUnicode_AsUTF8AndSize(obj, &len); + if (str == NULL) { Py_DECREF(obj); return 0; } diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c index 2374eaa63453..906e3b539d4a 100644 --- a/numpy/core/src/multiarray/datetime_busdaycal.c +++ b/numpy/core/src/multiarray/datetime_busdaycal.c @@ -30,33 +30,32 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) { PyObject *obj = weekmask_in; - /* Make obj into an ASCII string if it is UNICODE */ - Py_INCREF(obj); - if (PyUnicode_Check(obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(obj); + /* Make obj into an UTF8 string */ + if (PyBytes_Check(obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { Py_DECREF(obj); return 0; } - Py_DECREF(obj); obj = obj_str; } + else { + Py_INCREF(obj); + } + if (PyBytes_Check(obj)) { - char *str; Py_ssize_t len; - int i; - - if (PyBytes_AsStringAndSize(obj, &str, &len) < 0) { + char const *str = PyUnicode_AsUTF8AndSize(obj, &len); + if (str == NULL) { Py_DECREF(obj); return 0; } /* Length 7 is a string like "1111100" */ if (len == 7) { - for (i = 0; i < 7; ++i) { + for (int i = 0; i < 7; ++i) { switch(str[i]) { case '0': weekmask[i] = 0; @@ -75,7 +74,7 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) general_weekmask_string: /* a string like "SatSun" or "Mon Tue Wed" */ memset(weekmask, 0, 7); - for (i = 0; i < len; i += 3) { + for (int i = 0; i < len; i += 3) { while (isspace(str[i])) ++i; diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c index f847c7ea8e9c..49c7e81eb377 100644 --- a/numpy/core/src/multiarray/datetime_strings.c +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -1385,21 +1385,23 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, /* Parse the input unit if provided */ if (unit_in != NULL && unit_in != Py_None) { PyObject *strobj; - char *str = NULL; - Py_ssize_t len = 0; - if (PyUnicode_Check(unit_in)) { - strobj = PyUnicode_AsASCIIString(unit_in); - if (strobj == NULL) { - goto fail; + if (PyBytes_Check(unit_in)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(unit_in, NULL, NULL); + if (obj_str == NULL) { + return 0; } + strobj = obj_str; } else { + Py_INCREF(unit_in); strobj = unit_in; - Py_INCREF(strobj); } - if (PyBytes_AsStringAndSize(strobj, &str, &len) < 0) { + Py_ssize_t len = 0; + char *str = PyUnicode_AsUTF8AndSize(strobj, &len); + if (str == NULL) { Py_DECREF(strobj); goto fail; } @@ -1434,24 +1436,27 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, /* Get the input time zone */ if (timezone_obj != NULL) { - /* Convert to ASCII if it's unicode */ - if (PyUnicode_Check(timezone_obj)) { - /* accept unicode input */ - PyObject *obj_str; - obj_str = PyUnicode_AsASCIIString(timezone_obj); + PyObject *strobj; + if (PyBytes_Check(timezone_obj)) { + /* accept bytes input */ + PyObject *obj_str = PyUnicode_FromEncodedObject(timezone_obj, NULL, NULL); if (obj_str == NULL) { goto fail; } - Py_DECREF(timezone_obj); - timezone_obj = obj_str; + strobj = obj_str; } + else { + Py_INCREF(timezone_obj); + strobj = unit_in; + } + + Py_SETREF(timezone_obj, strobj); /* Check for the supported string inputs */ - if (PyBytes_Check(timezone_obj)) { - char *str; + if (PyUnicode_Check(timezone_obj)) { Py_ssize_t len; - - if (PyBytes_AsStringAndSize(timezone_obj, &str, &len) < 0) { + char const *str = PyUnicode_AsUTF8AndSize(timezone_obj, &len); + if (str == NULL) { goto fail; } diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index f725091c5067..f58cf307a0de 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -1654,8 +1654,9 @@ def test_datetime_as_string(self): '1959-10-13T12:34:56') assert_equal(np.datetime_as_string(np.datetime64(datetime, 'ms')), '1959-10-13T12:34:56.789') - assert_equal(np.datetime_as_string(np.datetime64(datetime, 'us')), - '1959-10-13T12:34:56.789012') + for us in ['us', 'μs', b'us']: # check non-ascii and bytes too + assert_equal(np.datetime_as_string(np.datetime64(datetime, us)), + '1959-10-13T12:34:56.789012') datetime = '1969-12-31T23:34:56.789012345678901234' From a7e1d210cdf5cb9b24d3f0fed218e8afab914282 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 30 Aug 2020 13:31:36 +0100 Subject: [PATCH 002/409] Update numpy/core/src/multiarray/datetime_busday.c --- numpy/core/src/multiarray/datetime_busday.c | 1 - 1 file changed, 1 deletion(-) diff --git a/numpy/core/src/multiarray/datetime_busday.c b/numpy/core/src/multiarray/datetime_busday.c index 5acba600afa6..2cf1575515bc 100644 --- a/numpy/core/src/multiarray/datetime_busday.c +++ b/numpy/core/src/multiarray/datetime_busday.c @@ -840,7 +840,6 @@ PyArray_BusDayRollConverter(PyObject *roll_in, NPY_BUSDAY_ROLL *roll) /* accept bytes input */ PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { - Py_DECREF(obj); return 0; } obj = obj_str; From cd7acfd8d8a6d43dc4f64f4552a10a589cb30e97 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 30 Aug 2020 18:59:26 +0100 Subject: [PATCH 003/409] Update numpy/core/src/multiarray/datetime_busdaycal.c --- numpy/core/src/multiarray/datetime_busdaycal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c index 906e3b539d4a..341b80a86f3e 100644 --- a/numpy/core/src/multiarray/datetime_busdaycal.c +++ b/numpy/core/src/multiarray/datetime_busdaycal.c @@ -35,7 +35,6 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) /* accept bytes input */ PyObject *obj_str = PyUnicode_FromEncodedObject(obj, NULL, NULL); if (obj_str == NULL) { - Py_DECREF(obj); return 0; } obj = obj_str; From 911fa12539ca438142409eaabcf3404557aca650 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 31 Aug 2020 09:52:01 +0100 Subject: [PATCH 004/409] Apply suggestions from code review --- numpy/core/src/multiarray/datetime_strings.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c index 49c7e81eb377..8665d329579f 100644 --- a/numpy/core/src/multiarray/datetime_strings.c +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -1400,7 +1400,7 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, } Py_ssize_t len = 0; - char *str = PyUnicode_AsUTF8AndSize(strobj, &len); + char const *str = PyUnicode_AsUTF8AndSize(strobj, &len); if (str == NULL) { Py_DECREF(strobj); goto fail; @@ -1447,7 +1447,7 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, } else { Py_INCREF(timezone_obj); - strobj = unit_in; + strobj = timezone_obj; } Py_SETREF(timezone_obj, strobj); From cdd5becba9a39d054f0ae34a493dd16d613f8cb9 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 31 Aug 2020 10:57:06 +0100 Subject: [PATCH 005/409] Update numpy/core/src/multiarray/datetime_busdaycal.c --- numpy/core/src/multiarray/datetime_busdaycal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c index 341b80a86f3e..cec7135ffea3 100644 --- a/numpy/core/src/multiarray/datetime_busdaycal.c +++ b/numpy/core/src/multiarray/datetime_busdaycal.c @@ -44,7 +44,7 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) } - if (PyBytes_Check(obj)) { + if (PyUnicode_Check(obj)) { Py_ssize_t len; char const *str = PyUnicode_AsUTF8AndSize(obj, &len); if (str == NULL) { From b0dd380451085c250f6239dabb30d4e7235ac41b Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 21 Aug 2020 16:40:22 -0500 Subject: [PATCH 006/409] MAINT: Move dtype instance to DType class cast This function is better housed in convert_datatype.c and was only in array_coercion, because we did not use it anywhere else before. This also somewhat modifies the logic and cleans up use-cases of it in array_coercion.c --- numpy/core/src/multiarray/array_coercion.c | 61 ++++---------------- numpy/core/src/multiarray/convert_datatype.c | 49 ++++++++++++++++ numpy/core/src/multiarray/convert_datatype.h | 3 + 3 files changed, 62 insertions(+), 51 deletions(-) diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index ffb5bd632111..4e2354991034 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -306,51 +306,6 @@ discover_dtype_from_pyobject( } -/* - * This function should probably become public API eventually. At this - * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`. - * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement - * this logic. - */ -static NPY_INLINE PyArray_Descr * -cast_descriptor_to_fixed_dtype( - PyArray_Descr *descr, PyArray_DTypeMeta *fixed_DType) -{ - if (fixed_DType == NULL) { - /* Nothing to do, we only need to promote the new dtype */ - Py_INCREF(descr); - return descr; - } - - if (!fixed_DType->parametric) { - /* - * Don't actually do anything, the default is always the result - * of any cast. - */ - return fixed_DType->default_descr(fixed_DType); - } - if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)fixed_DType)) { - Py_INCREF(descr); - return descr; - } - /* - * TODO: When this is implemented for all dtypes, the special cases - * can be removed... - */ - if (fixed_DType->legacy && fixed_DType->parametric && - NPY_DTYPE(descr)->legacy) { - PyArray_Descr *flex_dtype = PyArray_DescrFromType(fixed_DType->type_num); - return PyArray_AdaptFlexibleDType(descr, flex_dtype); - } - - PyErr_SetString(PyExc_NotImplementedError, - "Must use casting to find the correct dtype, this is " - "not yet implemented! " - "(It should not be possible to hit this code currently!)"); - return NULL; -} - - /** * Discover the correct descriptor from a known DType class and scalar. * If the fixed DType can discover a dtype instance/descr all is fine, @@ -392,7 +347,7 @@ find_scalar_descriptor( return descr; } - Py_SETREF(descr, cast_descriptor_to_fixed_dtype(descr, fixed_DType)); + Py_SETREF(descr, PyArray_CastDescrToDType(descr, fixed_DType)); return descr; } @@ -727,8 +682,13 @@ find_descriptor_from_array( enum _dtype_discovery_flags flags = 0; *out_descr = NULL; - if (NPY_UNLIKELY(DType != NULL && DType->parametric && - PyArray_ISOBJECT(arr))) { + if (DType == NULL) { + *out_descr = PyArray_DESCR(arr); + Py_INCREF(*out_descr); + return 0; + } + + if (NPY_UNLIKELY(DType->parametric && PyArray_ISOBJECT(arr))) { /* * We have one special case, if (and only if) the input array is of * object DType and the dtype is not fixed already but parametric. @@ -777,7 +737,7 @@ find_descriptor_from_array( } Py_DECREF(iter); } - else if (DType != NULL && NPY_UNLIKELY(DType->type_num == NPY_DATETIME) && + else if (NPY_UNLIKELY(DType->type_num == NPY_DATETIME) && PyArray_ISSTRING(arr)) { /* * TODO: This branch should be deprecated IMO, the workaround is @@ -806,8 +766,7 @@ find_descriptor_from_array( * If this is not an object array figure out the dtype cast, * or simply use the returned DType. */ - *out_descr = cast_descriptor_to_fixed_dtype( - PyArray_DESCR(arr), DType); + *out_descr = PyArray_CastDescrToDType(PyArray_DESCR(arr), DType); if (*out_descr == NULL) { return -1; } diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index d9121707b21a..0ccc0b538e8f 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1031,6 +1031,55 @@ ensure_dtype_nbo(PyArray_Descr *type) } } + +/** + * This function should possibly become public API eventually. At this + * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`. + * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement + * this logic. + * Before that, the API needs to be reviewed though. + * + * WARNING: This function currently does not guarantee that `descr` can + * actually be cast to the given DType. + * + * @param descr The dtype instance to adapt "cast" + * @param given_DType The DType class for which we wish to find an instance able + * to represent `descr`. + * @returns Instance of `given_DType`. If `given_DType` is parametric the + * descr may be adapted to hold it. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) +{ + if (NPY_DTYPE(descr) == given_DType) { + Py_INCREF(descr); + return descr; + } + if (!given_DType->parametric) { + /* + * Don't actually do anything, the default is always the result + * of any cast. + */ + return given_DType->default_descr(given_DType); + } + if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)given_DType)) { + Py_INCREF(descr); + return descr; + } + + if (!given_DType->legacy) { + PyErr_SetString(PyExc_NotImplementedError, + "Must use casting to find the correct DType for a parametric " + "user DType. This is not yet implemented (this error should be " + "unreachable)."); + return NULL; + } + + PyArray_Descr *flex_dtype = PyArray_DescrNew(given_DType->singleton); + return PyArray_AdaptFlexibleDType(descr, flex_dtype); +} + + /*NUMPY_API * Produces the smallest size and lowest kind type to which both * input types can be cast. diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 9b7f39db2e33..eef2f7313390 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -49,4 +49,7 @@ npy_set_invalid_cast_error( NPY_NO_EXPORT PyArray_Descr * PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); +NPY_NO_EXPORT PyArray_Descr * +PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType); + #endif From 6b1c643d82e5cb39498f791c0dc5020890229f6e Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 21 Aug 2020 16:51:42 -0500 Subject: [PATCH 007/409] MAINT: Use existing `ensure_dtype_nbo` in ufunc resolution There were two versions of this, since the merger of umath and multiarraymodule, this is unnecessary. --- numpy/core/src/multiarray/convert_datatype.c | 2 +- numpy/core/src/multiarray/convert_datatype.h | 3 +++ numpy/core/src/umath/ufunc_type_resolution.c | 15 --------------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 0ccc0b538e8f..3d81edc172ce 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1019,7 +1019,7 @@ promote_types(PyArray_Descr *type1, PyArray_Descr *type2, * Returns a new reference to type if it is already NBO, otherwise * returns a copy converted to NBO. */ -static PyArray_Descr * +NPY_NO_EXPORT PyArray_Descr * ensure_dtype_nbo(PyArray_Descr *type) { if (PyArray_ISNBO(type->byteorder)) { diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index eef2f7313390..507a72266977 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -18,6 +18,9 @@ NPY_NO_EXPORT npy_bool can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data, PyArray_Descr *to, NPY_CASTING casting); +NPY_NO_EXPORT PyArray_Descr * +ensure_dtype_nbo(PyArray_Descr *type); + NPY_NO_EXPORT int should_use_min_scalar(npy_intp narrs, PyArrayObject **arr, npy_intp ndtypes, PyArray_Descr **dtypes); diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index aa6f34d59119..3abeb2c5a202 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -236,21 +236,6 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc, return 0; } -/* - * Returns a new reference to type if it is already NBO, otherwise - * returns a copy converted to NBO. - */ -static PyArray_Descr * -ensure_dtype_nbo(PyArray_Descr *type) -{ - if (PyArray_ISNBO(type->byteorder)) { - Py_INCREF(type); - return type; - } - else { - return PyArray_DescrNewByteorder(type, NPY_NATIVE); - } -} /*UFUNC_API * From d9075b77586e0c7b536d5ec684bfd93c5bcd9439 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 21 Aug 2020 17:03:59 -0500 Subject: [PATCH 008/409] MAINT: Always define `default_descr()` and simplify code --- numpy/core/src/multiarray/array_coercion.c | 12 +++--------- numpy/core/src/multiarray/dtypemeta.c | 10 ++++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 4e2354991034..4f8456fb9499 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -1284,15 +1284,9 @@ PyArray_DiscoverDTypeAndShape( * the correct default. */ if (fixed_DType != NULL) { - if (fixed_DType->default_descr == NULL) { - Py_INCREF(fixed_DType->singleton); - *out_descr = fixed_DType->singleton; - } - else { - *out_descr = fixed_DType->default_descr(fixed_DType); - if (*out_descr == NULL) { - goto fail; - } + *out_descr = fixed_DType->default_descr(fixed_DType); + if (*out_descr == NULL) { + goto fail; } } } diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 3026e68e9af0..6e5bf840e018 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -193,6 +193,14 @@ discover_datetime_and_timedelta_from_pyobject( } +static PyArray_Descr * +nonparametric_default_descr(PyArray_DTypeMeta *cls) +{ + Py_INCREF(cls->singleton); + return cls->singleton; +} + + static PyArray_Descr * flexible_default_descr(PyArray_DTypeMeta *cls) { @@ -399,6 +407,8 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->kind = descr->kind; /* Strings and voids have (strange) logic around scalars. */ + dtype_class->default_descr = nonparametric_default_descr; + dtype_class->is_known_scalar_type = python_builtins_are_known_scalar_types; if (PyTypeNum_ISDATETIME(descr->type_num)) { From 76253895fbfbd2b6f5a5d4a5d2c6b96ff2dc5a0c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 21 Aug 2020 17:04:41 -0500 Subject: [PATCH 009/409] API,MAINT: Rewrite promotion using common DType and common instance This defines `common_dtype` and `common_instance` (only for parametric DTypes), and uses them to implement the `PyArray_CommonDType` operation. `PyArray_CommonDType()` together with the `common_instance` method then define the existing PromoteTypes. This does not (yet) affect "value based promotion" as defined by `PyArray_ResultType()`. We also require the step of casting to the common DType to define this type of example: ``` np.promote_types("S1", "i8") == np.dtype('S21') ``` This steps requires finding the string length corresponding to the integer (21 characters). This is here handled by the `PyArray_CastDescrToDType` function. However, that function still relies on `PyArray_AdaptFlexibleDType` and thus does not generalize to arbitrary DTypes. See NEP 42 (currently "Common DType Operations" section): https://numpy.org/neps/nep-0042-new-dtypes.html#common-dtype-operations --- numpy/core/include/numpy/ndarraytypes.h | 6 + numpy/core/src/multiarray/convert_datatype.c | 370 ++++--------------- numpy/core/src/multiarray/convert_datatype.h | 3 + numpy/core/src/multiarray/dtypemeta.c | 137 ++++++- numpy/core/src/multiarray/dtypemeta.h | 16 + numpy/core/src/multiarray/usertypes.c | 121 ++++++ numpy/core/src/multiarray/usertypes.h | 4 + 7 files changed, 350 insertions(+), 307 deletions(-) diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index bbcf468c1f35..df480f96dae5 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1839,6 +1839,10 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, PyArray_DTypeMeta *cls, PyTypeObject *obj); typedef PyArray_Descr *(default_descr_function)(PyArray_DTypeMeta *cls); + typedef PyArray_DTypeMeta *(common_dtype_function)( + PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtyep2); + typedef PyArray_Descr *(common_instance_function)( + PyArray_Descr *dtype1, PyArray_Descr *dtyep2); /* * While NumPy DTypes would not need to be heap types the plan is to @@ -1894,6 +1898,8 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, discover_descr_from_pyobject_function *discover_descr_from_pyobject; is_known_scalar_type_function *is_known_scalar_type; default_descr_function *default_descr; + common_dtype_function *common_dtype; + common_instance_function *common_instance; }; #endif /* NPY_INTERNAL_BUILD */ diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 3d81edc172ce..f700bdc990df 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1080,6 +1080,50 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) } +/** + * This function defines the common DType operator. + * + * Note that the common DType will not be "object" (unless one of the dtypes + * is object), even though object can technically represent all values + * correctly. + * + * TODO: Before exposure, we should review the return value (e.g. no error + * when no common DType is found). + * + * @param dtype1 DType class to find the common type for. + * @param dtype2 Second DType class. + * @return The common DType or NULL with an error set + */ +NPY_NO_EXPORT PyArray_DTypeMeta * +PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2) +{ + if (dtype1 == dtype2) { + Py_INCREF(dtype1); + return dtype1; + } + + PyArray_DTypeMeta *common_dtype; + + common_dtype = dtype1->common_dtype(dtype1, dtype2); + if (common_dtype == (PyArray_DTypeMeta *)Py_NotImplemented) { + Py_DECREF(common_dtype); + common_dtype = dtype2->common_dtype(dtype2, dtype1); + } + if (common_dtype == NULL) { + return NULL; + } + if (common_dtype == (PyArray_DTypeMeta *)Py_NotImplemented) { + Py_DECREF(Py_NotImplemented); + PyErr_Format(PyExc_TypeError, + "The DTypes %S and %S do not have a common DType. " + "For example they cannot be stored in a single array unless " + "the dtype is `object`.", dtype1, dtype2); + return NULL; + } + return common_dtype; +} + + /*NUMPY_API * Produces the smallest size and lowest kind type to which both * input types can be cast. @@ -1087,320 +1131,48 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) NPY_NO_EXPORT PyArray_Descr * PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) { - int type_num1, type_num2, ret_type_num; + PyArray_DTypeMeta *common_dtype; + PyArray_Descr *res; - /* - * Fast path for identical dtypes. - * - * Non-native-byte-order types are converted to native ones below, so we - * can't quit early. - */ + /* Fast path for identical inputs (NOTE: This path preserves metadata!) */ if (type1 == type2 && PyArray_ISNBO(type1->byteorder)) { Py_INCREF(type1); return type1; } - type_num1 = type1->type_num; - type_num2 = type2->type_num; - - /* If they're built-in types, use the promotion table */ - if (type_num1 < NPY_NTYPES && type_num2 < NPY_NTYPES) { - ret_type_num = _npy_type_promotion_table[type_num1][type_num2]; - /* - * The table doesn't handle string/unicode/void/datetime/timedelta, - * so check the result - */ - if (ret_type_num >= 0) { - return PyArray_DescrFromType(ret_type_num); - } - } - /* If one or both are user defined, calculate it */ - else { - int skind1 = NPY_NOSCALAR, skind2 = NPY_NOSCALAR, skind; - - if (PyArray_CanCastTo(type2, type1)) { - /* Promoted types are always native byte order */ - return ensure_dtype_nbo(type1); - } - else if (PyArray_CanCastTo(type1, type2)) { - /* Promoted types are always native byte order */ - return ensure_dtype_nbo(type2); - } - - /* Convert the 'kind' char into a scalar kind */ - switch (type1->kind) { - case 'b': - skind1 = NPY_BOOL_SCALAR; - break; - case 'u': - skind1 = NPY_INTPOS_SCALAR; - break; - case 'i': - skind1 = NPY_INTNEG_SCALAR; - break; - case 'f': - skind1 = NPY_FLOAT_SCALAR; - break; - case 'c': - skind1 = NPY_COMPLEX_SCALAR; - break; - } - switch (type2->kind) { - case 'b': - skind2 = NPY_BOOL_SCALAR; - break; - case 'u': - skind2 = NPY_INTPOS_SCALAR; - break; - case 'i': - skind2 = NPY_INTNEG_SCALAR; - break; - case 'f': - skind2 = NPY_FLOAT_SCALAR; - break; - case 'c': - skind2 = NPY_COMPLEX_SCALAR; - break; - } - - /* If both are scalars, there may be a promotion possible */ - if (skind1 != NPY_NOSCALAR && skind2 != NPY_NOSCALAR) { - - /* Start with the larger scalar kind */ - skind = (skind1 > skind2) ? skind1 : skind2; - ret_type_num = _npy_smallest_type_of_kind_table[skind]; - - for (;;) { - - /* If there is no larger type of this kind, try a larger kind */ - if (ret_type_num < 0) { - ++skind; - /* Use -1 to signal no promoted type found */ - if (skind < NPY_NSCALARKINDS) { - ret_type_num = _npy_smallest_type_of_kind_table[skind]; - } - else { - break; - } - } - - /* If we found a type to which we can promote both, done! */ - if (PyArray_CanCastSafely(type_num1, ret_type_num) && - PyArray_CanCastSafely(type_num2, ret_type_num)) { - return PyArray_DescrFromType(ret_type_num); - } - - /* Try the next larger type of this kind */ - ret_type_num = _npy_next_larger_type_table[ret_type_num]; - } - - } - - PyErr_SetString(PyExc_TypeError, - "invalid type promotion with custom data type"); + common_dtype = PyArray_CommonDType(NPY_DTYPE(type1), NPY_DTYPE(type2)); + if (common_dtype == NULL) { return NULL; } - switch (type_num1) { - /* BOOL can convert to anything except datetime/void */ - case NPY_BOOL: - if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { - int char_size = 1; - if (type_num2 == NPY_UNICODE) { - char_size = 4; - } - if (type2->elsize < 5 * char_size) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type2); - ret = ensure_dtype_nbo(temp); - ret->elsize = 5 * char_size; - Py_DECREF(temp); - return ret; - } - return ensure_dtype_nbo(type2); - } - else if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) { - return ensure_dtype_nbo(type2); - } - break; - /* For strings and unicodes, take the larger size */ - case NPY_STRING: - if (type_num2 == NPY_STRING) { - if (type1->elsize > type2->elsize) { - return ensure_dtype_nbo(type1); - } - else { - return ensure_dtype_nbo(type2); - } - } - else if (type_num2 == NPY_UNICODE) { - if (type2->elsize >= type1->elsize * 4) { - return ensure_dtype_nbo(type2); - } - else { - PyArray_Descr *d = PyArray_DescrNewFromType(NPY_UNICODE); - if (d == NULL) { - return NULL; - } - d->elsize = type1->elsize * 4; - return d; - } - } - /* Allow NUMBER -> STRING */ - else if (PyTypeNum_ISNUMBER(type_num2)) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type1); - PyDataType_MAKEUNSIZED(temp); - - temp = PyArray_AdaptFlexibleDType(type2, temp); - if (temp == NULL) { - return NULL; - } - if (temp->elsize > type1->elsize) { - ret = ensure_dtype_nbo(temp); - } - else { - ret = ensure_dtype_nbo(type1); - } - Py_DECREF(temp); - return ret; - } - break; - case NPY_UNICODE: - if (type_num2 == NPY_UNICODE) { - if (type1->elsize > type2->elsize) { - return ensure_dtype_nbo(type1); - } - else { - return ensure_dtype_nbo(type2); - } - } - else if (type_num2 == NPY_STRING) { - if (type1->elsize >= type2->elsize * 4) { - return ensure_dtype_nbo(type1); - } - else { - PyArray_Descr *d = PyArray_DescrNewFromType(NPY_UNICODE); - if (d == NULL) { - return NULL; - } - d->elsize = type2->elsize * 4; - return d; - } - } - /* Allow NUMBER -> UNICODE */ - else if (PyTypeNum_ISNUMBER(type_num2)) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type1); - PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(type2, temp); - if (temp == NULL) { - return NULL; - } - if (temp->elsize > type1->elsize) { - ret = ensure_dtype_nbo(temp); - } - else { - ret = ensure_dtype_nbo(type1); - } - Py_DECREF(temp); - return ret; - } - break; - case NPY_DATETIME: - case NPY_TIMEDELTA: - if (type_num2 == NPY_DATETIME || type_num2 == NPY_TIMEDELTA) { - return datetime_type_promotion(type1, type2); - } - break; + if (!common_dtype->parametric) { + res = common_dtype->default_descr(common_dtype); + Py_DECREF(common_dtype); + return res; } - switch (type_num2) { - /* BOOL can convert to almost anything */ - case NPY_BOOL: - if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { - int char_size = 1; - if (type_num2 == NPY_UNICODE) { - char_size = 4; - } - if (type2->elsize < 5 * char_size) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type2); - ret = ensure_dtype_nbo(temp); - ret->elsize = 5 * char_size; - Py_DECREF(temp); - return ret; - } - return ensure_dtype_nbo(type2); - } - else if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && - type_num1 != NPY_VOID) { - return ensure_dtype_nbo(type1); - } - break; - case NPY_STRING: - /* Allow NUMBER -> STRING */ - if (PyTypeNum_ISNUMBER(type_num1)) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type2); - PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(type1, temp); - if (temp == NULL) { - return NULL; - } - if (temp->elsize > type2->elsize) { - ret = ensure_dtype_nbo(temp); - } - else { - ret = ensure_dtype_nbo(type2); - } - Py_DECREF(temp); - return ret; - } - break; - case NPY_UNICODE: - /* Allow NUMBER -> UNICODE */ - if (PyTypeNum_ISNUMBER(type_num1)) { - PyArray_Descr *ret = NULL; - PyArray_Descr *temp = PyArray_DescrNew(type2); - PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(type1, temp); - if (temp == NULL) { - return NULL; - } - if (temp->elsize > type2->elsize) { - ret = ensure_dtype_nbo(temp); - } - else { - ret = ensure_dtype_nbo(type2); - } - Py_DECREF(temp); - return ret; - } - break; - case NPY_TIMEDELTA: - if (PyTypeNum_ISSIGNED(type_num1)) { - return ensure_dtype_nbo(type2); - } - break; + /* Cast the input types to the common DType if necessary */ + type1 = PyArray_CastDescrToDType(type1, common_dtype); + if (type1 == NULL) { + Py_DECREF(common_dtype); + return NULL; } - - /* For types equivalent up to endianness, can return either */ - if (PyArray_CanCastTypeTo(type1, type2, NPY_EQUIV_CASTING)) { - return ensure_dtype_nbo(type1); + type2 = PyArray_CastDescrToDType(type2, common_dtype); + if (type2 == NULL) { + Py_DECREF(type1); + Py_DECREF(common_dtype); + return NULL; } - /* TODO: Also combine fields, subarrays, strings, etc */ - /* - printf("invalid type promotion: "); - PyObject_Print(type1, stdout, 0); - printf(" "); - PyObject_Print(type2, stdout, 0); - printf("\n"); - */ - PyErr_SetString(PyExc_TypeError, "invalid type promotion"); - return NULL; + * And find the common instance of the two inputs + * NOTE: Common instance preserves metadata (normally and of one input) + */ + res = common_dtype->common_instance(type1, type2); + Py_DECREF(type1); + Py_DECREF(type2); + Py_DECREF(common_dtype); + return res; } /* diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 507a72266977..a2b36b497fbe 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -10,6 +10,9 @@ PyArray_ObjectType(PyObject *op, int minimum_type); NPY_NO_EXPORT PyArrayObject ** PyArray_ConvertToCommonType(PyObject *op, int *retn); +NPY_NO_EXPORT PyArray_DTypeMeta * +PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2); + NPY_NO_EXPORT int PyArray_ValidType(int type); diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 6e5bf840e018..92f50247e28a 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -15,6 +15,9 @@ #include "dtypemeta.h" #include "_datetime.h" #include "array_coercion.h" +#include "scalartypes.h" +#include "convert_datatype.h" +#include "usertypes.h" static void @@ -216,6 +219,34 @@ flexible_default_descr(PyArray_DTypeMeta *cls) } +static PyArray_Descr * +string_unicode_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2) +{ + if (descr1->elsize >= descr2->elsize) { + return ensure_dtype_nbo(descr1); + } + else { + return ensure_dtype_nbo(descr2); + } +} + + +static PyArray_Descr * +void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2) +{ + /* + * We currently do not support promotion of void types unless they + * are equivalent. + */ + if (!PyArray_CanCastTypeTo(descr1, descr2, NPY_EQUIV_CASTING)) { + PyErr_SetString(PyExc_TypeError, + "invalid type promotion with structured or void datatype(s)."); + return NULL; + } + Py_INCREF(descr1); + return descr1; +} + static int python_builtins_are_known_scalar_types( PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype) @@ -289,6 +320,86 @@ string_known_scalar_types( } +/* + * The following set of functions define the common dtype operator for + * the builtin types. + */ +static PyArray_DTypeMeta * +default_builtin_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + assert(cls->type_num < NPY_NTYPES); + if (!other->legacy || other->type_num > cls->type_num) { + /* Let the more generic (larger type number) DType handle this */ + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + + /* + * Note: The use of the promotion table should probably be revised at + * some point. It may be most useful to remove it entirely and then + * consider adding a fast path/cache `PyArray_CommonDType()` itself. + */ + int common_num = _npy_type_promotion_table[cls->type_num][other->type_num]; + if (common_num < 0) { + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + return PyArray_DTypeFromTypeNum(common_num); +} + + +static PyArray_DTypeMeta * +string_unicode_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + assert(cls->type_num < NPY_NTYPES); + if (!other->legacy || other->type_num > cls->type_num || + other->type_num == NPY_OBJECT) { + /* Let the more generic (larger type number) DType handle this */ + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + /* + * The builtin types are ordered by complexity (aside from object) here. + * Arguably, we should not consider numbers and strings "common", but + * we currently do. + */ + Py_INCREF(cls); + return cls; +} + +static PyArray_DTypeMeta * +datetime_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + if (cls->type_num == NPY_DATETIME && other->type_num == NPY_TIMEDELTA) { + /* + * TODO: We actually currently do allow promotion here. This is + * currently relied on within `np.add(datetime, timedelta)`, + * while for concatenation the cast step will fail. + */ + Py_INCREF(cls); + return cls; + } + return default_builtin_common_dtype(cls, other); +} + + + +static PyArray_DTypeMeta * +object_common_dtype( + PyArray_DTypeMeta *cls, PyArray_DTypeMeta *NPY_UNUSED(other)) +{ + /* + * The object DType is special in that it can represent everything, + * including all potential user DTypes. + * One reason to defer (or error) here might be if the other DType + * does not support scalars so that e.g. `arr1d[0]` returns a 0-D array + * and `arr.astype(object)` would fail. But object casts are special. + */ + Py_INCREF(cls); + return cls; +} + + /** * This function takes a PyArray_Descr and replaces its base class with * a newly created dtype subclass (DTypeMeta instances). @@ -406,16 +517,28 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->f = descr->f; dtype_class->kind = descr->kind; - /* Strings and voids have (strange) logic around scalars. */ + /* Set default functions (correct for most dtypes, override below) */ dtype_class->default_descr = nonparametric_default_descr; - + dtype_class->discover_descr_from_pyobject = ( + nonparametric_discover_descr_from_pyobject); dtype_class->is_known_scalar_type = python_builtins_are_known_scalar_types; + dtype_class->common_dtype = default_builtin_common_dtype; + dtype_class->common_instance = NULL; - if (PyTypeNum_ISDATETIME(descr->type_num)) { + if (PyTypeNum_ISUSERDEF(descr->type_num)) { + dtype_class->common_dtype = legacy_userdtype_common_dtype_function; + } + else if (descr->type_num == NPY_OBJECT) { + dtype_class->common_dtype = object_common_dtype; + } + else if (PyTypeNum_ISDATETIME(descr->type_num)) { /* Datetimes are flexible, but were not considered previously */ dtype_class->parametric = NPY_TRUE; + dtype_class->default_descr = flexible_default_descr; dtype_class->discover_descr_from_pyobject = ( discover_datetime_and_timedelta_from_pyobject); + dtype_class->common_dtype = datetime_common_dtype; + dtype_class->common_instance = datetime_type_promotion; if (descr->type_num == NPY_DATETIME) { dtype_class->is_known_scalar_type = datetime_known_scalar_types; } @@ -426,18 +549,16 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) if (descr->type_num == NPY_VOID) { dtype_class->discover_descr_from_pyobject = ( void_discover_descr_from_pyobject); + dtype_class->common_instance = void_common_instance; } else { dtype_class->is_known_scalar_type = string_known_scalar_types; dtype_class->discover_descr_from_pyobject = ( string_discover_descr_from_pyobject); + dtype_class->common_dtype = string_unicode_common_dtype; + dtype_class->common_instance = string_unicode_common_instance; } } - else { - /* nonparametric case */ - dtype_class->discover_descr_from_pyobject = ( - nonparametric_discover_descr_from_pyobject); - } if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj, PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) { diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h index e0909a7eb4b2..83cf7c07e944 100644 --- a/numpy/core/src/multiarray/dtypemeta.h +++ b/numpy/core/src/multiarray/dtypemeta.h @@ -2,6 +2,22 @@ #define _NPY_DTYPEMETA_H #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr)) +/* + * This function will hopefully be phased out or replaced, but was convenient + * for incremental implementation of new DTypes based on DTypeMeta. + * (Error checking is not required for DescrFromType, assuming that the + * type is valid.) + */ +static NPY_INLINE PyArray_DTypeMeta * +PyArray_DTypeFromTypeNum(int typenum) +{ + PyArray_Descr *descr = PyArray_DescrFromType(typenum); + PyArray_DTypeMeta *dtype = NPY_DTYPE(descr); + Py_INCREF(dtype); + Py_DECREF(descr); + return dtype; +} + NPY_NO_EXPORT int dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem); diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index 6b6c6bd9d057..265ec4be42e3 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -38,6 +38,7 @@ maintainer email: oliphant.travis@ieee.org #include "usertypes.h" #include "dtypemeta.h" +#include "scalartypes.h" NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL; @@ -347,3 +348,123 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype, return _append_new(&descr->f->cancastscalarkindto[scalar], totype); } } + + +/* + * Legacy user DTypes implemented the common DType operation + * (as used in type promotion/result_type, and e.g. the type for + * concatenation), by using "safe cast" logic. + * + * New DTypes do have this behaviour generally, but we use can-cast + * when legacy user dtypes are involved. + */ +NPY_NO_EXPORT PyArray_DTypeMeta * +legacy_userdtype_common_dtype_function( + PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + int skind1 = NPY_NOSCALAR, skind2 = NPY_NOSCALAR, skind; + + if (!other->legacy) { + /* legacy DTypes can always defer to new style ones */ + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + /* Defer so that only one of the types handles the cast */ + if (cls->type_num < other->type_num) { + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; + } + + /* Check whether casting is possible from one type to the other */ + if (PyArray_CanCastSafely(cls->type_num, other->type_num)) { + Py_INCREF(other); + return other; + } + if (PyArray_CanCastSafely(other->type_num, cls->type_num)) { + Py_INCREF(cls); + return cls; + } + + /* + * The following code used to be part of PyArray_PromoteTypes(). + * We can expect that this code is never used. + * In principle, it allows for promotion of two different user dtypes + * to a single NumPy dtype of the same "kind". In practice + * using the same `kind` as NumPy was never possible due to an + * simplification where `PyArray_EquivTypes(descr1, descr2)` will + * return True if both kind and element size match (e.g. bfloat16 and + * float16 would be equivalent). + * The option is also very obscure and not used in the examples. + */ + + /* Convert the 'kind' char into a scalar kind */ + switch (cls->kind) { + case 'b': + skind1 = NPY_BOOL_SCALAR; + break; + case 'u': + skind1 = NPY_INTPOS_SCALAR; + break; + case 'i': + skind1 = NPY_INTNEG_SCALAR; + break; + case 'f': + skind1 = NPY_FLOAT_SCALAR; + break; + case 'c': + skind1 = NPY_COMPLEX_SCALAR; + break; + } + switch (other->kind) { + case 'b': + skind2 = NPY_BOOL_SCALAR; + break; + case 'u': + skind2 = NPY_INTPOS_SCALAR; + break; + case 'i': + skind2 = NPY_INTNEG_SCALAR; + break; + case 'f': + skind2 = NPY_FLOAT_SCALAR; + break; + case 'c': + skind2 = NPY_COMPLEX_SCALAR; + break; + } + + /* If both are scalars, there may be a promotion possible */ + if (skind1 != NPY_NOSCALAR && skind2 != NPY_NOSCALAR) { + + /* Start with the larger scalar kind */ + skind = (skind1 > skind2) ? skind1 : skind2; + int ret_type_num = _npy_smallest_type_of_kind_table[skind]; + + for (;;) { + + /* If there is no larger type of this kind, try a larger kind */ + if (ret_type_num < 0) { + ++skind; + /* Use -1 to signal no promoted type found */ + if (skind < NPY_NSCALARKINDS) { + ret_type_num = _npy_smallest_type_of_kind_table[skind]; + } + else { + break; + } + } + + /* If we found a type to which we can promote both, done! */ + if (PyArray_CanCastSafely(cls->type_num, ret_type_num) && + PyArray_CanCastSafely(other->type_num, ret_type_num)) { + return PyArray_DTypeFromTypeNum(ret_type_num); + } + + /* Try the next larger type of this kind */ + ret_type_num = _npy_next_larger_type_table[ret_type_num]; + } + } + + Py_INCREF(Py_NotImplemented); + return (PyArray_DTypeMeta *)Py_NotImplemented; +} diff --git a/numpy/core/src/multiarray/usertypes.h b/numpy/core/src/multiarray/usertypes.h index b3e386c5c671..1b323d458e88 100644 --- a/numpy/core/src/multiarray/usertypes.h +++ b/numpy/core/src/multiarray/usertypes.h @@ -17,4 +17,8 @@ NPY_NO_EXPORT int PyArray_RegisterCastFunc(PyArray_Descr *descr, int totype, PyArray_VectorUnaryFunc *castfunc); +NPY_NO_EXPORT PyArray_DTypeMeta * +legacy_userdtype_common_dtype_function( + PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other); + #endif From 07c2e66efbf8d98af0da573f0e7a90765b6b979c Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 25 Aug 2020 19:14:51 -0500 Subject: [PATCH 010/409] TST: Test void promotion uses equivalent casting --- numpy/core/tests/test_numeric.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index ae5ee4c88340..89a82d3b6fcf 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1025,6 +1025,24 @@ def test_promote_types_metadata(self, dtype1, dtype2): assert res_bs == res assert res_bs.metadata == res.metadata + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V6"), np.dtype("V10")], + [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])], + [np.dtype("i8,i8"), np.dtype("i4,i4")], + ]) + def test_invalid_void_promotion(self, dtype1, dtype2): + # Mainly test structured void promotion, which currently allows + # byte-swapping, but nothing else: + with pytest.raises(TypeError): + np.promote_types(dtype1, dtype2) + + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V10"), np.dtype("V10")], + [np.dtype([("name1", "i8")])], + [np.dtype("i8,i8"), np.dtype("i8,>i8")], + ]) + def test_valid_void_promotion(self, dtype1, dtype2): + assert np.promote_types(dtype1, dtype2) is dtype1 def test_can_cast(self): assert_(np.can_cast(np.int32, np.int64)) From b40f6bb22d7e71533e0b450493530e8fdd08afa5 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 2 Sep 2020 13:42:35 -0500 Subject: [PATCH 011/409] TST: Adapt metadata-promotion tests to new implementation --- numpy/core/tests/test_numeric.py | 39 ++++++++------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 89a82d3b6fcf..f5428f98cb54 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -941,7 +941,7 @@ def test_promote_identical_types_metadata(self, dtype): return res = np.promote_types(dtype, dtype) - if res.char in "?bhilqpBHILQPefdgFDGOmM": + if res.char in "?bhilqpBHILQPefdgFDGOmM" or dtype.type is rational: # Metadata is lost for simple promotions (they create a new dtype) assert res.metadata is None else: @@ -976,41 +976,20 @@ def test_promote_types_metadata(self, dtype1, dtype2): # Promotion failed, this test only checks metadata return - # The rules for when metadata is preserved and which dtypes metadta - # will be used are very confusing and depend on multiple paths. - # This long if statement attempts to reproduce this: - if dtype1.type is rational or dtype2.type is rational: - # User dtype promotion preserves byte-order here: - if np.can_cast(res, dtype1): - assert res.metadata == dtype1.metadata - else: - assert res.metadata == dtype2.metadata - - elif res.char in "?bhilqpBHILQPefdgFDGOmM": + if res.char in "?bhilqpBHILQPefdgFDGOmM" or res.type is rational: # All simple types lose metadata (due to using promotion table): assert res.metadata is None - elif res.kind in "SU" and dtype1 == dtype2: - # Strings give precedence to the second dtype: - assert res is dtype2 elif res == dtype1: # If one result is the result, it is usually returned unchanged: assert res is dtype1 elif res == dtype2: - # If one result is the result, it is usually returned unchanged: - assert res is dtype2 - elif dtype1.kind == "S" and dtype2.kind == "U": - # Promotion creates a new unicode dtype from scratch - assert res.metadata is None - elif dtype1.kind == "U" and dtype2.kind == "S": - # Promotion creates a new unicode dtype from scratch - assert res.metadata is None - elif res.kind in "SU" and dtype2.kind != res.kind: - # We build on top of dtype1: - assert res.metadata == dtype1.metadata - elif res.kind in "SU" and res.kind == dtype1.kind: - assert res.metadata == dtype1.metadata - elif res.kind in "SU" and res.kind == dtype2.kind: - assert res.metadata == dtype2.metadata + # dtype1 may have been cast to the same type/kind as dtype2. + # If the resulting dtype is identical we currently pick the cast + # version of dtype1, which lost the metadata: + if np.promote_types(dtype1, dtype2.kind) == dtype2: + res.metadata is None + else: + res.metadata == metadata2 else: assert res.metadata is None From 0a921a8d282e41127c85d504e2569bfea05a7ec0 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Wed, 2 Sep 2020 16:12:04 -0300 Subject: [PATCH 012/409] DOC: Added templates for different types of issues. --- .github/ISSUE_TEMPLATE/bug-report.md | 30 +++++++++++++++++++ .github/ISSUE_TEMPLATE/documentation.md | 21 +++++++++++++ .github/ISSUE_TEMPLATE/feature-request.md | 15 ++++++++++ .../ISSUE_TEMPLATE/questions-help-support.md | 16 ++++++++++ .github/PULL_REQUEST_TEMPLATE.md | 4 +++ 5 files changed, 86 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md create mode 100644 .github/ISSUE_TEMPLATE/documentation.md create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md create mode 100644 .github/ISSUE_TEMPLATE/questions-help-support.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 000000000000..123498224627 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,30 @@ +--- +name: "Bug Report" +about: Submit a bug report to help us improve NumPy + +--- + + + +### Reproducing code example: + + + +```python +import numpy as np +<< your code here >> +``` + +### Error message: + + + + + +### Numpy/Python version information: + + + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 000000000000..cc249e33fab2 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,21 @@ +--- +name: "Documentation" +about: Report an issue related to the NumPy documentation + +--- + +## Documentation + + + + + + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 000000000000..96faf6aab148 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,15 @@ +--- +name: "Feature Request" +about: Check instructions for submitting your idea on the mailing list first. + +--- + +## Feature + +If you're looking to request a new feature or change in functionality, please +post your idea on the [numpy-discussion mailing list] +(https://mail.python.org/mailman/listinfo/numpy-discussion) to explain your +reasoning before you open an issue or pull request. You can also check out our +[Contributor Guide] +(https://github.com/numpy/numpy/blob/master/doc/source/dev/index.rst) if you +need more information. diff --git a/.github/ISSUE_TEMPLATE/questions-help-support.md b/.github/ISSUE_TEMPLATE/questions-help-support.md new file mode 100644 index 000000000000..c170ed3740e3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/questions-help-support.md @@ -0,0 +1,16 @@ +--- +name: "Questions/Help/Support" +about: Resources to get help and support about NumPy. + +--- + +## Questions and Help + +### Please note that this issue tracker is not a help form and this issue will +be closed. + +If you need help, we have a set of [listed resources available on the website] +(https://numpy.org/gethelp/). + +For NumPy development-related matters, please see [Community] +(https://numpy.org/community/). diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index e12eea7bd763..1f313fd75027 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,3 +5,7 @@ http://www.numpy.org/devdocs/dev/development_workflow.html + + From 199448294c937e82127cb648fd4dd380a46d8d58 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Wed, 2 Sep 2020 19:22:02 -0300 Subject: [PATCH 013/409] Responding to PR suggestions. --- .github/ISSUE_TEMPLATE/config.yml | 8 ++++++++ .github/ISSUE_TEMPLATE/documentation.md | 7 +++---- .github/ISSUE_TEMPLATE/feature-request.md | 3 ++- .github/ISSUE_TEMPLATE/questions-help-support.md | 16 ---------------- 4 files changed, 13 insertions(+), 21 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/questions-help-support.md diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000000..3027d66506a7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Question/Help/Support + url: https://numpy.org/gethelp/ + about: "If you have a question, please look at the listed resources available on the website." + - name: Development-related matters + url: https://numpy.org/community/ + about: "If you would like to discuss development-related matters or need help from the NumPy team, see our community's communication channels." diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index cc249e33fab2..b0435a5654d5 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -9,13 +9,12 @@ about: Report an issue related to the NumPy documentation +referring to. You can also check the development version of the documentation +and see if this issue has already been addressed: https://numpy.org/devdocs/ +--> - - diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index 96faf6aab148..38d64710bbb2 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -6,7 +6,8 @@ about: Check instructions for submitting your idea on the mailing list first. ## Feature -If you're looking to request a new feature or change in functionality, please +If you're looking to request a new feature or change in functionality, including +adding or changing the meaning of arguments to an existing function, please post your idea on the [numpy-discussion mailing list] (https://mail.python.org/mailman/listinfo/numpy-discussion) to explain your reasoning before you open an issue or pull request. You can also check out our diff --git a/.github/ISSUE_TEMPLATE/questions-help-support.md b/.github/ISSUE_TEMPLATE/questions-help-support.md deleted file mode 100644 index c170ed3740e3..000000000000 --- a/.github/ISSUE_TEMPLATE/questions-help-support.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: "Questions/Help/Support" -about: Resources to get help and support about NumPy. - ---- - -## Questions and Help - -### Please note that this issue tracker is not a help form and this issue will -be closed. - -If you need help, we have a set of [listed resources available on the website] -(https://numpy.org/gethelp/). - -For NumPy development-related matters, please see [Community] -(https://numpy.org/community/). From e9e89e073e46f8af801a0940a276d3ed55c818c2 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Mon, 3 Aug 2020 13:20:33 -0400 Subject: [PATCH 014/409] DOC: Revise glossary page Entries added, explanations revised, Python-only entries now point to Python glossary. --- doc/source/glossary.rst | 780 ++++++++++++++++++++++------------------ 1 file changed, 429 insertions(+), 351 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index fb4e0137add2..8a7ebde827d1 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -2,474 +2,552 @@ Glossary ******** -.. toctree:: +.. glossary:: -.. glossary:: + (`n`,) - along an axis - Axes are defined for arrays with more than one dimension. A - 2-dimensional array has two corresponding axes: the first running - vertically downwards across rows (axis 0), and the second running - horizontally across columns (axis 1). + A tuple with one element.The trailing comma distinguishes a one-element + tuple from a parenthesized ``n``. - Many operations can take place along one of these axes. For example, - we can sum each row of an array, in which case we operate along - columns, or axis 1:: - >>> x = np.arange(12).reshape((3,4)) + -1 - >>> x - array([[ 0, 1, 2, 3], - [ 4, 5, 6, 7], - [ 8, 9, 10, 11]]) + Used as a dimension entry, ``-1`` instructs NumPy to choose the length + that will keep the total number of elements the same. - >>> x.sum(axis=1) - array([ 6, 22, 38]) - array - A homogeneous container of numerical elements. Each element in the - array occupies a fixed amount of memory (hence homogeneous), and - can be a numerical element of a single type (such as float, int - or complex) or a combination (such as ``(float, int, float)``). Each - array has an associated data-type (or ``dtype``), which describes - the numerical type of its elements:: + ``...`` - >>> x = np.array([1, 2, 3], float) + **When indexing an array**, shorthand that the missing axes, if they + exist, are full slices. - >>> x - array([ 1., 2., 3.]) + .. code:: python - >>> x.dtype # floating point number, 64 bits of memory per element - dtype('float64') + >>> a.shape + (2, 3, 4) + >>> a[...].shape + (2, 3, 4) - # More complicated data type: each array element is a combination of - # and integer and a floating point number - >>> np.array([(1, 2.0), (3, 4.0)], dtype=[('x', np.int64), ('y', float)]) - array([(1, 2.), (3, 4.)], dtype=[('x', '>> a[...,0].shape + (2, 3) - Fast element-wise operations, called a :term:`ufunc`, operate on arrays. + >>> a[0,...].shape + (3, 4) - array_like - Any sequence that can be interpreted as an ndarray. This includes - nested lists, tuples, scalars and existing arrays. + >>> a[0,...,0].shape + (3,) - attribute - A property of an object that can be accessed using ``obj.attribute``, - e.g., ``shape`` is an attribute of an array:: + It can be used at most once: :: - >>> x = np.array([1, 2, 3]) - >>> x.shape - (3,) + >>> a[0,...,0,...].shape + --------------------------------------------------------------------------- + IndexError Traceback (most recent call last) + in + ----> 1 a[0,...,0,...].shape - big-endian - When storing a multi-byte value in memory as a sequence of bytes, the - sequence addresses/sends/stores the most significant byte first (lowest - address) and the least significant byte last (highest address). Common in - micro-processors and used for transmission of data over network protocols. + IndexError: an index can only have a single ellipsis ('...') - BLAS - `Basic Linear Algebra Subprograms `_ + For more, see the article on + :doc:`Indexing. ` - broadcast - NumPy can do operations on arrays whose shapes are mismatched:: + **In printouts**, NumPy substitutes ``...`` for the middle elements of + large arrays. To see the entire array, use + :doc:`numpy.printoptions. ` - >>> x = np.array([1, 2]) - >>> y = np.array([[3], [4]]) - >>> x - array([1, 2]) + ``:`` - >>> y - array([[3], - [4]]) + The Python + `slice `_ + operator. In ndarrays, slicing can be applied to every + axis: - >>> x + y - array([[4, 5], - [5, 6]]) + .. code:: python - See `basics.broadcasting` for more information. + >>> a = np.arange(24).reshape(2,3,4) + a + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], - C order - See `row-major` + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) - column-major - A way to represent items in a N-dimensional array in the 1-dimensional - computer memory. In column-major order, the leftmost index "varies the - fastest": for example the array:: + >>> a[1:,-2:,:-1] + array([[[16, 17, 18], + [20, 21, 22]]]) - [[1, 2, 3], - [4, 5, 6]] + Trailing slices can be omitted: - is represented in the column-major order as:: + .. code:: python - [1, 4, 2, 5, 3, 6] + >>> a[1] == a[1,:,:] + array([[ True, True, True, True], + [ True, True, True, True], + [ True, True, True, True]]) - Column-major order is also known as the Fortran order, as the Fortran - programming language uses it. + In contrast to Python, where slicing creates a copy, in NumPy slicing + creates a :term:`view`. - decorator - An operator that transforms a function. For example, a ``log`` - decorator may be defined to print debugging information upon - function execution:: + For more, see + `Indexing. `_ - >>> def log(f): - ... def new_logging_func(*args, **kwargs): - ... print("Logging call with parameters:", args, kwargs) - ... return f(*args, **kwargs) - ... - ... return new_logging_func + ``<`` - Now, when we define a function, we can "decorate" it using ``log``:: + In a dtype declaration, indicates that the data is + `little-endian `_ + (the bracket is big on the right). :: - >>> @log - ... def add(a, b): - ... return a + b + >>> dt = np.dtype('>> add(1, 2) - Logging call with parameters: (1, 2) {} - 3 + ``>`` - dictionary - Resembling a language dictionary, which provides a mapping between - words and descriptions thereof, a Python dictionary is a mapping - between two objects:: + In a dtype declaration, indicates that the data is + `big-endian `_ + (the bracket is big on the left). :: - >>> x = {1: 'one', 'two': [1, 2]} + >>> dt = np.dtype('>H') # big-endian unsigned short - Here, `x` is a dictionary mapping keys to values, in this case - the integer 1 to the string "one", and the string "two" to - the list ``[1, 2]``. The values may be accessed using their - corresponding keys:: - >>> x[1] - 'one' + advanced indexing - >>> x['two'] - [1, 2] + Rather than using a scalar or slice as an index, an axis can be + indexed with an array, providing fine-grained selection. This is + known as + `advanced indexing `_ + or ``fancy indexing``. - Note that dictionaries are not stored in any specific order. Also, - most mutable (see *immutable* below) objects, such as lists, may not - be used as keys. - For more information on dictionaries, read the - `Python tutorial `_. + axis - field - In a :term:`structured data type`, each sub-type is called a `field`. + Another term for an array dimension. Axes are numbered left to right; + axis 0 is the first element in the shape tuple. + + In a two-dimensional vector, the elements of axis 0 are rows and the + elements of axis 1 are columns. + + In higher dimensions the picture changes. NumPy prints + higher-dimensional vectors as replications of row-by-column building + blocks, as in this three-dimensional vector: + + .. code:: python + + >>> a + array([[[ 0, 1, 2], + [ 3, 4, 5]], + + [[ 6, 7, 8], + [ 9, 10, 11]]]) + + >>> a.shape + (2, 2, 3) + + ``a`` is depicted as a two-element array whose elements are 2x3 vectors. + From this point of view, rows and columns are the final two axes, + respectively, in any shape. + + This rule helps you anticipate how a vector will be printed, and + conversely how to find the index of any of the printed elements. For + instance, in the example, the last two values of 8's index must be 0 and + 2. Since 8 appears in the second of the two 2x3's, the first index must + be 1: + + >>> a[1,0,2] + 8 + + A convenient way to count dimensions in a printed vector is to + count ``[`` symbols after the open-parenthesis. This is + useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: + + .. code:: python + + >>> a.shape + (2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) + + >>> a.shape + (1, 2, 3) + >>> a + array([[[0, 1, 2], + [3, 4, 5]]]) + + + along an axis + + The result of an operation along an axis X is a vector in which X + disappears. This can surprise new users expecting the opposite. + + The operation can be visualized this way: + + Imagine a slice of array ``a`` where axis X has a fixed index + and the other dimensions are left full (``:``). + + >>> a.shape + (2,3,4) + >>> a[:,0,:].shape + (2,4) + + The slice has ``a``'s shape with the X dimension deleted. Saying an + operation ``op`` is ``performed along X`` means that ``op`` takes as its + operands slices having every value of X: + + >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] + array([[ True, True, True, True], + [ True, True, True, True]]) + + + array + + Used synonymously in the NumPy docs with `ndarray`, NumPy's basic structure. + See :doc:`Array objects. ` + + + array_like + + Any `sequence `_ + that can be interpreted as an ndarray. In addition to ndarrays + this category includes lists (possibly nested and with + different element types), tuples, and scalars. Any argument accepted by + :doc:`numpy.array ` + is array_like. + + .. code:: + + >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) + + >>> x + array([[1.+0.j, 2.+0.j], + [0.+0.j, 0.+0.j], + [1.+1.j, 3.+0.j]]) + + + array scalar + + For uniformity in handling operands, NumPy treats + a :doc:`scalar ` as an array of zero dimension. + + + `attribute `_ + \ + + + .base + + If an array does not own its memory, then its ``.base`` attribute + returns the object whose memory the array is referencing. That object + may may be borrowing the memory from still another object, so the + owning object may be ``a.base.base.base...``. Despite advice to the + contrary, testing ``base`` is not a surefire way to determine if two + arrays are `views. <#term-view>`_ + + + `big-endian `_ + \ + + + `BLAS `_ + \ + + + broadcast + + ``broadcasting`` is NumPy's ability to process ndarrays of + different sizes as if all were the same size. + + When NumPy operates on two arrays, it works element by + element -- for instance, ``c = a * b`` is + + .. code:: + + c[0,0,0] = a[0,0,0] * b[0,0,0] + c[0,0,1] = a[0,0,1] * b[0,0,1] + ... + + Ordinarily this means the shapes of a and b must be identical. But in + some cases, NumPy can fill "missing" axes or "too-short" dimensions + with duplicate data so shapes will match. The duplication costs + no memory or time. See :doc:`Broadcasting. ` + + + C order + + Same as `row-major. `_ + + + `column-major `_ + \ + + + copy + + See :term:`view`. + + + `decorator `_ + \ + + + `dictionary `_ + \ + + + dimension + + See :term:`axis`. + + + dtype + + The datatype describing the (identically typed) elements in an ndarray. + It can be changed to reinterpret the array contents. See + :doc:`Data type objects (dtype). ` + + + fancy indexing + + Another term for :term:`advanced indexing`. + + + field + + In a :term:`structured data type`, each subtype is called a `field`. The `field` has a name (a string), a type (any valid dtype), and - an optional `title`. See :ref:`arrays.dtypes` + an optional :term:`title`. See :ref:`arrays.dtypes`. + + + Fortran order - Fortran order - See `column-major` + Same as `column-major `_ - flattened - Collapsed to a one-dimensional array. See `numpy.ndarray.flatten` - for details. - homogeneous - Describes a block of memory comprised of blocks, each block comprised of - items and of the same size, and blocks are interpreted in exactly the - same way. In the simplest case each block contains a single item, for - instance int32 or float64. + flattened - immutable - An object that cannot be modified after execution is called - immutable. Two common examples are strings and tuples. + See :term:`ravel`. - instance - A class definition gives the blueprint for constructing an object:: - >>> class House: - ... wall_colour = 'white' + homogeneous - Yet, we have to *build* a house before it exists:: + All elements of a homogeneous array have the same type. ndarrays, in + contrast to Python lists, are homogeneous. The type can be complicated, + as in a :term:`structured array`, but all elements have that type. - >>> h = House() # build a house + NumPy `object arrays <#term-object-array>`_, which contain references to + Python objects, fill the role of heterogeneous arrays. - Now, ``h`` is called a ``House`` instance. An instance is therefore - a specific realisation of a class. - iterable - A sequence that allows "walking" (iterating) over items, typically - using a loop such as:: + `immutable `_ + \ - >>> x = [1, 2, 3] - >>> [item**2 for item in x] - [1, 4, 9] - It is often used in combination with ``enumerate``:: - >>> keys = ['a','b','c'] - >>> for n, k in enumerate(keys): - ... print("Key %d: %s" % (n, k)) - ... - Key 0: a - Key 1: b - Key 2: c + `iterable `_ + \ + + + itemsize - itemsize The size of the dtype element in bytes. - list - A Python container that can hold any number of objects or items. - The items do not have to be of the same type, and can even be - lists themselves:: - >>> x = [2, 2.0, "two", [2, 2.0]] + `list `_ + \ + - The list `x` contains 4 items, each which can be accessed individually:: + `little-endian `_ + \ - >>> x[2] # the string 'two' - 'two' - >>> x[3] # a list, containing an integer 2 and a float 2.0 - [2, 2.0] + mask - It is also possible to select more than one item at a time, - using *slicing*:: + The boolean array used to select elements in a :term:`masked array`. - >>> x[0:2] # or, equivalently, x[:2] - [2, 2.0] - In code, arrays are often conveniently expressed as nested lists:: + masked array + Bad or missing data can be cleanly ignored by putting it in a masked + array, which has an internal boolean array indicating invalid + entries. Operations with masked arrays ignore these entries. - >>> np.array([[1, 2], [3, 4]]) - array([[1, 2], - [3, 4]]) + >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) + >>> a + masked_array(data = [-- 2.0 --], + mask = [ True False True], + fill_value = 1e+20) - For more information, read the section on lists in the `Python - tutorial `_. For a mapping - type (key-value), see *dictionary*. + >>> a + [1, 2, 3] + masked_array(data = [-- 4.0 --], + mask = [ True False True], + fill_value = 1e+20) - little-endian - When storing a multi-byte value in memory as a sequence of bytes, the - sequence addresses/sends/stores the least significant byte first (lowest - address) and the most significant byte last (highest address). Common in - x86 processors. + See :doc:`Masked arrays. ` - mask - A boolean array, used to select only certain elements for an operation:: - >>> x = np.arange(5) - >>> x - array([0, 1, 2, 3, 4]) + matrix - >>> mask = (x > 2) - >>> mask - array([False, False, False, True, True]) + NumPy's two-dimensional + :doc:`matrix class ` + should no longer be used; use regular ndarrays. - >>> x[mask] = -1 - >>> x - array([ 0, 1, 2, -1, -1]) - masked array - Array that suppressed values indicated by a mask:: + ndarray - >>> x = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) - >>> x - masked_array(data=[--, 2.0, --], - mask=[ True, False, True], - fill_value=1e+20) + See :term:`array`. - >>> x + [1, 2, 3] - masked_array(data=[--, 4.0, --], - mask=[ True, False, True], - fill_value=1e+20) + object array - Masked arrays are often used when operating on arrays containing - missing or invalid entries. + An array whose dtype is ``object``; that is, it contains references to + Python objects. Indexing the array dereferences the Python objects, so + unlike other ndarrays, an object array has the ability to hold + heterogeneous objects. - matrix - A 2-dimensional ndarray that preserves its two-dimensional nature - throughout operations. It has certain special operations, such as ``*`` - (matrix multiplication) and ``**`` (matrix power), defined:: - >>> x = np.mat([[1, 2], [3, 4]]) - >>> x - matrix([[1, 2], - [3, 4]]) + ravel - >>> x**2 - matrix([[ 7, 10], - [15, 22]]) + :doc:`numpy.ravel \ + ` + and :doc:`numpy.flatten \ + ` + both flatten an ndarray. ``ravel`` will return a view if possible; + ``flatten`` always returns a copy. - method - A function associated with an object. For example, each ndarray has a - method called ``repeat``:: + Flattening collapses a multimdimensional array to a single dimension; + details of how this is done (for instance, whether ``a[n+1]`` should be + the next row or next column) are parameters. - >>> x = np.array([1, 2, 3]) - >>> x.repeat(2) - array([1, 1, 2, 2, 3, 3]) - ndarray - See *array*. + record array - record array - An :term:`ndarray` with :term:`structured data type` which has been - subclassed as ``np.recarray`` and whose dtype is of type ``np.record``, - making the fields of its data type to be accessible by attribute. + A :term:`structured array` with an additional way to access + fields -- ``a.field`` in addition to ``a['field']``. See + :doc:`numpy.recarray. ` - reference - If ``a`` is a reference to ``b``, then ``(a is b) == True``. Therefore, - ``a`` and ``b`` are different names for the same Python object. - row-major - A way to represent items in a N-dimensional array in the 1-dimensional - computer memory. In row-major order, the rightmost index "varies - the fastest": for example the array:: + `row-major `_ + \ - [[1, 2, 3], - [4, 5, 6]] + shape - is represented in the row-major order as:: + A tuple showing the length of each dimension of an ndarray. The + length of the tuple itself is the number of dimensions + (:doc:`numpy.ndim `). + The product of the tuple elements is the number of elements in the + array. - [1, 2, 3, 4, 5, 6] - Row-major order is also known as the C order, as the C programming - language uses it. New NumPy arrays are by default in row-major order. + :term:`slice <:>` + \ - self - Often seen in method signatures, ``self`` refers to the instance - of the associated class. For example: - >>> class Paintbrush: - ... color = 'blue' - ... - ... def paint(self): - ... print("Painting the city %s!" % self.color) - ... - >>> p = Paintbrush() - >>> p.color = 'red' - >>> p.paint() # self refers to 'p' - Painting the city red! + stride - slice - Used to select only certain elements from a sequence: + Tuple of bytes to step in each dimension when traversing an array. + The byte offset of element :math:`(i_0, i_1, ..., i_n)` in an array `a` is: :: - >>> x = range(5) - >>> x - [0, 1, 2, 3, 4] + offset = sum(np.array(i) * a.strides) - >>> x[1:3] # slice from 1 to 3 (excluding 3 itself) - [1, 2] + Stride is computed automatically from an array's dtype and + shape, but can be directly specified using + :doc:`as_strided. ` - >>> x[1:5:2] # slice from 1 to 5, but skipping every second element - [1, 3] + To see how striding underlies the power of NumPy views, see + `The NumPy array: a structure for efficient numerical computation. \ + `_ - >>> x[::-1] # slice a sequence in reverse - [4, 3, 2, 1, 0] - Arrays may have more than one dimension, each which can be sliced - individually: + structured array - >>> x = np.array([[1, 2], [3, 4]]) - >>> x - array([[1, 2], - [3, 4]]) + Array whose :term:`dtype` is a :term:`structured data type`. - >>> x[:, 1] - array([2, 4]) - structure - See :term:`structured data type` + structured data type - structured data type - A data type composed of other datatypes + Users can create arbitrarily complex :term:`dtypes ` + that can include other arrays and dtypes. These composite dtypes are called + :doc:`structured data types. ` - subarray data type - A :term:`structured data type` may contain a :term:`ndarray` with its - own dtype and shape: - >>> dt = np.dtype([('a', np.int32), ('b', np.float32, (3,))]) - >>> np.zeros(3, dtype=dt) - array([(0, [0., 0., 0.]), (0, [0., 0., 0.]), (0, [0., 0., 0.])], - dtype=[('a', '` which is an alias to the name and is - commonly used for plotting. + An array nested in a :term:`structured data type`: - tuple - A sequence that may contain a variable number of types of any - kind. A tuple is immutable, i.e., once constructed it cannot be - changed. Similar to a list, it can be indexed and sliced:: + .. code:: python - >>> x = (1, 'one', [1, 2]) - >>> x - (1, 'one', [1, 2]) + >>> dt = np.dtype([('a', np.int32), ('b', np.float32, (3,))]) + >>> np.zeros(3, dtype=dt) + array([(0, [0., 0., 0.]), (0, [0., 0., 0.]), (0, [0., 0., 0.])], + dtype=[('a', '>> x[0] - 1 - >>> x[:2] - (1, 'one') + subarray data type - A useful concept is "tuple unpacking", which allows variables to - be assigned to the contents of a tuple:: + An element of a strctured datatype that behaves like an ndarray. - >>> x, y = (1, 2) - >>> x, y = 1, 2 + .. + The entry is in numpy.doc.structured_arrays:51 and + so can't be deleted. - This is often used when a function returns multiple values: - >>> def return_many(): - ... return 1, 'alpha', None + title - >>> a, b, c = return_many() - >>> a, b, c - (1, 'alpha', None) + An alias for a field name in a structured datatype. - >>> a - 1 - >>> b - 'alpha' - - ufunc - Universal function. A fast element-wise, :term:`vectorized - ` array operation. Examples include ``add``, ``sin`` and - ``logical_or``. - - vectorization - Optimizing a looping block by specialized code. In a traditional sense, - vectorization performs the same operation on multiple elements with - fixed strides between them via specialized hardware. Compilers know how - to take advantage of well-constructed loops to implement such - optimizations. NumPy uses :ref:`vectorization ` - to mean any optimization via specialized code performing the same - operations on multiple elements, typically achieving speedups by - avoiding some of the overhead in looking up and converting the elements. - - view - An array that does not own its data, but refers to another array's - data instead. For example, we may create a view that only shows - every second element of another array:: - - >>> x = np.arange(5) - >>> x - array([0, 1, 2, 3, 4]) - - >>> y = x[::2] - >>> y - array([0, 2, 4]) - - >>> x[0] = 3 # changing x changes y as well, since y is a view on x - >>> y - array([3, 2, 4]) - - wrapper - Python is a high-level (highly abstracted, or English-like) language. - This abstraction comes at a price in execution speed, and sometimes - it becomes necessary to use lower level languages to do fast - computations. A wrapper is code that provides a bridge between - high and the low level languages, allowing, e.g., Python to execute - code written in C or Fortran. - - Examples include ctypes, SWIG and Cython (which wraps C and C++) - and f2py (which wraps Fortran). + .. + The entry is referenced in numpy.doc.structured_arrays:242 + and so can't be deleted. + + + `tuple `_ + \ + + + type + + In NumPy, a synonym for :term:`dtype`. For the more general Python + meaning, + `see here. `_ + + + ufunc + + NumPy's fast element-by-element computation (:term:`vectorization`) is + structured so as to leave the choice of function open. A function used + in vectorization is called a ``ufunc``, short for ``universal + function``. NumPy routines have built-in ufuncs, but users can also + :doc:`write their own. ` + + + vectorization + + NumPy hands off array processing to C, where looping and computation are + much faster than in Python. To exploit this, programmers using NumPy + eliminate Python loops in favor of array-to-array operations. + :term:`vectorization` can refer both to the C offloading and to + structuring NumPy code to leverage it. + + + view + + Without changing underlying data, NumPy can make one array masquerade as + any number of other arrays with different types, shapes, and even + content. This is much faster than creating those arrays. + + An array created this way is a ``view``, and the performance gain often + makes an array created as a view preferable to one created as a new + array. + But because a view shares data with the original array, a write in one + array can affect the other, even though they appear to be different + arrays. If this is an problem, a view can't be used; the second array + needs to be physically distinct -- a ``copy``. + Some NumPy routines always return views, some always return copies, some + may return one or the other, and for some the choice can be specified. + Responsiblity for managing views and copies falls to the programmer. + NumPy reports whether arrays share memory wih + :doc:`numpy.shares_memory `, + but an exact answer isn't always possible; see the link. From d7db94fa85be50a1ae840be5a6b2b5a50d56c557 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Mon, 3 Aug 2020 13:35:31 -0400 Subject: [PATCH 015/409] DOC: Wording improvement in #16996 More clearly describe `shares_memory` as being not always *feasible* rather than *possible* --- doc/source/glossary.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 8a7ebde827d1..4f264e0f3636 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -550,4 +550,4 @@ Glossary Responsiblity for managing views and copies falls to the programmer. NumPy reports whether arrays share memory wih :doc:`numpy.shares_memory `, - but an exact answer isn't always possible; see the link. + but an exact answer isn't always feasible; see the link. From d434a2e7332fa5cccc89a030268317a6225105aa Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Tue, 4 Aug 2020 12:30:04 -0400 Subject: [PATCH 016/409] DOC: Incorporate @BvB93's suggestions in PR #16996 Also reworded `stride` entry for clarity, moved 'axis' entry to correct alphabetical position. Also added anchors to reference/arrays.indexing for glossary references to point to. --- doc/source/glossary.rst | 177 ++++++++++++----------- doc/source/reference/arrays.indexing.rst | 3 + 2 files changed, 96 insertions(+), 84 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 4f264e0f3636..90e8ccd9164d 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -6,7 +6,7 @@ Glossary (`n`,) - A tuple with one element.The trailing comma distinguishes a one-element + A tuple with one element. The trailing comma distinguishes a one-element tuple from a parenthesized ``n``. @@ -48,8 +48,7 @@ Glossary IndexError: an index can only have a single ellipsis ('...') - For more, see the article on - :doc:`Indexing. ` + For details, see :doc:`Indexing. ` **In printouts**, NumPy substitutes ``...`` for the middle elements of large arrays. To see the entire array, use @@ -91,8 +90,8 @@ Glossary In contrast to Python, where slicing creates a copy, in NumPy slicing creates a :term:`view`. - For more, see - `Indexing. `_ + For details, see :ref:`Combining advanced and basic indexing `. + ``<`` @@ -114,13 +113,72 @@ Glossary advanced indexing - Rather than using a scalar or slice as an index, an axis can be - indexed with an array, providing fine-grained selection. This is - known as - `advanced indexing `_ + Rather than using a :doc:`scalar ` or slice as + an index, an axis can be indexed with an array, providing fine-grained + selection. This is known as :ref:`advanced indexing` or ``fancy indexing``. + along an axis + + The result of an operation along an :term:`axis` X is an array in which X + disappears. This can surprise new users expecting the opposite. + + The operation can be visualized this way: + + Imagine a slice of array ``a`` where axis X has a fixed index + and the other dimensions are left full (``:``). + + >>> a.shape + (2,3,4) + >>> a[:,0,:].shape + (2,4) + + The slice has ``a``'s shape with the X dimension deleted. Saying an + operation ``op`` is ``performed along X`` means that ``op`` takes as its + operands slices having every value of X: + + >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] + array([[ True, True, True, True], + [ True, True, True, True]]) + + + array + + Used synonymously in the NumPy docs with + :doc:`ndarray `, NumPy's basic structure. + + + array_like + + Any :doc:`scalar ` or + `sequence `_ + that can be interpreted as an ndarray. In addition to ndarrays + and scalars this category includes lists (possibly nested and with + different element types) and tuples. Any argument accepted by + :doc:`numpy.array ` + is array_like. + + .. code:: + + >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) + + >>> x + array([[1.+0.j, 2.+0.j], + [0.+0.j, 0.+0.j], + [1.+1.j, 3.+0.j]]) + + + array scalar + + For uniformity in handling operands, NumPy treats + a :doc:`scalar ` as an array of zero dimension. + + + `attribute `_ + \ + + axis Another term for an array dimension. Axes are numbered left to right; @@ -177,68 +235,10 @@ Glossary [3, 4, 5]]]) - along an axis - - The result of an operation along an axis X is a vector in which X - disappears. This can surprise new users expecting the opposite. - - The operation can be visualized this way: - - Imagine a slice of array ``a`` where axis X has a fixed index - and the other dimensions are left full (``:``). - - >>> a.shape - (2,3,4) - >>> a[:,0,:].shape - (2,4) - - The slice has ``a``'s shape with the X dimension deleted. Saying an - operation ``op`` is ``performed along X`` means that ``op`` takes as its - operands slices having every value of X: - - >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] - array([[ True, True, True, True], - [ True, True, True, True]]) - - - array - - Used synonymously in the NumPy docs with `ndarray`, NumPy's basic structure. - See :doc:`Array objects. ` - - - array_like - - Any `sequence `_ - that can be interpreted as an ndarray. In addition to ndarrays - this category includes lists (possibly nested and with - different element types), tuples, and scalars. Any argument accepted by - :doc:`numpy.array ` - is array_like. - - .. code:: - - >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) - - >>> x - array([[1.+0.j, 2.+0.j], - [0.+0.j, 0.+0.j], - [1.+1.j, 3.+0.j]]) - - - array scalar - - For uniformity in handling operands, NumPy treats - a :doc:`scalar ` as an array of zero dimension. - - - `attribute `_ - \ - - .base - If an array does not own its memory, then its ``.base`` attribute + If an array does not own its memory, then its + :doc:`base ` attribute returns the object whose memory the array is referencing. That object may may be borrowing the memory from still another object, so the owning object may be ``a.base.base.base...``. Despite advice to the @@ -271,7 +271,7 @@ Glossary Ordinarily this means the shapes of a and b must be identical. But in some cases, NumPy can fill "missing" axes or "too-short" dimensions with duplicate data so shapes will match. The duplication costs - no memory or time. See :doc:`Broadcasting. ` + no memory or time. For details, see :doc:`Broadcasting. ` C order @@ -304,7 +304,7 @@ Glossary dtype The datatype describing the (identically typed) elements in an ndarray. - It can be changed to reinterpret the array contents. See + It can be changed to reinterpret the array contents. For details, see :doc:`Data type objects (dtype). ` @@ -315,9 +315,10 @@ Glossary field - In a :term:`structured data type`, each subtype is called a `field`. - The `field` has a name (a string), a type (any valid dtype), and - an optional :term:`title`. See :ref:`arrays.dtypes`. + In a :term:`structured data type`, each subtype is called a + :doc:`field `. + A field has a name (a string), a type (any valid dtype), and + an optional :term:`title`. For details, see :ref:`arrays.dtypes`. Fortran order @@ -383,7 +384,7 @@ Glossary mask = [ True False True], fill_value = 1e+20) - See :doc:`Masked arrays. ` + For details, see :doc:`Masked arrays. ` matrix @@ -423,12 +424,15 @@ Glossary record array A :term:`structured array` with an additional way to access - fields -- ``a.field`` in addition to ``a['field']``. See + fields -- ``a.field`` in addition to ``a['field']``. For details, see :doc:`numpy.recarray. ` `row-major `_ - \ + \ + + :doc:`scalar ` + \ shape @@ -436,7 +440,8 @@ Glossary length of the tuple itself is the number of dimensions (:doc:`numpy.ndim `). The product of the tuple elements is the number of elements in the - array. + array. For details, see + :doc:`numpy.ndarray.shape `. :term:`slice <:>` @@ -445,14 +450,18 @@ Glossary stride - Tuple of bytes to step in each dimension when traversing an array. - The byte offset of element :math:`(i_0, i_1, ..., i_n)` in an array `a` is: :: - - offset = sum(np.array(i) * a.strides) + Physical memory is one-dimensional; ``stride`` maps an index in an + N-dimensional ndarray to an address in memory. For an N-dimensional + array, stride is an N-element tuple; advancing from index ``i`` to index + ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes to the + address. Stride is computed automatically from an array's dtype and shape, but can be directly specified using - :doc:`as_strided. ` + :doc:`as_strided. ` + + For details, see + :doc:`numpy.ndarray.strides `. To see how striding underlies the power of NumPy views, see `The NumPy array: a structure for efficient numerical computation. \ @@ -548,6 +557,6 @@ Glossary Some NumPy routines always return views, some always return copies, some may return one or the other, and for some the choice can be specified. Responsiblity for managing views and copies falls to the programmer. - NumPy reports whether arrays share memory wih + NumPy reports whether arrays share memory :doc:`numpy.shares_memory `, but an exact answer isn't always feasible; see the link. diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst index 3e600b7c456e..180a79dae18c 100644 --- a/doc/source/reference/arrays.indexing.rst +++ b/doc/source/reference/arrays.indexing.rst @@ -198,6 +198,7 @@ concepts to remember include: create an axis of length one. :const:`newaxis` is an alias for 'None', and 'None' can be used in place of this with the same result. +.. _advanced-indexing: Advanced Indexing ----------------- @@ -304,6 +305,8 @@ understood with an example. most important thing to remember about indexing with multiple advanced indexes. +.. _combining-advanced-and-basic-indexing: + Combining advanced and basic indexing """"""""""""""""""""""""""""""""""""" From ef3b71e7c2bc6ae42c41d31a9bd83f57f604b496 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Tue, 4 Aug 2020 13:54:01 -0400 Subject: [PATCH 017/409] DOC: Avoid unwanted doctest on PR #14979 Failing CI because it's running doctest on the example strings. Also, deleting glossary.py, source file of the original glossary. --- doc/source/glossary.rst | 117 +++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 69 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 90e8ccd9164d..ea2a9a6f25bc 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -19,34 +19,32 @@ Glossary ``...`` **When indexing an array**, shorthand that the missing axes, if they - exist, are full slices. + exist, are full slices. :: - .. code:: python - - >>> a.shape - (2, 3, 4) + >>> a.shape + (2, 3, 4) - >>> a[...].shape - (2, 3, 4) + >>> a[...].shape + (2, 3, 4) - >>> a[...,0].shape - (2, 3) + >>> a[...,0].shape + (2, 3) - >>> a[0,...].shape - (3, 4) + >>> a[0,...].shape + (3, 4) - >>> a[0,...,0].shape - (3,) + >>> a[0,...,0].shape + (3,) - It can be used at most once: :: + It can be used at most once: :: - >>> a[0,...,0,...].shape - --------------------------------------------------------------------------- - IndexError Traceback (most recent call last) - in - ----> 1 a[0,...,0,...].shape + >>> a[0,...,0,...].shape + --------------------------------------------------------------------------- + IndexError Traceback (most recent call last) + in + ----> 1 a[0,...,0,...].shape - IndexError: an index can only have a single ellipsis ('...') + IndexError: an index can only have a single ellipsis ('...') For details, see :doc:`Indexing. ` @@ -60,9 +58,7 @@ Glossary The Python `slice `_ operator. In ndarrays, slicing can be applied to every - axis: - - .. code:: python + axis: :: >>> a = np.arange(24).reshape(2,3,4) a @@ -78,9 +74,7 @@ Glossary array([[[16, 17, 18], [20, 21, 22]]]) - Trailing slices can be omitted: - - .. code:: python + Trailing slices can be omitted: :: >>> a[1] == a[1,:,:] array([[ True, True, True, True], @@ -127,7 +121,7 @@ Glossary The operation can be visualized this way: Imagine a slice of array ``a`` where axis X has a fixed index - and the other dimensions are left full (``:``). + and the other dimensions are left full (``:``). :: >>> a.shape (2,3,4) @@ -136,7 +130,7 @@ Glossary The slice has ``a``'s shape with the X dimension deleted. Saying an operation ``op`` is ``performed along X`` means that ``op`` takes as its - operands slices having every value of X: + operands slices having every value of X: :: >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] array([[ True, True, True, True], @@ -157,9 +151,7 @@ Glossary and scalars this category includes lists (possibly nested and with different element types) and tuples. Any argument accepted by :doc:`numpy.array ` - is array_like. - - .. code:: + is array_like. :: >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) @@ -172,7 +164,8 @@ Glossary array scalar For uniformity in handling operands, NumPy treats - a :doc:`scalar ` as an array of zero dimension. + a :doc:`scalar ` as an array of zero + dimension. `attribute `_ @@ -189,19 +182,17 @@ Glossary In higher dimensions the picture changes. NumPy prints higher-dimensional vectors as replications of row-by-column building - blocks, as in this three-dimensional vector: - - .. code:: python + blocks, as in this three-dimensional vector: :: - >>> a - array([[[ 0, 1, 2], - [ 3, 4, 5]], + >>> a + array([[[ 0, 1, 2], + [ 3, 4, 5]], - [[ 6, 7, 8], - [ 9, 10, 11]]]) + [[ 6, 7, 8], + [ 9, 10, 11]]]) - >>> a.shape - (2, 2, 3) + >>> a.shape + (2, 2, 3) ``a`` is depicted as a two-element array whose elements are 2x3 vectors. From this point of view, rows and columns are the final two axes, @@ -218,21 +209,19 @@ Glossary A convenient way to count dimensions in a printed vector is to count ``[`` symbols after the open-parenthesis. This is - useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: + useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: :: - .. code:: python - - >>> a.shape - (2, 3) - >>> a - array([[0, 1, 2], - [3, 4, 5]]) + >>> a.shape + (2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) - >>> a.shape - (1, 2, 3) - >>> a - array([[[0, 1, 2], - [3, 4, 5]]]) + >>> a.shape + (1, 2, 3) + >>> a + array([[[0, 1, 2], + [3, 4, 5]]]) .base @@ -243,7 +232,7 @@ Glossary may may be borrowing the memory from still another object, so the owning object may be ``a.base.base.base...``. Despite advice to the contrary, testing ``base`` is not a surefire way to determine if two - arrays are `views. <#term-view>`_ + arrays are :term:`view`\ s. `big-endian `_ @@ -260,9 +249,7 @@ Glossary different sizes as if all were the same size. When NumPy operates on two arrays, it works element by - element -- for instance, ``c = a * b`` is - - .. code:: + element -- for instance, ``c = a * b`` is :: c[0,0,0] = a[0,0,0] * b[0,0,0] c[0,0,1] = a[0,0,1] * b[0,0,1] @@ -371,7 +358,7 @@ Glossary Bad or missing data can be cleanly ignored by putting it in a masked array, which has an internal boolean array indicating invalid - entries. Operations with masked arrays ignore these entries. + entries. Operations with masked arrays ignore these entries. :: >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) >>> a @@ -482,9 +469,7 @@ Glossary subarray - An array nested in a :term:`structured data type`: - - .. code:: python + An array nested in a :term:`structured data type`: :: >>> dt = np.dtype([('a', np.int32), ('b', np.float32, (3,))]) >>> np.zeros(3, dtype=dt) @@ -497,18 +482,12 @@ Glossary An element of a strctured datatype that behaves like an ndarray. .. - The entry is in numpy.doc.structured_arrays:51 and - so can't be deleted. title An alias for a field name in a structured datatype. - .. - The entry is referenced in numpy.doc.structured_arrays:242 - and so can't be deleted. - `tuple `_ \ From 348a32ef50d11ab206c0d58764728f8c7792b68a Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 06:44:26 -0400 Subject: [PATCH 018/409] DOC: 2nd try: Avoid unwanted doctest on PR #14979 --- doc/source/glossary.rst | 72 +++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index ea2a9a6f25bc..ce6781ce9db8 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -19,9 +19,11 @@ Glossary ``...`` **When indexing an array**, shorthand that the missing axes, if they - exist, are full slices. :: + exist, are full slices. + + .. doctest:: + :skipif: True - >>> a.shape (2, 3, 4) >>> a[...].shape @@ -36,7 +38,10 @@ Glossary >>> a[0,...,0].shape (3,) - It can be used at most once: :: + It can be used at most once: + + .. doctest:: + :skipif: True >>> a[0,...,0,...].shape --------------------------------------------------------------------------- @@ -121,20 +126,27 @@ Glossary The operation can be visualized this way: Imagine a slice of array ``a`` where axis X has a fixed index - and the other dimensions are left full (``:``). :: + and the other dimensions are left full (``:``). - >>> a.shape - (2,3,4) - >>> a[:,0,:].shape - (2,4) + .. doctest:: + :skipif: True + + >>> a.shape + >>> a.shape + (2,3,4) + >>> a[:,0,:].shape + (2,4) The slice has ``a``'s shape with the X dimension deleted. Saying an operation ``op`` is ``performed along X`` means that ``op`` takes as its - operands slices having every value of X: :: + operands slices having every value of X: - >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] - array([[ True, True, True, True], - [ True, True, True, True]]) + .. doctest:: + :skipif: True + + >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] + array([[ True, True, True, True], + [ True, True, True, True]]) array @@ -153,9 +165,9 @@ Glossary :doc:`numpy.array ` is array_like. :: - >>> x = np.array([[1,2.0],[0,0],(1+1j,3.)]) + >>> a = np.array([[1,2.0],[0,0],(1+1j,3.)]) - >>> x + >>> a array([[1.+0.j, 2.+0.j], [0.+0.j, 0.+0.j], [1.+1.j, 3.+0.j]]) @@ -182,17 +194,20 @@ Glossary In higher dimensions the picture changes. NumPy prints higher-dimensional vectors as replications of row-by-column building - blocks, as in this three-dimensional vector: :: + blocks, as in this three-dimensional vector: - >>> a - array([[[ 0, 1, 2], - [ 3, 4, 5]], + .. doctest:: + :skipif: True - [[ 6, 7, 8], - [ 9, 10, 11]]]) + >>> a + array([[[ 0, 1, 2], + [ 3, 4, 5]], - >>> a.shape - (2, 2, 3) + [[ 6, 7, 8], + [ 9, 10, 11]]]) + + >>> a.shape + (2, 2, 3) ``a`` is depicted as a two-element array whose elements are 2x3 vectors. From this point of view, rows and columns are the final two axes, @@ -204,12 +219,18 @@ Glossary 2. Since 8 appears in the second of the two 2x3's, the first index must be 1: + .. doctest:: + :skipif: True + >>> a[1,0,2] 8 A convenient way to count dimensions in a printed vector is to count ``[`` symbols after the open-parenthesis. This is - useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: :: + useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: + + .. doctest:: + :skipif: True >>> a.shape (2, 3) @@ -249,7 +270,10 @@ Glossary different sizes as if all were the same size. When NumPy operates on two arrays, it works element by - element -- for instance, ``c = a * b`` is :: + element -- for instance, ``c = a * b`` is + + .. doctest:: + :skipif: True c[0,0,0] = a[0,0,0] * b[0,0,0] c[0,0,1] = a[0,0,1] * b[0,0,1] From 4d4ba091492cbe149210447c580039c744e4d5b9 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 07:12:18 -0400 Subject: [PATCH 019/409] DOC: 3rd try: Avoid unwanted doctest on PR #14979 --- doc/source/glossary.rst | 48 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index ce6781ce9db8..776701814ae3 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -21,22 +21,19 @@ Glossary **When indexing an array**, shorthand that the missing axes, if they exist, are full slices. - .. doctest:: - :skipif: True - - (2, 3, 4) + >>> a = np.arange(24).reshape(2,3,4) - >>> a[...].shape - (2, 3, 4) + >>> a[...].shape + (2, 3, 4) - >>> a[...,0].shape - (2, 3) + >>> a[...,0].shape + (2, 3) - >>> a[0,...].shape - (3, 4) + >>> a[0,...].shape + (3, 4) - >>> a[0,...,0].shape - (3,) + >>> a[0,...,0].shape + (3,) It can be used at most once: @@ -63,7 +60,7 @@ Glossary The Python `slice `_ operator. In ndarrays, slicing can be applied to every - axis: :: + axis: >>> a = np.arange(24).reshape(2,3,4) a @@ -128,12 +125,11 @@ Glossary Imagine a slice of array ``a`` where axis X has a fixed index and the other dimensions are left full (``:``). - .. doctest:: - :skipif: True + >>> a = np.arange(24).reshape(2,3,4) - >>> a.shape >>> a.shape (2,3,4) + >>> a[:,0,:].shape (2,4) @@ -141,9 +137,6 @@ Glossary operation ``op`` is ``performed along X`` means that ``op`` takes as its operands slices having every value of X: - .. doctest:: - :skipif: True - >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] array([[ True, True, True, True], [ True, True, True, True]]) @@ -196,9 +189,7 @@ Glossary higher-dimensional vectors as replications of row-by-column building blocks, as in this three-dimensional vector: - .. doctest:: - :skipif: True - + >>> a = np.arange(12).reshape(2,2,3) >>> a array([[[ 0, 1, 2], [ 3, 4, 5]], @@ -219,9 +210,6 @@ Glossary 2. Since 8 appears in the second of the two 2x3's, the first index must be 1: - .. doctest:: - :skipif: True - >>> a[1,0,2] 8 @@ -229,15 +217,14 @@ Glossary count ``[`` symbols after the open-parenthesis. This is useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: - .. doctest:: - :skipif: True - + >>> a = np.arange(6).reshape(2,3) >>> a.shape (2, 3) >>> a array([[0, 1, 2], [3, 4, 5]]) + >>> a = np.arange(6).reshape(1,2,3) >>> a.shape (1, 2, 3) >>> a @@ -270,10 +257,7 @@ Glossary different sizes as if all were the same size. When NumPy operates on two arrays, it works element by - element -- for instance, ``c = a * b`` is - - .. doctest:: - :skipif: True + element -- for instance, ``c = a * b`` is :: c[0,0,0] = a[0,0,0] * b[0,0,0] c[0,0,1] = a[0,0,1] * b[0,0,1] From 181446848f1d26bd387e4606de588980144ab91c Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 07:31:27 -0400 Subject: [PATCH 020/409] DOC: 4th try: Avoid unwanted doctest on PR #14979 --- doc/source/glossary.rst | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 776701814ae3..a1822b6e793e 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -37,16 +37,13 @@ Glossary It can be used at most once: - .. doctest:: - :skipif: True + >>> a[0,...,0,...].shape + --------------------------------------------------------------------------- + IndexError Traceback (most recent call last) + in + ----> 1 a[0,...,0,...].shape - >>> a[0,...,0,...].shape - --------------------------------------------------------------------------- - IndexError Traceback (most recent call last) - in - ----> 1 a[0,...,0,...].shape - - IndexError: an index can only have a single ellipsis ('...') + IndexError: an index can only have a single ellipsis ('...') For details, see :doc:`Indexing. ` @@ -63,7 +60,7 @@ Glossary axis: >>> a = np.arange(24).reshape(2,3,4) - a + >>> a array([[[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], @@ -189,7 +186,7 @@ Glossary higher-dimensional vectors as replications of row-by-column building blocks, as in this three-dimensional vector: - >>> a = np.arange(12).reshape(2,2,3) + >>> a = np.arange(12).reshape(2,2,3) >>> a array([[[ 0, 1, 2], [ 3, 4, 5]], @@ -368,16 +365,17 @@ Glossary array, which has an internal boolean array indicating invalid entries. Operations with masked arrays ignore these entries. :: - >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) - >>> a - masked_array(data = [-- 2.0 --], - mask = [ True False True], - fill_value = 1e+20) + >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) + >>> a + masked_array(data=[--, 2.0, --], + mask=[ True, False, True], + fill_value=1e+20) + >>> a + [1, 2, 3] - masked_array(data = [-- 4.0 --], - mask = [ True False True], - fill_value = 1e+20) + masked_array(data=[--, 4.0, --], + mask=[ True, False, True], + fill_value=1e+20) For details, see :doc:`Masked arrays. ` From c26d56933627cf08b3cfffcaa320ef77fdd348b3 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 07:47:50 -0400 Subject: [PATCH 021/409] DOC: 5th try: Avoid unwanted doctest on PR #14979 --- doc/source/glossary.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index a1822b6e793e..0af59c021449 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -42,7 +42,7 @@ Glossary IndexError Traceback (most recent call last) in ----> 1 a[0,...,0,...].shape - + IndexError: an index can only have a single ellipsis ('...') For details, see :doc:`Indexing. ` @@ -64,7 +64,7 @@ Glossary array([[[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], - + [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]) @@ -190,13 +190,10 @@ Glossary >>> a array([[[ 0, 1, 2], [ 3, 4, 5]], - + [[ 6, 7, 8], [ 9, 10, 11]]]) - >>> a.shape - (2, 2, 3) - ``a`` is depicted as a two-element array whose elements are 2x3 vectors. From this point of view, rows and columns are the final two axes, respectively, in any shape. @@ -371,7 +368,6 @@ Glossary mask=[ True, False, True], fill_value=1e+20) - >>> a + [1, 2, 3] masked_array(data=[--, 4.0, --], mask=[ True, False, True], From 03e116da14d8413ebc9eda1ed7aa115a70d5feef Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 08:05:28 -0400 Subject: [PATCH 022/409] DOC: 6th try: Fix doctest on PR #16996 --- doc/source/glossary.rst | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 0af59c021449..7c1ba64ee569 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -35,17 +35,7 @@ Glossary >>> a[0,...,0].shape (3,) - It can be used at most once: - - >>> a[0,...,0,...].shape - --------------------------------------------------------------------------- - IndexError Traceback (most recent call last) - in - ----> 1 a[0,...,0,...].shape - - IndexError: an index can only have a single ellipsis ('...') - - For details, see :doc:`Indexing. ` + It can be used at most once; ``a[...,0,...]`` raises an ``IndexError``. **In printouts**, NumPy substitutes ``...`` for the middle elements of large arrays. To see the entire array, use From 15292c9186a8b139ceaaa56aa4cafca42e787fef Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 10:03:50 -0400 Subject: [PATCH 023/409] DOC: Fix typos in #16996 strctured -> structured, caught by @WarrenWeckesser. Fixed missing language in `view` entry. --- doc/source/glossary.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 7c1ba64ee569..c20d98b67eec 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -471,7 +471,7 @@ Glossary subarray data type - An element of a strctured datatype that behaves like an ndarray. + An element of a structured datatype that behaves like an ndarray. .. @@ -528,6 +528,7 @@ Glossary Some NumPy routines always return views, some always return copies, some may return one or the other, and for some the choice can be specified. Responsiblity for managing views and copies falls to the programmer. - NumPy reports whether arrays share memory - :doc:`numpy.shares_memory `, - but an exact answer isn't always feasible; see the link. + :doc:`numpy.shares_memory ` + will check whether ``b`` is a view of ``a``, + but an exact answer isn't always feasible, as the documentation page + explains. From 55cd824ac52b573273eab0f85712aead8f9a5588 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 10:12:26 -0400 Subject: [PATCH 024/409] DOC: Fix repeated word in PR #16996 Also adding s to some examples for clarity. --- doc/source/glossary.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index c20d98b67eec..9bfd0d76c020 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -58,7 +58,7 @@ Glossary [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]) - + >>> a[1:,-2:,:-1] array([[[16, 17, 18], [20, 21, 22]]]) @@ -113,10 +113,10 @@ Glossary and the other dimensions are left full (``:``). >>> a = np.arange(24).reshape(2,3,4) - + >>> a.shape (2,3,4) - + >>> a[:,0,:].shape (2,4) @@ -221,7 +221,7 @@ Glossary If an array does not own its memory, then its :doc:`base ` attribute returns the object whose memory the array is referencing. That object - may may be borrowing the memory from still another object, so the + may be borrowing the memory from still another object, so the owning object may be ``a.base.base.base...``. Despite advice to the contrary, testing ``base`` is not a surefire way to determine if two arrays are :term:`view`\ s. From cef9b98cc1fd9ccd981a6d4295a7c0607669baba Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 5 Aug 2020 12:28:12 -0400 Subject: [PATCH 025/409] DOC: Fix example formatting in PR #16996 --- doc/source/glossary.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 9bfd0d76c020..8bf7bc5feb03 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -115,10 +115,10 @@ Glossary >>> a = np.arange(24).reshape(2,3,4) >>> a.shape - (2,3,4) + (2, 3, 4) >>> a[:,0,:].shape - (2,4) + (2, 4) The slice has ``a``'s shape with the X dimension deleted. Saying an operation ``op`` is ``performed along X`` means that ``op`` takes as its From 0dd96fba418b6ffab1df813ff2ba02a2e99709b8 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Wed, 5 Aug 2020 12:18:20 -0700 Subject: [PATCH 026/409] DOC: Use intersphinx links. --- doc/source/glossary.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 8bf7bc5feb03..22b1e38ffc80 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -44,8 +44,7 @@ Glossary ``:`` - The Python - `slice `_ + The Python :term:`python:slice` operator. In ndarrays, slicing can be applied to every axis: @@ -138,7 +137,7 @@ Glossary array_like Any :doc:`scalar ` or - `sequence `_ + :term:`python:sequence` that can be interpreted as an ndarray. In addition to ndarrays and scalars this category includes lists (possibly nested and with different element types) and tuples. Any argument accepted by @@ -488,8 +487,7 @@ Glossary type In NumPy, a synonym for :term:`dtype`. For the more general Python - meaning, - `see here. `_ + meaning, :term:`see here. `. ufunc From 5e0159cc322b84ea73c10c149267e3d6b810a95a Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Wed, 5 Aug 2020 12:39:59 -0700 Subject: [PATCH 027/409] DOC: intersphinx standalone glossary entries --- doc/source/glossary.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 22b1e38ffc80..725a132fb75b 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -159,7 +159,7 @@ Glossary dimension. - `attribute `_ + :term:`attribute ` \ @@ -266,11 +266,11 @@ Glossary See :term:`view`. - `decorator `_ + :term:`decorator ` \ - `dictionary `_ + :term:`dictionary ` \ @@ -319,11 +319,11 @@ Glossary Python objects, fill the role of heterogeneous arrays. - `immutable `_ + :term:`immutable ` \ - `iterable `_ + :term:`iterable ` \ @@ -332,7 +332,7 @@ Glossary The size of the dtype element in bytes. - `list `_ + :term:`list ` \ From 40dc5982f2df98aa153d3834a4a39e3df78b33c8 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Wed, 5 Aug 2020 12:40:43 -0700 Subject: [PATCH 028/409] DOC: rm tuple from glossary --- doc/source/glossary.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 725a132fb75b..11439187a01b 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -480,10 +480,6 @@ Glossary An alias for a field name in a structured datatype. - `tuple `_ - \ - - type In NumPy, a synonym for :term:`dtype`. For the more general Python From 66de9eb30017c2950cecf046c0626fa983e347b6 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Tue, 8 Sep 2020 19:41:52 -0300 Subject: [PATCH 029/409] Added post-install issue template and responses to PR comments. --- .github/ISSUE_TEMPLATE.md | 27 ----------------------- .github/ISSUE_TEMPLATE/bug-report.md | 2 +- .github/ISSUE_TEMPLATE/feature-request.md | 4 ++-- .github/ISSUE_TEMPLATE/post-install.md | 20 +++++++++++++++++ .github/PULL_REQUEST_TEMPLATE.md | 2 +- 5 files changed, 24 insertions(+), 31 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/ISSUE_TEMPLATE/post-install.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 3a25eeb1e9a3..000000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,27 +0,0 @@ - - - -### Reproducing code example: - - - -```python -import numpy as np -<< your code here >> -``` - - - -### Error message: - - - - - -### Numpy/Python version information: - - - diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index 123498224627..d2df08689da5 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -24,7 +24,7 @@ https://github.com/numpy/numpy/blob/master/doc/source/dev/development_environmen -### Numpy/Python version information: +### NumPy/Python version information: diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index 38d64710bbb2..a6a3ca4234af 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -10,7 +10,7 @@ If you're looking to request a new feature or change in functionality, including adding or changing the meaning of arguments to an existing function, please post your idea on the [numpy-discussion mailing list] (https://mail.python.org/mailman/listinfo/numpy-discussion) to explain your -reasoning before you open an issue or pull request. You can also check out our -[Contributor Guide] +reasoning in addition to opening an issue or pull request. You can also check +out our [Contributor Guide] (https://github.com/numpy/numpy/blob/master/doc/source/dev/index.rst) if you need more information. diff --git a/.github/ISSUE_TEMPLATE/post-install.md b/.github/ISSUE_TEMPLATE/post-install.md new file mode 100644 index 000000000000..fb8b3507678d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/post-install.md @@ -0,0 +1,20 @@ +--- +name: "Post-install/importing issue" +about: If you have trouble importing or using NumPy after installation + +--- + + + +### Steps to reproduce: + + + +### Error message: + + + + + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1f313fd75027..528580a8e6df 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,6 @@ http://www.numpy.org/devdocs/dev/development_workflow.html http://www.numpy.org/devdocs/dev/development_workflow.html#writing-the-commit-message --> - From f25a376cbd89512516f7fc458815f0c4159d7ff9 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Wed, 9 Sep 2020 16:32:42 -0300 Subject: [PATCH 030/409] Added labels to doc and post-install templates. --- .github/ISSUE_TEMPLATE/config.yml | 1 - .github/ISSUE_TEMPLATE/documentation.md | 1 + .github/ISSUE_TEMPLATE/post-install.md | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 3027d66506a7..adfff81bd004 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,3 @@ -blank_issues_enabled: false contact_links: - name: Question/Help/Support url: https://numpy.org/gethelp/ diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index b0435a5654d5..67e04c8af0cf 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -1,6 +1,7 @@ --- name: "Documentation" about: Report an issue related to the NumPy documentation +labels: 04 - Documentation --- diff --git a/.github/ISSUE_TEMPLATE/post-install.md b/.github/ISSUE_TEMPLATE/post-install.md index fb8b3507678d..c0ec7896a40d 100644 --- a/.github/ISSUE_TEMPLATE/post-install.md +++ b/.github/ISSUE_TEMPLATE/post-install.md @@ -1,6 +1,7 @@ --- name: "Post-install/importing issue" about: If you have trouble importing or using NumPy after installation +labels: 32 - Installation --- From 2e3de29722cc42970a31fe6843c5aa0dbcf0ee7d Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 10 Sep 2020 16:02:55 -0500 Subject: [PATCH 031/409] MAINT: Simplify ufunc pickling This also allows at least in principle numba dynamically generated ufuncs to be pickled (with some hacking), see: https://github.com/dask/distributed/issues/3450 If the name of the ufunc is set to a qualname, using this method, pickle should be able to unpickle the ufunc correctly. We may want to allow setting the module and qualname explicitly on the ufunc object to remove the need for the custom pickler completely. --- numpy/core/__init__.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py index c77885954ef1..a0769cc89112 100644 --- a/numpy/core/__init__.py +++ b/numpy/core/__init__.py @@ -113,10 +113,9 @@ __all__ += shape_base.__all__ __all__ += einsumfunc.__all__ -# Make it possible so that ufuncs can be pickled -# Here are the loading and unloading functions -# The name numpy.core._ufunc_reconstruct must be -# available for unpickling to work. +# We used to use `np.core._ufunc_reconstruct` to unpickle. This is unnecessary, +# but old pickles saved before 1.20 will be using it, and there is no reason +# to break loading them. def _ufunc_reconstruct(module, name): # The `fromlist` kwarg is required to ensure that `mod` points to the # inner-most module rather than the parent package when module name is @@ -126,14 +125,17 @@ def _ufunc_reconstruct(module, name): return getattr(mod, name) def _ufunc_reduce(func): - from pickle import whichmodule - name = func.__name__ - return _ufunc_reconstruct, (whichmodule(func, name), name) + # Report the `__name__`. pickle will try to find the module. Note that + # pickle supports for this `__name__` to be a `__qualname__`. It may + # make sense to add a `__qualname__` to ufuncs, to allow this more + # explicitly (Numba has ufuncs as attributes). + # See also: https://github.com/dask/distributed/issues/3450 + return func.__name__ import copyreg -copyreg.pickle(ufunc, _ufunc_reduce, _ufunc_reconstruct) +copyreg.pickle(ufunc, _ufunc_reduce) # Unclutter namespace (must keep _ufunc_reconstruct for unpickling) del copyreg del _ufunc_reduce From 2ec7ea418e6f6e054e9aad2b18adfd9827c90249 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Fri, 11 Sep 2020 10:19:30 -0300 Subject: [PATCH 032/409] Put feature request template as comments. --- .github/ISSUE_TEMPLATE/feature-request.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index a6a3ca4234af..0be94f92893d 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -6,11 +6,11 @@ about: Check instructions for submitting your idea on the mailing list first. ## Feature -If you're looking to request a new feature or change in functionality, including + From 96be6959f970f56fda1c4dfe38db3eabc807f7a6 Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Thu, 10 Sep 2020 15:31:27 +0200 Subject: [PATCH 033/409] ENH: Pass optimizations arguments to asv build This patch allows passing `-j`, `--cpu-baseline`, `--cpu-dispatch` and `--disable-optimization` to ASV build when argument `--bench-compare` is used. --- .gitignore | 1 + benchmarks/asv_compare.conf.json.tpl | 95 ++++++++++++++++++++++++ runtests.py | 105 ++++++++++++++++++++++++++- 3 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 benchmarks/asv_compare.conf.json.tpl diff --git a/.gitignore b/.gitignore index c58b0e62feb9..85318467e849 100644 --- a/.gitignore +++ b/.gitignore @@ -182,6 +182,7 @@ benchmarks/results benchmarks/html benchmarks/env benchmarks/numpy +benchmarks/_asv_compare.conf.json # cythonized files cythonize.dat numpy/random/_mtrand/_mtrand.c diff --git a/benchmarks/asv_compare.conf.json.tpl b/benchmarks/asv_compare.conf.json.tpl new file mode 100644 index 000000000000..1f339077c66d --- /dev/null +++ b/benchmarks/asv_compare.conf.json.tpl @@ -0,0 +1,95 @@ +// This config file is almost similar to 'asv.conf.json' except it contains +// custom tokens that can be substituted by 'runtests.py' and ASV, +// due to the necessity to add custom build options when `--bench-compare` +// is used. +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "numpy", + + // The project's homepage + "project_url": "https://www.numpy.org/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "tip" (for mercurial). + "branches": ["HEAD"], + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "virtualenv", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/numpy/numpy/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.7"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list indicates to just test against the default (latest) + // version. + "matrix": { + "Cython": [], + }, + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + // NOTE: changes dir name will requires update `generate_asv_config()` in + // runtests.py + "env_dir": "env", + + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8, + + "build_command" : [ + "python setup.py build {numpy_build_options}", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + ], + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } +} diff --git a/runtests.py b/runtests.py index 2f07749f8147..f8b70d9360df 100755 --- a/runtests.py +++ b/runtests.py @@ -52,7 +52,7 @@ import sys -import os +import os, glob # In case we are run from the source directory, we don't want to import the # project from there: @@ -310,8 +310,16 @@ def main(argv): out = subprocess.check_output(['git', 'rev-parse', commit_a]) commit_a = out.strip().decode('ascii') + # generate config file with the required build options + asv_cfpath = [ + '--config', asv_compare_config( + os.path.join(ROOT_DIR, 'benchmarks'), args, + # to clear the cache if the user changed build options + (commit_a, commit_b) + ) + ] cmd = ['asv', 'continuous', '-e', '-f', '1.05', - commit_a, commit_b] + bench_args + commit_a, commit_b] + asv_cfpath + bench_args ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks')) sys.exit(ret) @@ -361,7 +369,6 @@ def main(argv): else: sys.exit(1) - def build_project(args): """ Build a dev version of the project. @@ -491,6 +498,98 @@ def build_project(args): return site_dir, site_dir_noarch +def asv_compare_config(bench_path, args, h_commits): + """ + Fill the required build options through custom variable + 'numpy_build_options' and return the generated config path. + """ + conf_path = os.path.join(bench_path, "asv_compare.conf.json.tpl") + nconf_path = os.path.join(bench_path, "_asv_compare.conf.json") + + # add custom build + build = [] + if args.parallel > 1: + build += ["-j", str(args.parallel)] + if args.cpu_baseline: + build += ["--cpu-baseline", args.cpu_baseline] + if args.cpu_dispatch: + build += ["--cpu-dispatch", args.cpu_dispatch] + if args.disable_optimization: + build += ["--disable-optimization"] + + is_cached = asv_substitute_config(conf_path, nconf_path, + numpy_build_options = ' '.join([f'\\"{v}\\"' for v in build]), + ) + if not is_cached: + asv_clear_cache(bench_path, h_commits) + return nconf_path + +def asv_clear_cache(bench_path, h_commits, env_dir="env"): + """ + Force ASV to clear the cache according to specified commit hashes. + """ + # FIXME: only clear the cache from the current environment dir + asv_build_pattern = os.path.join(bench_path, env_dir, "*", "asv-build-cache") + for asv_build_cache in glob.glob(asv_build_pattern, recursive=True): + for c in h_commits: + try: shutil.rmtree(os.path.join(asv_build_cache, c)) + except OSError: pass + +def asv_substitute_config(in_config, out_config, **custom_vars): + """ + A workaround to allow substituting custom tokens within + ASV configuration file since there's no official way to add custom + variables(e.g. env vars). + + Parameters + ---------- + in_config : str + The path of ASV configuration file, e.g. '/path/to/asv.conf.json' + out_config : str + The path of generated configuration file, + e.g. '/path/to/asv_substituted.conf.json'. + + The other keyword arguments represent the custom variables. + + Returns + ------- + True(is cached) if 'out_config' is already generated with + the same '**custom_vars' and updated with latest 'in_config', + False otherwise. + + Examples + -------- + See asv_compare_config(). + """ + assert in_config != out_config + assert len(custom_vars) > 0 + + def sdbm_hash(*factors): + chash = 0 + for f in factors: + for char in str(f): + chash = ord(char) + (chash << 6) + (chash << 16) - chash + chash &= 0xFFFFFFFF + return chash + + vars_hash = sdbm_hash(custom_vars, os.path.getmtime(in_config)) + try: + with open(out_config, "r") as wfd: + hash_line = wfd.readline().split('hash:') + if len(hash_line) > 1 and int(hash_line[1]) == vars_hash: + return True + except IOError: + pass + + custom_vars = {f'{{{k}}}':v for k, v in custom_vars.items()} + with open(in_config, "r") as rfd, open(out_config, "w") as wfd: + wfd.write(f"// hash:{vars_hash}\n") + wfd.write("// This file is automatically generated by runtests.py\n") + for line in rfd: + for key, val in custom_vars.items(): + line = line.replace(key, val) + wfd.write(line) + return False # # GCOV support From c70fbb6d7206155f8f9d75af585f32c7b057051a Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Sat, 12 Sep 2020 16:39:50 -0500 Subject: [PATCH 034/409] MAINT: (dtype-transfer) make copyswapn and legacy cast wrapper method This is just a small preparation to make the code slightly more modular, since I definitely will need to retain the wrapping for legacy dtype `dtype->f->castfuncs` and the `dtype->f->copyswapn`, but want more freedom to see whether I can split out the specialized casts we use internally (since NumPy has those cast functions, but normally never uses them). Splitting these two out should make it easier to write a function to support current "legacy" dtype cast functions without worrying as much about the casts used internally for NumPy dtypes. --- numpy/core/src/multiarray/dtype_transfer.c | 321 +++++++++++---------- 1 file changed, 176 insertions(+), 145 deletions(-) diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 42c66ee7f6e7..af4e6c22e601 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -1322,95 +1322,21 @@ get_unicode_to_datetime_transfer_function(int aligned, return NPY_SUCCEED; } + static int -get_nbo_cast_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api, - int *out_needs_wrap) +get_legacy_dtype_cast_function( + int aligned, npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, + int *out_needs_api, int *out_needs_wrap) { _strided_cast_data *data; PyArray_VectorUnaryFunc *castfunc; PyArray_Descr *tmp_dtype; - npy_intp shape = 1, src_itemsize = src_dtype->elsize, - dst_itemsize = dst_dtype->elsize; - - if (PyTypeNum_ISNUMBER(src_dtype->type_num) && - PyTypeNum_ISNUMBER(dst_dtype->type_num)) { - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || - !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_cast_numeric_transfer_function(aligned, - src_stride, dst_stride, - src_dtype->type_num, dst_dtype->type_num, - out_stransfer, out_transferdata); - } - - if (src_dtype->type_num == NPY_DATETIME || - src_dtype->type_num == NPY_TIMEDELTA || - dst_dtype->type_num == NPY_DATETIME || - dst_dtype->type_num == NPY_TIMEDELTA) { - /* A parameterized type, datetime->datetime sometimes needs casting */ - if ((src_dtype->type_num == NPY_DATETIME && - dst_dtype->type_num == NPY_DATETIME) || - (src_dtype->type_num == NPY_TIMEDELTA && - dst_dtype->type_num == NPY_TIMEDELTA)) { - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || - !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_cast_datetime_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - } - - /* - * Datetime <-> string conversions can be handled specially. - * The functions may raise an error if the strings have no - * space, or can't be parsed properly. - */ - if (src_dtype->type_num == NPY_DATETIME) { - switch (dst_dtype->type_num) { - case NPY_STRING: - *out_needs_api = 1; - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); - return get_nbo_datetime_to_string_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - - case NPY_UNICODE: - return get_datetime_to_unicode_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata, - out_needs_api); - } - } - else if (dst_dtype->type_num == NPY_DATETIME) { - switch (src_dtype->type_num) { - case NPY_STRING: - *out_needs_api = 1; - *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_string_to_datetime_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - - case NPY_UNICODE: - return get_unicode_to_datetime_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata, - out_needs_api); - } - } - } + npy_intp shape = 1; + npy_intp src_itemsize = src_dtype->elsize; + npy_intp dst_itemsize = dst_dtype->elsize; *out_needs_wrap = !aligned || !PyArray_ISNBO(src_dtype->byteorder) || @@ -1543,6 +1469,167 @@ get_nbo_cast_transfer_function(int aligned, return NPY_SUCCEED; } + +static int +get_nbo_cast_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api, + int *out_needs_wrap) +{ + if (PyTypeNum_ISNUMBER(src_dtype->type_num) && + PyTypeNum_ISNUMBER(dst_dtype->type_num)) { + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || + !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_cast_numeric_transfer_function(aligned, + src_stride, dst_stride, + src_dtype->type_num, dst_dtype->type_num, + out_stransfer, out_transferdata); + } + + if (src_dtype->type_num == NPY_DATETIME || + src_dtype->type_num == NPY_TIMEDELTA || + dst_dtype->type_num == NPY_DATETIME || + dst_dtype->type_num == NPY_TIMEDELTA) { + /* A parameterized type, datetime->datetime sometimes needs casting */ + if ((src_dtype->type_num == NPY_DATETIME && + dst_dtype->type_num == NPY_DATETIME) || + (src_dtype->type_num == NPY_TIMEDELTA && + dst_dtype->type_num == NPY_TIMEDELTA)) { + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || + !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_cast_datetime_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + } + + /* + * Datetime <-> string conversions can be handled specially. + * The functions may raise an error if the strings have no + * space, or can't be parsed properly. + */ + if (src_dtype->type_num == NPY_DATETIME) { + switch (dst_dtype->type_num) { + case NPY_STRING: + *out_needs_api = 1; + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); + return get_nbo_datetime_to_string_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + + case NPY_UNICODE: + return get_datetime_to_unicode_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata, + out_needs_api); + } + } + else if (dst_dtype->type_num == NPY_DATETIME) { + switch (src_dtype->type_num) { + case NPY_STRING: + *out_needs_api = 1; + *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_string_to_datetime_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata); + + case NPY_UNICODE: + return get_unicode_to_datetime_transfer_function( + aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + out_stransfer, out_transferdata, + out_needs_api); + } + } + } + + return get_legacy_dtype_cast_function( + aligned, src_stride, dst_stride, src_dtype, dst_dtype, + move_references, out_stransfer, out_transferdata, + out_needs_api, out_needs_wrap); +} + + +static int +wrap_aligned_contig_transfer_function_with_copyswapn( + int aligned, npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, + int *out_needs_api, + PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata) +{ + NpyAuxData *todata = NULL, *fromdata = NULL; + PyArray_StridedUnaryOp *tobuffer, *frombuffer; + npy_intp src_itemsize = src_dtype->elsize; + npy_intp dst_itemsize = dst_dtype->elsize; + + /* Get the copy/swap operation from src */ + PyArray_GetDTypeCopySwapFn( + aligned, src_stride, src_itemsize, src_dtype, &tobuffer, &todata); + + if (!PyDataType_REFCHK(dst_dtype)) { + /* Copying from buffer is a simple copy/swap operation */ + PyArray_GetDTypeCopySwapFn( + aligned, dst_itemsize, dst_stride, dst_dtype, + &frombuffer, &fromdata); + } + else { + /* + * Since the buffer is initialized to NULL, need to move the + * references in order to DECREF the existing data. + */ + /* Object types cannot be byte swapped */ + assert(PyDataType_ISNOTSWAPPED(dst_dtype)); + /* The loop already needs the python api if this is reached */ + assert(*out_needs_api); + + if (PyArray_GetDTypeTransferFunction( + aligned, dst_itemsize, dst_stride, + dst_dtype, dst_dtype, 1, + &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) { + return NPY_FAIL; + } + } + + if (frombuffer == NULL || tobuffer == NULL) { + NPY_AUXDATA_FREE(castdata); + NPY_AUXDATA_FREE(todata); + NPY_AUXDATA_FREE(fromdata); + return NPY_FAIL; + } + + *out_stransfer = caststransfer; + + /* Wrap it all up in a new transfer function + data */ + if (wrap_aligned_contig_transfer_function( + src_itemsize, dst_itemsize, + tobuffer, todata, + frombuffer, fromdata, + caststransfer, castdata, + PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), + *out_needs_api, + out_stransfer, out_transferdata) != NPY_SUCCEED) { + NPY_AUXDATA_FREE(castdata); + NPY_AUXDATA_FREE(todata); + NPY_AUXDATA_FREE(fromdata); + return NPY_FAIL; + } + + return NPY_SUCCEED; +} + + static int get_cast_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, @@ -1553,10 +1640,8 @@ get_cast_transfer_function(int aligned, int *out_needs_api) { PyArray_StridedUnaryOp *caststransfer; - NpyAuxData *castdata, *todata = NULL, *fromdata = NULL; + NpyAuxData *castdata; int needs_wrap = 0; - npy_intp src_itemsize = src_dtype->elsize, - dst_itemsize = dst_dtype->elsize; if (get_nbo_cast_transfer_function(aligned, src_stride, dst_stride, @@ -1581,64 +1666,10 @@ get_cast_transfer_function(int aligned, } /* Otherwise, we have to copy and/or swap to aligned temporaries */ else { - PyArray_StridedUnaryOp *tobuffer, *frombuffer; - - /* Get the copy/swap operation from src */ - PyArray_GetDTypeCopySwapFn(aligned, - src_stride, src_itemsize, - src_dtype, - &tobuffer, &todata); - - if (!PyDataType_REFCHK(dst_dtype)) { - /* Copying from buffer is a simple copy/swap operation */ - PyArray_GetDTypeCopySwapFn(aligned, - dst_itemsize, dst_stride, - dst_dtype, - &frombuffer, &fromdata); - } - else { - /* - * Since the buffer is initialized to NULL, need to move the - * references in order to DECREF the existing data. - */ - /* Object types cannot be byte swapped */ - assert(PyDataType_ISNOTSWAPPED(dst_dtype)); - /* The loop already needs the python api if this is reached */ - assert(*out_needs_api); - - if (PyArray_GetDTypeTransferFunction( - aligned, dst_itemsize, dst_stride, - dst_dtype, dst_dtype, 1, - &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) { - return NPY_FAIL; - } - } - - if (frombuffer == NULL || tobuffer == NULL) { - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); - return NPY_FAIL; - } - - *out_stransfer = caststransfer; - - /* Wrap it all up in a new transfer function + data */ - if (wrap_aligned_contig_transfer_function( - src_itemsize, dst_itemsize, - tobuffer, todata, - frombuffer, fromdata, - caststransfer, castdata, - PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), - *out_needs_api, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); - return NPY_FAIL; - } - - return NPY_SUCCEED; + return wrap_aligned_contig_transfer_function_with_copyswapn( + aligned, src_stride, dst_stride, src_dtype, dst_dtype, + out_stransfer, out_transferdata, out_needs_api, + caststransfer, castdata); } } From bae33831e42a6bf07b66284aec618cc49b13d030 Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 Sep 2020 16:12:02 +0300 Subject: [PATCH 035/409] WIP, BUILD: start to debug circleCI failures --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f4ffb5223bdf..4222895df651 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,10 +28,10 @@ jobs: name: build numpy command: | . venv/bin/activate - pip install --upgrade pip 'setuptools<49.2.0' - pip install -r test_requirements.txt + pip install --progress-bar=off --upgrade pip 'setuptools<49.2.0' + pip install --progress-bar=off -r test_requirements.txt pip install . - pip install -r doc_requirements.txt + pip install --progress-bar=off -r doc_requirements.txt - run: name: create release notes From de0cf43c44b315b70addd920722d1668a9a0c77c Mon Sep 17 00:00:00 2001 From: mattip Date: Sun, 13 Sep 2020 21:17:22 +0300 Subject: [PATCH 036/409] BUILD: try python 3.8.4 --- .circleci/config.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4222895df651..f4536cca5525 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,7 +8,7 @@ jobs: docker: # CircleCI maintains a library of pre-built images # documented at https://circleci.com/docs/2.0/circleci-images/ - - image: circleci/python:3.6.6 + - image: circleci/python:3.8.4 working_directory: ~/repo @@ -18,11 +18,10 @@ jobs: - run: name: create virtual environment, install dependencies command: | - python3 -m venv venv - ln -s $(which python3) venv/bin/python3.6 - . venv/bin/activate sudo apt-get update sudo apt-get install -y graphviz texlive-fonts-recommended texlive-latex-recommended texlive-latex-extra texlive-generic-extra latexmk texlive-xetex + python3.8 -m venv venv + . venv/bin/activate - run: name: build numpy From 52746a756992a32ad46cc7ab69c0f1e5e6e88ed6 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 14 Sep 2020 07:26:01 +0000 Subject: [PATCH 037/409] MAINT: Bump hypothesis from 5.33.0 to 5.35.1 Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 5.33.0 to 5.35.1. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-5.33.0...hypothesis-python-5.35.1) Signed-off-by: dependabot-preview[bot] --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 7ef91125c4e4..0349b711be58 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,7 +1,7 @@ cython==0.29.21 wheel setuptools<49.2.0 -hypothesis==5.33.0 +hypothesis==5.35.1 pytest==6.0.1 pytz==2020.1 pytest-cov==2.10.1 From f64d9afed7fb473914dbe88037c665c501e4bf68 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Mon, 14 Sep 2020 12:48:22 +0200 Subject: [PATCH 038/409] MAINT: Move the `fromnumeric` annotations to their own stub file --- numpy/__init__.pyi | 560 ++++++------------------------------- numpy/core/fromnumeric.pyi | 492 ++++++++++++++++++++++++++++++++ 2 files changed, 576 insertions(+), 476 deletions(-) create mode 100644 numpy/core/fromnumeric.pyi diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index bf54207a4637..d4eda6b3150f 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -66,10 +66,93 @@ from numpy.core.function_base import ( geomspace, ) +from numpy.core.fromnumeric import ( + take, + reshape, + choose, + repeat, + put, + swapaxes, + transpose, + partition, + argpartition, + sort, + argsort, + argmax, + argmin, + searchsorted, + resize, + squeeze, + diagonal, + trace, + ravel, + nonzero, + shape, + compress, + clip, + sum, + all, + any, + cumsum, + ptp, + amax, + amin, + prod, + cumprod, + ndim, + size, + around, + mean, + std, + var, +) + # Add an object to `__all__` if their stubs are defined in an external file; # their stubs will not be recognized otherwise. # NOTE: This is redundant for objects defined within this file. -__all__ = ["linspace", "logspace", "geomspace"] +__all__ = [ + "linspace", + "logspace", + "geomspace", + "take", + "reshape", + "choose", + "repeat", + "put", + "swapaxes", + "transpose", + "partition", + "argpartition", + "sort", + "argsort", + "argmax", + "argmin", + "searchsorted", + "resize", + "squeeze", + "diagonal", + "trace", + "ravel", + "nonzero", + "shape", + "compress", + "clip", + "sum", + "all", + "any", + "cumsum", + "ptp", + "amax", + "amin", + "prod", + "cumprod", + "ndim", + "size", + "around", + "mean", + "std", + "var", +] # TODO: remove when the full numpy namespace is defined def __getattr__(name: str) -> Any: ... @@ -998,481 +1081,6 @@ def find_common_type( array_types: Sequence[DtypeLike], scalar_types: Sequence[DtypeLike] ) -> dtype: ... -# Functions from np.core.fromnumeric -_Mode = Literal["raise", "wrap", "clip"] -_PartitionKind = Literal["introselect"] -_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] -_Side = Literal["left", "right"] - -# Various annotations for scalars - -# While dt.datetime and dt.timedelta are not technically part of NumPy, -# they are one of the rare few builtin scalars which serve as valid return types. -# See https://github.com/numpy/numpy-stubs/pull/67#discussion_r412604113. -_ScalarNumpy = Union[generic, dt.datetime, dt.timedelta] -_ScalarBuiltin = Union[str, bytes, dt.date, dt.timedelta, bool, int, float, complex] -_Scalar = Union[_ScalarBuiltin, _ScalarNumpy] - -# Integers and booleans can generally be used interchangeably -_ScalarIntOrBool = TypeVar("_ScalarIntOrBool", bound=Union[integer, bool_]) -_ScalarGeneric = TypeVar("_ScalarGeneric", bound=generic) -_ScalarGenericDT = TypeVar( - "_ScalarGenericDT", bound=Union[dt.datetime, dt.timedelta, generic] -) - -_Number = TypeVar('_Number', bound=number) _NumberLike = Union[int, float, complex, number, bool_] - -# An array-like object consisting of integers _IntLike = Union[int, integer] _BoolLike = Union[bool, bool_] -_IntOrBool = Union[_IntLike, _BoolLike] -_ArrayLikeIntNested = ArrayLike # TODO: wait for support for recursive types -_ArrayLikeBoolNested = ArrayLike # TODO: wait for support for recursive types - -# Integers and booleans can generally be used interchangeably -_ArrayLikeIntOrBool = Union[ - _IntOrBool, - ndarray, - Sequence[_IntOrBool], - Sequence[_ArrayLikeIntNested], - Sequence[_ArrayLikeBoolNested], -] -_ArrayLikeBool = Union[ - _BoolLike, - Sequence[_BoolLike], - ndarray -] - -# The signature of take() follows a common theme with its overloads: -# 1. A generic comes in; the same generic comes out -# 2. A scalar comes in; a generic comes out -# 3. An array-like object comes in; some keyword ensures that a generic comes out -# 4. An array-like object comes in; an ndarray or generic comes out -@overload -def take( - a: _ScalarGenericDT, - indices: int, - axis: Optional[int] = ..., - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> _ScalarGenericDT: ... -@overload -def take( - a: _Scalar, - indices: int, - axis: Optional[int] = ..., - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> _ScalarNumpy: ... -@overload -def take( - a: ArrayLike, - indices: int, - axis: Optional[int] = ..., - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> _ScalarNumpy: ... -@overload -def take( - a: ArrayLike, - indices: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> Union[_ScalarNumpy, ndarray]: ... -def reshape(a: ArrayLike, newshape: _ShapeLike, order: _OrderACF = ...) -> ndarray: ... -@overload -def choose( - a: _ScalarIntOrBool, - choices: ArrayLike, - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> _ScalarIntOrBool: ... -@overload -def choose( - a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ... -) -> Union[integer, bool_]: ... -@overload -def choose( - a: _ArrayLikeIntOrBool, - choices: ArrayLike, - out: Optional[ndarray] = ..., - mode: _Mode = ..., -) -> ndarray: ... -def repeat( - a: ArrayLike, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ... -) -> ndarray: ... -def put( - a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... -) -> None: ... -def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ... -def transpose( - a: ArrayLike, axes: Union[None, Sequence[int], ndarray] = ... -) -> ndarray: ... -def partition( - a: ArrayLike, - kth: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> ndarray: ... -@overload -def argpartition( - a: generic, - kth: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> integer: ... -@overload -def argpartition( - a: _ScalarBuiltin, - kth: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> ndarray: ... -@overload -def argpartition( - a: ArrayLike, - kth: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> ndarray: ... -def sort( - a: ArrayLike, - axis: Optional[int] = ..., - kind: Optional[_SortKind] = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> ndarray: ... -def argsort( - a: ArrayLike, - axis: Optional[int] = ..., - kind: Optional[_SortKind] = ..., - order: Union[None, str, Sequence[str]] = ..., -) -> ndarray: ... -@overload -def argmax(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... -@overload -def argmax( - a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... -) -> Union[integer, ndarray]: ... -@overload -def argmin(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... -@overload -def argmin( - a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... -) -> Union[integer, ndarray]: ... -@overload -def searchsorted( - a: ArrayLike, - v: _Scalar, - side: _Side = ..., - sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array -) -> integer: ... -@overload -def searchsorted( - a: ArrayLike, - v: ArrayLike, - side: _Side = ..., - sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array -) -> ndarray: ... -def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ... -@overload -def squeeze(a: _ScalarGeneric, axis: Optional[_ShapeLike] = ...) -> _ScalarGeneric: ... -@overload -def squeeze(a: ArrayLike, axis: Optional[_ShapeLike] = ...) -> ndarray: ... -def diagonal( - a: ArrayLike, offset: int = ..., axis1: int = ..., axis2: int = ... # >= 2D array -) -> ndarray: ... -def trace( - a: ArrayLike, # >= 2D array - offset: int = ..., - axis1: int = ..., - axis2: int = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., -) -> Union[number, ndarray]: ... -def ravel(a: ArrayLike, order: _OrderKACF = ...) -> ndarray: ... -def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ... -def shape(a: ArrayLike) -> _Shape: ... -def compress( - condition: ArrayLike, # 1D bool array - a: ArrayLike, - axis: Optional[int] = ..., - out: Optional[ndarray] = ..., -) -> ndarray: ... -@overload -def clip( - a: _Number, - a_min: ArrayLike, - a_max: Optional[ArrayLike], - out: Optional[ndarray] = ..., - **kwargs: Any, -) -> _Number: ... -@overload -def clip( - a: _Number, - a_min: None, - a_max: ArrayLike, - out: Optional[ndarray] = ..., - **kwargs: Any, -) -> _Number: ... -@overload -def clip( - a: ArrayLike, - a_min: ArrayLike, - a_max: Optional[ArrayLike], - out: Optional[ndarray] = ..., - **kwargs: Any, -) -> Union[number, ndarray]: ... -@overload -def clip( - a: ArrayLike, - a_min: None, - a_max: ArrayLike, - out: Optional[ndarray] = ..., - **kwargs: Any, -) -> Union[number, ndarray]: ... -@overload -def sum( - a: _Number, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> _Number: ... -@overload -def sum( - a: ArrayLike, - axis: _ShapeLike = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> Union[number, ndarray]: ... -@overload -def all( - a: ArrayLike, - axis: None = ..., - out: Optional[ndarray] = ..., - keepdims: Literal[False] = ..., -) -> bool_: ... -@overload -def all( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., -) -> Union[bool_, ndarray]: ... -@overload -def any( - a: ArrayLike, - axis: None = ..., - out: Optional[ndarray] = ..., - keepdims: Literal[False] = ..., -) -> bool_: ... -@overload -def any( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., -) -> Union[bool_, ndarray]: ... -def cumsum( - a: ArrayLike, - axis: Optional[int] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., -) -> ndarray: ... -@overload -def ptp( - a: _Number, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., -) -> _Number: ... -@overload -def ptp( - a: ArrayLike, - axis: None = ..., - out: Optional[ndarray] = ..., - keepdims: Literal[False] = ..., -) -> number: ... -@overload -def ptp( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., -) -> Union[number, ndarray]: ... -@overload -def amax( - a: _Number, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> _Number: ... -@overload -def amax( - a: ArrayLike, - axis: None = ..., - out: Optional[ndarray] = ..., - keepdims: Literal[False] = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> number: ... -@overload -def amax( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> Union[number, ndarray]: ... -@overload -def amin( - a: _Number, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> _Number: ... -@overload -def amin( - a: ArrayLike, - axis: None = ..., - out: Optional[ndarray] = ..., - keepdims: Literal[False] = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> number: ... -@overload -def amin( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> Union[number, ndarray]: ... - -# TODO: `np.prod()``: For object arrays `initial` does not necessarily -# have to be a numerical scalar. -# The only requirement is that it is compatible -# with the `.__mul__()` method(s) of the passed array's elements. - -# Note that the same situation holds for all wrappers around -# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`). - -@overload -def prod( - a: _Number, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: None = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> _Number: ... -@overload -def prod( - a: ArrayLike, - axis: None = ..., - dtype: DtypeLike = ..., - out: None = ..., - keepdims: Literal[False] = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> number: ... -@overload -def prod( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., - initial: _NumberLike = ..., - where: _ArrayLikeBool = ..., -) -> Union[number, ndarray]: ... -def cumprod( - a: ArrayLike, - axis: Optional[int] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., -) -> ndarray: ... -def ndim(a: ArrayLike) -> int: ... -def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ... -@overload -def around( - a: _Number, decimals: int = ..., out: Optional[ndarray] = ... -) -> _Number: ... -@overload -def around( - a: _NumberLike, decimals: int = ..., out: Optional[ndarray] = ... -) -> number: ... -@overload -def around( - a: ArrayLike, decimals: int = ..., out: Optional[ndarray] = ... -) -> ndarray: ... -@overload -def mean( - a: ArrayLike, - axis: None = ..., - dtype: DtypeLike = ..., - out: None = ..., - keepdims: Literal[False] = ..., -) -> number: ... -@overload -def mean( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - keepdims: bool = ..., -) -> Union[number, ndarray]: ... -@overload -def std( - a: ArrayLike, - axis: None = ..., - dtype: DtypeLike = ..., - out: None = ..., - ddof: int = ..., - keepdims: Literal[False] = ..., -) -> number: ... -@overload -def std( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - ddof: int = ..., - keepdims: bool = ..., -) -> Union[number, ndarray]: ... -@overload -def var( - a: ArrayLike, - axis: None = ..., - dtype: DtypeLike = ..., - out: None = ..., - ddof: int = ..., - keepdims: Literal[False] = ..., -) -> number: ... -@overload -def var( - a: ArrayLike, - axis: Optional[_ShapeLike] = ..., - dtype: DtypeLike = ..., - out: Optional[ndarray] = ..., - ddof: int = ..., - keepdims: bool = ..., -) -> Union[number, ndarray]: ... diff --git a/numpy/core/fromnumeric.pyi b/numpy/core/fromnumeric.pyi new file mode 100644 index 000000000000..7ad772b07e54 --- /dev/null +++ b/numpy/core/fromnumeric.pyi @@ -0,0 +1,492 @@ +import sys +import datetime as dt +from typing import Optional, Union, Sequence, Tuple, Any, overload, TypeVar + +from numpy import ( + ndarray, + number, + integer, + bool_, + generic, + _OrderKACF, + _OrderACF, + _IntLike, + _BoolLike, + _NumberLike, +) +from numpy.typing import DtypeLike, ArrayLike, _ShapeLike, _Shape + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +_Mode = Literal["raise", "wrap", "clip"] +_PartitionKind = Literal["introselect"] +_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] +_Side = Literal["left", "right"] + +# Various annotations for scalars + +# While dt.datetime and dt.timedelta are not technically part of NumPy, +# they are one of the rare few builtin scalars which serve as valid return types. +# See https://github.com/numpy/numpy-stubs/pull/67#discussion_r412604113. +_ScalarNumpy = Union[generic, dt.datetime, dt.timedelta] +_ScalarBuiltin = Union[str, bytes, dt.date, dt.timedelta, bool, int, float, complex] +_Scalar = Union[_ScalarBuiltin, _ScalarNumpy] + +# Integers and booleans can generally be used interchangeably +_ScalarIntOrBool = TypeVar("_ScalarIntOrBool", bound=Union[integer, bool_]) +_ScalarGeneric = TypeVar("_ScalarGeneric", bound=generic) +_ScalarGenericDT = TypeVar( + "_ScalarGenericDT", bound=Union[dt.datetime, dt.timedelta, generic] +) + +_Number = TypeVar("_Number", bound=number) + +# An array-like object consisting of integers +_IntOrBool = Union[_IntLike, _BoolLike] +_ArrayLikeIntNested = ArrayLike # TODO: wait for support for recursive types +_ArrayLikeBoolNested = ArrayLike # TODO: wait for support for recursive types + +# Integers and booleans can generally be used interchangeably +_ArrayLikeIntOrBool = Union[ + _IntOrBool, + ndarray, + Sequence[_IntOrBool], + Sequence[_ArrayLikeIntNested], + Sequence[_ArrayLikeBoolNested], +] +_ArrayLikeBool = Union[_BoolLike, Sequence[_BoolLike], ndarray] + +# The signature of take() follows a common theme with its overloads: +# 1. A generic comes in; the same generic comes out +# 2. A scalar comes in; a generic comes out +# 3. An array-like object comes in; some keyword ensures that a generic comes out +# 4. An array-like object comes in; an ndarray or generic comes out +@overload +def take( + a: _ScalarGenericDT, + indices: int, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> _ScalarGenericDT: ... +@overload +def take( + a: _Scalar, + indices: int, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> _ScalarNumpy: ... +@overload +def take( + a: ArrayLike, + indices: int, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> _ScalarNumpy: ... +@overload +def take( + a: ArrayLike, + indices: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> Union[_ScalarNumpy, ndarray]: ... +def reshape(a: ArrayLike, newshape: _ShapeLike, order: _OrderACF = ...) -> ndarray: ... +@overload +def choose( + a: _ScalarIntOrBool, + choices: ArrayLike, + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> _ScalarIntOrBool: ... +@overload +def choose( + a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ... +) -> Union[integer, bool_]: ... +@overload +def choose( + a: _ArrayLikeIntOrBool, + choices: ArrayLike, + out: Optional[ndarray] = ..., + mode: _Mode = ..., +) -> ndarray: ... +def repeat( + a: ArrayLike, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ... +) -> ndarray: ... +def put( + a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... +) -> None: ... +def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ... +def transpose( + a: ArrayLike, axes: Union[None, Sequence[int], ndarray] = ... +) -> ndarray: ... +def partition( + a: ArrayLike, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... +@overload +def argpartition( + a: generic, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> integer: ... +@overload +def argpartition( + a: _ScalarBuiltin, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... +@overload +def argpartition( + a: ArrayLike, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... +def sort( + a: ArrayLike, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... +def argsort( + a: ArrayLike, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., +) -> ndarray: ... +@overload +def argmax(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... +@overload +def argmax( + a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... +) -> Union[integer, ndarray]: ... +@overload +def argmin(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... +@overload +def argmin( + a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... +) -> Union[integer, ndarray]: ... +@overload +def searchsorted( + a: ArrayLike, + v: _Scalar, + side: _Side = ..., + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array +) -> integer: ... +@overload +def searchsorted( + a: ArrayLike, + v: ArrayLike, + side: _Side = ..., + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array +) -> ndarray: ... +def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ... +@overload +def squeeze(a: _ScalarGeneric, axis: Optional[_ShapeLike] = ...) -> _ScalarGeneric: ... +@overload +def squeeze(a: ArrayLike, axis: Optional[_ShapeLike] = ...) -> ndarray: ... +def diagonal( + a: ArrayLike, offset: int = ..., axis1: int = ..., axis2: int = ... # >= 2D array +) -> ndarray: ... +def trace( + a: ArrayLike, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., +) -> Union[number, ndarray]: ... +def ravel(a: ArrayLike, order: _OrderKACF = ...) -> ndarray: ... +def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ... +def shape(a: ArrayLike) -> _Shape: ... +def compress( + condition: ArrayLike, # 1D bool array + a: ArrayLike, + axis: Optional[int] = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... +@overload +def clip( + a: _Number, + a_min: ArrayLike, + a_max: Optional[ArrayLike], + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> _Number: ... +@overload +def clip( + a: _Number, + a_min: None, + a_max: ArrayLike, + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> _Number: ... +@overload +def clip( + a: ArrayLike, + a_min: ArrayLike, + a_max: Optional[ArrayLike], + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> Union[number, ndarray]: ... +@overload +def clip( + a: ArrayLike, + a_min: None, + a_max: ArrayLike, + out: Optional[ndarray] = ..., + **kwargs: Any, +) -> Union[number, ndarray]: ... +@overload +def sum( + a: _Number, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> _Number: ... +@overload +def sum( + a: ArrayLike, + axis: _ShapeLike = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> Union[number, ndarray]: ... +@overload +def all( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + keepdims: Literal[False] = ..., +) -> bool_: ... +@overload +def all( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> Union[bool_, ndarray]: ... +@overload +def any( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + keepdims: Literal[False] = ..., +) -> bool_: ... +@overload +def any( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> Union[bool_, ndarray]: ... +def cumsum( + a: ArrayLike, + axis: Optional[int] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... +@overload +def ptp( + a: _Number, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> _Number: ... +@overload +def ptp( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + keepdims: Literal[False] = ..., +) -> number: ... +@overload +def ptp( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> Union[number, ndarray]: ... +@overload +def amax( + a: _Number, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> _Number: ... +@overload +def amax( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> number: ... +@overload +def amax( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> Union[number, ndarray]: ... +@overload +def amin( + a: _Number, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> _Number: ... +@overload +def amin( + a: ArrayLike, + axis: None = ..., + out: Optional[ndarray] = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> number: ... +@overload +def amin( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> Union[number, ndarray]: ... + +# TODO: `np.prod()``: For object arrays `initial` does not necessarily +# have to be a numerical scalar. +# The only requirement is that it is compatible +# with the `.__mul__()` method(s) of the passed array's elements. + +# Note that the same situation holds for all wrappers around +# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`). +@overload +def prod( + a: _Number, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> _Number: ... +@overload +def prod( + a: ArrayLike, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> number: ... +@overload +def prod( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., +) -> Union[number, ndarray]: ... +def cumprod( + a: ArrayLike, + axis: Optional[int] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., +) -> ndarray: ... +def ndim(a: ArrayLike) -> int: ... +def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ... +@overload +def around( + a: _Number, decimals: int = ..., out: Optional[ndarray] = ... +) -> _Number: ... +@overload +def around( + a: _NumberLike, decimals: int = ..., out: Optional[ndarray] = ... +) -> number: ... +@overload +def around( + a: ArrayLike, decimals: int = ..., out: Optional[ndarray] = ... +) -> ndarray: ... +@overload +def mean( + a: ArrayLike, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: Literal[False] = ..., +) -> number: ... +@overload +def mean( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + keepdims: bool = ..., +) -> Union[number, ndarray]: ... +@overload +def std( + a: ArrayLike, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: Literal[False] = ..., +) -> number: ... +@overload +def std( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + ddof: int = ..., + keepdims: bool = ..., +) -> Union[number, ndarray]: ... +@overload +def var( + a: ArrayLike, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: Literal[False] = ..., +) -> number: ... +@overload +def var( + a: ArrayLike, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: Optional[ndarray] = ..., + ddof: int = ..., + keepdims: bool = ..., +) -> Union[number, ndarray]: ... From 34719a111253e36f463aa4629eb75716e1c5c13c Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 14 Sep 2020 15:14:58 +0100 Subject: [PATCH 039/409] MAINT: Syntax-highlight .src files on github --- .gitattributes | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitattributes b/.gitattributes index dad6dde37cd0..81b8037d4d71 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,3 +3,8 @@ numpy/lib/tests/data/*.npy binary # Release notes, reduce number of conflicts. doc/release/*.rst merge=union + +# Highlight our custom templating language as C, since it's hopefully better +# than nothing. This also affects repo statistics. +*.c.src linguist-language=C +*.h.src linguist-language=C From 5abb267691a44e908bc4084d1b83ad7d73c34796 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 14 Sep 2020 14:22:24 +0000 Subject: [PATCH 040/409] MAINT: Bump pytest from 6.0.1 to 6.0.2 Bumps [pytest](https://github.com/pytest-dev/pytest) from 6.0.1 to 6.0.2. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/6.0.1...6.0.2) Signed-off-by: dependabot-preview[bot] --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 0349b711be58..ca39de622bc0 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -2,7 +2,7 @@ cython==0.29.21 wheel setuptools<49.2.0 hypothesis==5.35.1 -pytest==6.0.1 +pytest==6.0.2 pytz==2020.1 pytest-cov==2.10.1 pickle5; python_version == '3.7' From 4cdd3606160de923fb4054cf93f4ea02a356def0 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 14 Sep 2020 18:55:12 +0300 Subject: [PATCH 041/409] DOC: add new glossary terms (#17263) * DOC: add new glossary terms * DOC: link to python Ellipsis * DOC: fixes from review * DOC: fixes from review * DOC: remove glossary items that belong to python --- doc/source/glossary.rst | 279 +++++++++++++++++++++-- doc/source/reference/arrays.indexing.rst | 3 + 2 files changed, 262 insertions(+), 20 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index d375349603df..4a59c990bdd4 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -2,10 +2,98 @@ Glossary ******** -.. toctree:: - .. glossary:: + + (`n`,) + A tuple with one element. The trailing comma distinguishes a one-element + tuple from a parenthesized ``n``. + + + -1 + Used as a dimension entry, ``-1`` instructs NumPy to choose the length + that will keep the total number of elements the same. + + + ``...`` + An :py:data:`Ellipsis` + + **When indexing an array**, shorthand that the missing axes, if they + exist, are full slices. + + >>> a = np.arange(24).reshape(2,3,4) + + >>> a[...].shape + (2, 3, 4) + + >>> a[...,0].shape + (2, 3) + + >>> a[0,...].shape + (3, 4) + + >>> a[0,...,0].shape + (3,) + + It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`. + + **In printouts**, NumPy substitutes ``...`` for the middle elements of + large arrays. To see the entire array, use `numpy.printoptions` + + + ``:`` + The Python :term:`python:slice` + operator. In ndarrays, slicing can be applied to every + axis: + + >>> a = np.arange(24).reshape(2,3,4) + >>> a + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + + >>> a[1:,-2:,:-1] + array([[[16, 17, 18], + [20, 21, 22]]]) + + Trailing slices can be omitted: :: + + >>> a[1] == a[1,:,:] + array([[ True, True, True, True], + [ True, True, True, True], + [ True, True, True, True]]) + + In contrast to Python, where slicing creates a copy, in NumPy slicing + creates a :term:`view`. + + For details, see :ref:`combining-advanced-and-basic-indexing`. + + + ``<`` + In a dtype declaration, indicates that the data is + :term:`little-endian` (the bracket is big on the right). :: + + >>> dt = np.dtype('`` + In a dtype declaration, indicates that the data is + :term:`big-endian` (the bracket is big on the left). :: + + >>> dt = np.dtype('>H') # big-endian unsigned short + + + advanced indexing + Rather than using a :doc:`scalar ` or slice as + an index, an axis can be indexed with an array, providing fine-grained + selection. This is known as :ref:`advanced indexing` + or "fancy indexing". + + along an axis Axes are defined for arrays with more than one dimension. A 2-dimensional array has two corresponding axes: the first running @@ -26,6 +114,7 @@ Glossary >>> x.sum(axis=1) array([ 6, 22, 38]) + array A homogeneous container of numerical elements. Each element in the array occupies a fixed amount of memory (hence homogeneous), and @@ -50,19 +139,92 @@ Glossary Fast element-wise operations, called a :term:`ufunc`, operate on arrays. + array_like Any sequence that can be interpreted as an ndarray. This includes nested lists, tuples, scalars and existing arrays. + + array scalar + For uniformity in handling operands, NumPy treats + a :doc:`scalar ` as an array of zero + dimension. + + + axis + + Another term for an array dimension. Axes are numbered left to right; + axis 0 is the first element in the shape tuple. + + In a two-dimensional vector, the elements of axis 0 are rows and the + elements of axis 1 are columns. + + In higher dimensions, the picture changes. NumPy prints + higher-dimensional vectors as replications of row-by-column building + blocks, as in this three-dimensional vector: + + >>> a = np.arange(12).reshape(2,2,3) + >>> a + array([[[ 0, 1, 2], + [ 3, 4, 5]], + + [[ 6, 7, 8], + [ 9, 10, 11]]]) + + ``a`` is depicted as a two-element array whose elements are 2x3 vectors. + From this point of view, rows and columns are the final two axes, + respectively, in any shape. + + This rule helps you anticipate how a vector will be printed, and + conversely how to find the index of any of the printed elements. For + instance, in the example, the last two values of 8's index must be 0 and + 2. Since 8 appears in the second of the two 2x3's, the first index must + be 1: + + >>> a[1,0,2] + 8 + + A convenient way to count dimensions in a printed vector is to + count ``[`` symbols after the open-parenthesis. This is + useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: + + >>> a = np.arange(6).reshape(2,3) + >>> a.ndim + 2 + >>> a + array([[0, 1, 2], + [3, 4, 5]]) + + >>> a = np.arange(6).reshape(1,2,3) + >>> a.ndim + 3 + >>> a + array([[[0, 1, 2], + [3, 4, 5]]]) + + + .base + + If an array does not own its memory, then its + :doc:`base ` attribute + returns the object whose memory the array is referencing. That object + may be borrowing the memory from still another object, so the + owning object may be ``a.base.base.base...``. Despite advice to the + contrary, testing ``base`` is not a surefire way to determine if two + arrays are :term:`view`\ s. + + big-endian When storing a multi-byte value in memory as a sequence of bytes, the sequence addresses/sends/stores the most significant byte first (lowest address) and the least significant byte last (highest address). Common in micro-processors and used for transmission of data over network protocols. + BLAS `Basic Linear Algebra Subprograms `_ + broadcast NumPy can do operations on arrays whose shapes are mismatched:: @@ -82,9 +244,11 @@ Glossary See `basics.broadcasting` for more information. + C order See `row-major` + column-major A way to represent items in a N-dimensional array in the 1-dimensional computer memory. In column-major order, the leftmost index "varies the @@ -100,6 +264,11 @@ Glossary Column-major order is also known as the Fortran order, as the Fortran programming language uses it. + copy + + See :term:`view`. + + decorator An operator that transforms a function. For example, a ``log`` decorator may be defined to print debugging information upon @@ -124,6 +293,7 @@ Glossary Logging call with parameters: (1, 2) {} 3 + dictionary Resembling a language dictionary, which provides a mapping between words and descriptions thereof, a Python dictionary is a mapping @@ -149,48 +319,55 @@ Glossary For more information on dictionaries, read the `Python tutorial `_. + + dimension + + See :term:`axis`. + + + dtype + + The datatype describing the (identically typed) elements in an ndarray. + It can be changed to reinterpret the array contents. For details, see + :doc:`Data type objects (dtype). ` + + + fancy indexing + + Another term for :term:`advanced indexing`. + + field In a :term:`structured data type`, each sub-type is called a `field`. The `field` has a name (a string), a type (any valid dtype), and an optional `title`. See :ref:`arrays.dtypes` + Fortran order See `column-major` + flattened Collapsed to a one-dimensional array. See `numpy.ndarray.flatten` for details. + homogeneous - Describes a block of memory comprised of blocks, each block comprised of + Describes a block of memory comprised of blocks, each block comprised of items and of the same size, and blocks are interpreted in exactly the same way. In the simplest case each block contains a single item, for instance int32 or float64. + immutable An object that cannot be modified after execution is called immutable. Two common examples are strings and tuples. - iterable - A sequence that allows "walking" (iterating) over items, typically - using a loop such as:: - - >>> x = [1, 2, 3] - >>> [item**2 for item in x] - [1, 4, 9] - - It is often used in combination with ``enumerate``:: - >>> keys = ['a','b','c'] - >>> for n, k in enumerate(keys): - ... print("Key %d: %s" % (n, k)) - ... - Key 0: a - Key 1: b - Key 2: c itemsize The size of the dtype element in bytes. + list A Python container that can hold any number of objects or items. The items do not have to be of the same type, and can even be @@ -223,12 +400,14 @@ Glossary tutorial `_. For a mapping type (key-value), see *dictionary*. + little-endian When storing a multi-byte value in memory as a sequence of bytes, the sequence addresses/sends/stores the least significant byte first (lowest address) and the most significant byte last (highest address). Common in x86 processors. + mask A boolean array, used to select only certain elements for an operation:: @@ -244,6 +423,7 @@ Glossary >>> x array([ 0, 1, 2, -1, -1]) + masked array Array that suppressed values indicated by a mask:: @@ -262,6 +442,7 @@ Glossary Masked arrays are often used when operating on arrays containing missing or invalid entries. + matrix A 2-dimensional ndarray that preserves its two-dimensional nature throughout operations. It has certain special operations, such as ``*`` @@ -276,18 +457,40 @@ Glossary matrix([[ 7, 10], [15, 22]]) + ndarray See *array*. + + object array + + An array whose dtype is ``object``; that is, it contains references to + Python objects. Indexing the array dereferences the Python objects, so + unlike other ndarrays, an object array has the ability to hold + heterogeneous objects. + + + ravel + + `numpy.ravel` and `numpy.ndarray.flatten` both flatten an ndarray. ``ravel`` + will return a view if possible; ``flatten`` always returns a copy. + + Flattening collapses a multi-dimensional array to a single dimension; + details of how this is done (for instance, whether ``a[n+1]`` should be + the next row or next column) are parameters. + + record array An :term:`ndarray` with :term:`structured data type` which has been subclassed as ``np.recarray`` and whose dtype is of type ``np.record``, making the fields of its data type to be accessible by attribute. + reference If ``a`` is a reference to ``b``, then ``(a is b) == True``. Therefore, ``a`` and ``b`` are different names for the same Python object. + row-major A way to represent items in a N-dimensional array in the 1-dimensional computer memory. In row-major order, the rightmost index "varies @@ -303,6 +506,7 @@ Glossary Row-major order is also known as the C order, as the C programming language uses it. New NumPy arrays are by default in row-major order. + slice Used to select only certain elements from a sequence: @@ -330,9 +534,40 @@ Glossary >>> x[:, 1] array([2, 4]) + + stride + + Physical memory is one-dimensional; strides provide a mechanism to map + a given index to an address in memory. For an N-dimensional array, its + ``strides`` attribute is an N-element tuple; advancing from index + ``i`` to index ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes + to the address. + + Strides are computed automatically from an array's dtype and + shape, but can be directly specified using + :doc:`as_strided. ` + + For details, see + :doc:`numpy.ndarray.strides `. + + To see how striding underlies the power of NumPy views, see + `The NumPy array: a structure for efficient numerical computation. \ + `_ + + + structure + See :term:`structured data type` + + + structured array + + Array whose :term:`dtype` is a :term:`structured data type`. + + structured data type A data type composed of other datatypes + subarray data type A :term:`structured data type` may contain a :term:`ndarray` with its own dtype and shape: @@ -342,16 +577,19 @@ Glossary array([(0, [0., 0., 0.]), (0, [0., 0., 0.]), (0, [0., 0., 0.])], dtype=[('a', '` which is an alias to the name and is commonly used for plotting. + ufunc Universal function. A fast element-wise, :term:`vectorized ` array operation. Examples include ``add``, ``sin`` and ``logical_or``. + vectorization Optimizing a looping block by specialized code. In a traditional sense, vectorization performs the same operation on multiple elements with @@ -362,6 +600,7 @@ Glossary operations on multiple elements, typically achieving speedups by avoiding some of the overhead in looking up and converting the elements. + view An array that does not own its data, but refers to another array's data instead. For example, we may create a view that only shows @@ -379,6 +618,7 @@ Glossary >>> y array([3, 2, 4]) + wrapper Python is a high-level (highly abstracted, or English-like) language. This abstraction comes at a price in execution speed, and sometimes @@ -390,4 +630,3 @@ Glossary Examples include ctypes, SWIG and Cython (which wraps C and C++) and f2py (which wraps Fortran). - diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst index 3e600b7c456e..180a79dae18c 100644 --- a/doc/source/reference/arrays.indexing.rst +++ b/doc/source/reference/arrays.indexing.rst @@ -198,6 +198,7 @@ concepts to remember include: create an axis of length one. :const:`newaxis` is an alias for 'None', and 'None' can be used in place of this with the same result. +.. _advanced-indexing: Advanced Indexing ----------------- @@ -304,6 +305,8 @@ understood with an example. most important thing to remember about indexing with multiple advanced indexes. +.. _combining-advanced-and-basic-indexing: + Combining advanced and basic indexing """"""""""""""""""""""""""""""""""""" From 365cd3dba3487802d5a2d4e046a5c16a31bd1590 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 14 Sep 2020 11:20:00 -0500 Subject: [PATCH 042/409] MAINT: Mark vendored/generated files in .gitattributes This marks some files as vendored or generated. There may be more files around. This should mostly change our language statistics on github, so it probably isn't worth much trouble, but maybe it makes the stats slightly more representative. --- .gitattributes | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.gitattributes b/.gitattributes index 81b8037d4d71..ad7d3b227108 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8,3 +8,13 @@ doc/release/*.rst merge=union # than nothing. This also affects repo statistics. *.c.src linguist-language=C *.h.src linguist-language=C + +# Mark some files as vendored +numpy/linalg/lapack_lite/f2c.c linguist-vendored +numpy/linalg/lapack_lite/f2c.h linguist-vendored +tools/npy_tempita/* linguist-vendored + +# Mark some files as generated +numpy/linalg/lapack_lite/f2c_*.c linguist-generated +numpy/linalg/lapack_lite/lapack_lite_names.h linguist-generated + From 01996b8439892a12d511652db7d8882d11e2a09f Mon Sep 17 00:00:00 2001 From: mattip Date: Mon, 14 Sep 2020 22:56:46 +0300 Subject: [PATCH 043/409] BUILD: pin pygments to 2.6.1 --- doc_requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc_requirements.txt b/doc_requirements.txt index e2694ba12808..815aac3079ec 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -1,3 +1,4 @@ +pygments==2.6.1 sphinx>=2.2.0,<3.0 ipython scipy From ff44f113101814b6321906a312fe9f0db1b2f1e6 Mon Sep 17 00:00:00 2001 From: Chris Brown Date: Tue, 15 Sep 2020 14:35:14 +0100 Subject: [PATCH 044/409] BUG: Set deprecated fields to null in PyArray_InitArrFuncs Initializing the deprecated fields to null ensures that if a user sets them to their own function pointers, this can be detected and the warning about using deprecated fields can be printed. --- numpy/core/src/multiarray/usertypes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index 6b6c6bd9d057..d75fd313090e 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -127,6 +127,9 @@ PyArray_InitArrFuncs(PyArray_ArrFuncs *f) f->scalarkind = NULL; f->cancastscalarkindto = NULL; f->cancastto = NULL; + f->fastclip = NULL; + f->fastputmask = NULL; + f->fasttake = NULL; } From a50d3006b1c0c526dd91ffa6cbe36b1120683b65 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 15 Sep 2020 12:55:55 -0500 Subject: [PATCH 045/409] BUG: Relax registration of user dtypes with `NPY_ITEM_REFCOUNT` This also includes `NPY_ITEM_IS_POINTER`. A previous changed raised an error when this happened, but some downstream libraries use it to create a custom dtype with a single object field. It seems acceptable to create such a dtype if (and only if) that dtype hardcodes names and fields at creation time, so this change allows that, but otherwise keeps the error intact. This should work fine, although some care may be required --- numpy/core/src/multiarray/usertypes.c | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index d75fd313090e..a816a701d944 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -195,7 +195,7 @@ PyArray_RegisterDataType(PyArray_Descr *descr) } } typenum = NPY_USERDEF + NPY_NUMUSERTYPES; - descr->type_num = typenum; + descr->type_num = -1; if (PyDataType_ISUNSIZED(descr)) { PyErr_SetString(PyExc_ValueError, "cannot register a" \ "flexible data-type"); @@ -214,18 +214,31 @@ PyArray_RegisterDataType(PyArray_Descr *descr) " is missing."); return -1; } - if (descr->flags & (NPY_ITEM_IS_POINTER | NPY_ITEM_REFCOUNT)) { - PyErr_SetString(PyExc_ValueError, - "Legacy user dtypes referencing python objects or generally " - "allocated memory are unsupported. " - "If you see this error in an existing, working code base, " - "please contact the NumPy developers."); - return -1; - } if (descr->typeobj == NULL) { PyErr_SetString(PyExc_ValueError, "missing typeobject"); return -1; } + if (descr->flags & (NPY_ITEM_IS_POINTER | NPY_ITEM_REFCOUNT)) { + /* + * User dtype can't actually do reference counting, however, there + * are existing hacks (e.g. xpress), which use a structured one: + * dtype((xpress.var, [('variable', 'O')])) + * so we have to support this. But such a structure must be constant + * (i.e. fixed at registration time, this is the case for `xpress`). + */ + if (descr->names == NULL || descr->fields == NULL || + !PyDict_CheckExact(descr->fields)) { + PyErr_Format(PyExc_ValueError, + "Failed to register dtype for %S: Legacy user dtypes " + "using `NPY_ITEM_IS_POINTER` or `NPY_ITEM_REFCOUNT` are" + "unsupported. It is possible to create such a dtype only " + "if it is a structured dtype with names and fields " + "hardcoded at registration time.\n" + "Please contact the NumPy developers if this used to work " + "but now fails.", descr->typeobj); + return -1; + } + } if (test_deprecated_arrfuncs_members(f) < 0) { return -1; @@ -242,7 +255,7 @@ PyArray_RegisterDataType(PyArray_Descr *descr) if (dtypemeta_wrap_legacy_descriptor(descr) < 0) { return -1; } - + descr->type_num = typenum; return typenum; } From 4e9c37d0555fe4d58632d47fb5de98a40a427050 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 15 Sep 2020 13:01:57 -0500 Subject: [PATCH 046/409] ENH: Make dtype registration time error messages more clear --- numpy/core/src/multiarray/dtypemeta.c | 6 +++++- numpy/core/src/multiarray/usertypes.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index d07dc700dccd..c483f993781a 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -315,7 +315,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) if (Py_TYPE(descr) != &PyArrayDescr_Type) { PyErr_Format(PyExc_RuntimeError, "During creation/wrapping of legacy DType, the original class " - "was not PyArrayDescr_Type (it is replaced in this step)."); + "was not of PyArrayDescr_Type (it is replaced in this step). " + "The extension creating a custom DType for type %S must be " + "modified to ensure `Py_TYPE(descr) == &PyArrayDescr_Type` at " + "registration time.", + descr->typeobj); return -1; } diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index a816a701d944..4174e9c0891d 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -315,7 +315,7 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype, if (!PyTypeNum_ISUSERDEF(descr->type_num) && !PyTypeNum_ISUSERDEF(totype)) { PyErr_SetString(PyExc_ValueError, - "At least one of the types provided to" + "At least one of the types provided to " "RegisterCanCast must be user-defined."); return -1; } From fc0297e3311de5f47a46767f6d6c5181c9a7b07d Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 15 Sep 2020 13:43:14 -0500 Subject: [PATCH 047/409] MAINT: Relax check that a correct type is already set This allows running `xpress` on current master. `xpress` copies the type from `np.dtype(object)` rather than using `&PyArrayDescr_Type`. That seems overall fine, we can just replace it. The only reason for this check is to ensure that the user does not override our updated `type(user_dtype)` and thus corrupting it. --- numpy/core/src/multiarray/dtypemeta.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index c483f993781a..ae1dfdd81a4b 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -312,14 +312,28 @@ string_known_scalar_types( NPY_NO_EXPORT int dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) { - if (Py_TYPE(descr) != &PyArrayDescr_Type) { + int has_type_set = Py_TYPE(descr) == &PyArrayDescr_Type; + + if (!has_type_set) { + /* Accept if the type was filled in from an existing builtin dtype */ + for (int i = 0; i < NPY_NTYPES; i++) { + PyArray_Descr *builtin = PyArray_DescrFromType(i); + has_type_set = Py_TYPE(descr) == Py_TYPE(builtin); + Py_DECREF(builtin); + if (has_type_set) { + break; + } + } + } + if (!has_type_set) { PyErr_Format(PyExc_RuntimeError, "During creation/wrapping of legacy DType, the original class " "was not of PyArrayDescr_Type (it is replaced in this step). " "The extension creating a custom DType for type %S must be " - "modified to ensure `Py_TYPE(descr) == &PyArrayDescr_Type` at " - "registration time.", - descr->typeobj); + "modified to ensure `Py_TYPE(descr) == &PyArrayDescr_Type` or " + "that of an existing dtype (with the assumption it is just " + "copied over and can be replaced).", + descr->typeobj, Py_TYPE(descr)); return -1; } From bd263912e11f7f79310c5516fab6bd0a7b7c8eb9 Mon Sep 17 00:00:00 2001 From: Ryan C Cooper Date: Tue, 15 Sep 2020 16:35:06 -0400 Subject: [PATCH 048/409] DOC: Update numpy4matlab (#17159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update doc/source/user/numpy-for-matlab-users.rst Co-authored-by: Eric Wieser Co-authored-by: Ben Nathanson Co-authored-by: Melissa Weber Mendonça --- doc/source/user/numpy-for-matlab-users.rst | 737 ++++++++++++--------- 1 file changed, 420 insertions(+), 317 deletions(-) diff --git a/doc/source/user/numpy-for-matlab-users.rst b/doc/source/user/numpy-for-matlab-users.rst index 547d5b2a0767..eb011de633c4 100644 --- a/doc/source/user/numpy-for-matlab-users.rst +++ b/doc/source/user/numpy-for-matlab-users.rst @@ -7,12 +7,9 @@ NumPy for MATLAB users Introduction ============ -MATLAB® and NumPy/SciPy have a lot in common. But there are many -differences. NumPy and SciPy were created to do numerical and scientific -computing in the most natural way with Python, not to be MATLAB clones. -This page is intended to be a place to collect wisdom about the -differences, mostly for the purpose of helping proficient MATLAB users -become proficient NumPy and SciPy users. +MATLAB® and NumPy have a lot in common, but NumPy was created to work with +Python, not to be a MATLAB clone. This guide will help MATLAB users get started +with NumPy. .. raw:: html @@ -20,234 +17,184 @@ become proficient NumPy and SciPy users. table.docutils td { border: solid 1px #ccc; } -Some Key Differences +Some key differences ==================== .. list-table:: - - * - In MATLAB, the basic data type is a multidimensional array of - double precision floating point numbers. Most expressions take such - arrays and return such arrays. Operations on the 2-D instances of - these arrays are designed to act more or less like matrix operations - in linear algebra. - - In NumPy the basic type is a multidimensional ``array``. Operations - on these arrays in all dimensionalities including 2D are element-wise - operations. One needs to use specific functions for linear algebra - (though for matrix multiplication, one can use the ``@`` operator - in python 3.5 and above). - - * - MATLAB uses 1 (one) based indexing. The initial element of a - sequence is found using a(1). + :class: docutils + + * - In MATLAB, the basic type, even for scalars, is a + multidimensional array. Array assignments in MATLAB are stored as + 2D arrays of double precision floating point numbers, unless you + specify the number of dimensions and type. Operations on the 2D + instances of these arrays are modeled on matrix operations in + linear algebra. + + - In NumPy, the basic type is a multidimensional ``array``. Array + assignments in NumPy are usually stored as :ref:`n-dimensional arrays` with the + minimum type required to hold the objects in sequence, unless you + specify the number of dimensions and type. NumPy performs + operations element-by-element, so multiplying 2D arrays with + ``*`` is not a matrix multiplication -- it's an + element-by-element multiplication. (The ``@`` operator, available + since Python 3.5, can be used for conventional matrix + multiplication.) + + * - MATLAB numbers indices from 1; ``a(1)`` is the first element. :ref:`See note INDEXING ` - - Python uses 0 (zero) based indexing. The initial element of a - sequence is found using a[0]. - - * - MATLAB's scripting language was created for doing linear algebra. - The syntax for basic matrix operations is nice and clean, but the API - for adding GUIs and making full-fledged applications is more or less - an afterthought. - - NumPy is based on Python, which was designed from the outset to be - an excellent general-purpose programming language. While MATLAB's - syntax for some array manipulations is more compact than - NumPy's, NumPy (by virtue of being an add-on to Python) can do many - things that MATLAB just cannot, for instance dealing properly with - stacks of matrices. - - * - In MATLAB, arrays have pass-by-value semantics, with a lazy - copy-on-write scheme to prevent actually creating copies until they - are actually needed. Slice operations copy parts of the array. - - In NumPy arrays have pass-by-reference semantics. Slice operations - are views into an array. - - -'array' or 'matrix'? Which should I use? -======================================== - -Historically, NumPy has provided a special matrix type, `np.matrix`, which -is a subclass of ndarray which makes binary operations linear algebra -operations. You may see it used in some existing code instead of `np.array`. -So, which one to use? - -Short answer ------------- - -**Use arrays**. - -- They are the standard vector/matrix/tensor type of numpy. Many numpy - functions return arrays, not matrices. -- There is a clear distinction between element-wise operations and - linear algebra operations. -- You can have standard vectors or row/column vectors if you like. - -Until Python 3.5 the only disadvantage of using the array type was that you -had to use ``dot`` instead of ``*`` to multiply (reduce) two tensors -(scalar product, matrix vector multiplication etc.). Since Python 3.5 you -can use the matrix multiplication ``@`` operator. - -Given the above, we intend to deprecate ``matrix`` eventually. - -Long answer ------------ - -NumPy contains both an ``array`` class and a ``matrix`` class. The -``array`` class is intended to be a general-purpose n-dimensional array -for many kinds of numerical computing, while ``matrix`` is intended to -facilitate linear algebra computations specifically. In practice there -are only a handful of key differences between the two. - -- Operators ``*`` and ``@``, functions ``dot()``, and ``multiply()``: - - - For ``array``, **``*`` means element-wise multiplication**, while - **``@`` means matrix multiplication**; they have associated functions - ``multiply()`` and ``dot()``. (Before python 3.5, ``@`` did not exist - and one had to use ``dot()`` for matrix multiplication). - - For ``matrix``, **``*`` means matrix multiplication**, and for - element-wise multiplication one has to use the ``multiply()`` function. - -- Handling of vectors (one-dimensional arrays) - - - For ``array``, the **vector shapes 1xN, Nx1, and N are all different - things**. Operations like ``A[:,1]`` return a one-dimensional array of - shape N, not a two-dimensional array of shape Nx1. Transpose on a - one-dimensional ``array`` does nothing. - - For ``matrix``, **one-dimensional arrays are always upconverted to 1xN - or Nx1 matrices** (row or column vectors). ``A[:,1]`` returns a - two-dimensional matrix of shape Nx1. - -- Handling of higher-dimensional arrays (ndim > 2) - - - ``array`` objects **can have number of dimensions > 2**; - - ``matrix`` objects **always have exactly two dimensions**. - -- Convenience attributes - - - ``array`` **has a .T attribute**, which returns the transpose of - the data. - - ``matrix`` **also has .H, .I, and .A attributes**, which return - the conjugate transpose, inverse, and ``asarray()`` of the matrix, - respectively. - -- Convenience constructor + - NumPy, like Python, numbers indices from 0; ``a[0]`` is the first + element. - - The ``array`` constructor **takes (nested) Python sequences as - initializers**. As in, ``array([[1,2,3],[4,5,6]])``. - - The ``matrix`` constructor additionally **takes a convenient - string initializer**. As in ``matrix("[1 2 3; 4 5 6]")``. - -There are pros and cons to using both: - -- ``array`` - - - ``:)`` Element-wise multiplication is easy: ``A*B``. - - ``:(`` You have to remember that matrix multiplication has its own - operator, ``@``. - - ``:)`` You can treat one-dimensional arrays as *either* row or column - vectors. ``A @ v`` treats ``v`` as a column vector, while - ``v @ A`` treats ``v`` as a row vector. This can save you having to - type a lot of transposes. - - ``:)`` ``array`` is the "default" NumPy type, so it gets the most - testing, and is the type most likely to be returned by 3rd party - code that uses NumPy. - - ``:)`` Is quite at home handling data of any number of dimensions. - - ``:)`` Closer in semantics to tensor algebra, if you are familiar - with that. - - ``:)`` *All* operations (``*``, ``/``, ``+``, ``-`` etc.) are - element-wise. - - ``:(`` Sparse matrices from ``scipy.sparse`` do not interact as well - with arrays. - -- ``matrix`` - - - ``:\\`` Behavior is more like that of MATLAB matrices. - - ``<:(`` Maximum of two-dimensional. To hold three-dimensional data you - need ``array`` or perhaps a Python list of ``matrix``. - - ``<:(`` Minimum of two-dimensional. You cannot have vectors. They must be - cast as single-column or single-row matrices. - - ``<:(`` Since ``array`` is the default in NumPy, some functions may - return an ``array`` even if you give them a ``matrix`` as an - argument. This shouldn't happen with NumPy functions (if it does - it's a bug), but 3rd party code based on NumPy may not honor type - preservation like NumPy does. - - ``:)`` ``A*B`` is matrix multiplication, so it looks just like you write - it in linear algebra (For Python >= 3.5 plain arrays have the same - convenience with the ``@`` operator). - - ``<:(`` Element-wise multiplication requires calling a function, - ``multiply(A,B)``. - - ``<:(`` The use of operator overloading is a bit illogical: ``*`` - does not work element-wise but ``/`` does. - - Interaction with ``scipy.sparse`` is a bit cleaner. - -The ``array`` is thus much more advisable to use. Indeed, we intend to -deprecate ``matrix`` eventually. - -Table of Rough MATLAB-NumPy Equivalents + * - MATLAB's scripting language was created for linear algebra so the + syntax for some array manipulations is more compact than + NumPy's. On the other hand, the API for adding GUIs and creating + full-fledged applications is more or less an afterthought. + - NumPy is based on Python, a + general-purpose language. The advantage to NumPy + is access to Python libraries including: `SciPy + `_, `Matplotlib `_, + `Pandas `_, `OpenCV `_, + and more. In addition, Python is often `embedded as a scripting language + `_ + in other software, allowing NumPy to be used there too. + + * - MATLAB array slicing uses pass-by-value semantics, with a lazy + copy-on-write scheme to prevent creating copies until they are + needed. Slicing operations copy parts of the array. + - NumPy array slicing uses pass-by-reference, that does not copy + the arguments. Slicing operations are views into an array. + + +Rough equivalents ======================================= The table below gives rough equivalents for some common MATLAB -expressions. **These are not exact equivalents**, but rather should be -taken as hints to get you going in the right direction. For more detail -read the built-in documentation on the NumPy functions. +expressions. These are similar expressions, not equivalents. For +details, see the :ref:`documentation`. In the table below, it is assumed that you have executed the following commands in Python: :: - from numpy import * - import scipy.linalg + import numpy as np + from scipy import io, integrate, linalg, signal + from scipy.sparse.linalg import eigs Also assume below that if the Notes talk about "matrix" that the arguments are two-dimensional entities. -General Purpose Equivalents +General purpose equivalents --------------------------- .. list-table:: :header-rows: 1 - * - **MATLAB** - - **numpy** - - **Notes** + * - MATLAB + - NumPy + - Notes * - ``help func`` - - ``info(func)`` or ``help(func)`` or ``func?`` (in Ipython) + - ``info(func)`` or ``help(func)`` or ``func?`` (in IPython) - get help on the function *func* * - ``which func`` - - `see note HELP `__ + - :ref:`see note HELP ` - find out where *func* is defined * - ``type func`` - - ``source(func)`` or ``func??`` (in Ipython) + - ``np.source(func)`` or ``func??`` (in IPython) - print source for *func* (if not a native function) + * - ``% comment`` + - ``# comment`` + - comment a line of code with the text ``comment`` + + * - :: + + for i=1:3 + fprintf('%i\n',i) + end + + - :: + + for i in range(1, 4): + print(i) + + - use a for-loop to print the numbers 1, 2, and 3 using :py:class:`range ` + * - ``a && b`` - ``a and b`` - - short-circuiting logical AND operator (Python native operator); + - short-circuiting logical AND operator (:ref:`Python native operator `); scalar arguments only * - ``a || b`` - ``a or b`` - - short-circuiting logical OR operator (Python native operator); + - short-circuiting logical OR operator (:ref:`Python native operator `); scalar arguments only + * - .. code:: matlab + + >> 4 == 4 + ans = 1 + >> 4 == 5 + ans = 0 + + - :: + + >>> 4 == 4 + True + >>> 4 == 5 + False + + - The :ref:`boolean objects ` + in Python are ``True`` and ``False``, as opposed to MATLAB + logical types of ``1`` and ``0``. + + * - .. code:: matlab + + a=4 + if a==4 + fprintf('a = 4\n') + elseif a==5 + fprintf('a = 5\n') + end + + - :: + + a = 4 + if a == 4: + print('a = 4') + elif a == 5: + print('a = 5') + + - create an if-else statement to check if ``a`` is 4 or 5 and print result + * - ``1*i``, ``1*j``, ``1i``, ``1j`` - ``1j`` - complex numbers * - ``eps`` - - ``np.spacing(1)`` - - Distance between 1 and the nearest floating point number. + - ``np.finfo(float).eps`` or ``np.spacing(1)`` + - Upper bound to relative error due to rounding in 64-bit floating point + arithmetic. + + * - ``load data.mat`` + - ``io.loadmat('data.mat')`` + - Load MATLAB variables saved to the file ``data.mat``. (Note: When saving arrays to + ``data.mat`` in MATLAB/Octave, use a recent binary format. :func:`scipy.io.loadmat` + will create a dictionary with the saved arrays and further information.) * - ``ode45`` - - ``scipy.integrate.solve_ivp(f)`` + - ``integrate.solve_ivp(f)`` - integrate an ODE with Runge-Kutta 4,5 * - ``ode15s`` - - ``scipy.integrate.solve_ivp(f, method='BDF')`` + - ``integrate.solve_ivp(f, method='BDF')`` - integrate an ODE with BDF method -Linear Algebra Equivalents + +Linear algebra equivalents -------------------------- .. list-table:: @@ -258,16 +205,16 @@ Linear Algebra Equivalents - Notes * - ``ndims(a)`` - - ``ndim(a)`` or ``a.ndim`` - - get the number of dimensions of an array + - ``np.ndim(a)`` or ``a.ndim`` + - number of dimensions of array ``a`` * - ``numel(a)`` - - ``size(a)`` or ``a.size`` - - get the number of elements of an array + - ``np.size(a)`` or ``a.size`` + - number of elements of array ``a`` * - ``size(a)`` - - ``shape(a)`` or ``a.shape`` - - get the "size" of the matrix + - ``np.shape(a)`` or ``a.shape`` + - "size" of array ``a`` * - ``size(a,n)`` - ``a.shape[n-1]`` @@ -276,45 +223,45 @@ Linear Algebra Equivalents See note :ref:`INDEXING `) * - ``[ 1 2 3; 4 5 6 ]`` - - ``array([[1.,2.,3.], [4.,5.,6.]])`` - - 2x3 matrix literal + - ``np.array([[1. ,2. ,3.], [4. ,5. ,6.]])`` + - define a 2x3 2D array * - ``[ a b; c d ]`` - - ``block([[a,b], [c,d]])`` + - ``np.block([[a, b], [c, d]])`` - construct a matrix from blocks ``a``, ``b``, ``c``, and ``d`` * - ``a(end)`` - ``a[-1]`` - - access last element in the 1xn matrix ``a`` + - access last element in MATLAB vector (1xn or nx1) or 1D NumPy array + ``a`` (length n) * - ``a(2,5)`` - - ``a[1,4]`` - - access element in second row, fifth column + - ``a[1, 4]`` + - access element in second row, fifth column in 2D array ``a`` * - ``a(2,:)`` - - ``a[1]`` or ``a[1,:]`` - - entire second row of ``a`` + - ``a[1]`` or ``a[1, :]`` + - entire second row of 2D array ``a`` * - ``a(1:5,:)`` - - ``a[0:5]`` or ``a[:5]`` or ``a[0:5,:]`` - - the first five rows of ``a`` + - ``a[0:5]`` or ``a[:5]`` or ``a[0:5, :]`` + - first 5 rows of 2D array ``a`` * - ``a(end-4:end,:)`` - ``a[-5:]`` - - the last five rows of ``a`` + - last 5 rows of 2D array ``a`` * - ``a(1:3,5:9)`` - - ``a[0:3][:,4:9]`` - - rows one to three and columns five to nine of ``a``. This gives - read-only access. + - ``a[0:3, 4:9]`` + - The first through third rows and fifth through ninth columns of a 2D array, ``a``. * - ``a([2,4,5],[1,3])`` - - ``a[ix_([1,3,4],[0,2])]`` + - ``a[np.ix_([1, 3, 4], [0, 2])]`` - rows 2,4 and 5 and columns 1 and 3. This allows the matrix to be modified, and doesn't require a regular slice. * - ``a(3:2:21,:)`` - - ``a[ 2:21:2,:]`` + - ``a[2:21:2,:]`` - every other row of ``a``, starting with the third and going to the twenty-first @@ -323,11 +270,11 @@ Linear Algebra Equivalents - every other row of ``a``, starting with the first * - ``a(end:-1:1,:)`` or ``flipud(a)`` - - ``a[ ::-1,:]`` + - ``a[::-1,:]`` - ``a`` with rows in reverse order * - ``a([1:end 1],:)`` - - ``a[r_[:len(a),0]]`` + - ``a[np.r_[:len(a),0]]`` - ``a`` with copy of the first row appended to the end * - ``a.'`` @@ -354,30 +301,30 @@ Linear Algebra Equivalents - ``a**3`` - element-wise exponentiation - * - ``(a>0.5)`` - - ``(a>0.5)`` + * - ``(a > 0.5)`` + - ``(a > 0.5)`` - matrix whose i,jth element is (a_ij > 0.5). The MATLAB result is an - array of 0s and 1s. The NumPy result is an array of the boolean + array of logical values 0 and 1. The NumPy result is an array of the boolean values ``False`` and ``True``. - * - ``find(a>0.5)`` - - ``nonzero(a>0.5)`` + * - ``find(a > 0.5)`` + - ``np.nonzero(a > 0.5)`` - find the indices where (``a`` > 0.5) - * - ``a(:,find(v>0.5))`` - - ``a[:,nonzero(v>0.5)[0]]`` + * - ``a(:,find(v > 0.5))`` + - ``a[:,np.nonzero(v > 0.5)[0]]`` - extract the columms of ``a`` where vector v > 0.5 * - ``a(:,find(v>0.5))`` - - ``a[:,v.T>0.5]`` + - ``a[:, v.T > 0.5]`` - extract the columms of ``a`` where column vector v > 0.5 * - ``a(a<0.5)=0`` - - ``a[a<0.5]=0`` + - ``a[a < 0.5]=0`` - ``a`` with elements less than 0.5 zeroed out * - ``a .* (a>0.5)`` - - ``a * (a>0.5)`` + - ``a * (a > 0.5)`` - ``a`` with elements less than 0.5 zeroed out * - ``a(:) = 3`` @@ -386,11 +333,11 @@ Linear Algebra Equivalents * - ``y=x`` - ``y = x.copy()`` - - numpy assigns by reference + - NumPy assigns by reference * - ``y=x(2,:)`` - - ``y = x[1,:].copy()`` - - numpy slices are by reference + - ``y = x[1, :].copy()`` + - NumPy slices are by reference * - ``y=x(:)`` - ``y = x.flatten()`` @@ -398,62 +345,74 @@ Linear Algebra Equivalents same data ordering as in MATLAB, use ``x.flatten('F')``. * - ``1:10`` - - ``arange(1.,11.)`` or ``r_[1.:11.]`` or ``r_[1:10:10j]`` + - ``np.arange(1., 11.)`` or ``np.r_[1.:11.]`` or ``np.r_[1:10:10j]`` - create an increasing vector (see note :ref:`RANGES `) * - ``0:9`` - - ``arange(10.)`` or ``r_[:10.]`` or ``r_[:9:10j]`` + - ``np.arange(10.)`` or ``np.r_[:10.]`` or ``np.r_[:9:10j]`` - create an increasing vector (see note :ref:`RANGES `) * - ``[1:10]'`` - - ``arange(1.,11.)[:, newaxis]`` + - ``np.arange(1.,11.)[:, np.newaxis]`` - create a column vector * - ``zeros(3,4)`` - - ``zeros((3,4))`` + - ``np.zeros((3, 4))`` - 3x4 two-dimensional array full of 64-bit floating point zeros * - ``zeros(3,4,5)`` - - ``zeros((3,4,5))`` + - ``np.zeros((3, 4, 5))`` - 3x4x5 three-dimensional array full of 64-bit floating point zeros * - ``ones(3,4)`` - - ``ones((3,4))`` + - ``np.ones((3, 4))`` - 3x4 two-dimensional array full of 64-bit floating point ones * - ``eye(3)`` - - ``eye(3)`` + - ``np.eye(3)`` - 3x3 identity matrix * - ``diag(a)`` - - ``diag(a)`` - - vector of diagonal elements of ``a`` + - ``np.diag(a)`` + - returns a vector of the diagonal elements of 2D array, ``a`` + + * - ``diag(v,0)`` + - ``np.diag(v, 0)`` + - returns a square diagonal matrix whose nonzero values are the elements of + vector, ``v`` - * - ``diag(a,0)`` - - ``diag(a,0)`` - - square diagonal matrix whose nonzero values are the elements of - ``a`` + * - .. code:: matlab + + rng(42,'twister') + rand(3,4) - * - ``rand(3,4)`` - - ``random.rand(3,4)`` or ``random.random_sample((3, 4))`` - - random 3x4 matrix + - :: + + from numpy.random import default_rng + rng = default_rng(42) + rng.random(3, 4) + + or older version: ``random.rand((3, 4))`` + + - generate a random 3x4 array with default random number generator and + seed = 42 * - ``linspace(1,3,4)`` - - ``linspace(1,3,4)`` + - ``np.linspace(1,3,4)`` - 4 equally spaced samples between 1 and 3, inclusive * - ``[x,y]=meshgrid(0:8,0:5)`` - - ``mgrid[0:9.,0:6.]`` or ``meshgrid(r_[0:9.],r_[0:6.]`` + - ``np.mgrid[0:9.,0:6.]`` or ``np.meshgrid(r_[0:9.],r_[0:6.]`` - two 2D arrays: one of x values, the other of y values * - - - ``ogrid[0:9.,0:6.]`` or ``ix_(r_[0:9.],r_[0:6.]`` + - ``ogrid[0:9.,0:6.]`` or ``np.ix_(np.r_[0:9.],np.r_[0:6.]`` - the best way to eval functions on a grid * - ``[x,y]=meshgrid([1,2,4],[2,4,5])`` - - ``meshgrid([1,2,4],[2,4,5])`` + - ``np.meshgrid([1,2,4],[2,4,5])`` - * - @@ -461,37 +420,38 @@ Linear Algebra Equivalents - the best way to eval functions on a grid * - ``repmat(a, m, n)`` - - ``tile(a, (m, n))`` + - ``np.tile(a, (m, n))`` - create m by n copies of ``a`` * - ``[a b]`` - - ``concatenate((a,b),1)`` or ``hstack((a,b))`` or - ``column_stack((a,b))`` or ``c_[a,b]`` + - ``np.concatenate((a,b),1)`` or ``np.hstack((a,b))`` or + ``np.column_stack((a,b))`` or ``np.c_[a,b]`` - concatenate columns of ``a`` and ``b`` * - ``[a; b]`` - - ``concatenate((a,b))`` or ``vstack((a,b))`` or ``r_[a,b]`` + - ``np.concatenate((a,b))`` or ``np.vstack((a,b))`` or ``np.r_[a,b]`` - concatenate rows of ``a`` and ``b`` * - ``max(max(a))`` - - ``a.max()`` - - maximum element of ``a`` (with ndims(a)<=2 for MATLAB) + - ``a.max()`` or ``np.nanmax(a)`` + - maximum element of ``a`` (with ndims(a)<=2 for MATLAB, if there are + NaN's, ``nanmax`` will ignore these and return largest value) * - ``max(a)`` - ``a.max(0)`` - - maximum element of each column of matrix ``a`` + - maximum element of each column of array ``a`` * - ``max(a,[],2)`` - ``a.max(1)`` - - maximum element of each row of matrix ``a`` + - maximum element of each row of array ``a`` * - ``max(a,b)`` - - ``maximum(a, b)`` + - ``np.maximum(a, b)`` - compares ``a`` and ``b`` element-wise, and returns the maximum value from each pair * - ``norm(v)`` - - ``sqrt(v @ v)`` or ``np.linalg.norm(v)`` + - ``np.sqrt(v @ v)`` or ``np.linalg.norm(v)`` - L2 norm of vector ``v`` * - ``a & b`` @@ -500,7 +460,7 @@ Linear Algebra Equivalents LOGICOPS ` * - ``a | b`` - - ``logical_or(a,b)`` + - ``np.logical_or(a,b)`` - element-by-element OR operator (NumPy ufunc) :ref:`See note LOGICOPS ` @@ -514,90 +474,99 @@ Linear Algebra Equivalents * - ``inv(a)`` - ``linalg.inv(a)`` - - inverse of square matrix ``a`` + - inverse of square 2D array ``a`` * - ``pinv(a)`` - ``linalg.pinv(a)`` - - pseudo-inverse of matrix ``a`` + - pseudo-inverse of 2D array ``a`` * - ``rank(a)`` - ``linalg.matrix_rank(a)`` - - matrix rank of a 2D array / matrix ``a`` + - matrix rank of a 2D array ``a`` * - ``a\b`` - - ``linalg.solve(a,b)`` if ``a`` is square; ``linalg.lstsq(a,b)`` + - ``linalg.solve(a, b)`` if ``a`` is square; ``linalg.lstsq(a, b)`` otherwise - solution of a x = b for x * - ``b/a`` - - Solve a.T x.T = b.T instead + - Solve ``a.T x.T = b.T`` instead - solution of x a = b for x * - ``[U,S,V]=svd(a)`` - ``U, S, Vh = linalg.svd(a), V = Vh.T`` - singular value decomposition of ``a`` - * - ``chol(a)`` - - ``linalg.cholesky(a).T`` - - cholesky factorization of a matrix (``chol(a)`` in MATLAB returns an - upper triangular matrix, but ``linalg.cholesky(a)`` returns a lower - triangular matrix) + * - ``c=chol(a)`` where ``a==c'*c`` + - ``c = linalg.cholesky(a)`` where ``a == c@c.T`` + - Cholesky factorization of a 2D array (``chol(a)`` in MATLAB returns an + upper triangular 2D array, but :func:`~scipy.linalg.cholesky` returns a lower + triangular 2D array) * - ``[V,D]=eig(a)`` - ``D,V = linalg.eig(a)`` - - eigenvalues and eigenvectors of ``a`` + - eigenvalues :math:`\lambda` and eigenvectors :math:`\bar{v}` of ``a``, + where :math:`\lambda\bar{v}=\mathbf{a}\bar{v}` * - ``[V,D]=eig(a,b)`` - - ``D,V = scipy.linalg.eig(a,b)`` - - eigenvalues and eigenvectors of ``a``, ``b`` + - ``D,V = linalg.eig(a, b)`` + - eigenvalues :math:`\lambda` and eigenvectors :math:`\bar{v}` of + ``a``, ``b`` + where :math:`\lambda\mathbf{b}\bar{v}=\mathbf{a}\bar{v}` - * - ``[V,D]=eigs(a,k)`` - - - - find the ``k`` largest eigenvalues and eigenvectors of ``a`` + * - ``[V,D]=eigs(a,3)`` + - ``D,V = eigs(a, k = 3)`` + - find the ``k=3`` largest eigenvalues and eigenvectors of 2D array, ``a`` * - ``[Q,R,P]=qr(a,0)`` - - ``Q,R = scipy.linalg.qr(a)`` + - ``Q,R = linalg.qr(a)`` - QR decomposition - * - ``[L,U,P]=lu(a)`` - - ``L,U = scipy.linalg.lu(a)`` or ``LU,P=scipy.linalg.lu_factor(a)`` - - LU decomposition (note: P(MATLAB) == transpose(P(numpy)) ) + * - ``[L,U,P]=lu(a)`` where ``a==P'*L*U`` + - ``P,L,U = linalg.lu(a)`` where ``a == P@L@U`` + - LU decomposition (note: P(MATLAB) == transpose(P(NumPy))) * - ``conjgrad`` - - ``scipy.sparse.linalg.cg`` + - ``cg`` - Conjugate gradients solver * - ``fft(a)`` - - ``fft(a)`` + - ``np.fft(a)`` - Fourier transform of ``a`` * - ``ifft(a)`` - - ``ifft(a)`` + - ``np.ifft(a)`` - inverse Fourier transform of ``a`` * - ``sort(a)`` - - ``sort(a)`` or ``a.sort()`` - - sort the matrix + - ``np.sort(a)`` or ``a.sort(axis=0)`` + - sort each column of a 2D array, ``a`` - * - ``[b,I] = sortrows(a,i)`` - - ``I = argsort(a[:,i]), b=a[I,:]`` - - sort the rows of the matrix + * - ``sort(a, 2)`` + - ``np.sort(a, axis = 1)`` or ``a.sort(axis = 1)`` + - sort the each row of 2D array, ``a`` - * - ``regress(y,X)`` - - ``linalg.lstsq(X,y)`` - - multilinear regression + * - ``[b,I]=sortrows(a,1)`` + - ``I = np.argsort(a[:, 0]); b = a[I,:]`` + - save the array ``a`` as array ``b`` with rows sorted by the first column + + * - ``x = Z\y`` + - ``x = linalg.lstsq(Z, y)`` + - perform a linear regression of the form :math:`\mathbf{Zx}=\mathbf{y}` * - ``decimate(x, q)`` - - ``scipy.signal.resample(x, len(x)/q)`` + - ``signal.resample(x, np.ceil(len(x)/q))`` - downsample with low-pass filtering * - ``unique(a)`` - - ``unique(a)`` - - + - ``np.unique(a)`` + - a vector of unique values in array ``a`` * - ``squeeze(a)`` - ``a.squeeze()`` - - + - remove singleton dimensions of array ``a``. Note that MATLAB will always + return arrays of 2D or higher while NumPy will return arrays of 0D or + higher .. _numpy-for-matlab-users.notes: @@ -605,11 +574,11 @@ Notes ===== \ **Submatrix**: Assignment to a submatrix can be done with lists of -indexes using the ``ix_`` command. E.g., for 2d array ``a``, one might -do: ``ind=[1,3]; a[np.ix_(ind,ind)]+=100``. +indices using the ``ix_`` command. E.g., for 2D array ``a``, one might +do: ``ind=[1, 3]; a[np.ix_(ind, ind)] += 100``. \ **HELP**: There is no direct equivalent of MATLAB's ``which`` command, -but the commands ``help`` and ``source`` will usually list the filename +but the commands :func:`help`` and :func:`numpy.source` will usually list the filename where the function is located. Python also has an ``inspect`` module (do ``import inspect``) which provides a ``getfile`` that often works. @@ -627,35 +596,35 @@ Dijkstra ; MATLAB's is the reverse. + operators like ``<`` and ``>``; MATLAB's is the reverse. If you know you have boolean arguments, you can get away with using -NumPy's bitwise operators, but be careful with parentheses, like this: z -= (x > 1) & (x < 2). The absence of NumPy operator forms of logical\_and -and logical\_or is an unfortunate consequence of Python's design. +NumPy's bitwise operators, but be careful with parentheses, like this: ``z += (x > 1) & (x < 2)``. The absence of NumPy operator forms of ``logical_and`` +and ``logical_or`` is an unfortunate consequence of Python's design. **RESHAPE and LINEAR INDEXING**: MATLAB always allows multi-dimensional arrays to be accessed using scalar or linear indices, NumPy does not. -Linear indices are common in MATLAB programs, e.g. find() on a matrix +Linear indices are common in MATLAB programs, e.g. ``find()`` on a matrix returns them, whereas NumPy's find behaves differently. When converting MATLAB code it might be necessary to first reshape a matrix to a linear sequence, perform some indexing operations and then reshape back. As @@ -664,11 +633,132 @@ possible to do this fairly efficiently. Note that the scan order used by reshape in NumPy defaults to the 'C' order, whereas MATLAB uses the Fortran order. If you are simply converting to a linear sequence and back this doesn't matter. But if you are converting reshapes from MATLAB -code which relies on the scan order, then this MATLAB code: z = -reshape(x,3,4); should become z = x.reshape(3,4,order='F').copy() in +code which relies on the scan order, then this MATLAB code: ``z = +reshape(x,3,4);`` should become ``z = x.reshape(3,4,order='F').copy()`` in NumPy. -Customizing Your Environment +'array' or 'matrix'? Which should I use? +======================================== + +Historically, NumPy has provided a special matrix type, `np.matrix`, which +is a subclass of ndarray which makes binary operations linear algebra +operations. You may see it used in some existing code instead of `np.array`. +So, which one to use? + +Short answer +------------ + +**Use arrays**. + +- They support multidimensional array algebra that is supported in MATLAB +- They are the standard vector/matrix/tensor type of NumPy. Many NumPy + functions return arrays, not matrices. +- There is a clear distinction between element-wise operations and + linear algebra operations. +- You can have standard vectors or row/column vectors if you like. + +Until Python 3.5 the only disadvantage of using the array type was that you +had to use ``dot`` instead of ``*`` to multiply (reduce) two tensors +(scalar product, matrix vector multiplication etc.). Since Python 3.5 you +can use the matrix multiplication ``@`` operator. + +Given the above, we intend to deprecate ``matrix`` eventually. + +Long answer +----------- + +NumPy contains both an ``array`` class and a ``matrix`` class. The +``array`` class is intended to be a general-purpose n-dimensional array +for many kinds of numerical computing, while ``matrix`` is intended to +facilitate linear algebra computations specifically. In practice there +are only a handful of key differences between the two. + +- Operators ``*`` and ``@``, functions ``dot()``, and ``multiply()``: + + - For ``array``, **``*`` means element-wise multiplication**, while + **``@`` means matrix multiplication**; they have associated functions + ``multiply()`` and ``dot()``. (Before Python 3.5, ``@`` did not exist + and one had to use ``dot()`` for matrix multiplication). + - For ``matrix``, **``*`` means matrix multiplication**, and for + element-wise multiplication one has to use the ``multiply()`` function. + +- Handling of vectors (one-dimensional arrays) + + - For ``array``, the **vector shapes 1xN, Nx1, and N are all different + things**. Operations like ``A[:,1]`` return a one-dimensional array of + shape N, not a two-dimensional array of shape Nx1. Transpose on a + one-dimensional ``array`` does nothing. + - For ``matrix``, **one-dimensional arrays are always upconverted to 1xN + or Nx1 matrices** (row or column vectors). ``A[:,1]`` returns a + two-dimensional matrix of shape Nx1. + +- Handling of higher-dimensional arrays (ndim > 2) + + - ``array`` objects **can have number of dimensions > 2**; + - ``matrix`` objects **always have exactly two dimensions**. + +- Convenience attributes + + - ``array`` **has a .T attribute**, which returns the transpose of + the data. + - ``matrix`` **also has .H, .I, and .A attributes**, which return + the conjugate transpose, inverse, and ``asarray()`` of the matrix, + respectively. + +- Convenience constructor + + - The ``array`` constructor **takes (nested) Python sequences as + initializers**. As in, ``array([[1,2,3],[4,5,6]])``. + - The ``matrix`` constructor additionally **takes a convenient + string initializer**. As in ``matrix("[1 2 3; 4 5 6]")``. + +There are pros and cons to using both: + +- ``array`` + + - ``:)`` Element-wise multiplication is easy: ``A*B``. + - ``:(`` You have to remember that matrix multiplication has its own + operator, ``@``. + - ``:)`` You can treat one-dimensional arrays as *either* row or column + vectors. ``A @ v`` treats ``v`` as a column vector, while + ``v @ A`` treats ``v`` as a row vector. This can save you having to + type a lot of transposes. + - ``:)`` ``array`` is the "default" NumPy type, so it gets the most + testing, and is the type most likely to be returned by 3rd party + code that uses NumPy. + - ``:)`` Is quite at home handling data of any number of dimensions. + - ``:)`` Closer in semantics to tensor algebra, if you are familiar + with that. + - ``:)`` *All* operations (``*``, ``/``, ``+``, ``-`` etc.) are + element-wise. + - ``:(`` Sparse matrices from ``scipy.sparse`` do not interact as well + with arrays. + +- ``matrix`` + + - ``:\\`` Behavior is more like that of MATLAB matrices. + - ``<:(`` Maximum of two-dimensional. To hold three-dimensional data you + need ``array`` or perhaps a Python list of ``matrix``. + - ``<:(`` Minimum of two-dimensional. You cannot have vectors. They must be + cast as single-column or single-row matrices. + - ``<:(`` Since ``array`` is the default in NumPy, some functions may + return an ``array`` even if you give them a ``matrix`` as an + argument. This shouldn't happen with NumPy functions (if it does + it's a bug), but 3rd party code based on NumPy may not honor type + preservation like NumPy does. + - ``:)`` ``A*B`` is matrix multiplication, so it looks just like you write + it in linear algebra (For Python >= 3.5 plain arrays have the same + convenience with the ``@`` operator). + - ``<:(`` Element-wise multiplication requires calling a function, + ``multiply(A,B)``. + - ``<:(`` The use of operator overloading is a bit illogical: ``*`` + does not work element-wise but ``/`` does. + - Interaction with ``scipy.sparse`` is a bit cleaner. + +The ``array`` is thus much more advisable to use. Indeed, we intend to +deprecate ``matrix`` eventually. + +Customizing your environment ============================ In MATLAB the main tool available to you for customizing the @@ -696,26 +786,39 @@ this is just an example, not a statement of "best practices"): # Make all numpy available via shorter 'np' prefix import numpy as np - # Make all matlib functions accessible at the top level via M.func() - import numpy.matlib as M - # Make some matlib functions accessible directly at the top level via, e.g. rand(3,3) - from numpy.matlib import rand,zeros,ones,empty,eye + # + # Make the SciPy linear algebra functions available as linalg.func() + # e.g. linalg.lu, linalg.eig (for general l*B@u==A@u solution) + from scipy import linalg + # # Define a Hermitian function def hermitian(A, **kwargs): - return np.transpose(A,**kwargs).conj() - # Make some shortcuts for transpose,hermitian: - # np.transpose(A) --> T(A) + return np.conj(A,**kwargs).T + # Make a shortcut for hermitian: # hermitian(A) --> H(A) - T = np.transpose H = hermitian +To use the deprecated `matrix` and other `matlib` functions: + +:: + + # Make all matlib functions accessible at the top level via M.func() + import numpy.matlib as M + # Make some matlib functions accessible directly at the top level via, e.g. rand(3,3) + from numpy.matlib import matrix,rand,zeros,ones,empty,eye + Links ===== -See http://mathesaurus.sf.net/ for another MATLAB/NumPy -cross-reference. +Another somewhat outdated MATLAB/NumPy cross-reference can be found at +http://mathesaurus.sf.net/ -An extensive list of tools for scientific work with python can be +An extensive list of tools for scientific work with Python can be found in the `topical software page `__. -MATLAB® and SimuLink® are registered trademarks of The MathWorks. +See +`List of Python software: scripting +`_ +for a list of softwares that use Python as a scripting language + +MATLAB® and SimuLink® are registered trademarks of The MathWorks, Inc. From d5718145fff9bc70d3e3143a85ef42bd910209ce Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 16 Sep 2020 11:25:15 +0300 Subject: [PATCH 049/409] adjust whitespace to minimize the diff --- doc/source/glossary.rst | 602 +++++++++++++++++++--------------------- 1 file changed, 288 insertions(+), 314 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 11439187a01b..978589996a5b 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -2,463 +2,443 @@ Glossary ******** -.. glossary:: +.. glossary:: - (`n`,) + (`n`,) + A tuple with one element. The trailing comma distinguishes a one-element + tuple from a parenthesized ``n``. - A tuple with one element. The trailing comma distinguishes a one-element - tuple from a parenthesized ``n``. + -1 + Used as a dimension entry, ``-1`` instructs NumPy to choose the length + that will keep the total number of elements the same. - -1 - Used as a dimension entry, ``-1`` instructs NumPy to choose the length - that will keep the total number of elements the same. + ``...`` + **When indexing an array**, shorthand that the missing axes, if they + exist, are full slices. - ``...`` + >>> a = np.arange(24).reshape(2,3,4) - **When indexing an array**, shorthand that the missing axes, if they - exist, are full slices. + >>> a[...].shape + (2, 3, 4) - >>> a = np.arange(24).reshape(2,3,4) + >>> a[...,0].shape + (2, 3) - >>> a[...].shape - (2, 3, 4) + >>> a[0,...].shape + (3, 4) - >>> a[...,0].shape - (2, 3) + >>> a[0,...,0].shape + (3,) - >>> a[0,...].shape - (3, 4) + It can be used at most once; ``a[...,0,...]`` raises an ``IndexError``. - >>> a[0,...,0].shape - (3,) + **In printouts**, NumPy substitutes ``...`` for the middle elements of + large arrays. To see the entire array, use + :doc:`numpy.printoptions. ` - It can be used at most once; ``a[...,0,...]`` raises an ``IndexError``. - **In printouts**, NumPy substitutes ``...`` for the middle elements of - large arrays. To see the entire array, use - :doc:`numpy.printoptions. ` + ``:`` + The Python :term:`python:slice` + operator. In ndarrays, slicing can be applied to every + axis: - ``:`` + >>> a = np.arange(24).reshape(2,3,4) + >>> a + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + + >>> a[1:,-2:,:-1] + array([[[16, 17, 18], + [20, 21, 22]]]) - The Python :term:`python:slice` - operator. In ndarrays, slicing can be applied to every - axis: + Trailing slices can be omitted: :: - >>> a = np.arange(24).reshape(2,3,4) - >>> a - array([[[ 0, 1, 2, 3], - [ 4, 5, 6, 7], - [ 8, 9, 10, 11]], - - [[12, 13, 14, 15], - [16, 17, 18, 19], - [20, 21, 22, 23]]]) - - >>> a[1:,-2:,:-1] - array([[[16, 17, 18], - [20, 21, 22]]]) + >>> a[1] == a[1,:,:] + array([[ True, True, True, True], + [ True, True, True, True], + [ True, True, True, True]]) - Trailing slices can be omitted: :: + In contrast to Python, where slicing creates a copy, in NumPy slicing + creates a :term:`view`. - >>> a[1] == a[1,:,:] - array([[ True, True, True, True], - [ True, True, True, True], - [ True, True, True, True]]) + For details, see :ref:`Combining advanced and basic indexing `. - In contrast to Python, where slicing creates a copy, in NumPy slicing - creates a :term:`view`. - For details, see :ref:`Combining advanced and basic indexing `. + ``<`` + In a dtype declaration, indicates that the data is + `little-endian `_ + (the bracket is big on the right). :: - ``<`` + >>> dt = np.dtype('`_ - (the bracket is big on the right). :: - >>> dt = np.dtype('`` + In a dtype declaration, indicates that the data is + `big-endian `_ + (the bracket is big on the left). :: - ``>`` + >>> dt = np.dtype('>H') # big-endian unsigned short - In a dtype declaration, indicates that the data is - `big-endian `_ - (the bracket is big on the left). :: - >>> dt = np.dtype('>H') # big-endian unsigned short + advanced indexing + Rather than using a :doc:`scalar ` or slice as + an index, an axis can be indexed with an array, providing fine-grained + selection. This is known as :ref:`advanced indexing` + or ``fancy indexing``. - advanced indexing - Rather than using a :doc:`scalar ` or slice as - an index, an axis can be indexed with an array, providing fine-grained - selection. This is known as :ref:`advanced indexing` - or ``fancy indexing``. + along an axis + The result of an operation along an :term:`axis` X is an array in which X + disappears. This can surprise new users expecting the opposite. - along an axis + The operation can be visualized this way: - The result of an operation along an :term:`axis` X is an array in which X - disappears. This can surprise new users expecting the opposite. + Imagine a slice of array ``a`` where axis X has a fixed index + and the other dimensions are left full (``:``). - The operation can be visualized this way: + >>> a = np.arange(24).reshape(2,3,4) + + >>> a.shape + (2, 3, 4) + + >>> a[:,0,:].shape + (2, 4) - Imagine a slice of array ``a`` where axis X has a fixed index - and the other dimensions are left full (``:``). + The slice has ``a``'s shape with the X dimension deleted. Saying an + operation ``op`` is ``performed along X`` means that ``op`` takes as its + operands slices having every value of X: - >>> a = np.arange(24).reshape(2,3,4) - - >>> a.shape - (2, 3, 4) - - >>> a[:,0,:].shape - (2, 4) + >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] + array([[ True, True, True, True], + [ True, True, True, True]]) - The slice has ``a``'s shape with the X dimension deleted. Saying an - operation ``op`` is ``performed along X`` means that ``op`` takes as its - operands slices having every value of X: - >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] - array([[ True, True, True, True], - [ True, True, True, True]]) - - - array + array Used synonymously in the NumPy docs with :doc:`ndarray `, NumPy's basic structure. - array_like - - Any :doc:`scalar ` or - :term:`python:sequence` - that can be interpreted as an ndarray. In addition to ndarrays - and scalars this category includes lists (possibly nested and with - different element types) and tuples. Any argument accepted by - :doc:`numpy.array ` - is array_like. :: - - >>> a = np.array([[1,2.0],[0,0],(1+1j,3.)]) + array_like + Any :doc:`scalar ` or + :term:`python:sequence` + that can be interpreted as an ndarray. In addition to ndarrays + and scalars this category includes lists (possibly nested and with + different element types) and tuples. Any argument accepted by + :doc:`numpy.array ` + is array_like. :: - >>> a - array([[1.+0.j, 2.+0.j], - [0.+0.j, 0.+0.j], - [1.+1.j, 3.+0.j]]) + >>> a = np.array([[1,2.0],[0,0],(1+1j,3.)]) + >>> a + array([[1.+0.j, 2.+0.j], + [0.+0.j, 0.+0.j], + [1.+1.j, 3.+0.j]]) - array scalar - - For uniformity in handling operands, NumPy treats - a :doc:`scalar ` as an array of zero - dimension. - - - :term:`attribute ` - \ + array scalar + For uniformity in handling operands, NumPy treats + a :doc:`scalar ` as an array of zero + dimension. - axis - Another term for an array dimension. Axes are numbered left to right; - axis 0 is the first element in the shape tuple. + axis + Another term for an array dimension. Axes are numbered left to right; + axis 0 is the first element in the shape tuple. - In a two-dimensional vector, the elements of axis 0 are rows and the - elements of axis 1 are columns. + In a two-dimensional vector, the elements of axis 0 are rows and the + elements of axis 1 are columns. - In higher dimensions the picture changes. NumPy prints - higher-dimensional vectors as replications of row-by-column building - blocks, as in this three-dimensional vector: + In higher dimensions the picture changes. NumPy prints + higher-dimensional vectors as replications of row-by-column building + blocks, as in this three-dimensional vector: - >>> a = np.arange(12).reshape(2,2,3) - >>> a - array([[[ 0, 1, 2], - [ 3, 4, 5]], - - [[ 6, 7, 8], - [ 9, 10, 11]]]) + >>> a = np.arange(12).reshape(2,2,3) + >>> a + array([[[ 0, 1, 2], + [ 3, 4, 5]], + + [[ 6, 7, 8], + [ 9, 10, 11]]]) - ``a`` is depicted as a two-element array whose elements are 2x3 vectors. - From this point of view, rows and columns are the final two axes, - respectively, in any shape. + ``a`` is depicted as a two-element array whose elements are 2x3 vectors. + From this point of view, rows and columns are the final two axes, + respectively, in any shape. - This rule helps you anticipate how a vector will be printed, and - conversely how to find the index of any of the printed elements. For - instance, in the example, the last two values of 8's index must be 0 and - 2. Since 8 appears in the second of the two 2x3's, the first index must - be 1: + This rule helps you anticipate how a vector will be printed, and + conversely how to find the index of any of the printed elements. For + instance, in the example, the last two values of 8's index must be 0 and + 2. Since 8 appears in the second of the two 2x3's, the first index must + be 1: - >>> a[1,0,2] - 8 + >>> a[1,0,2] + 8 - A convenient way to count dimensions in a printed vector is to - count ``[`` symbols after the open-parenthesis. This is - useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: + A convenient way to count dimensions in a printed vector is to + count ``[`` symbols after the open-parenthesis. This is + useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: - >>> a = np.arange(6).reshape(2,3) - >>> a.shape - (2, 3) - >>> a - array([[0, 1, 2], - [3, 4, 5]]) + >>> a = np.arange(6).reshape(2,3) + >>> a.shape + (2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) - >>> a = np.arange(6).reshape(1,2,3) - >>> a.shape - (1, 2, 3) - >>> a - array([[[0, 1, 2], - [3, 4, 5]]]) + >>> a = np.arange(6).reshape(1,2,3) + >>> a.shape + (1, 2, 3) + >>> a + array([[[0, 1, 2], + [3, 4, 5]]]) - .base + .base - If an array does not own its memory, then its - :doc:`base ` attribute - returns the object whose memory the array is referencing. That object - may be borrowing the memory from still another object, so the - owning object may be ``a.base.base.base...``. Despite advice to the - contrary, testing ``base`` is not a surefire way to determine if two - arrays are :term:`view`\ s. + If an array does not own its memory, then its + :doc:`base ` attribute + returns the object whose memory the array is referencing. That object + may be borrowing the memory from still another object, so the + owning object may be ``a.base.base.base...``. Despite advice to the + contrary, testing ``base`` is not a surefire way to determine if two + arrays are :term:`view`\ s. - `big-endian `_ + `big-endian `_ \ - `BLAS `_ + `BLAS `_ \ - broadcast + broadcast + ``broadcasting`` is NumPy's ability to process ndarrays of + different sizes as if all were the same size. - ``broadcasting`` is NumPy's ability to process ndarrays of - different sizes as if all were the same size. + When NumPy operates on two arrays, it works element by + element -- for instance, ``c = a * b`` is :: - When NumPy operates on two arrays, it works element by - element -- for instance, ``c = a * b`` is :: - - c[0,0,0] = a[0,0,0] * b[0,0,0] - c[0,0,1] = a[0,0,1] * b[0,0,1] + c[0,0,0] = a[0,0,0] * b[0,0,0] + c[0,0,1] = a[0,0,1] * b[0,0,1] ... - Ordinarily this means the shapes of a and b must be identical. But in - some cases, NumPy can fill "missing" axes or "too-short" dimensions - with duplicate data so shapes will match. The duplication costs - no memory or time. For details, see :doc:`Broadcasting. ` - + Ordinarily this means the shapes of a and b must be identical. But in + some cases, NumPy can fill "missing" axes or "too-short" dimensions + with duplicate data so shapes will match. The duplication costs + no memory or time. For details, see :doc:`Broadcasting. ` - C order - Same as `row-major. `_ + C order + Same as `row-major. `_ - `column-major `_ - \ + `column-major `_ + \ - copy + copy - See :term:`view`. + See :term:`view`. - :term:`decorator ` + :term:`decorator ` \ - :term:`dictionary ` + :term:`dictionary ` \ - dimension + dimension - See :term:`axis`. + See :term:`axis`. - dtype + dtype - The datatype describing the (identically typed) elements in an ndarray. - It can be changed to reinterpret the array contents. For details, see - :doc:`Data type objects (dtype). ` + The datatype describing the (identically typed) elements in an ndarray. + It can be changed to reinterpret the array contents. For details, see + :doc:`Data type objects (dtype). ` - fancy indexing + fancy indexing Another term for :term:`advanced indexing`. - field - + field In a :term:`structured data type`, each subtype is called a :doc:`field `. A field has a name (a string), a type (any valid dtype), and an optional :term:`title`. For details, see :ref:`arrays.dtypes`. - Fortran order - + Fortran order Same as `column-major `_ - flattened - + flattened See :term:`ravel`. - homogeneous - - All elements of a homogeneous array have the same type. ndarrays, in - contrast to Python lists, are homogeneous. The type can be complicated, - as in a :term:`structured array`, but all elements have that type. + homogeneous + All elements of a homogeneous array have the same type. ndarrays, in + contrast to Python lists, are homogeneous. The type can be complicated, + as in a :term:`structured array`, but all elements have that type. NumPy `object arrays <#term-object-array>`_, which contain references to Python objects, fill the role of heterogeneous arrays. - :term:`immutable ` + :term:`immutable ` \ - :term:`iterable ` + :term:`iterable ` \ - itemsize - - The size of the dtype element in bytes. + itemsize + The size of the dtype element in bytes. - :term:`list ` + :term:`list ` \ - `little-endian `_ + `little-endian `_ \ - mask - + mask The boolean array used to select elements in a :term:`masked array`. - masked array - - Bad or missing data can be cleanly ignored by putting it in a masked - array, which has an internal boolean array indicating invalid - entries. Operations with masked arrays ignore these entries. :: + masked array + Bad or missing data can be cleanly ignored by putting it in a masked + array, which has an internal boolean array indicating invalid + entries. Operations with masked arrays ignore these entries. :: - >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) - >>> a - masked_array(data=[--, 2.0, --], - mask=[ True, False, True], - fill_value=1e+20) + >>> a = np.ma.masked_array([np.nan, 2, np.nan], [True, False, True]) + >>> a + masked_array(data=[--, 2.0, --], + mask=[ True, False, True], + fill_value=1e+20) >>> a + [1, 2, 3] masked_array(data=[--, 4.0, --], mask=[ True, False, True], fill_value=1e+20) - For details, see :doc:`Masked arrays. ` - + For details, see :doc:`Masked arrays. ` - matrix - NumPy's two-dimensional - :doc:`matrix class ` - should no longer be used; use regular ndarrays. + matrix + NumPy's two-dimensional + :doc:`matrix class ` + should no longer be used; use regular ndarrays. - ndarray + ndarray See :term:`array`. - object array + object array - An array whose dtype is ``object``; that is, it contains references to - Python objects. Indexing the array dereferences the Python objects, so - unlike other ndarrays, an object array has the ability to hold - heterogeneous objects. + An array whose dtype is ``object``; that is, it contains references to + Python objects. Indexing the array dereferences the Python objects, so + unlike other ndarrays, an object array has the ability to hold + heterogeneous objects. - ravel + ravel - :doc:`numpy.ravel \ - ` - and :doc:`numpy.flatten \ - ` - both flatten an ndarray. ``ravel`` will return a view if possible; - ``flatten`` always returns a copy. + :doc:`numpy.ravel \ + ` + and :doc:`numpy.flatten \ + ` + both flatten an ndarray. ``ravel`` will return a view if possible; + ``flatten`` always returns a copy. - Flattening collapses a multimdimensional array to a single dimension; - details of how this is done (for instance, whether ``a[n+1]`` should be - the next row or next column) are parameters. + Flattening collapses a multidimensional array to a single dimension; + details of how this is done (for instance, whether ``a[n+1]`` should be + the next row or next column) are parameters. - record array - + record array A :term:`structured array` with an additional way to access fields -- ``a.field`` in addition to ``a['field']``. For details, see :doc:`numpy.recarray. ` - `row-major `_ + `row-major `_ \ - :doc:`scalar ` + :doc:`scalar ` \ - shape + shape - A tuple showing the length of each dimension of an ndarray. The - length of the tuple itself is the number of dimensions - (:doc:`numpy.ndim `). - The product of the tuple elements is the number of elements in the - array. For details, see - :doc:`numpy.ndarray.shape `. + A tuple showing the length of each dimension of an ndarray. The + length of the tuple itself is the number of dimensions + (:doc:`numpy.ndim `). + The product of the tuple elements is the number of elements in the + array. For details, see + :doc:`numpy.ndarray.shape `. - :term:`slice <:>` + :term:`slice <:>` \ - stride + stride - Physical memory is one-dimensional; ``stride`` maps an index in an - N-dimensional ndarray to an address in memory. For an N-dimensional - array, stride is an N-element tuple; advancing from index ``i`` to index - ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes to the - address. + Physical memory is one-dimensional; ``stride`` maps an index in an + N-dimensional ndarray to an address in memory. For an N-dimensional + array, stride is an N-element tuple; advancing from index ``i`` to index + ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes to the + address. - Stride is computed automatically from an array's dtype and - shape, but can be directly specified using - :doc:`as_strided. ` + Stride is computed automatically from an array's dtype and + shape, but can be directly specified using + :doc:`as_strided. ` - For details, see - :doc:`numpy.ndarray.strides `. + For details, see + :doc:`numpy.ndarray.strides `. - To see how striding underlies the power of NumPy views, see - `The NumPy array: a structure for efficient numerical computation. \ - `_ + To see how striding underlies the power of NumPy views, see + `The NumPy array: a structure for efficient numerical computation. \ + `_ - structured array - Array whose :term:`dtype` is a :term:`structured data type`. + structured array + Array whose :term:`dtype` is a :term:`structured data type`. - structured data type - Users can create arbitrarily complex :term:`dtypes ` - that can include other arrays and dtypes. These composite dtypes are called - :doc:`structured data types. ` + structured data type + Users can create arbitrarily complex :term:`dtypes ` + that can include other arrays and dtypes. These composite dtypes are called + :doc:`structured data types. ` - subarray + subarray An array nested in a :term:`structured data type`: :: @@ -468,61 +448,55 @@ Glossary dtype=[('a', '`. - + title + An alias for a field name in a structured datatype. - ufunc - NumPy's fast element-by-element computation (:term:`vectorization`) is - structured so as to leave the choice of function open. A function used - in vectorization is called a ``ufunc``, short for ``universal - function``. NumPy routines have built-in ufuncs, but users can also - :doc:`write their own. ` + type + In NumPy, a synonym for :term:`dtype`. For the more general Python + meaning, :term:`see here. ` - vectorization + ufunc + NumPy's fast element-by-element computation (:term:`vectorization`) is + structured so as to leave the choice of function open. A function used + in vectorization is called a ``ufunc``, short for ``universal + function``. NumPy routines have built-in ufuncs, but users can also + :doc:`write their own. ` - NumPy hands off array processing to C, where looping and computation are - much faster than in Python. To exploit this, programmers using NumPy - eliminate Python loops in favor of array-to-array operations. - :term:`vectorization` can refer both to the C offloading and to - structuring NumPy code to leverage it. + vectorization + NumPy hands off array processing to C, where looping and computation are + much faster than in Python. To exploit this, programmers using NumPy + eliminate Python loops in favor of array-to-array operations. + :term:`vectorization` can refer both to the C offloading and to + structuring NumPy code to leverage it. - view - Without changing underlying data, NumPy can make one array masquerade as - any number of other arrays with different types, shapes, and even - content. This is much faster than creating those arrays. + view + Without changing underlying data, NumPy can make one array masquerade as + any number of other arrays with different types, shapes, and even + content. This is much faster than creating those arrays. - An array created this way is a ``view``, and the performance gain often - makes an array created as a view preferable to one created as a new - array. + An array created this way is a ``view``, and the performance gain often + makes an array created as a view preferable to one created as a new + array. - But because a view shares data with the original array, a write in one - array can affect the other, even though they appear to be different - arrays. If this is an problem, a view can't be used; the second array - needs to be physically distinct -- a ``copy``. + But because a view shares data with the original array, a write in one + array can affect the other, even though they appear to be different + arrays. If this is an problem, a view can't be used; the second array + needs to be physically distinct -- a ``copy``. - Some NumPy routines always return views, some always return copies, some - may return one or the other, and for some the choice can be specified. - Responsiblity for managing views and copies falls to the programmer. - :doc:`numpy.shares_memory ` - will check whether ``b`` is a view of ``a``, - but an exact answer isn't always feasible, as the documentation page - explains. + Some NumPy routines always return views, some always return copies, some + may return one or the other, and for some the choice can be specified. + Responsiblity for managing views and copies falls to the programmer. + :doc:`numpy.shares_memory ` + will check whether ``b`` is a view of ``a``, + but an exact answer isn't always feasible, as the documentation page + explains. From 23d69b316edf60a87f6b8a782b85d8f0f3f33e6f Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 16 Sep 2020 11:52:49 +0300 Subject: [PATCH 050/409] DOC: manually merge some changes done in the other PRs --- doc/source/glossary.rst | 101 +++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 978589996a5b..b32c2ef2986d 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -4,6 +4,7 @@ Glossary .. glossary:: + (`n`,) A tuple with one element. The trailing comma distinguishes a one-element tuple from a parenthesized ``n``. @@ -15,6 +16,7 @@ Glossary ``...`` + An :py:data:`Ellipsis` **When indexing an array**, shorthand that the missing axes, if they exist, are full slices. @@ -33,15 +35,13 @@ Glossary >>> a[0,...,0].shape (3,) - It can be used at most once; ``a[...,0,...]`` raises an ``IndexError``. + It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`. **In printouts**, NumPy substitutes ``...`` for the middle elements of - large arrays. To see the entire array, use - :doc:`numpy.printoptions. ` + large arrays. To see the entire array, use `numpy.printoptions` ``:`` - The Python :term:`python:slice` operator. In ndarrays, slicing can be applied to every axis: @@ -70,33 +70,28 @@ Glossary In contrast to Python, where slicing creates a copy, in NumPy slicing creates a :term:`view`. - For details, see :ref:`Combining advanced and basic indexing `. + For details, see :ref:`combining-advanced-and-basic-indexing`. ``<`` - In a dtype declaration, indicates that the data is - `little-endian `_ - (the bracket is big on the right). :: + :term:`little-endian` (the bracket is big on the right). :: - >>> dt = np.dtype('>> dt = np.dtype('`` - In a dtype declaration, indicates that the data is - `big-endian `_ - (the bracket is big on the left). :: + :term:`big-endian` (the bracket is big on the left). :: - >>> dt = np.dtype('>H') # big-endian unsigned short + >>> dt = np.dtype('>H') # big-endian unsigned short advanced indexing - Rather than using a :doc:`scalar ` or slice as an index, an axis can be indexed with an array, providing fine-grained selection. This is known as :ref:`advanced indexing` - or ``fancy indexing``. + or "fancy indexing". along an axis @@ -162,7 +157,7 @@ Glossary In a two-dimensional vector, the elements of axis 0 are rows and the elements of axis 1 are columns. - In higher dimensions the picture changes. NumPy prints + In higher dimensions, the picture changes. NumPy prints higher-dimensional vectors as replications of row-by-column building blocks, as in this three-dimensional vector: @@ -192,15 +187,15 @@ Glossary useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape: >>> a = np.arange(6).reshape(2,3) - >>> a.shape - (2, 3) + >>> a.ndim + 2 >>> a array([[0, 1, 2], [3, 4, 5]]) >>> a = np.arange(6).reshape(1,2,3) - >>> a.shape - (1, 2, 3) + >>> a.ndim + 3 >>> a array([[[0, 1, 2], [3, 4, 5]]]) @@ -217,12 +212,15 @@ Glossary arrays are :term:`view`\ s. - `big-endian `_ - \ + big-endian + When storing a multi-byte value in memory as a sequence of bytes, the + sequence addresses/sends/stores the most significant byte first (lowest + address) and the least significant byte last (highest address). Common in + micro-processors and used for transmission of data over network protocols. - `BLAS `_ - \ + BLAS + `Basic Linear Algebra Subprograms `_ broadcast @@ -243,7 +241,16 @@ Glossary C order - Same as `row-major. `_ + See `row-major` + + + column-major + A way to represent items in a N-dimensional array in the 1-dimensional + computer memory. In column-major order, the leftmost index "varies the + fastest": for example the array:: + + [[1, 2, 3], + [4, 5, 6]] `column-major `_ @@ -281,14 +288,13 @@ Glossary field - In a :term:`structured data type`, each subtype is called a - :doc:`field `. - A field has a name (a string), a type (any valid dtype), and - an optional :term:`title`. For details, see :ref:`arrays.dtypes`. + In a :term:`structured data type`, each sub-type is called a `field`. + The `field` has a name (a string), a type (any valid dtype), and + an optional `title`. See :ref:`arrays.dtypes` Fortran order - Same as `column-major `_ + Same as `column-major` flattened @@ -321,7 +327,10 @@ Glossary `little-endian `_ - \ + When storing a multi-byte value in memory as a sequence of bytes, the + sequence addresses/sends/stores the least significant byte first (lowest + address) and the most significant byte last (highest address). Common in + x86 processors. mask @@ -348,7 +357,6 @@ Glossary matrix - NumPy's two-dimensional :doc:`matrix class ` should no longer be used; use regular ndarrays. @@ -368,12 +376,8 @@ Glossary ravel - :doc:`numpy.ravel \ - ` - and :doc:`numpy.flatten \ - ` - both flatten an ndarray. ``ravel`` will return a view if possible; - ``flatten`` always returns a copy. + `numpy.ravel` and `numpy.ndarray.flatten` both flatten an ndarray. ``ravel`` + will return a view if possible; ``flatten`` always returns a copy. Flattening collapses a multidimensional array to a single dimension; details of how this is done (for instance, whether ``a[n+1]`` should be @@ -390,7 +394,20 @@ Glossary \ :doc:`scalar ` - \ + A way to represent items in a N-dimensional array in the 1-dimensional + computer memory. In row-major order, the rightmost index "varies + the fastest": for example the array:: + + [[1, 2, 3], + [4, 5, 6]] + + is represented in the row-major order as:: + + [1, 2, 3, 4, 5, 6] + + Row-major order is also known as the C order, as the C programming + language uses it. New NumPy arrays are by default in row-major order. + shape @@ -414,7 +431,7 @@ Glossary ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes to the address. - Stride is computed automatically from an array's dtype and + Strides are computed automatically from an array's dtype and shape, but can be directly specified using :doc:`as_strided. ` @@ -458,10 +475,6 @@ Glossary An alias for a field name in a structured datatype. - type - In NumPy, a synonym for :term:`dtype`. For the more general Python - meaning, :term:`see here. ` - ufunc NumPy's fast element-by-element computation (:term:`vectorization`) is From a182f32c77e557cadb0cd70f788e17c14671d171 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 16 Sep 2020 16:14:14 +0300 Subject: [PATCH 051/409] DOC, BLD: fix templated C highlighting --- doc/source/conf.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index e34be7f5c12a..032e828ce4a2 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -367,15 +367,15 @@ def linkcode_resolve(domain, info): numpy.__version__, fn, linespec) from pygments.lexers import CLexer -import copy +from pygments.lexer import inherit, bygroups +from pygments.token import Comment class NumPyLexer(CLexer): name = 'NUMPYLEXER' - tokens = copy.deepcopy(CLexer.tokens) - # Extend the regex for valid identifiers with @ - for k, val in tokens.items(): - for i, v in enumerate(val): - if isinstance(v, tuple): - if isinstance(v[0], str): - val[i] = (v[0].replace('a-zA-Z', 'a-zA-Z@'),) + v[1:] + tokens = { + 'statements': [ + (r'@[a-zA-Z_]*@', Comment.Preproc, 'macro'), + inherit, + ], + } From 11d67d46931887f4f4e05da080cf99414bff329d Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 16 Sep 2020 16:15:03 +0300 Subject: [PATCH 052/409] DOC, BLD: update doc build dependencies, remove as numpydoc submodule --- doc/source/docs/howto_document.rst | 26 ++------------------------ doc_requirements.txt | 2 +- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/doc/source/docs/howto_document.rst b/doc/source/docs/howto_document.rst index cf86b7e99870..9f9068ab32a7 100644 --- a/doc/source/docs/howto_document.rst +++ b/doc/source/docs/howto_document.rst @@ -40,29 +40,7 @@ after which you may use it:: np.fft.fft2(...) -.. rubric:: - **For convenience the** `formatting standard`_ **is included below with an - example** - -.. include:: ../../sphinxext/doc/format.rst - -.. _example: - -Example Source -============== - -.. literalinclude:: ../../sphinxext/doc/example.py - - - -Example Rendered -================ - -.. ifconfig:: python_version_major < '3' - - The example is rendered only when sphinx is run with python3 and above - -.. automodule:: doc.example - :members: +Please use the numpydoc `formatting standard`_ as shown in their example_ .. _`formatting standard`: https://numpydoc.readthedocs.io/en/latest/format.html +.. _example: https://numpydoc.readthedocs.io/en/latest/example.html diff --git a/doc_requirements.txt b/doc_requirements.txt index 815aac3079ec..36b651c6465f 100644 --- a/doc_requirements.txt +++ b/doc_requirements.txt @@ -1,5 +1,5 @@ -pygments==2.6.1 sphinx>=2.2.0,<3.0 +numpydoc==1.1.0 ipython scipy matplotlib From 3a06142f5e26c6294ffd5fa24cd450addef45e13 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 16 Sep 2020 12:49:41 +0100 Subject: [PATCH 053/409] DOC: Add more examples to the `issubdtype` docstring --- numpy/core/numerictypes.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 2a015f48fc98..632f2b8cd17c 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -358,13 +358,15 @@ def issubsctype(arg1, arg2): @set_module('numpy') def issubdtype(arg1, arg2): - """ + r""" Returns True if first argument is a typecode lower/equal in type hierarchy. + This is like the builtin :func:`issubclass`, but for `dtype`\ s. + Parameters ---------- arg1, arg2 : dtype_like - dtype or string representing a typecode. + `dtype` or object coercible to one Returns ------- @@ -377,10 +379,40 @@ def issubdtype(arg1, arg2): Examples -------- - >>> np.issubdtype('S1', np.string_) + `issubdtype` can be used to check the type of arrays: + + >>> ints = np.array([1, 2, 3], dtype=np.int32) + >>> np.issubdtype(ints.dtype, np.integer) + True + >>> np.issubdtype(ints.dtype, np.floating) + False + + >>> floats = np.array([1, 2, 3], dtype=np.float32) + >>> np.issubdtype(floats.dtype, np.integer) + False + >>> np.issubdtype(floats.dtype, np.floating) True + + Similar types of different sizes are not subdtypes of each other: + >>> np.issubdtype(np.float64, np.float32) False + >>> np.issubdtype(np.float32, np.float64) + False + + but both are subtypes of `floating`: + + >>> np.issubdtype(np.float64, np.floating) + True + >>> np.issubdtype(np.float32, np.floating) + True + + For convenience, dtype-like objects are allowed too: + + >>> np.issubdtype('S1', np.string_) + True + >>> np.issubdtype('i4', np.signedinteger) + True """ if not issubclass_(arg1, generic): From 5c3f10061f39a7858a5f2c6cd48773abe3252e48 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 16 Sep 2020 13:08:11 +0100 Subject: [PATCH 054/409] DOC: Fix broken references and make the tables a little clearer `types.BooleanType` and similar names were python2 things that no longer exist `unicode` also no longer exists, and is now `str` --- doc/source/reference/arrays.scalars.rst | 55 +++++++++++++------------ numpy/core/numerictypes.py | 2 +- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst index f57a117244aa..46d2bb8faeef 100644 --- a/doc/source/reference/arrays.scalars.rst +++ b/doc/source/reference/arrays.scalars.rst @@ -29,7 +29,7 @@ an array scalar object. Alternatively, what kind of array scalar is present can be determined using other members of the data type hierarchy. Thus, for example ``isinstance(val, np.complexfloating)`` will return :py:data:`True` if *val* is a complex valued type, while -:const:`isinstance(val, np.flexible)` will return true if *val* is one +``isinstance(val, np.flexible)`` will return true if *val* is one of the flexible itemsize array types (:class:`string`, :class:`unicode`, :class:`void`). @@ -65,19 +65,22 @@ Some of the scalar types are essentially equivalent to fundamental Python types and therefore inherit from them as well as from the generic array scalar type: -==================== ================================ -Array scalar type Related Python type -==================== ================================ -:class:`int_` :class:`IntType` (Python 2 only) -:class:`float_` :class:`FloatType` -:class:`complex_` :class:`ComplexType` -:class:`bytes_` :class:`BytesType` -:class:`unicode_` :class:`UnicodeType` -==================== ================================ +==================== =========================== ============= +Array scalar type Related Python type Inherits? +==================== =========================== ============= +:class:`int_` :class:`int` Python 2 only +:class:`float_` :class:`float` yes +:class:`complex_` :class:`complex` yes +:class:`bytes_` :class:`bytes` yes +:class:`str_` :class:`str` yes +:class:`bool_` :class:`bool` no +:class:`datetime64` :class:`datetime.datetime` no +:class:`timedelta64` :class:`datetime.timedelta` no +==================== =========================== ============= The :class:`bool_` data type is very similar to the Python -:class:`BooleanType` but does not inherit from it because Python's -:class:`BooleanType` does not allow itself to be inherited from, and +:class:`bool` but does not inherit from it because Python's +:class:`bool` does not allow itself to be inherited from, and on the C-level the size of the actual bool data is not the same as a Python Boolean scalar. @@ -86,7 +89,7 @@ Python Boolean scalar. The :class:`bool_` type is not a subclass of the :class:`int_` type (the :class:`bool_` is not even a number type). This is different than Python's default implementation of :class:`bool` as a - sub-class of int. + sub-class of :class:`int`. .. warning:: @@ -113,11 +116,11 @@ Type Remarks Character code Integers: =================== ============================= =============== -:class:`byte` compatible: C char ``'b'`` -:class:`short` compatible: C short ``'h'`` -:class:`intc` compatible: C int ``'i'`` -:class:`int_` compatible: Python int ``'l'`` -:class:`longlong` compatible: C long long ``'q'`` +:class:`byte` compatible: C ``char`` ``'b'`` +:class:`short` compatible: C ``short`` ``'h'`` +:class:`intc` compatible: C ``int`` ``'i'`` +:class:`int_` compatible: C ``long`` ``'l'`` +:class:`longlong` compatible: C ``long long`` ``'q'`` :class:`intp` large enough to fit a pointer ``'p'`` :class:`int8` 8 bits :class:`int16` 16 bits @@ -127,18 +130,18 @@ Integers: Unsigned integers: -=================== ============================= =============== -:class:`ubyte` compatible: C unsigned char ``'B'`` -:class:`ushort` compatible: C unsigned short ``'H'`` -:class:`uintc` compatible: C unsigned int ``'I'`` -:class:`uint` compatible: Python int ``'L'`` -:class:`ulonglong` compatible: C long long ``'Q'`` -:class:`uintp` large enough to fit a pointer ``'P'`` +=================== ================================= =============== +:class:`ubyte` compatible: C ``unsigned char`` ``'B'`` +:class:`ushort` compatible: C ``unsigned short`` ``'H'`` +:class:`uintc` compatible: C ``unsigned int`` ``'I'`` +:class:`uint` compatible: C ``long`` ``'L'`` +:class:`ulonglong` compatible: C ``long long`` ``'Q'`` +:class:`uintp` large enough to fit a pointer ``'P'`` :class:`uint8` 8 bits :class:`uint16` 16 bits :class:`uint32` 32 bits :class:`uint64` 64 bits -=================== ============================= =============== +=================== ================================= =============== Floating-point numbers: diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 632f2b8cd17c..e705dd3ea855 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -374,8 +374,8 @@ def issubdtype(arg1, arg2): See Also -------- + :ref:`arrays.scalars` : Overview of the numpy type hierarchy. issubsctype, issubclass_ - numpy.core.numerictypes : Overview of numpy type hierarchy. Examples -------- From 9c0febeec5e6464d873e03ec74900e08989d75b7 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 16 Sep 2020 17:29:02 +0300 Subject: [PATCH 055/409] BLD: remove submodule doc/sphinxext that pointed to numpydocs --- .gitmodules | 3 --- doc/sphinxext | 1 - 2 files changed, 4 deletions(-) delete mode 160000 doc/sphinxext diff --git a/.gitmodules b/.gitmodules index b1e13c3bc120..e69de29bb2d1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "doc/sphinxext"] - path = doc/sphinxext - url = https://github.com/numpy/numpydoc.git diff --git a/doc/sphinxext b/doc/sphinxext deleted file mode 160000 index b4c5fd17e2b8..000000000000 --- a/doc/sphinxext +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b4c5fd17e2b85c2416a5e586933eee353b58bf7c From 260f2f144018f1c4c3de171160dc53b5c642b205 Mon Sep 17 00:00:00 2001 From: Bijesh Mohan Date: Wed, 16 Sep 2020 20:04:09 +0530 Subject: [PATCH 056/409] MAINT: Chaining exceptions in npyio.py --- numpy/lib/npyio.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 90e16643c013..805e59bc1ff4 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -86,7 +86,7 @@ def __getattribute__(self, key): try: return object.__getattribute__(self, '_obj')[key] except KeyError: - raise AttributeError(key) + raise AttributeError(key) from None def __dir__(self): """ @@ -446,9 +446,9 @@ def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, "when allow_pickle=False") try: return pickle.load(fid, **pickle_kwargs) - except Exception: + except Exception as e: raise IOError( - "Failed to interpret file %s as a pickle" % repr(file)) + "Failed to interpret file %s as a pickle" % repr(file)) from e def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): @@ -1435,10 +1435,10 @@ def first_write(self, v): for row in X: try: v = format % tuple(row) + newline - except TypeError: + except TypeError as e: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" - % (str(X.dtype), format)) + % (str(X.dtype), format)) from e fh.write(v) if len(footer) > 0: From 26a407f701782f45d3165f92861bd6555d042596 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Wed, 16 Sep 2020 18:01:45 +0300 Subject: [PATCH 057/409] Update doc/source/conf.py Co-authored-by: Eric Wieser --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 032e828ce4a2..fe7ea096740d 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -376,6 +376,6 @@ class NumPyLexer(CLexer): tokens = { 'statements': [ (r'@[a-zA-Z_]*@', Comment.Preproc, 'macro'), - inherit, + inherit, ], } From 60a1e10c4593736b188b38e7d7c51aefb213af6a Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 17 Sep 2020 03:06:41 -0500 Subject: [PATCH 058/409] DOC: Fix syntax errors in docstrings for versionchanged, versionadded (#17338) * DOC: Fix typos in versionchanged. --- numpy/core/multiarray.py | 4 ++-- numpy/lib/function_base.py | 2 +- numpy/lib/recfunctions.py | 2 +- numpy/ma/extras.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 225c9554cec2..540d1ea9b5f6 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -163,12 +163,12 @@ def concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None): If provided, the destination array will have this dtype. Cannot be provided together with `out`. - ..versionadded:: 1.20.0 + .. versionadded:: 1.20.0 casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional Controls what kind of data casting may occur. Defaults to 'same_kind'. - ..versionadded:: 1.20.0 + .. versionadded:: 1.20.0 Returns ------- diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index c43b2fb531f3..c7ddbdb8de67 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1450,7 +1450,7 @@ def angle(z, deg=False): The counterclockwise angle from the positive real axis on the complex plane in the range ``(-pi, pi]``, with dtype as numpy.float64. - ..versionchanged:: 1.16.0 + .. versionchanged:: 1.16.0 This function works on subclasses of ndarray like `ma.array`. See Also diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index cfc5dc9cae99..fbfbca73d442 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -513,7 +513,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): Nested fields are supported. - ..versionchanged: 1.18.0 + .. versionchanged:: 1.18.0 `drop_fields` returns an array with 0 fields if all fields are dropped, rather than returning ``None`` as it did previously. diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 8ede29da18af..613bcb550dfd 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -1641,7 +1641,7 @@ def flatnotmasked_contiguous(a): slice_list : list A sorted sequence of `slice` objects (start index, end index). - ..versionchanged:: 1.15.0 + .. versionchanged:: 1.15.0 Now returns an empty list instead of None for a fully masked array See Also From 233c63a56974de22b846ac989cef1fabe45e7296 Mon Sep 17 00:00:00 2001 From: "Nick R. Papior" Date: Thu, 17 Sep 2020 12:29:38 +0200 Subject: [PATCH 059/409] BLD: enabled negation of library choices in NPY_*_ORDER (#17219) BLD: enabled negation of library choices in NPY_*_ORDER When users build for a particular order it may be beneficial to disallow certain libraries. In particular a user may not care about which accelerated BLAS library is used, so long as the NetLIB or ATLAS library isn't used. This is now possible with: NPY_BLAS_ORDER='^blas,atlas' or NPY_BLAS_ORDER='!blas,atlas' Since we may envision more BLAS/LAPACK libraries to the pool, this will provide greater flexibility as they enter. A new (local) method is added in system_info.py which removes duplicate code and allows for easier usage across libraries. --- .../upcoming_changes/17219.new_feature.rst | 12 ++ doc/source/user/building.rst | 21 ++++ numpy/distutils/system_info.py | 119 +++++++++++++----- numpy/distutils/tests/test_system_info.py | 27 ++++ 4 files changed, 145 insertions(+), 34 deletions(-) create mode 100644 doc/release/upcoming_changes/17219.new_feature.rst diff --git a/doc/release/upcoming_changes/17219.new_feature.rst b/doc/release/upcoming_changes/17219.new_feature.rst new file mode 100644 index 000000000000..a6985ef0d2ad --- /dev/null +++ b/doc/release/upcoming_changes/17219.new_feature.rst @@ -0,0 +1,12 @@ +Negation of user-defined BLAS/LAPACK detection order +---------------------------------------------------- +`distutils` allows negation of libraries when determining BLAS/LAPACK +libraries. +This may be used to remove an item from the library resolution phase, i.e. +to disallow NetLIB libraries one could do:: + +.. code:: bash + + NPY_BLAS_ORDER='^blas' NPY_LAPACK_ORDER='^lapack' python setup.py build + +which will use any of the accelerated libraries instead. diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst index 54ece3da3749..47399139e6e8 100644 --- a/doc/source/user/building.rst +++ b/doc/source/user/building.rst @@ -142,6 +142,16 @@ will prefer to use ATLAS, then BLIS, then OpenBLAS and as a last resort MKL. If neither of these exists the build will fail (names are compared lower case). +Alternatively one may use ``!`` or ``^`` to negate all items:: + + NPY_BLAS_ORDER='^blas,atlas' python setup.py build + +will allow using anything **but** NetLIB BLAS and ATLAS libraries, the order of the above +list is retained. + +One cannot mix negation and positives, nor have multiple negations, such cases will +raise an error. + LAPACK ~~~~~~ @@ -165,6 +175,17 @@ will prefer to use ATLAS, then OpenBLAS and as a last resort MKL. If neither of these exists the build will fail (names are compared lower case). +Alternatively one may use ``!`` or ``^`` to negate all items:: + + NPY_LAPACK_ORDER='^lapack' python setup.py build + +will allow using anything **but** the NetLIB LAPACK library, the order of the above +list is retained. + +One cannot mix negation and positives, nor have multiple negations, such cases will +raise an error. + + .. deprecated:: 1.20 The native libraries on macOS, provided by Accelerate, are not fit for use in NumPy since they have bugs that cause wrong output under easily reproducible diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py index 19f7482f2a86..b4513825d04d 100644 --- a/numpy/distutils/system_info.py +++ b/numpy/distutils/system_info.py @@ -415,6 +415,83 @@ def get_standard_file(fname): return filenames +def _parse_env_order(base_order, env): + """ Parse an environment variable `env` by splitting with "," and only returning elements from `base_order` + + This method will sequence the environment variable and check for their invidual elements in `base_order`. + + The items in the environment variable may be negated via '^item' or '!itema,itemb'. + It must start with ^/! to negate all options. + + Raises + ------ + ValueError: for mixed negated and non-negated orders or multiple negated orders + + Parameters + ---------- + base_order : list of str + the base list of orders + env : str + the environment variable to be parsed, if none is found, `base_order` is returned + + Returns + ------- + allow_order : list of str + allowed orders in lower-case + unknown_order : list of str + for values not overlapping with `base_order` + """ + order_str = os.environ.get(env, None) + + # ensure all base-orders are lower-case (for easier comparison) + base_order = [order.lower() for order in base_order] + if order_str is None: + return base_order, [] + + neg = order_str.startswith('^') or order_str.startswith('!') + # Check format + order_str_l = list(order_str) + sum_neg = order_str_l.count('^') + order_str_l.count('!') + if neg: + if sum_neg > 1: + raise ValueError(f"Environment variable '{env}' may only contain a single (prefixed) negation: {order_str}") + # remove prefix + order_str = order_str[1:] + elif sum_neg > 0: + raise ValueError(f"Environment variable '{env}' may not mix negated an non-negated items: {order_str}") + + # Split and lower case + orders = order_str.lower().split(',') + + # to inform callee about non-overlapping elements + unknown_order = [] + + # if negated, we have to remove from the order + if neg: + allow_order = base_order.copy() + + for order in orders: + if order not in base_order: + unknown_order.append(order) + continue + + if order in allow_order: + allow_order.remove(order) + + else: + allow_order = [] + + for order in orders: + if order not in base_order: + unknown_order.append(order) + continue + + if order not in allow_order: + allow_order.append(order) + + return allow_order, unknown_order + + def get_info(name, notfound_action=0): """ notfound_action: @@ -1766,24 +1843,11 @@ def _calc_info(self, name): return getattr(self, '_calc_info_{}'.format(name))() def calc_info(self): - user_order = os.environ.get(self.order_env_var_name, None) - if user_order is None: - lapack_order = self.lapack_order - else: - # the user has requested the order of the - # check they are all in the available list, a COMMA SEPARATED list - user_order = user_order.lower().split(',') - non_existing = [] - lapack_order = [] - for order in user_order: - if order in self.lapack_order: - lapack_order.append(order) - elif len(order) > 0: - non_existing.append(order) - if len(non_existing) > 0: - raise ValueError("lapack_opt_info user defined " - "LAPACK order has unacceptable " - "values: {}".format(non_existing)) + lapack_order, unknown_order = _parse_env_order(self.lapack_order, self.order_env_var_name) + if len(unknown_order) > 0: + raise ValueError("lapack_opt_info user defined " + "LAPACK order has unacceptable " + "values: {}".format(unknown_order)) for lapack in lapack_order: if self._calc_info(lapack): @@ -1911,22 +1975,9 @@ def _calc_info(self, name): return getattr(self, '_calc_info_{}'.format(name))() def calc_info(self): - user_order = os.environ.get(self.order_env_var_name, None) - if user_order is None: - blas_order = self.blas_order - else: - # the user has requested the order of the - # check they are all in the available list - user_order = user_order.lower().split(',') - non_existing = [] - blas_order = [] - for order in user_order: - if order in self.blas_order: - blas_order.append(order) - elif len(order) > 0: - non_existing.append(order) - if len(non_existing) > 0: - raise ValueError("blas_opt_info user defined BLAS order has unacceptable values: {}".format(non_existing)) + blas_order, unknown_order = _parse_env_order(self.blas_order, self.order_env_var_name) + if len(unknown_order) > 0: + raise ValueError("blas_opt_info user defined BLAS order has unacceptable values: {}".format(unknown_order)) for blas in blas_order: if self._calc_info(blas): diff --git a/numpy/distutils/tests/test_system_info.py b/numpy/distutils/tests/test_system_info.py index 0768ffdde55a..46ad9b1033e0 100644 --- a/numpy/distutils/tests/test_system_info.py +++ b/numpy/distutils/tests/test_system_info.py @@ -285,3 +285,30 @@ def test_overrides(self): finally: os.chdir(previousDir) + +def test_distutils_parse_env_order(monkeypatch): + from numpy.distutils.system_info import _parse_env_order + env = 'NPY_TESTS_DISTUTILS_PARSE_ENV_ORDER' + + base_order = list('abcdef') + + monkeypatch.setenv(env, 'b,i,e,f') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 3 + assert order == list('bef') + assert len(unknown) == 1 + + for prefix in '^!': + monkeypatch.setenv(env, f'{prefix}b,i,e') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 4 + assert order == list('acdf') + assert len(unknown) == 1 + + with pytest.raises(ValueError): + monkeypatch.setenv(env, 'b,^e,i') + _parse_env_order(base_order, env) + + with pytest.raises(ValueError): + monkeypatch.setenv(env, '!b,^e,i') + _parse_env_order(base_order, env) From 58b5b3dce56c91489308f0c631ee067b4aa26ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melissa=20Weber=20Mendon=C3=A7a?= Date: Thu, 17 Sep 2020 12:02:20 -0300 Subject: [PATCH 060/409] Fix typos Co-authored-by: Bradley Dice --- .github/ISSUE_TEMPLATE/documentation.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index 67e04c8af0cf..cdb7cde2ee2f 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -7,8 +7,8 @@ labels: 04 - Documentation ## Documentation - - From e5f2ce3c6465222f3dfdc186c930b3e049a4d597 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 17 Sep 2020 11:47:31 -0500 Subject: [PATCH 061/409] TST: Add test for creating an xpress like user-dtype This includes a few error paths. Note that the test creates a new dtype ever time it is run, and dtypes in NumPy are persistent so that this leaks references. --- .../src/multiarray/_multiarray_tests.c.src | 68 +++++++++++++++++++ numpy/core/tests/test_dtype.py | 37 ++++++++++ 2 files changed, 105 insertions(+) diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index ea04c82bdf2e..cbfb1e17954d 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -610,6 +610,71 @@ fromstring_null_term_c_api(PyObject *dummy, PyObject *byte_obj) } +/* + * Create a custom field dtype from an existing void one (and test some errors). + * The dtypes created by this function may be not be usable (or even crash + * while using). + */ +static PyObject * +create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args) +{ + PyArray_Descr *dtype; + PyTypeObject *scalar_type; + PyTypeObject *original_type = NULL; + int error_path; + + if (!PyArg_ParseTuple(args, "O!O!i", + &PyArrayDescr_Type, &dtype, + &PyType_Type, &scalar_type, + &error_path)) { + return NULL; + } + /* check that the result should be more or less valid */ + if (dtype->type_num != NPY_VOID || dtype->fields == NULL || + !PyDict_CheckExact(dtype->fields) || + PyTuple_Size(dtype->names) != 1 || + !PyDataType_REFCHK(dtype) || + dtype->elsize != sizeof(PyObject *)) { + PyErr_SetString(PyExc_ValueError, + "Bad dtype passed to test function, must be an object " + "containing void with a single field."); + return NULL; + } + + /* Copy and then appropriate this dtype */ + original_type = Py_TYPE(dtype); + dtype = PyArray_DescrNew(dtype); + if (dtype == NULL) { + return NULL; + } + + Py_INCREF(scalar_type); + Py_SETREF(dtype->typeobj, scalar_type); + if (error_path == 1) { + /* Test that we reject this, if fields was not already set */ + Py_SETREF(dtype->fields, NULL); + } + else if (error_path == 2) { + /* + * Test that we reject this if the type is not set to something that + * we are pretty sure can be safely replaced. + */ + Py_SET_TYPE(dtype, scalar_type); + } + else if (error_path != 0) { + PyErr_SetString(PyExc_ValueError, + "invalid error argument to test function."); + } + if (PyArray_RegisterDataType(dtype) < 0) { + /* Fix original type in the error_path == 2 case. */ + Py_SET_TYPE(dtype, original_type); + return NULL; + } + Py_INCREF(dtype); + return (PyObject *)dtype; +} + + /* check no elison for avoided increfs */ static PyObject * incref_elide(PyObject *dummy, PyObject *args) @@ -2081,6 +2146,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"fromstring_null_term_c_api", fromstring_null_term_c_api, METH_O, NULL}, + {"create_custom_field_dtype", + create_custom_field_dtype, + METH_VARARGS, NULL}, {"incref_elide", incref_elide, METH_VARARGS, NULL}, diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 2e2b0dbe2a34..898ceebcd45b 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -6,6 +6,7 @@ import numpy as np from numpy.core._rational_tests import rational +from numpy.core._multiarray_tests import create_custom_field_dtype from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT) from numpy.compat import pickle @@ -1338,3 +1339,39 @@ def test_pairs(self, pair): pair_type = np.dtype('{},{}'.format(*pair)) expected = np.dtype([('f0', pair[0]), ('f1', pair[1])]) assert_equal(pair_type, expected) + + +class TestUserDType: + @pytest.mark.leaks_references(reason="dynamically creates custom dtype.") + def test_custom_structured_dtype(self): + class mytype: + pass + + blueprint = np.dtype([("field", object)]) + dt = create_custom_field_dtype(blueprint, mytype, 0) + assert dt.type == mytype + # We cannot (currently) *create* this dtype with `np.dtype` because + # mytype does not inherit from `np.generic`. This seems like an + # unnecessary restriction, but one that has been around forever: + assert np.dtype(mytype) == np.dtype("O") + + with pytest.raises(RuntimeError): + # Registering a second time should fail + create_custom_field_dtype(blueprint, mytype, 0) + + def test_custom_structured_dtype_errors(self): + class mytype: + pass + + blueprint = np.dtype([("field", object)]) + + with pytest.raises(ValueError): + # Tests what happens if fields are unset during creation + # which is currently rejected due to the containing object + # (see PyArray_RegisterDataType). + create_custom_field_dtype(blueprint, mytype, 1) + + with pytest.raises(RuntimeError): + # Tests that a dtype must have its type field set up to np.dtype + # or in this case a builtin instance. + create_custom_field_dtype(blueprint, mytype, 2) From c6853c7c27bb8352ab498848439d4fee9eb79a33 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Thu, 17 Sep 2020 12:20:56 -0700 Subject: [PATCH 062/409] NEP: Regenerate table in NEP 29 (add numpy 1.18 and 1.19 to list) (#17337) * Regenerate Table for NEP 29. Numpy 1.18 and 1.19 have been released, which add two new recommended stop date for recommended numpy support in 2021 and 2022. * Infer next version as max-known-minor-version+1 Do not print the first 4 entries to be dropped; as they are pre-nep-29 --- doc/neps/nep-0029-deprecation_policy.rst | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/doc/neps/nep-0029-deprecation_policy.rst b/doc/neps/nep-0029-deprecation_policy.rst index dbead1b9b550..4674d24ec663 100644 --- a/doc/neps/nep-0029-deprecation_policy.rst +++ b/doc/neps/nep-0029-deprecation_policy.rst @@ -111,8 +111,10 @@ Jun 23, 2020 3.7+ 1.15+ Jul 23, 2020 3.7+ 1.16+ Jan 13, 2021 3.7+ 1.17+ Jul 26, 2021 3.7+ 1.18+ -Dec 26, 2021 3.8+ 1.18+ -Apr 14, 2023 3.9+ 1.18+ +Dec 22, 2021 3.7+ 1.19+ +Dec 26, 2021 3.8+ 1.19+ +Jun 21, 2022 3.8+ 1.20+ +Apr 14, 2023 3.9+ 1.20+ ============ ====== ===== @@ -127,7 +129,9 @@ Drop Schedule On Jul 23, 2020 drop support for Numpy 1.15 (initially released on Jul 23, 2018) On Jan 13, 2021 drop support for Numpy 1.16 (initially released on Jan 13, 2019) On Jul 26, 2021 drop support for Numpy 1.17 (initially released on Jul 26, 2019) + On Dec 22, 2021 drop support for Numpy 1.18 (initially released on Dec 22, 2019) On Dec 26, 2021 drop support for Python 3.7 (initially released on Jun 27, 2018) + On Jun 21, 2022 drop support for Numpy 1.19 (initially released on Jun 20, 2020) On Apr 14, 2023 drop support for Python 3.8 (initially released on Oct 14, 2019) @@ -255,6 +259,8 @@ Code to generate support and drop schedule tables :: Jan 13, 2019: Numpy 1.16 Jul 26, 2019: Numpy 1.17 Oct 14, 2019: Python 3.8 + Dec 22, 2019: Numpy 1.18 + Jun 20, 2020: Numpy 1.19 """ releases = [] @@ -274,8 +280,12 @@ Code to generate support and drop schedule tables :: releases = sorted(releases, key=lambda x: x[0]) - minpy = '3.9+' - minnum = '1.18+' + + py_major,py_minor = sorted([int(x) for x in r[2].split('.')] for r in releases if r[1] == 'Python')[-1] + minpy = f"{py_major}.{py_minor+1}+" + + num_major,num_minor = sorted([int(x) for x in r[2].split('.')] for r in releases if r[1] == 'Numpy')[-1] + minnum = f"{num_major}.{num_minor+1}+" toprint_drop_dates = [''] toprint_support_table = [] @@ -289,14 +299,14 @@ Code to generate support and drop schedule tables :: minnum = v+'+' else: minpy = v+'+' - - for e in toprint_drop_dates[::-1]: + print("On next release, drop support for Python 3.5 (initially released on Sep 13, 2015)") + for e in toprint_drop_dates[-4::-1]: print(e) print('============ ====== =====') print('Date Python NumPy') print('------------ ------ -----') - for e in toprint_support_table[::-1]: + for e in toprint_support_table[-4::-1]: print(e) print('============ ====== =====') From 511d7694075fc06aee1091a67b830a7338f8c9a5 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Thu, 17 Sep 2020 15:54:17 -0400 Subject: [PATCH 063/409] DOC: Updated for reviewer comments Also added "contiguous," which is referenced in docs but hadn't been defined. --- doc/source/glossary.rst | 220 ++++++++++++---------------------------- 1 file changed, 67 insertions(+), 153 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 6decf76f9de7..e6c8bbd17270 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -14,12 +14,16 @@ Glossary Used as a dimension entry, ``-1`` instructs NumPy to choose the length that will keep the total number of elements the same. + >>> np.arange(12).reshape(4,-1).shape + (4, 3) + + ``...`` - An :py:data:`Ellipsis` + An :py:data:`Ellipsis`. - **When indexing an array**, shorthand that the missing axes, if they - exist, are full slices. + - **When indexing an array**, shorthand that the missing axes, if they + exist, are full slices. >>> a = np.arange(24).reshape(2,3,4) @@ -37,8 +41,8 @@ Glossary It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`. - **In printouts**, NumPy substitutes ``...`` for the middle elements of - large arrays. To see the entire array, use `numpy.printoptions` + - **In printouts**, NumPy substitutes ``...`` for the middle elements of + large arrays. To see the entire array, use `numpy.printoptions` ``:`` @@ -95,40 +99,47 @@ Glossary along an axis + Reversing along axis 0 (the row axis) below reverses the columns, and + reversing along axis 1 (the column axis) reverses the rows. This might + be the opposite of what a new user would expect. - The result of an operation along an :term:`axis` X is an array in which X - disappears. This can surprise new users expecting the opposite. - - The operation can be visualized this way: - - Imagine a slice of array ``a`` where axis X has a fixed index - and the other dimensions are left full (``:``). + >>> a = np.arange(12).reshape(3,4) + >>> a + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + + >>> np.flip(a,axis=0) + array([[ 8, 9, 10, 11], + [ 4, 5, 6, 7], + [ 0, 1, 2, 3]]) + + >>> np.flip(a,axis=1) + array([[ 3, 2, 1, 0], + [ 7, 6, 5, 4], + [11, 10, 9, 8]]) - >>> a = np.arange(24).reshape(2,3,4) - - >>> a.shape - (2, 3, 4) - - >>> a[:,0,:].shape - (2, 4) + An operation "along axis 0" behaves as if the argument were an array of + ``a`` slices taking successive indexes of axis 0: - The slice has ``a``'s shape with the X dimension deleted. Saying an - operation ``op`` is ``performed along X`` means that ``op`` takes as its - operands slices having every value of X: + >>> np.array((a[0,:], a[1,:], a[2,:])) + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) - >>> np.sum(a,axis=1) == a[:,0,:] + a[:,1,:] + a[:,2,:] - array([[ True, True, True, True], - [ True, True, True, True]]) + Reversing the slices results in ``np.flip(a,axis=0)``: + >>> np.array((a[2,:],a[1,:],a[0,:])) + array([[ 8, 9, 10, 11], + [ 4, 5, 6, 7], + [ 0, 1, 2, 3]]) array - Used synonymously in the NumPy docs with :doc:`ndarray `, NumPy's basic structure. - array_like Any :doc:`scalar ` or :term:`python:sequence` @@ -138,7 +149,7 @@ Glossary :doc:`numpy.array ` is array_like. :: - >>> a = np.array([[1, 2.0],[0, 0],(1+1j, 3.)]) + >>> a = np.array([[1, 2.0], [0, 0], (1+1j, 3.)]) >>> a array([[1.+0.j, 2.+0.j], @@ -214,11 +225,8 @@ Glossary arrays are :term:`view`\ s. - big-endian - When storing a multi-byte value in memory as a sequence of bytes, the - sequence addresses/sends/stores the most significant byte first (lowest - address) and the least significant byte last (highest address). Common in - micro-processors and used for transmission of data over network protocols. + `big-endian `_ + \ BLAS @@ -242,69 +250,33 @@ Glossary no memory or time. For details, see :doc:`Broadcasting. ` - C order - See `row-major` - - - column-major - A way to represent items in a N-dimensional array in the 1-dimensional - computer memory. In column-major order, the leftmost index "varies the - fastest": for example the array:: - - [[1, 2, 3], - [4, 5, 6]] + See :term:`row-major`. `column-major `_ \ - copy - - See :term:`view`. - - - :term:`decorator ` - \ - - - :term:`dictionary ` - \ + contiguous + An array is contiguous if it occupies a single unbroken block of memory. - dimension - - See :term:`axis`. - - - dtype - - The datatype describing the (identically typed) elements in an ndarray. - It can be changed to reinterpret the array contents. For details, see - :doc:`Data type objects (dtype). ` - - - fancy indexing - - Another term for :term:`advanced indexing`. - + copy + See :term:`view`. dimension - See :term:`axis`. dtype - The datatype describing the (identically typed) elements in an ndarray. It can be changed to reinterpret the array contents. For details, see :doc:`Data type objects (dtype). ` fancy indexing - Another term for :term:`advanced indexing`. @@ -315,15 +287,13 @@ Glossary Fortran order - Same as `column-major` - + Same as `column-major `_ flattened See :term:`ravel`. - homogeneous All elements of a homogeneous array have the same type. ndarrays, in contrast to Python lists, are homogeneous. The type can be complicated, @@ -333,34 +303,18 @@ Glossary Python objects, fill the role of heterogeneous arrays. - :term:`immutable ` - \ - - - :term:`iterable ` - \ - - itemsize The size of the dtype element in bytes. - :term:`list ` - \ - - `little-endian `_ - When storing a multi-byte value in memory as a sequence of bytes, the - sequence addresses/sends/stores the least significant byte first (lowest - address) and the most significant byte last (highest address). Common in - x86 processors. + \ mask The boolean array used to select elements in a :term:`masked array`. - masked array Bad or missing data can be cleanly ignored by putting it in a masked array, which has an internal boolean array indicating invalid @@ -391,26 +345,6 @@ Glossary object array - - An array whose dtype is ``object``; that is, it contains references to - Python objects. Indexing the array dereferences the Python objects, so - unlike other ndarrays, an object array has the ability to hold - heterogeneous objects. - - - ravel - - `numpy.ravel` and `numpy.ndarray.flatten` both flatten an ndarray. ``ravel`` - will return a view if possible; ``flatten`` always returns a copy. - - Flattening collapses a multidimensional array to a single dimension; - details of how this is done (for instance, whether ``a[n+1]`` should be - the next row or next column) are parameters. - - - - object array - An array whose dtype is ``object``; that is, it contains references to Python objects. Indexing the array dereferences the Python objects, so unlike other ndarrays, an object array has the ability to hold @@ -418,11 +352,14 @@ Glossary ravel - - `numpy.ravel` and `numpy.ndarray.flatten` both flatten an ndarray. ``ravel`` - will return a view if possible; ``flatten`` always returns a copy. - - Flattening collapses a multi-dimensional array to a single dimension; + :doc:`numpy.ravel \ + ` + and :doc:`numpy.flatten \ + ` + both flatten an ndarray. ``ravel`` will return a view if possible; + ``flatten`` always returns a copy. + + Flattening collapses a multimdimensional array to a single dimension; details of how this is done (for instance, whether ``a[n+1]`` should be the next row or next column) are parameters. @@ -434,23 +371,16 @@ Glossary row-major - A way to represent items in a N-dimensional array in the 1-dimensional - computer memory. In row-major order, the rightmost index "varies - the fastest": for example the array:: + `row-major `_ + order is also known as C order, as the C programming language uses it. + New NumPy arrays are by default in row-major order. - [[1, 2, 3], - [4, 5, 6]] - is represented in the row-major order as:: - - [1, 2, 3, 4, 5, 6] - - Row-major order is also known as the C order, as the C programming - language uses it. New NumPy arrays are by default in row-major order. + :doc:`scalar ` + \ shape - A tuple showing the length of each dimension of an ndarray. The length of the tuple itself is the number of dimensions (:doc:`numpy.ndim `). @@ -464,7 +394,6 @@ Glossary stride - Physical memory is one-dimensional; strides provide a mechanism to map a given index to an address in memory. For an N-dimensional array, its ``strides`` attribute is an N-element tuple; advancing from index @@ -485,7 +414,6 @@ Glossary structured array - Array whose :term:`dtype` is a :term:`structured data type`. @@ -496,7 +424,6 @@ Glossary subarray - An array nested in a :term:`structured data type`: :: >>> dt = np.dtype([('a', np.int32), ('b', np.float32, (3,))]) @@ -505,18 +432,19 @@ Glossary dtype=[('a', '` + + ufunc NumPy's fast element-by-element computation (:term:`vectorization`) is structured so as to leave the choice of function open. A function used @@ -525,7 +453,6 @@ Glossary :doc:`write their own. ` - vectorization NumPy hands off array processing to C, where looping and computation are much faster than in Python. To exploit this, programmers using NumPy @@ -533,14 +460,12 @@ Glossary :term:`vectorization` can refer both to the C offloading and to structuring NumPy code to leverage it. - - view Without changing underlying data, NumPy can make one array masquerade as any number of other arrays with different types, shapes, and even content. This is much faster than creating those arrays. - An array created this way is a ``view``, and the performance gain often + An array created this way is a `view`, and the performance gain often makes an array created as a view preferable to one created as a new array. @@ -569,14 +494,3 @@ Glossary >>> y array([3, 2, 4]) - - wrapper - Python is a high-level (highly abstracted, or English-like) language. - This abstraction comes at a price in execution speed, and sometimes - it becomes necessary to use lower level languages to do fast - computations. A wrapper is code that provides a bridge between - high and the low level languages, allowing, e.g., Python to execute - code written in C or Fortran. - - Examples include ctypes, SWIG and Cython (which wraps C and C++) - and f2py (which wraps Fortran). From 5bec8df317ffbda8f97486195b758f2261495749 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Fri, 18 Sep 2020 02:09:02 +0200 Subject: [PATCH 064/409] BLD: Fix a macOS build failure when `NPY_BLAS_ORDER=""` --- numpy/distutils/system_info.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py index b4513825d04d..c3bd6347c172 100644 --- a/numpy/distutils/system_info.py +++ b/numpy/distutils/system_info.py @@ -471,6 +471,9 @@ def _parse_env_order(base_order, env): allow_order = base_order.copy() for order in orders: + if not order: + continue + if order not in base_order: unknown_order.append(order) continue @@ -482,6 +485,9 @@ def _parse_env_order(base_order, env): allow_order = [] for order in orders: + if not order: + continue + if order not in base_order: unknown_order.append(order) continue From 6d3a9d7f97c1033de02a3a611ee3e8111f81738c Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Fri, 18 Sep 2020 11:38:08 +0200 Subject: [PATCH 065/409] TST: Expanded the `_parse_env_order` tests Relevant for when LAPACK/BLAS optimization is disabled. Addresses https://github.com/numpy/numpy/pull/17346#issuecomment-694744389. --- numpy/distutils/tests/test_system_info.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/numpy/distutils/tests/test_system_info.py b/numpy/distutils/tests/test_system_info.py index 46ad9b1033e0..ec15126f7f7b 100644 --- a/numpy/distutils/tests/test_system_info.py +++ b/numpy/distutils/tests/test_system_info.py @@ -284,7 +284,7 @@ def test_overrides(self): assert info.get_lib_dirs() == lib_dirs finally: os.chdir(previousDir) - + def test_distutils_parse_env_order(monkeypatch): from numpy.distutils.system_info import _parse_env_order @@ -298,6 +298,12 @@ def test_distutils_parse_env_order(monkeypatch): assert order == list('bef') assert len(unknown) == 1 + # For when LAPACK/BLAS optimization is disabled + monkeypatch.setenv(env, '') + order, unknown = _parse_env_order(base_order, env) + assert len(order) == 0 + assert len(unknown) == 0 + for prefix in '^!': monkeypatch.setenv(env, f'{prefix}b,i,e') order, unknown = _parse_env_order(base_order, env) From 90111e2dd99b7f29e2134c4e326ac68d86cda393 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 18 Sep 2020 10:02:54 -0400 Subject: [PATCH 066/409] DOC: 'Along an axis' rewrite and other reviewer changes --- doc/source/glossary.rst | 168 ++++++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 68 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index e6c8bbd17270..20d3227d2a6b 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -6,20 +6,23 @@ Glossary (`n`,) - A tuple with one element. The trailing comma distinguishes a one-element - tuple from a parenthesized ``n``. + A parenthesized number followed by a comma denotes a tuple with one + element. The trailing comma distinguishes a one-element tuple from a + parenthesized ``n``. -1 - Used as a dimension entry, ``-1`` instructs NumPy to choose the length - that will keep the total number of elements the same. + - **In a dimension entry**, instructs NumPy to choose the length + that will keep the total number of array elements the same. - >>> np.arange(12).reshape(4,-1).shape + >>> np.arange(12).reshape(4, -1).shape (4, 3) + - **In an index**, any negative value + `denotes `_ + indexing from the right. - - ``...`` + . . . An :py:data:`Ellipsis`. - **When indexing an array**, shorthand that the missing axes, if they @@ -39,13 +42,13 @@ Glossary >>> a[0,...,0].shape (3,) - It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`. + It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`. - **In printouts**, NumPy substitutes ``...`` for the middle elements of large arrays. To see the entire array, use `numpy.printoptions` - ``:`` + : The Python :term:`python:slice` operator. In ndarrays, slicing can be applied to every axis: @@ -77,14 +80,14 @@ Glossary For details, see :ref:`combining-advanced-and-basic-indexing`. - ``<`` + < In a dtype declaration, indicates that the data is :term:`little-endian` (the bracket is big on the right). :: >>> dt = np.dtype('`` + > In a dtype declaration, indicates that the data is :term:`big-endian` (the bracket is big on the left). :: @@ -99,35 +102,41 @@ Glossary along an axis - Reversing along axis 0 (the row axis) below reverses the columns, and - reversing along axis 1 (the column axis) reverses the rows. This might - be the opposite of what a new user would expect. + An operation `along axis n` of array ``a`` behaves as if its argument + were an array of slices of ``a`` where each slice has a successive + index of axis `n`. + + For example, if ``a`` is a 3 x `N` array, an operation along axis 0 + behaves as if its argument were an array containing slices of each row: + + >>> np.array((a[0,:], a[1,:], a[2,:])) + + To make it concrete, we can pick the operation to be the array-reversal + function :func:`numpy.flip`, which accepts an ``axis`` argument. We + construct a 3 x 4 array ``a``: >>> a = np.arange(12).reshape(3,4) >>> a array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]]) - + + Reversing along axis 0 (the row axis) yields + >>> np.flip(a,axis=0) array([[ 8, 9, 10, 11], [ 4, 5, 6, 7], [ 0, 1, 2, 3]]) - - >>> np.flip(a,axis=1) - array([[ 3, 2, 1, 0], - [ 7, 6, 5, 4], - [11, 10, 9, 8]]) - An operation "along axis 0" behaves as if the argument were an array of - ``a`` slices taking successive indexes of axis 0: + Recalling the definition of `along an axis`, ``flip`` along axis 0 is + treating its argument as if it were >>> np.array((a[0,:], a[1,:], a[2,:])) array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]]) - Reversing the slices results in ``np.flip(a,axis=0)``: + and the result of ``np.flip(a,axis=0)`` is to reverse the slices: >>> np.array((a[2,:],a[1,:],a[0,:])) array([[ 8, 9, 10, 11], @@ -136,8 +145,7 @@ Glossary array - Used synonymously in the NumPy docs with - :doc:`ndarray `, NumPy's basic structure. + Used synonymously in the NumPy docs with :term:`ndarray`. array_like @@ -178,7 +186,6 @@ Glossary >>> a array([[[ 0, 1, 2], [ 3, 4, 5]], - [[ 6, 7, 8], [ 9, 10, 11]]]) @@ -217,12 +224,12 @@ Glossary .base If an array does not own its memory, then its - :doc:`base ` attribute - returns the object whose memory the array is referencing. That object - may be borrowing the memory from still another object, so the - owning object may be ``a.base.base.base...``. Despite advice to the - contrary, testing ``base`` is not a surefire way to determine if two - arrays are :term:`view`\ s. + :doc:`base ` attribute returns + the object whose memory the array is referencing. That object may be + referencing the memory from still another object, so the owning object + may be ``a.base.base.base...``. Some writers erroneously claim that + testing ``base`` determines if arrays are :term:`view`\ s. For the + correct way, see :func:`numpy.shares_memory`. `big-endian `_ @@ -237,17 +244,30 @@ Glossary ``broadcasting`` is NumPy's ability to process ndarrays of different sizes as if all were the same size. - When NumPy operates on two arrays, it works element by - element -- for instance, ``c = a * b`` is :: + It permits an elegant do-what-I-mean behavior where, for instance, + adding a scalar to a vector adds the scalar value to every element. + + >>> a = np.arange(3) + >>> a + array([0, 1, 2]) + + >>> a + [3, 3, 3] + array([3, 4, 5]) + + >>> a + 3 + array([3, 4, 5]) + + Ordinarly, vector operands must all be the same size, because NumPy + works element by element -- for instance, ``c = a * b`` is :: c[0,0,0] = a[0,0,0] * b[0,0,0] c[0,0,1] = a[0,0,1] * b[0,0,1] ... - Ordinarily this means the shapes of a and b must be identical. But in - some cases, NumPy can fill "missing" axes or "too-short" dimensions - with duplicate data so shapes will match. The duplication costs - no memory or time. For details, see :doc:`Broadcasting. ` + But in certain useful cases, NumPy can duplicate data along "missing" + axes or "too-short" dimensions so shapes will match. The duplication + costs no memory or time. For details, see + :doc:`Broadcasting. ` C order @@ -259,7 +279,10 @@ Glossary contiguous - An array is contiguous if it occupies a single unbroken block of memory. + An array is contiguous if + * it occupies an unbroken block of memory, and + * array elements with higher indexes occupy higher addresses (that + is, the :term:`stride` is not negative). copy @@ -299,12 +322,12 @@ Glossary contrast to Python lists, are homogeneous. The type can be complicated, as in a :term:`structured array`, but all elements have that type. - NumPy `object arrays <#term-object-array>`_, which contain references to - Python objects, fill the role of heterogeneous arrays. + NumPy `object arrays <#term-object-array>`_, which contain references to + Python objects, fill the role of heterogeneous arrays. itemsize - The size of the dtype element in bytes. + The size of the dtype element in bytes. `little-endian `_ @@ -312,7 +335,19 @@ Glossary mask - The boolean array used to select elements in a :term:`masked array`. + A boolean array used to select only certain elements for an operation: + + >>> x = np.arange(5) + >>> x + array([0, 1, 2, 3, 4]) + + >>> mask = (x > 2) + >>> mask + array([False, False, False, True, True]) + + >>> x[mask] = -1 + >>> x + array([ 0, 1, 2, -1, -1]) masked array @@ -341,7 +376,7 @@ Glossary ndarray - See :term:`array`. + :doc:`NumPy's basic structure `. object array @@ -365,15 +400,15 @@ Glossary record array - A :term:`structured array` with an additional way to access - fields -- ``a.field`` in addition to ``a['field']``. For details, see + A :term:`structured array` with allowing access in an attribute style + (``a.field``) in addition to ``a['field']``. For details, see :doc:`numpy.recarray. ` row-major - `row-major `_ - order is also known as C order, as the C programming language uses it. - New NumPy arrays are by default in row-major order. + `row-major `_ + order is also known as C order, as the C programming language uses it. + New NumPy arrays are by default in row-major order. :doc:`scalar ` @@ -390,7 +425,7 @@ Glossary :term:`slice <:>` - \ + \ stride @@ -424,7 +459,7 @@ Glossary subarray - An array nested in a :term:`structured data type`: :: + An array nested in a :term:`structured data type`, as ``b`` is here: >>> dt = np.dtype([('a', np.int32), ('b', np.float32, (3,))]) >>> np.zeros(3, dtype=dt) @@ -433,7 +468,7 @@ Glossary subarray data type - An element of a structured datatype that behaves like an ndarray. + An element of a structured datatype that behaves like an ndarray. title @@ -446,10 +481,10 @@ Glossary ufunc - NumPy's fast element-by-element computation (:term:`vectorization`) is - structured so as to leave the choice of function open. A function used - in vectorization is called a ``ufunc``, short for ``universal - function``. NumPy routines have built-in ufuncs, but users can also + NumPy's fast element-by-element computation (:term:`vectorization`) + gives a choice which function gets applied. The general term for the + function is ``ufunc``, short for ``universal function``. NumPy routines + have built-in ufuncs, but users can also :doc:`write their own. ` @@ -461,18 +496,15 @@ Glossary structuring NumPy code to leverage it. view - Without changing underlying data, NumPy can make one array masquerade as - any number of other arrays with different types, shapes, and even - content. This is much faster than creating those arrays. - - An array created this way is a `view`, and the performance gain often - makes an array created as a view preferable to one created as a new - array. - - But because a view shares data with the original array, a write in one - array can affect the other, even though they appear to be different - arrays. If this is an problem, a view can't be used; the second array - needs to be physically distinct -- a ``copy``. + Without touching underlying data, NumPy can make one array appear + to change its datatype and shape. + + An array created this way is a `view`, and NumPy often exploits the + performance gain of using a view versus making a new array. + + A potential drawback is that writing to a view can alter the original + as well. If this is a problem, the NumPy instead needs to create a + physically distinct array -- a `copy`. Some NumPy routines always return views, some always return copies, some may return one or the other, and for some the choice can be specified. From 563f5c27ab365204d72b436654a2dad54af0658d Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 18 Sep 2020 10:55:34 -0400 Subject: [PATCH 067/409] DOC: Replace linked headings with explicit 'See' --- doc/source/glossary.rst | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 20d3227d2a6b..0ca3becb161d 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -109,7 +109,7 @@ Glossary For example, if ``a`` is a 3 x `N` array, an operation along axis 0 behaves as if its argument were an array containing slices of each row: - >>> np.array((a[0,:], a[1,:], a[2,:])) + >>> np.array((a[0,:], a[1,:], a[2,:])) #doctest: +SKIP To make it concrete, we can pick the operation to be the array-reversal function :func:`numpy.flip`, which accepts an ``axis`` argument. We @@ -232,8 +232,8 @@ Glossary correct way, see :func:`numpy.shares_memory`. - `big-endian `_ - \ + big-endian + See `Endianness `_. BLAS @@ -271,11 +271,11 @@ Glossary C order - See :term:`row-major`. + Same as :term:`row-major`. - `column-major `_ - \ + column-major + See `Row- and column-major order `_. contiguous @@ -310,7 +310,7 @@ Glossary Fortran order - Same as `column-major `_ + Same as :term:`column-major`. flattened @@ -330,8 +330,8 @@ Glossary The size of the dtype element in bytes. - `little-endian `_ - \ + little-endian + See `Endianness `_. mask @@ -406,13 +406,12 @@ Glossary row-major - `row-major `_ - order is also known as C order, as the C programming language uses it. - New NumPy arrays are by default in row-major order. + See `Row- and column-major order `_. + NumPy creates arrays in row-major order by default. - :doc:`scalar ` - \ + scalar + In NumPy, usually a synonym for :term:`array scalar`. shape @@ -424,10 +423,6 @@ Glossary :doc:`numpy.ndarray.shape `. - :term:`slice <:>` - \ - - stride Physical memory is one-dimensional; strides provide a mechanism to map a given index to an address in memory. For an N-dimensional array, its @@ -447,7 +442,6 @@ Glossary `_ - structured array Array whose :term:`dtype` is a :term:`structured data type`. @@ -509,10 +503,9 @@ Glossary Some NumPy routines always return views, some always return copies, some may return one or the other, and for some the choice can be specified. Responsiblity for managing views and copies falls to the programmer. - :doc:`numpy.shares_memory ` - will check whether ``b`` is a view of ``a``, - but an exact answer isn't always feasible, as the documentation page - explains. + :func:`numpy.shares_memory` will check whether ``b`` is a view of + ``a``, but an exact answer isn't always feasible, as the documentation + page explains. >>> x = np.arange(5) >>> x From 85f364a0c9dd5ac91dfae6d9ce05193a5f2f7eb0 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 18 Sep 2020 12:08:09 -0400 Subject: [PATCH 068/409] DOC: Fix minor typos --- doc/source/glossary.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 0ca3becb161d..d4db87723d3d 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -304,9 +304,9 @@ Glossary field - In a :term:`structured data type`, each sub-type is called a `field`. + In a :term:`structured data type`, each subtype is called a `field`. The `field` has a name (a string), a type (any valid dtype), and - an optional `title`. See :ref:`arrays.dtypes` + an optional `title`. See :ref:`arrays.dtypes`. Fortran order @@ -497,7 +497,7 @@ Glossary performance gain of using a view versus making a new array. A potential drawback is that writing to a view can alter the original - as well. If this is a problem, the NumPy instead needs to create a + as well. If this is a problem, NumPy instead needs to create a physically distinct array -- a `copy`. Some NumPy routines always return views, some always return copies, some From a368e00114df2e5e3b74f050d4cf7aa04e58a228 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Wed, 16 Sep 2020 12:53:26 +0100 Subject: [PATCH 069/409] DOC: Fix incorrect `.. deprecated::` syntax that led to this note being invisible. --- doc/source/reference/arrays.dtypes.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst index 575984707ee3..301e26c55cb3 100644 --- a/doc/source/reference/arrays.dtypes.rst +++ b/doc/source/reference/arrays.dtypes.rst @@ -152,14 +152,6 @@ Array-scalar types >>> dt = np.dtype(np.complex128) # 128-bit complex floating-point number Generic types - .. deprecated NumPy 1.19:: - - The use of generic types is deprecated. This is because it can be - unexpected in a context such as ``arr.astype(dtype=np.floating)``. - ``arr.astype(dtype=np.floating)`` which casts an array of ``float32`` - to an array of ``float64``, even though ``float32`` is a subdtype of - ``np.floating``. - The generic hierarchical type objects convert to corresponding type objects according to the associations: @@ -172,6 +164,15 @@ Generic types :class:`generic`, :class:`flexible` :class:`void` ===================================================== =============== + .. deprecated:: 1.19 + + This conversion of generic scalar types is deprecated. + This is because it can be unexpected in a context such as + ``arr.astype(dtype=np.floating)``, which casts an array of ``float32`` + to an array of ``float64``, even though ``float32`` is a subdtype of + ``np.floating``. + + Built-in Python types Several python types are equivalent to a corresponding array scalar when used to generate a :class:`dtype` object: From 93867a956e6b9c8db036c1f03eeeced9ea460a1b Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 18 Sep 2020 18:37:24 -0400 Subject: [PATCH 070/409] DOC: How-to guide for drafting how-tos Migrated from numpy-tutorials --- doc/source/user/how-to-how-to.rst | 115 ++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 doc/source/user/how-to-how-to.rst diff --git a/doc/source/user/how-to-how-to.rst b/doc/source/user/how-to-how-to.rst new file mode 100644 index 000000000000..0a037f23728d --- /dev/null +++ b/doc/source/user/how-to-how-to.rst @@ -0,0 +1,115 @@ +:orphan: + +.source/_templates/autosummary/attribute.rst. _how-to-how-to: + +############################################################################## +How to write a NumPy how-to +############################################################################## + +How-tos get straight to the point -- they + + - answer a focused question, or + - narrow a broad question into focused questions that the user can + choose among. + +****************************************************************************** +A stranger has asked for directions... +****************************************************************************** + +**"I need to fill my tank."** + +****************************************************************************** +Give a brief but explicit answer +****************************************************************************** + + - `"Three miles, take a right at Hayseed Road, it's on your left."` + +Add helpful details for newcomers ("Hayseed Road", even though it's the only +turnoff at three miles). But not irrelevant ones: + + - Don't also give directions from Route 7. + - Don't explain why the town has only one gas station. + +If there's related background (tutorial, explanation, reference, alternative +approach), bring it to the user's attention with a link ("Directions from Route 7," +"Why so few gas stations?"). + + +****************************************************************************** +Delegate +****************************************************************************** + + - `"Three miles, take a right at Hayseed Road, follow the signs."` + +If the information is already documented and succinct enough for a how-to, +just link to it, possibly after an introduction ("Three miles, take a right"). + +****************************************************************************** +If the question is broad, narrow and redirect it +****************************************************************************** + + **"I want to see the sights"** + +The `See the sights` how-to should link to a set of narrower how-tos: + +- Find historic buildings +- Find scenic lookouts +- Find the town center + +and these might in turn link to still narrower how-tos -- so the town center +page might link to + + - Find the court house + - Find city hall + +By organizing how-tos this way, you not only display the options for people +who need to narrow their question, you also have provided answers for users +who start with narrower questions ("I want to see historic buildings," "Which +way to city hall?"). + +****************************************************************************** +Break it up +****************************************************************************** + +If a how-to has many steps: + + - Consider breaking a step out into an individual how-to and linking to it. + - Include subheadings. They help readers grasp what's coming and return + where they left off. + +****************************************************************************** +Why write how-tos when there's Stack Overflow, Reddit, Gitter...? +****************************************************************************** + + - We have authoritative answers. + - How-tos make the site less forbidding to non-experts. + - How-tos bring people into the site and help them discover other information + that's here . + - Creating how-tos helps us see NumPy usability through new eyes. + +****************************************************************************** +Aren't how-tos and tutorials the same thing? +****************************************************************************** + +People use them interchangeably, but we draw a distinction, following `Daniele +Procida `_. Documentation needs to meet +users where they are. `How-tos` offer get-it-done information; the user wants +steps to copy and doesn't necessarily want to understand NumPy. `Tutorials` +are warm-fuzzy information; the user wants a feel for some aspect of NumPy +(and again, may or may not care about deeper knowledge). + +We distinguish both tutorials and how-tos from `Explanations`, which are +deep dives intended to give understanding rather than immediate assistance, +and `References`, which give complete, autoritative data on some concrete +part of NumPy (like its API) but aren't obligated to paint a broader picture. + +For more on tutorials, see +`the tutorial how-to `. + + +****************************************************************************** +Is this page an example of a how-to? +****************************************************************************** + +Yes -- until the sections with question-mark headings; they explain rather +than giving directions. In a how-to, those would be links. \ No newline at end of file From e34a30e9736e8f966c1eac1af299dfd193db6dd3 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 18 Sep 2020 18:37:24 -0400 Subject: [PATCH 071/409] DOC: How-to guide for drafting how-tos Migrated from numpy-tutorials --- doc/source/user/how-to-how-to.rst | 116 ++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 doc/source/user/how-to-how-to.rst diff --git a/doc/source/user/how-to-how-to.rst b/doc/source/user/how-to-how-to.rst new file mode 100644 index 000000000000..59a51efda0ab --- /dev/null +++ b/doc/source/user/how-to-how-to.rst @@ -0,0 +1,116 @@ +:orphan: + +.. _how-to-how-to: + +############################################################################## +How to write a NumPy how-to +############################################################################## + +How-tos get straight to the point -- they + + - answer a focused question, or + - narrow a broad question into focused questions that the user can + choose among. + +****************************************************************************** +A stranger has asked for directions... +****************************************************************************** + +**"I need to fill my tank."** + +****************************************************************************** +Give a brief but explicit answer +****************************************************************************** + + - `"Three miles, take a right at Hayseed Road, it's on your left."` + +Add helpful details for newcomers ("Hayseed Road", even though it's the only +turnoff at three miles). But not irrelevant ones: + + - Don't also give directions from Route 7. + - Don't explain why the town has only one gas station. + +If there's related background (tutorial, explanation, reference, alternative +approach), bring it to the user's attention with a link ("Directions from Route 7," +"Why so few gas stations?"). + + +****************************************************************************** +Delegate +****************************************************************************** + + - `"Three miles, take a right at Hayseed Road, follow the signs."` + +If the information is already documented and succinct enough for a how-to, +just link to it, possibly after an introduction ("Three miles, take a right"). + +****************************************************************************** +If the question is broad, narrow and redirect it +****************************************************************************** + + **"I want to see the sights"** + +The `See the sights` how-to should link to a set of narrower how-tos: + +- Find historic buildings +- Find scenic lookouts +- Find the town center + +and these might in turn link to still narrower how-tos -- so the town center +page might link to + + - Find the court house + - Find city hall + +By organizing how-tos this way, you not only display the options for people +who need to narrow their question, you also have provided answers for users +who start with narrower questions ("I want to see historic buildings," "Which +way to city hall?"). + +****************************************************************************** +If there are many steps, break them up +****************************************************************************** + +If a how-to has many steps: + + - Consider breaking a step out into an individual how-to and linking to it. + - Include subheadings. They help readers grasp what's coming and return + where they left off. + +****************************************************************************** +Why write how-tos when there's Stack Overflow, Reddit, Gitter...? +****************************************************************************** + + - We have authoritative answers. + - How-tos make the site less forbidding to non-experts. + - How-tos bring people into the site and help them discover other information + that's here . + - Creating how-tos helps us see NumPy usability through new eyes. + +****************************************************************************** +Aren't how-tos and tutorials the same thing? +****************************************************************************** + +People use the terms "how-to" and "tutorial" interchangeably, but we draw a +distinction, following `Daniele Procida `_. +Documentation needs to meet users where they are. `How-tos` offer get-it-done +information; the user wants steps to copy and doesn't necessarily want to +understand NumPy. `Tutorials` are warm-fuzzy information; the user wants a +feel for some aspect of NumPy (and again, may or may not care about deeper +knowledge). + +We distinguish both tutorials and how-tos from `Explanations`, which are +deep dives intended to give understanding rather than immediate assistance, +and `References`, which give complete, autoritative data on some concrete +part of NumPy (like its API) but aren't obligated to paint a broader picture. + +For more on tutorials, see +`the tutorial how-to `. + + +****************************************************************************** +Is this page an example of a how-to? +****************************************************************************** + +Yes -- until the sections with question-mark headings; they explain rather +than giving directions. In a how-to, those would be links. \ No newline at end of file From a9b4bff3613a0081703f46523d37f81982170a8c Mon Sep 17 00:00:00 2001 From: Stefan Appelhoff Date: Sat, 19 Sep 2020 11:48:11 +0200 Subject: [PATCH 072/409] DOC: clarify residuals return param Specify that the sum of *squared* residuals are returned. Use @ operator instead of * in code example. --- numpy/linalg/linalg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py index 92f93d671a12..b6d860dfabad 100644 --- a/numpy/linalg/linalg.py +++ b/numpy/linalg/linalg.py @@ -2206,8 +2206,8 @@ def lstsq(a, b, rcond="warn"): Least-squares solution. If `b` is two-dimensional, the solutions are in the `K` columns of `x`. residuals : {(1,), (K,), (0,)} ndarray - Sums of residuals; squared Euclidean 2-norm for each column in - ``b - a*x``. + Sums of squared residuals: Squared Euclidean 2-norm for each column in + ``b - a @ x``. If the rank of `a` is < N or M <= N, this is an empty array. If `b` is 1-dimensional, this is a (1,) shape array. Otherwise the shape is (K,). @@ -2558,7 +2558,7 @@ def norm(x, ord=None, axis=None, keepdims=False): # special case for speedup s = (x.conj() * x).real return sqrt(add.reduce(s, axis=axis, keepdims=keepdims)) - # None of the str-type keywords for ord ('fro', 'nuc') + # None of the str-type keywords for ord ('fro', 'nuc') # are valid for vectors elif isinstance(ord, str): raise ValueError(f"Invalid norm order '{ord}' for vectors") From fa1fa77264e977ace57afc2520f4d12d39757cf8 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 21 Sep 2020 07:16:13 +0000 Subject: [PATCH 073/409] MAINT: Bump hypothesis from 5.35.1 to 5.35.3 Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 5.35.1 to 5.35.3. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-5.35.1...hypothesis-python-5.35.3) Signed-off-by: dependabot-preview[bot] --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index ca39de622bc0..036e9861fa4e 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,7 +1,7 @@ cython==0.29.21 wheel setuptools<49.2.0 -hypothesis==5.35.1 +hypothesis==5.35.3 pytest==6.0.2 pytz==2020.1 pytest-cov==2.10.1 From f983885f4cafddfe5b5c1e7e19e8d3a04d735fee Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Sat, 19 Sep 2020 18:37:23 -0600 Subject: [PATCH 074/409] ENH: Add PyLong_AsInt function. This compliments the PyLong_As* functions provided by Python. It is copied from the Python private function _PyLong_AsInt. It allows replacing some incorrect uses of PyInt_Check. --- numpy/core/include/numpy/npy_3kcompat.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h index 4bc06fc9695e..191cd244f875 100644 --- a/numpy/core/include/numpy/npy_3kcompat.h +++ b/numpy/core/include/numpy/npy_3kcompat.h @@ -28,6 +28,30 @@ extern "C" { * PyInt -> PyLong */ + +/* + * This is a renamed copy of the Python non-limited API function _PyLong_AsInt. It is + * included here because it is missing from the PyPy API. It completes the PyLong_As* + * group of functions and can be useful in replacing PyInt_Check. + */ +static NPY_INLINE int +Npy__PyLong_AsInt(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + + /* INT_MAX and INT_MIN are defined in Python.h */ + if (overflow || result > INT_MAX || result < INT_MIN) { + /* XXX: could be cute and give a different + message for overflow == -1 */ + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C int"); + return -1; + } + return (int)result; +} + + #if defined(NPY_PY3K) /* Return True only if the long fits in a C long */ static NPY_INLINE int PyInt_Check(PyObject *op) { @@ -39,6 +63,7 @@ static NPY_INLINE int PyInt_Check(PyObject *op) { return (overflow == 0); } + #define PyInt_FromLong PyLong_FromLong #define PyInt_AsLong PyLong_AsLong #define PyInt_AS_LONG PyLong_AsLong From ec565addec836ea0fe88f8766eec6168db3211f7 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 11 Sep 2020 10:55:01 -0600 Subject: [PATCH 075/409] MAINT: Cleanup f2py/cfuncs.py - Replace npy_3kcompat macro PyInt_AsLong by appropriate definition - Replace npy_3kcompat macro PyInt_AS_LONG by appropriate definition. - Reformat code for readability. The code in extensively reformated in this PR which may hide the most important parts of the diff, but I needed to do it to make the code readable. --- numpy/f2py/cfuncs.py | 250 ++++++++++++++++++++++++++++--------------- 1 file changed, 164 insertions(+), 86 deletions(-) diff --git a/numpy/f2py/cfuncs.py b/numpy/f2py/cfuncs.py index 94867b3093ec..26b43e7e6964 100644 --- a/numpy/f2py/cfuncs.py +++ b/numpy/f2py/cfuncs.py @@ -629,7 +629,9 @@ """ needs['string_from_pyobj'] = ['string', 'STRINGMALLOC', 'STRINGCOPYN'] cfuncs['string_from_pyobj'] = """\ -static int string_from_pyobj(string *str,int *len,const string inistr,PyObject *obj,const char *errmess) { +static int +string_from_pyobj(string *str,int *len,const string inistr,PyObject *obj,const char *errmess) +{ PyArrayObject *arr = NULL; PyObject *tmp = NULL; #ifdef DEBUGCFUNCS @@ -684,127 +686,165 @@ Py_XDECREF(tmp); { PyObject* err = PyErr_Occurred(); - if (err==NULL) err = #modulename#_error; - PyErr_SetString(err,errmess); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); } return 0; } """ + + needs['char_from_pyobj'] = ['int_from_pyobj'] cfuncs['char_from_pyobj'] = """\ -static int char_from_pyobj(char* v,PyObject *obj,const char *errmess) { - int i=0; - if (int_from_pyobj(&i,obj,errmess)) { +static int +char_from_pyobj(char* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { *v = (char)i; return 1; } return 0; } """ + + needs['signed_char_from_pyobj'] = ['int_from_pyobj', 'signed_char'] cfuncs['signed_char_from_pyobj'] = """\ -static int signed_char_from_pyobj(signed_char* v,PyObject *obj,const char *errmess) { - int i=0; - if (int_from_pyobj(&i,obj,errmess)) { +static int +signed_char_from_pyobj(signed_char* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { *v = (signed_char)i; return 1; } return 0; } """ + + needs['short_from_pyobj'] = ['int_from_pyobj'] cfuncs['short_from_pyobj'] = """\ -static int short_from_pyobj(short* v,PyObject *obj,const char *errmess) { - int i=0; - if (int_from_pyobj(&i,obj,errmess)) { +static int +short_from_pyobj(short* v, PyObject *obj, const char *errmess) { + int i = 0; + if (int_from_pyobj(&i, obj, errmess)) { *v = (short)i; return 1; } return 0; } """ + + cfuncs['int_from_pyobj'] = """\ -static int int_from_pyobj(int* v,PyObject *obj,const char *errmess) { +static int +int_from_pyobj(int* v, PyObject *obj, const char *errmess) +{ PyObject* tmp = NULL; - if (PyInt_Check(obj)) { - *v = (int)PyInt_AS_LONG(obj); - return 1; + + if (PyLong_Check(obj)) { + *v = Npy__PyLong_AsInt(obj); + return !(*v == -1 && PyErr_Occurred()); } + tmp = PyNumber_Long(obj); if (tmp) { - *v = PyInt_AS_LONG(tmp); + *v = Npy__PyLong_AsInt(tmp); Py_DECREF(tmp); - return 1; + return !(*v == -1 && PyErr_Occurred()); } + if (PyComplex_Check(obj)) tmp = PyObject_GetAttrString(obj,\"real\"); else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) /*pass*/; else if (PySequence_Check(obj)) - tmp = PySequence_GetItem(obj,0); + tmp = PySequence_GetItem(obj, 0); if (tmp) { PyErr_Clear(); - if (int_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;} + if (int_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } Py_DECREF(tmp); } { PyObject* err = PyErr_Occurred(); - if (err==NULL) err = #modulename#_error; - PyErr_SetString(err,errmess); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); } return 0; } """ + + cfuncs['long_from_pyobj'] = """\ -static int long_from_pyobj(long* v,PyObject *obj,const char *errmess) { +static int +long_from_pyobj(long* v, PyObject *obj, const char *errmess) { PyObject* tmp = NULL; - if (PyInt_Check(obj)) { - *v = PyInt_AS_LONG(obj); - return 1; + + if (PyLong_Check(obj)) { + *v = PyLong_AsLong(obj); + return !(*v == -1 && PyErr_Occurred()); } + tmp = PyNumber_Long(obj); if (tmp) { - *v = PyInt_AS_LONG(tmp); + *v = PyLong_AsLong(tmp); Py_DECREF(tmp); - return 1; + return !(*v == -1 && PyErr_Occurred()); } + if (PyComplex_Check(obj)) tmp = PyObject_GetAttrString(obj,\"real\"); else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) /*pass*/; else if (PySequence_Check(obj)) tmp = PySequence_GetItem(obj,0); + if (tmp) { PyErr_Clear(); - if (long_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;} + if (long_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } Py_DECREF(tmp); } { PyObject* err = PyErr_Occurred(); - if (err==NULL) err = #modulename#_error; - PyErr_SetString(err,errmess); + if (err == NULL) { + err = #modulename#_error; + } + PyErr_SetString(err, errmess); } return 0; } """ + + needs['long_long_from_pyobj'] = ['long_long'] cfuncs['long_long_from_pyobj'] = """\ -static int long_long_from_pyobj(long_long* v,PyObject *obj,const char *errmess) { +static int +long_long_from_pyobj(long_long* v, PyObject *obj, const char *errmess) +{ PyObject* tmp = NULL; + if (PyLong_Check(obj)) { *v = PyLong_AsLongLong(obj); - return (!PyErr_Occurred()); - } - if (PyInt_Check(obj)) { - *v = (long_long)PyInt_AS_LONG(obj); - return 1; + return !(*v == -1 && PyErr_Occurred()); } + tmp = PyNumber_Long(obj); if (tmp) { *v = PyLong_AsLongLong(tmp); Py_DECREF(tmp); - return (!PyErr_Occurred()); + return !(*v == -1 && PyErr_Occurred()); } + if (PyComplex_Check(obj)) tmp = PyObject_GetAttrString(obj,\"real\"); else if (PyBytes_Check(obj) || PyUnicode_Check(obj)) @@ -813,58 +853,64 @@ tmp = PySequence_GetItem(obj,0); if (tmp) { PyErr_Clear(); - if (long_long_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;} + if (long_long_from_pyobj(v, tmp, errmess)) { + Py_DECREF(tmp); + return 1; + } Py_DECREF(tmp); } { PyObject* err = PyErr_Occurred(); - if (err==NULL) err = #modulename#_error; + if (err == NULL) { + err = #modulename#_error; + } PyErr_SetString(err,errmess); } return 0; } """ + + needs['long_double_from_pyobj'] = ['double_from_pyobj', 'long_double'] cfuncs['long_double_from_pyobj'] = """\ -static int long_double_from_pyobj(long_double* v,PyObject *obj,const char *errmess) { +static int +long_double_from_pyobj(long_double* v, PyObject *obj, const char *errmess) +{ double d=0; if (PyArray_CheckScalar(obj)){ if PyArray_IsScalar(obj, LongDouble) { PyArray_ScalarAsCtype(obj, v); return 1; } - else if (PyArray_Check(obj) && PyArray_TYPE(obj)==NPY_LONGDOUBLE) { + else if (PyArray_Check(obj) && PyArray_TYPE(obj) == NPY_LONGDOUBLE) { (*v) = *((npy_longdouble *)PyArray_DATA(obj)); return 1; } } - if (double_from_pyobj(&d,obj,errmess)) { + if (double_from_pyobj(&d, obj, errmess)) { *v = (long_double)d; return 1; } return 0; } """ + + cfuncs['double_from_pyobj'] = """\ -static int double_from_pyobj(double* v,PyObject *obj,const char *errmess) { +static int +double_from_pyobj(double* v, PyObject *obj, const char *errmess) +{ PyObject* tmp = NULL; if (PyFloat_Check(obj)) { -#ifdef __sgi *v = PyFloat_AsDouble(obj); -#else - *v = PyFloat_AS_DOUBLE(obj); -#endif - return 1; + return !(*v == -1.0 && PyErr_Occurred()); } + tmp = PyNumber_Float(obj); if (tmp) { -#ifdef __sgi *v = PyFloat_AsDouble(tmp); -#else - *v = PyFloat_AS_DOUBLE(tmp); -#endif Py_DECREF(tmp); - return 1; + return !(*v == -1.0 && PyErr_Occurred()); } if (PyComplex_Check(obj)) tmp = PyObject_GetAttrString(obj,\"real\"); @@ -885,9 +931,13 @@ return 0; } """ + + needs['float_from_pyobj'] = ['double_from_pyobj'] cfuncs['float_from_pyobj'] = """\ -static int float_from_pyobj(float* v,PyObject *obj,const char *errmess) { +static int +float_from_pyobj(float* v, PyObject *obj, const char *errmess) +{ double d=0.0; if (double_from_pyobj(&d,obj,errmess)) { *v = (float)d; @@ -896,11 +946,15 @@ return 0; } """ + + needs['complex_long_double_from_pyobj'] = ['complex_long_double', 'long_double', 'complex_double_from_pyobj'] cfuncs['complex_long_double_from_pyobj'] = """\ -static int complex_long_double_from_pyobj(complex_long_double* v,PyObject *obj,const char *errmess) { - complex_double cd={0.0,0.0}; +static int +complex_long_double_from_pyobj(complex_long_double* v, PyObject *obj, const char *errmess) +{ + complex_double cd = {0.0,0.0}; if (PyArray_CheckScalar(obj)){ if PyArray_IsScalar(obj, CLongDouble) { PyArray_ScalarAsCtype(obj, v); @@ -920,13 +974,17 @@ return 0; } """ + + needs['complex_double_from_pyobj'] = ['complex_double'] cfuncs['complex_double_from_pyobj'] = """\ -static int complex_double_from_pyobj(complex_double* v,PyObject *obj,const char *errmess) { +static int +complex_double_from_pyobj(complex_double* v, PyObject *obj, const char *errmess) { Py_complex c; if (PyComplex_Check(obj)) { - c=PyComplex_AsCComplex(obj); - (*v).r=c.real, (*v).i=c.imag; + c = PyComplex_AsCComplex(obj); + (*v).r = c.real; + (*v).i = c.imag; return 1; } if (PyArray_IsScalar(obj, ComplexFloating)) { @@ -955,28 +1013,22 @@ else { arr = PyArray_FromScalar(obj, PyArray_DescrFromType(NPY_CDOUBLE)); } - if (arr==NULL) return 0; + if (arr == NULL) { + return 0; + } (*v).r = ((npy_cdouble *)PyArray_DATA(arr))->real; (*v).i = ((npy_cdouble *)PyArray_DATA(arr))->imag; return 1; } /* Python does not provide PyNumber_Complex function :-( */ - (*v).i=0.0; + (*v).i = 0.0; if (PyFloat_Check(obj)) { -#ifdef __sgi (*v).r = PyFloat_AsDouble(obj); -#else - (*v).r = PyFloat_AS_DOUBLE(obj); -#endif - return 1; - } - if (PyInt_Check(obj)) { - (*v).r = (double)PyInt_AS_LONG(obj); - return 1; + return !((*v).r == -1.0 && PyErr_Occurred()); } if (PyLong_Check(obj)) { (*v).r = PyLong_AsDouble(obj); - return (!PyErr_Occurred()); + return !((*v).r == -1.0 && PyErr_Occurred()); } if (PySequence_Check(obj) && !(PyBytes_Check(obj) || PyUnicode_Check(obj))) { PyObject *tmp = PySequence_GetItem(obj,0); @@ -997,10 +1049,14 @@ return 0; } """ + + needs['complex_float_from_pyobj'] = [ 'complex_float', 'complex_double_from_pyobj'] cfuncs['complex_float_from_pyobj'] = """\ -static int complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *errmess) { +static int +complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *errmess) +{ complex_double cd={0.0,0.0}; if (complex_double_from_pyobj(&cd,obj,errmess)) { (*v).r = (float)cd.r; @@ -1010,6 +1066,8 @@ return 0; } """ + + needs['try_pyarr_from_char'] = ['pyobj_from_char1', 'TRYPYARRAYTEMPLATE'] cfuncs[ 'try_pyarr_from_char'] = 'static int try_pyarr_from_char(PyObject* obj,char* v) {\n TRYPYARRAYTEMPLATE(char,\'c\');\n}\n' @@ -1047,14 +1105,18 @@ cfuncs[ 'try_pyarr_from_complex_double'] = 'static int try_pyarr_from_complex_double(PyObject* obj,complex_double* v) {\n TRYCOMPLEXPYARRAYTEMPLATE(double,\'D\');\n}\n' -needs['create_cb_arglist'] = ['CFUNCSMESS', 'PRINTPYOBJERR', 'MINMAX'] +needs['create_cb_arglist'] = ['CFUNCSMESS', 'PRINTPYOBJERR', 'MINMAX'] # create the list of arguments to be used when calling back to python cfuncs['create_cb_arglist'] = """\ -static int create_cb_arglist(PyObject* fun,PyTupleObject* xa,const int maxnofargs,const int nofoptargs,int *nofargs,PyTupleObject **args,const char *errmess) { +static int +create_cb_arglist(PyObject* fun, PyTupleObject* xa , const int maxnofargs, + const int nofoptargs, int *nofargs, PyTupleObject **args, + const char *errmess) +{ PyObject *tmp = NULL; PyObject *tmp_fun = NULL; - int tot,opt,ext,siz,i,di=0; + Py_ssize_t tot, opt, ext, siz, i, di = 0; CFUNCSMESS(\"create_cb_arglist\\n\"); tot=opt=ext=siz=0; /* Get the total number of arguments */ @@ -1103,10 +1165,15 @@ Py_INCREF(tmp_fun); } } -if (tmp_fun==NULL) { -fprintf(stderr,\"Call-back argument must be function|instance|instance.__call__|f2py-function but got %s.\\n\",(fun==NULL?\"NULL\":Py_TYPE(fun)->tp_name)); -goto capi_fail; -} + + if (tmp_fun == NULL) { + fprintf(stderr, + \"Call-back argument must be function|instance|instance.__call__|f2py-function \" + \"but got %s.\\n\", + ((fun == NULL) ? \"NULL\" : Py_TYPE(fun)->tp_name)); + goto capi_fail; + } + if (PyObject_HasAttrString(tmp_fun,\"__code__\")) { if (PyObject_HasAttrString(tmp = PyObject_GetAttrString(tmp_fun,\"__code__\"),\"co_argcount\")) { PyObject *tmp_argcount = PyObject_GetAttrString(tmp,\"co_argcount\"); @@ -1114,7 +1181,7 @@ if (tmp_argcount == NULL) { goto capi_fail; } - tot = PyInt_AsLong(tmp_argcount) - di; + tot = PyLong_AsSsize_t(tmp_argcount) - di; Py_DECREF(tmp_argcount); } } @@ -1130,13 +1197,23 @@ /* Calculate the size of call-backs argument list */ siz = MIN(maxnofargs+ext,tot); *nofargs = MAX(0,siz-ext); + #ifdef DEBUGCFUNCS - fprintf(stderr,\"debug-capi:create_cb_arglist:maxnofargs(-nofoptargs),tot,opt,ext,siz,nofargs=%d(-%d),%d,%d,%d,%d,%d\\n\",maxnofargs,nofoptargs,tot,opt,ext,siz,*nofargs); + fprintf(stderr, + \"debug-capi:create_cb_arglist:maxnofargs(-nofoptargs),\" + \"tot,opt,ext,siz,nofargs = %d(-%d), %zd, %zd, %zd, %zd, %d\\n\", + maxnofargs, nofoptargs, tot, opt, ext, siz, *nofargs); #endif - if (siz Date: Mon, 21 Sep 2020 16:32:14 -0400 Subject: [PATCH 076/409] DOC: Add how-to index entry for how-to guide PR #17352 --- doc/source/user/how-to-how-to.rst | 2 -- doc/source/user/howtos_index.rst | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/user/how-to-how-to.rst b/doc/source/user/how-to-how-to.rst index 59a51efda0ab..48feae6db92d 100644 --- a/doc/source/user/how-to-how-to.rst +++ b/doc/source/user/how-to-how-to.rst @@ -1,5 +1,3 @@ -:orphan: - .. _how-to-how-to: ############################################################################## diff --git a/doc/source/user/howtos_index.rst b/doc/source/user/howtos_index.rst index c052286b9be8..45e013e6fb6a 100644 --- a/doc/source/user/howtos_index.rst +++ b/doc/source/user/howtos_index.rst @@ -11,4 +11,5 @@ the package, see the :ref:`API reference `. .. toctree:: :maxdepth: 1 + how-to-how-to ionumpy From e1c109910e6f78fce17099c5cbdebd08867b1d8b Mon Sep 17 00:00:00 2001 From: Daniel Vanzo Date: Fri, 18 Sep 2020 10:31:23 +0200 Subject: [PATCH 077/409] ENH: Add support for the NVIDIA HPC SDK fortran compiler --- numpy/distutils/fcompiler/__init__.py | 2 +- numpy/distutils/fcompiler/nv.py | 48 +++++++++++++++++++++++++++ numpy/tests/test_public_api.py | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 numpy/distutils/fcompiler/nv.py diff --git a/numpy/distutils/fcompiler/__init__.py b/numpy/distutils/fcompiler/__init__.py index 1c3069363f84..31c992d8fb1d 100644 --- a/numpy/distutils/fcompiler/__init__.py +++ b/numpy/distutils/fcompiler/__init__.py @@ -745,7 +745,7 @@ def wrap_unlinkable_objects(self, objects, output_dir, extra_dll_dir): ('win32', ('gnu', 'intelv', 'absoft', 'compaqv', 'intelev', 'gnu95', 'g95', 'intelvem', 'intelem', 'flang')), ('cygwin.*', ('gnu', 'intelv', 'absoft', 'compaqv', 'intelev', 'gnu95', 'g95')), - ('linux.*', ('gnu95', 'intel', 'lahey', 'pg', 'absoft', 'nag', 'vast', 'compaq', + ('linux.*', ('gnu95', 'intel', 'lahey', 'pg', 'nv', 'absoft', 'nag', 'vast', 'compaq', 'intele', 'intelem', 'gnu', 'g95', 'pathf95', 'nagfor')), ('darwin.*', ('gnu95', 'nag', 'absoft', 'ibm', 'intel', 'gnu', 'g95', 'pg')), ('sunos.*', ('sun', 'gnu', 'gnu95', 'g95')), diff --git a/numpy/distutils/fcompiler/nv.py b/numpy/distutils/fcompiler/nv.py new file mode 100644 index 000000000000..60d117c80166 --- /dev/null +++ b/numpy/distutils/fcompiler/nv.py @@ -0,0 +1,48 @@ +# https://developer.nvidia.com/hpc-sdk +import sys + +from numpy.distutils.fcompiler import FCompiler + +compilers = ['NVHPCFCompiler'] + +class NVHPCFCompiler(FCompiler): + + compiler_type = 'nv' + description = 'NVIDIA HPC SDK' + version_pattern = r'\s*(nvfortran|(pg(f77|f90|fortran)) \(aka nvfortran\)) (?P[\d.-]+).*' + + executables = { + 'version_cmd': ["", "-V"], + 'compiler_f77': ["nvfortran"], + 'compiler_fix': ["nvfortran", "-Mfixed"], + 'compiler_f90': ["nvfortran"], + 'linker_so': [""], + 'archiver': ["ar", "-cr"], + 'ranlib': ["ranlib"] + } + pic_flags = ['-fpic'] + + module_dir_switch = '-module ' + module_include_switch = '-I' + + def get_flags(self): + opt = ['-Minform=inform', '-Mnosecond_underscore'] + return self.pic_flags + opt + + def get_flags_opt(self): + return ['-fast'] + + def get_flags_debug(self): + return ['-g'] + + def get_flags_linker_so(self): + return ["-shared", '-fpic'] + + def runtime_library_dir_option(self, dir): + return '-R%s' % dir + +if __name__ == '__main__': + from distutils import log + log.set_verbosity(2) + from numpy.distutils import customized_fcompiler + print(customized_fcompiler(compiler='nv').get_version()) diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py index a9d6da01c7db..ae00d1fc9e35 100644 --- a/numpy/tests/test_public_api.py +++ b/numpy/tests/test_public_api.py @@ -251,6 +251,7 @@ def test_NPY_NO_EXPORT(): "distutils.fcompiler.none", "distutils.fcompiler.pathf95", "distutils.fcompiler.pg", + "distutils.fcompiler.nv", "distutils.fcompiler.sun", "distutils.fcompiler.vast", "distutils.from_template", From 0a93d77580656c7451d7f3dc31eaa9c1fcb76881 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Tue, 22 Sep 2020 08:49:44 -0400 Subject: [PATCH 078/409] DOC: Revise #17352 with @mattip suggestions --- doc/source/user/how-to-how-to.rst | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/doc/source/user/how-to-how-to.rst b/doc/source/user/how-to-how-to.rst index 48feae6db92d..de8afc28ae9e 100644 --- a/doc/source/user/how-to-how-to.rst +++ b/doc/source/user/how-to-how-to.rst @@ -14,39 +14,39 @@ How-tos get straight to the point -- they A stranger has asked for directions... ****************************************************************************** -**"I need to fill my tank."** +**"I need to refuel my car."** ****************************************************************************** Give a brief but explicit answer ****************************************************************************** - - `"Three miles, take a right at Hayseed Road, it's on your left."` + - `"Three kilometers/miles, take a right at Hayseed Road, it's on your left."` Add helpful details for newcomers ("Hayseed Road", even though it's the only -turnoff at three miles). But not irrelevant ones: +turnoff at three km/mi). But not irrelevant ones: - Don't also give directions from Route 7. - - Don't explain why the town has only one gas station. + - Don't explain why the town has only one filling station. If there's related background (tutorial, explanation, reference, alternative approach), bring it to the user's attention with a link ("Directions from Route 7," -"Why so few gas stations?"). +"Why so few filling stations?"). ****************************************************************************** Delegate ****************************************************************************** - - `"Three miles, take a right at Hayseed Road, follow the signs."` + - `"Three km/mi, take a right at Hayseed Road, follow the signs."` If the information is already documented and succinct enough for a how-to, -just link to it, possibly after an introduction ("Three miles, take a right"). +just link to it, possibly after an introduction ("Three km/mi, take a right"). ****************************************************************************** If the question is broad, narrow and redirect it ****************************************************************************** - **"I want to see the sights"** + **"I want to see the sights."** The `See the sights` how-to should link to a set of narrower how-tos: @@ -90,7 +90,10 @@ Aren't how-tos and tutorials the same thing? ****************************************************************************** People use the terms "how-to" and "tutorial" interchangeably, but we draw a -distinction, following `Daniele Procida `_. +distinction, following Daniele Procida's `taxonomy of documentation`_. + + .. _`taxonomy of documentation`: https://documentation.divio.com/ + Documentation needs to meet users where they are. `How-tos` offer get-it-done information; the user wants steps to copy and doesn't necessarily want to understand NumPy. `Tutorials` are warm-fuzzy information; the user wants a @@ -102,8 +105,9 @@ deep dives intended to give understanding rather than immediate assistance, and `References`, which give complete, autoritative data on some concrete part of NumPy (like its API) but aren't obligated to paint a broader picture. -For more on tutorials, see -`the tutorial how-to `. +For more on tutorials, see the `tutorial how-to`_. + +.. _`tutorial how-to`: https://github.com/numpy/numpy-tutorials/blob/master/tutorial_style.ipynb ****************************************************************************** From 155248f763db534abde85ce382716cf0ab3347b9 Mon Sep 17 00:00:00 2001 From: Daniel Vanzo Date: Tue, 22 Sep 2020 15:08:21 +0200 Subject: [PATCH 079/409] ENH: Add comments about Nvidia HPC SDK and PGI --- numpy/distutils/fcompiler/nv.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numpy/distutils/fcompiler/nv.py b/numpy/distutils/fcompiler/nv.py index 60d117c80166..4cf30f5ef100 100644 --- a/numpy/distutils/fcompiler/nv.py +++ b/numpy/distutils/fcompiler/nv.py @@ -5,6 +5,10 @@ compilers = ['NVHPCFCompiler'] +""" +Since august 2020 the NVIDIA HPC SDK includes the compilers formely known as The Portland Group compilers. +https://www.pgroup.com/index.htm +""" class NVHPCFCompiler(FCompiler): compiler_type = 'nv' From 7afa1d0a5566e56c7f444ac6729784f9730bb559 Mon Sep 17 00:00:00 2001 From: Gerry Manoim Date: Fri, 18 Sep 2020 13:27:07 -0400 Subject: [PATCH 080/409] DEV: add pr prefix labeler and prefix mapping --- .github/pr-prefix-labeler.yml | 12 ++++++++++++ .github/workflows/labeler.yml | 13 +++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 .github/pr-prefix-labeler.yml create mode 100644 .github/workflows/labeler.yml diff --git a/.github/pr-prefix-labeler.yml b/.github/pr-prefix-labeler.yml new file mode 100644 index 000000000000..b50def97e930 --- /dev/null +++ b/.github/pr-prefix-labeler.yml @@ -0,0 +1,12 @@ +"API": "30 - API" +"BENCH": "28 - Benchmark" +"BUG": "00 - Bug" +"DEP": "07 - Deprecation" +"DEV": "16 - Development" +"DOC": "04 - Documentation" +"ENH": "01 - Enhancement" +"MAINT": "03 - Maintenance" +"REV": "34 - Reversion" +"TST": "05 - Testing" +"REL": "14 - Release" +"WIP": "25 - WIP" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml new file mode 100644 index 000000000000..99db967b383b --- /dev/null +++ b/.github/workflows/labeler.yml @@ -0,0 +1,13 @@ +name: "Pull Request Labeler" +on: + pull_request_target: + types: [opened, synchronize, reopened, edited] + +jobs: + pr-labeler: + runs-on: ubuntu-latest + steps: + - name: Label the PR + uses: gerrymanoim/pr-prefix-labeler@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From c46d1822b56b2f40c79665fb045f6860d6540038 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 15 Sep 2020 17:42:50 +0200 Subject: [PATCH 081/409] ENH: Add annotations for non-magic ndarray methods --- numpy/__init__.pyi | 466 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 430 insertions(+), 36 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index d4eda6b3150f..eb4b15d61987 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -409,6 +409,8 @@ class _ArrayOrScalarCommon( ) -> _ArraySelf: ... def swapaxes(self: _ArraySelf, axis1: int, axis2: int) -> _ArraySelf: ... def tobytes(self, order: _OrderKACF = ...) -> bytes: ... + # NOTE: `tostring()` is deprecated and therefore excluded + # def tostring(self, order=...): ... def tofile( self, fid: Union[IO[bytes], str], sep: str = ..., format: str = ... ) -> None: ... @@ -437,42 +439,434 @@ class _ArrayOrScalarCommon( def __array_struct__(self): ... def __array_wrap__(array, context=...): ... def __setstate__(self, __state): ... - def all(self, axis=..., out=..., keepdims=...): ... - def any(self, axis=..., out=..., keepdims=...): ... - def argmax(self, axis=..., out=...): ... - def argmin(self, axis=..., out=...): ... - def argpartition(self, kth, axis=..., kind=..., order=...): ... - def argsort(self, axis=..., kind=..., order=...): ... - def choose(self, choices, out=..., mode=...): ... - def clip(self, min=..., max=..., out=..., **kwargs): ... - def compress(self, condition, axis=..., out=...): ... - def conj(self): ... - def conjugate(self): ... - def cumprod(self, axis=..., dtype=..., out=...): ... - def cumsum(self, axis=..., dtype=..., out=...): ... - def diagonal(self, offset=..., axis1=..., axis2=...): ... - def dot(self, b, out=...): ... - def max(self, axis=..., out=..., keepdims=..., initial=..., where=...): ... - def mean(self, axis=..., dtype=..., out=..., keepdims=...): ... - def min(self, axis=..., out=..., keepdims=..., initial=..., where=...): ... - def newbyteorder(self, new_order=...): ... - def nonzero(self): ... - def partition(self, kth, axis=..., kind=..., order=...): ... - def prod(self, axis=..., dtype=..., out=..., keepdims=..., initial=..., where=...): ... - def ptp(self, axis=..., out=..., keepdims=...): ... - def put(self, indices, values, mode=...): ... - def repeat(self, repeats, axis=...): ... - def round(self, decimals=..., out=...): ... - def searchsorted(self, v, side=..., sorter=...): ... - def setfield(self, val, dtype, offset=...): ... - def sort(self, axis=..., kind=..., order=...): ... - def std(self, axis=..., dtype=..., out=..., ddof=..., keepdims=...): ... - def sum(self, axis=..., dtype=..., out=..., keepdims=..., initial=..., where=...): ... - def take(self, indices, axis=..., out=..., mode=...): ... - # NOTE: `tostring()` is deprecated and therefore excluded - # def tostring(self, order=...): ... - def trace(self, offset=..., axis1=..., axis2=..., dtype=..., out=...): ... - def var(self, axis=..., dtype=..., out=..., ddof=..., keepdims=...): ... + # a `bool_` is returned when `keepdims=True` and `self` is a 0d array + @overload + def all( + self, axis: None = ..., out: None = ..., keepdims: Literal[False] = ... + ) -> bool_: ... + @overload + def all( + self, axis: Optional[_ShapeLike] = ..., out: None = ..., keepdims: bool = ... + ) -> Union[bool_, ndarray]: ... + @overload + def all( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + @overload + def any( + self, axis: None = ..., out: None = ..., keepdims: Literal[False] = ... + ) -> bool_: ... + @overload + def any( + self, axis: Optional[_ShapeLike] = ..., out: None = ..., keepdims: bool = ... + ) -> Union[bool_, ndarray]: ... + @overload + def any( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + @overload + def argmax(self, axis: None = ..., out: None = ...) -> signedinteger: ... + @overload + def argmax( + self, axis: _ShapeLike = ..., out: None = ... + ) -> Union[signedinteger, ndarray]: ... + @overload + def argmax( + self, axis: Optional[_ShapeLike] = ..., out: _NdArraySubClass = ... + ) -> _NdArraySubClass: ... + @overload + def argmin(self, axis: None = ..., out: None = ...) -> signedinteger: ... + @overload + def argmin( + self, axis: _ShapeLike = ..., out: None = ... + ) -> Union[signedinteger, ndarray]: ... + @overload + def argmin( + self, axis: Optional[_ShapeLike] = ..., out: _NdArraySubClass = ... + ) -> _NdArraySubClass: ... + def argpartition( + self, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> ndarray: ... + def argsort( + self, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> ndarray: ... + @overload + def choose( + self, choices: ArrayLike, out: None = ..., mode: _Mode = ..., + ) -> ndarray: ... + @overload + def choose( + self, choices: ArrayLike, out: _NdArraySubClass = ..., mode: _Mode = ..., + ) -> _NdArraySubClass: ... + @overload + def clip( + self: _ArraySelf, + min: ArrayLike = ..., + max: Optional[ArrayLike] = ..., + out: None = ..., + **kwargs: Any, + ) -> Union[_ArraySelf, number]: ... + @overload + def clip( + self: _ArraySelf, + min: None = ..., + max: ArrayLike = ..., + out: None = ..., + **kwargs: Any, + ) -> Union[_ArraySelf, number]: ... + @overload + def clip( + self, + min: ArrayLike = ..., + max: Optional[ArrayLike] = ..., + out: _NdArraySubClass = ..., + **kwargs: Any, + ) -> _NdArraySubClass: ... + @overload + def clip( + self, + min: None = ..., + max: ArrayLike = ..., + out: _NdArraySubClass = ..., + **kwargs: Any, + ) -> _NdArraySubClass: ... + @overload + def compress( + self: _ArraySelf, a: ArrayLike, axis: Optional[int] = ..., out: None = ..., + ) -> _ArraySelf: ... + @overload + def compress( + self, a: ArrayLike, axis: Optional[int] = ..., out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + def conj(self: _ArraySelf) -> _ArraySelf: ... + def conjugate(self: _ArraySelf) -> _ArraySelf: ... + @overload + def cumprod( + self, axis: Optional[int] = ..., dtype: DtypeLike = ..., out: None = ..., + ) -> ndarray: ... + @overload + def cumprod( + self, + axis: Optional[int] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + @overload + def cumsum( + self, axis: Optional[int] = ..., dtype: DtypeLike = ..., out: None = ..., + ) -> ndarray: ... + @overload + def cumsum( + self, + axis: Optional[int] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + def diagonal( + self: _ArraySelf, offset: int = ..., axis1: int = ..., axis2: int = ... + ) -> _ArraySelf: ... + @overload + def dot(self, b: ArrayLike, out: None = ...) -> Union[number, ndarray]: ... + @overload + def dot(self, b: ArrayLike, out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + @overload + def max( + self, + axis: None = ..., + out: None = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> number: ... + @overload + def max( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> Union[number, ndarray]: ... + @overload + def max( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> _NdArraySubClass: ... + @overload + def mean( + self, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: Literal[False] = ..., + ) -> number: ... + @overload + def mean( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: bool = ..., + ) -> Union[number, ndarray]: ... + @overload + def mean( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + @overload + def min( + self, + axis: None = ..., + out: None = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> number: ... + @overload + def min( + self, + axis: Optional[_ShapeLike] = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> Union[number, ndarray]: ... + @overload + def min( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> _NdArraySubClass: ... + def newbyteorder(self: _ArraySelf, __new_order: _ByteOrder = ...) -> _ArraySelf: ... + def nonzero(self) -> Tuple[ndarray, ...]: ... + def partition( + self, + kth: _ArrayLikeIntOrBool, + axis: int = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + @overload + def prod( + self, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> number: ... + @overload + def prod( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> Union[number, ndarray]: ... + @overload + def prod( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> _NdArraySubClass: ... + @overload + def ptp( + self, axis: None = ..., out: None = ..., keepdims: Literal[False] = ..., + ) -> number: ... + @overload + def ptp( + self, axis: Optional[_ShapeLike] = ..., out: None = ..., keepdims: bool = ..., + ) -> Union[number, ndarray]: ... + @overload + def ptp( + self, + axis: Optional[_ShapeLike] = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + def put( + self, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... + ) -> None: ... + def repeat( + self, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ... + ) -> ndarray: ... + @overload + def round(self: _ArraySelf, decimals: int = ..., out: None = ...) -> _ArraySelf: ... + @overload + def round( + self, decimals: int = ..., out: _NdArraySubClass = ... + ) -> _NdArraySubClass: ... + def searchsorted( + self, + v: ArrayLike, + side: _Side = ..., + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array + ) -> ndarray: ... + def setfield( + self, val: ArithmeticError, dtype: DtypeLike, offset: int = ... + ) -> None: ... + def sort( + self, + axis: Optional[int] = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + @overload + def std( + self, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: Literal[False] = ..., + ) -> number: ... + @overload + def std( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: bool = ..., + ) -> Union[number, ndarray]: ... + @overload + def std( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ddof: int = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... + @overload + def sum( + self, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: Literal[False] = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> number: ... + @overload + def sum( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> Union[number, ndarray]: ... + @overload + def sum( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + keepdims: bool = ..., + initial: _NumberLike = ..., + where: _ArrayLikeBool = ..., + ) -> _NdArraySubClass: ... + @overload + def take( + self, + indices: Union[_IntLike, _BoolLike], + axis: Optional[int] = ..., + out: None = ..., + mode: _Mode = ..., + ) -> generic: ... + @overload + def take( + self, + indices: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + out: None = ..., + mode: _Mode = ..., + ) -> ndarray: ... + @overload + def take( + self, + indices: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + out: _NdArraySubClass = ..., + mode: _Mode = ..., + ) -> _NdArraySubClass: ... + @overload + def trace( + self, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DtypeLike = ..., + out: None = ..., + ) -> Union[number, ndarray]: ... + @overload + def trace( + self, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... + @overload + def var( + self, + axis: None = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: Literal[False] = ..., + ) -> number: ... + @overload + def var( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: None = ..., + ddof: int = ..., + keepdims: bool = ..., + ) -> Union[number, ndarray]: ... + @overload + def var( + self, + axis: Optional[_ShapeLike] = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ddof: int = ..., + keepdims: bool = ..., + ) -> _NdArraySubClass: ... _BufferType = Union[ndarray, bytes, bytearray, memoryview] _Casting = Literal["no", "equiv", "safe", "same_kind", "unsafe"] From 40cce4f69f6e4dfbda45e76294238db1f3223a97 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 15 Sep 2020 19:27:45 +0200 Subject: [PATCH 082/409] MAINT: Moved a number of methods from `_ArrayOrScalarCommon` to `ndarray` A number of methods require a >= 1D array or are completelly absent from `generic`. Move them to `ndarray`. --- numpy/__init__.pyi | 123 +++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 60 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index eb4b15d61987..480732c15e67 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -490,13 +490,6 @@ class _ArrayOrScalarCommon( def argmin( self, axis: Optional[_ShapeLike] = ..., out: _NdArraySubClass = ... ) -> _NdArraySubClass: ... - def argpartition( - self, - kth: _ArrayLikeIntOrBool, - axis: Optional[int] = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., - ) -> ndarray: ... def argsort( self, axis: Optional[int] = ..., @@ -545,8 +538,8 @@ class _ArrayOrScalarCommon( ) -> _NdArraySubClass: ... @overload def compress( - self: _ArraySelf, a: ArrayLike, axis: Optional[int] = ..., out: None = ..., - ) -> _ArraySelf: ... + self, a: ArrayLike, axis: Optional[int] = ..., out: None = ..., + ) -> ndarray: ... @overload def compress( self, a: ArrayLike, axis: Optional[int] = ..., out: _NdArraySubClass = ..., @@ -575,13 +568,6 @@ class _ArrayOrScalarCommon( dtype: DtypeLike = ..., out: _NdArraySubClass = ..., ) -> _NdArraySubClass: ... - def diagonal( - self: _ArraySelf, offset: int = ..., axis1: int = ..., axis2: int = ... - ) -> _ArraySelf: ... - @overload - def dot(self, b: ArrayLike, out: None = ...) -> Union[number, ndarray]: ... - @overload - def dot(self, b: ArrayLike, out: _NdArraySubClass = ...) -> _NdArraySubClass: ... @overload def max( self, @@ -661,14 +647,6 @@ class _ArrayOrScalarCommon( where: _ArrayLikeBool = ..., ) -> _NdArraySubClass: ... def newbyteorder(self: _ArraySelf, __new_order: _ByteOrder = ...) -> _ArraySelf: ... - def nonzero(self) -> Tuple[ndarray, ...]: ... - def partition( - self, - kth: _ArrayLikeIntOrBool, - axis: int = ..., - kind: _PartitionKind = ..., - order: Union[None, str, Sequence[str]] = ..., - ) -> None: ... @overload def prod( self, @@ -714,9 +692,6 @@ class _ArrayOrScalarCommon( out: _NdArraySubClass = ..., keepdims: bool = ..., ) -> _NdArraySubClass: ... - def put( - self, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... - ) -> None: ... def repeat( self, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ... ) -> ndarray: ... @@ -726,21 +701,6 @@ class _ArrayOrScalarCommon( def round( self, decimals: int = ..., out: _NdArraySubClass = ... ) -> _NdArraySubClass: ... - def searchsorted( - self, - v: ArrayLike, - side: _Side = ..., - sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array - ) -> ndarray: ... - def setfield( - self, val: ArithmeticError, dtype: DtypeLike, offset: int = ... - ) -> None: ... - def sort( - self, - axis: Optional[int] = ..., - kind: Optional[_SortKind] = ..., - order: Union[None, str, Sequence[str]] = ..., - ) -> None: ... @overload def std( self, @@ -823,24 +783,6 @@ class _ArrayOrScalarCommon( mode: _Mode = ..., ) -> _NdArraySubClass: ... @overload - def trace( - self, # >= 2D array - offset: int = ..., - axis1: int = ..., - axis2: int = ..., - dtype: DtypeLike = ..., - out: None = ..., - ) -> Union[number, ndarray]: ... - @overload - def trace( - self, # >= 2D array - offset: int = ..., - axis1: int = ..., - axis2: int = ..., - dtype: DtypeLike = ..., - out: _NdArraySubClass = ..., - ) -> _NdArraySubClass: ... - @overload def var( self, axis: None = ..., @@ -901,6 +843,67 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): def strides(self) -> _Shape: ... @strides.setter def strides(self, value: _ShapeLike): ... + def argpartition( + self, + kth: _ArrayLikeIntOrBool, + axis: Optional[int] = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> ndarray: ... + def diagonal( + self: _ArraySelf, offset: int = ..., axis1: int = ..., axis2: int = ... + ) -> _ArraySelf: ... + @overload + def dot(self, b: ArrayLike, out: None = ...) -> Union[number, ndarray]: ... + @overload + def dot(self, b: ArrayLike, out: _NdArraySubClass = ...) -> _NdArraySubClass: ... + # `nonzero()` is deprecated for 0d arrays/generics + def nonzero(self) -> Tuple[ndarray, ...]: ... + def partition( + self, + kth: _ArrayLikeIntOrBool, + axis: int = ..., + kind: _PartitionKind = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + # `put` is technically available to `generic`, + # but is pointless as `generic`s are immutable + def put( + self, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... + ) -> None: ... + def searchsorted( + self, # >= 1D array + v: ArrayLike, + side: _Side = ..., + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array + ) -> ndarray: ... + def setfield( + self, val: ArrayLike, dtype: DtypeLike, offset: int = ... + ) -> None: ... + def sort( + self, + axis: int = ..., + kind: Optional[_SortKind] = ..., + order: Union[None, str, Sequence[str]] = ..., + ) -> None: ... + @overload + def trace( + self, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DtypeLike = ..., + out: None = ..., + ) -> Union[number, ndarray]: ... + @overload + def trace( + self, # >= 2D array + offset: int = ..., + axis1: int = ..., + axis2: int = ..., + dtype: DtypeLike = ..., + out: _NdArraySubClass = ..., + ) -> _NdArraySubClass: ... # Many of these special methods are irrelevant currently, since protocols # aren't supported yet. That said, I'm adding them for completeness. # https://docs.python.org/3/reference/datamodel.html From 932d4e8852cc2d1fd8efead9ca68de33fbf1ce67 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 17 Sep 2020 12:37:33 +0200 Subject: [PATCH 083/409] MAINT: Moved a few constants back to `__init__.pyi`; cleaned up their names --- numpy/__init__.pyi | 29 ++++++++++++++++++------ numpy/core/fromnumeric.pyi | 46 +++++++++++++------------------------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 480732c15e67..fd7731133f3b 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -287,6 +287,21 @@ _OrderKACF = Optional[Literal["K", "A", "C", "F"]] _OrderACF = Optional[Literal["A", "C", "F"]] _OrderCF = Optional[Literal["C", "F"]] +_ModeKind = Literal["raise", "wrap", "clip"] +_PartitionKind = Literal["introselect"] +_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] +_SortSide = Literal["left", "right"] + +_ArrayLikeBool = Union[_BoolLike, Sequence[_BoolLike], ndarray] +_ArrayLikeIntOrBool = Union[ + _IntLike, + _BoolLike, + ndarray, + Sequence[_IntLike], + Sequence[_BoolLike], + Sequence[Sequence[Any]], # TODO: wait for support for recursive types +] + _ArraySelf = TypeVar("_ArraySelf", bound=_ArrayOrScalarCommon) class _ArrayOrScalarCommon( @@ -498,11 +513,11 @@ class _ArrayOrScalarCommon( ) -> ndarray: ... @overload def choose( - self, choices: ArrayLike, out: None = ..., mode: _Mode = ..., + self, choices: ArrayLike, out: None = ..., mode: _ModeKind = ..., ) -> ndarray: ... @overload def choose( - self, choices: ArrayLike, out: _NdArraySubClass = ..., mode: _Mode = ..., + self, choices: ArrayLike, out: _NdArraySubClass = ..., mode: _ModeKind = ..., ) -> _NdArraySubClass: ... @overload def clip( @@ -764,7 +779,7 @@ class _ArrayOrScalarCommon( indices: Union[_IntLike, _BoolLike], axis: Optional[int] = ..., out: None = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> generic: ... @overload def take( @@ -772,7 +787,7 @@ class _ArrayOrScalarCommon( indices: _ArrayLikeIntOrBool, axis: Optional[int] = ..., out: None = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> ndarray: ... @overload def take( @@ -780,7 +795,7 @@ class _ArrayOrScalarCommon( indices: _ArrayLikeIntOrBool, axis: Optional[int] = ..., out: _NdArraySubClass = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> _NdArraySubClass: ... @overload def var( @@ -869,12 +884,12 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): # `put` is technically available to `generic`, # but is pointless as `generic`s are immutable def put( - self, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... + self, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _ModeKind = ... ) -> None: ... def searchsorted( self, # >= 1D array v: ArrayLike, - side: _Side = ..., + side: _SortSide = ..., sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array ) -> ndarray: ... def setfield( diff --git a/numpy/core/fromnumeric.pyi b/numpy/core/fromnumeric.pyi index 7ad772b07e54..6b3d2268f35e 100644 --- a/numpy/core/fromnumeric.pyi +++ b/numpy/core/fromnumeric.pyi @@ -13,6 +13,12 @@ from numpy import ( _IntLike, _BoolLike, _NumberLike, + _ArrayLikeBool, + _ArrayLikeIntOrBool, + _ModeKind, + _PartitionKind, + _SortKind, + _SortSide, ) from numpy.typing import DtypeLike, ArrayLike, _ShapeLike, _Shape @@ -21,11 +27,6 @@ if sys.version_info >= (3, 8): else: from typing_extensions import Literal -_Mode = Literal["raise", "wrap", "clip"] -_PartitionKind = Literal["introselect"] -_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] -_Side = Literal["left", "right"] - # Various annotations for scalars # While dt.datetime and dt.timedelta are not technically part of NumPy, @@ -44,21 +45,6 @@ _ScalarGenericDT = TypeVar( _Number = TypeVar("_Number", bound=number) -# An array-like object consisting of integers -_IntOrBool = Union[_IntLike, _BoolLike] -_ArrayLikeIntNested = ArrayLike # TODO: wait for support for recursive types -_ArrayLikeBoolNested = ArrayLike # TODO: wait for support for recursive types - -# Integers and booleans can generally be used interchangeably -_ArrayLikeIntOrBool = Union[ - _IntOrBool, - ndarray, - Sequence[_IntOrBool], - Sequence[_ArrayLikeIntNested], - Sequence[_ArrayLikeBoolNested], -] -_ArrayLikeBool = Union[_BoolLike, Sequence[_BoolLike], ndarray] - # The signature of take() follows a common theme with its overloads: # 1. A generic comes in; the same generic comes out # 2. A scalar comes in; a generic comes out @@ -70,7 +56,7 @@ def take( indices: int, axis: Optional[int] = ..., out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> _ScalarGenericDT: ... @overload def take( @@ -78,7 +64,7 @@ def take( indices: int, axis: Optional[int] = ..., out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> _ScalarNumpy: ... @overload def take( @@ -86,7 +72,7 @@ def take( indices: int, axis: Optional[int] = ..., out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> _ScalarNumpy: ... @overload def take( @@ -94,7 +80,7 @@ def take( indices: _ArrayLikeIntOrBool, axis: Optional[int] = ..., out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> Union[_ScalarNumpy, ndarray]: ... def reshape(a: ArrayLike, newshape: _ShapeLike, order: _OrderACF = ...) -> ndarray: ... @overload @@ -102,24 +88,24 @@ def choose( a: _ScalarIntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> _ScalarIntOrBool: ... @overload def choose( - a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ... + a: Union[_IntLike, _BoolLike], choices: ArrayLike, out: Optional[ndarray] = ..., mode: _ModeKind = ... ) -> Union[integer, bool_]: ... @overload def choose( a: _ArrayLikeIntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., - mode: _Mode = ..., + mode: _ModeKind = ..., ) -> ndarray: ... def repeat( a: ArrayLike, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ... ) -> ndarray: ... def put( - a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... + a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _ModeKind = ... ) -> None: ... def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ... def transpose( @@ -184,14 +170,14 @@ def argmin( def searchsorted( a: ArrayLike, v: _Scalar, - side: _Side = ..., + side: _SortSide = ..., sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array ) -> integer: ... @overload def searchsorted( a: ArrayLike, v: ArrayLike, - side: _Side = ..., + side: _SortSide = ..., sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array ) -> ndarray: ... def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ... From 5c07541e6380f4bd0f5dee73e20deac21d40c529 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Fri, 18 Sep 2020 17:23:15 +0200 Subject: [PATCH 084/409] TST: Added new `ndarray`/`generic` typing tests --- numpy/__init__.pyi | 11 +- numpy/typing/tests/data/fail/ndarray_misc.py | 21 +++ numpy/typing/tests/data/pass/ndarray_misc.py | 158 ++++++++++++++++++ .../typing/tests/data/reveal/ndarray_misc.py | 150 +++++++++++++++++ 4 files changed, 336 insertions(+), 4 deletions(-) create mode 100644 numpy/typing/tests/data/fail/ndarray_misc.py create mode 100644 numpy/typing/tests/data/pass/ndarray_misc.py create mode 100644 numpy/typing/tests/data/reveal/ndarray_misc.py diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index fd7731133f3b..633aa0ca393c 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -521,20 +521,20 @@ class _ArrayOrScalarCommon( ) -> _NdArraySubClass: ... @overload def clip( - self: _ArraySelf, + self, min: ArrayLike = ..., max: Optional[ArrayLike] = ..., out: None = ..., **kwargs: Any, - ) -> Union[_ArraySelf, number]: ... + ) -> Union[number, ndarray]: ... @overload def clip( - self: _ArraySelf, + self, min: None = ..., max: ArrayLike = ..., out: None = ..., **kwargs: Any, - ) -> Union[_ArraySelf, number]: ... + ) -> Union[number, ndarray]: ... @overload def clip( self, @@ -1088,6 +1088,9 @@ class void(flexible): def real(self: _ArraySelf) -> _ArraySelf: ... @property def imag(self: _ArraySelf) -> _ArraySelf: ... + def setfield( + self, val: ArrayLike, dtype: DtypeLike, offset: int = ... + ) -> None: ... class character(flexible): ... # type: ignore diff --git a/numpy/typing/tests/data/fail/ndarray_misc.py b/numpy/typing/tests/data/fail/ndarray_misc.py new file mode 100644 index 000000000000..1e1496bfecca --- /dev/null +++ b/numpy/typing/tests/data/fail/ndarray_misc.py @@ -0,0 +1,21 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +import numpy as np + +f8: np.float64 + +f8.argpartition(0) # E: has no attribute +f8.diagonal() # E: has no attribute +f8.dot(1) # E: has no attribute +f8.nonzero() # E: has no attribute +f8.partition(0) # E: has no attribute +f8.put(0, 2) # E: has no attribute +f8.setfield(2, np.float64) # E: has no attribute +f8.sort() # E: has no attribute +f8.trace() # E: has no attribute diff --git a/numpy/typing/tests/data/pass/ndarray_misc.py b/numpy/typing/tests/data/pass/ndarray_misc.py new file mode 100644 index 000000000000..1871aebba402 --- /dev/null +++ b/numpy/typing/tests/data/pass/ndarray_misc.py @@ -0,0 +1,158 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +from typing import cast +import numpy as np + +class SubClass(np.ndarray): ... + +i8 = np.int64(1) +A = np.array([[1]], dtype=np.int64) +B0 = np.empty((), dtype=np.int64).view(SubClass) +B1 = np.empty((1,), dtype=np.int64).view(SubClass) +B2 = np.empty((1, 1), dtype=np.int64).view(SubClass) +C = np.array([0, 1, 2], dtype=np.int64) +D = np.empty(3).view(SubClass) + +i8.all() +A.all() +A.all(axis=0) +A.all(keepdims=True) +A.all(out=B0) + +i8.any() +A.any() +A.any(axis=0) +A.any(keepdims=True) +A.any(out=B0) + +i8.argmax() +A.argmax() +A.argmax(axis=0) +A.argmax(out=B0) + +i8.argmin() +A.argmin() +A.argmin(axis=0) +A.argmin(out=B0) + +i8.argsort() +A.argsort() + +i8.choose([()]) +C.choose([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) +C.choose([[0, 1, 2], [3, 4, 5], [6, 7, 8]], out=D) + +i8.clip(1) +A.clip(1) +A.clip(None, 1) +A.clip(1, out=B2) +A.clip(None, 1, out=B2) + +i8.compress([1]) +A.compress([1]) +A.compress([1], out=B1) + +i8.conj() +A.conj() +B0.conj() + +i8.conjugate() +A.conjugate() +B0.conjugate() + +i8.cumprod() +A.cumprod() +A.cumprod(out=B1) + +i8.cumsum() +A.cumsum() +A.cumsum(out=B1) + +i8.max() +A.max() +A.max(axis=0) +A.max(keepdims=True) +A.max(out=B0) + +i8.mean() +A.mean() +A.mean(axis=0) +A.mean(keepdims=True) +A.mean(out=B0) + +i8.min() +A.min() +A.min(axis=0) +A.min(keepdims=True) +A.min(out=B0) + +i8.newbyteorder() +A.newbyteorder() +B0.newbyteorder('|') + +i8.prod() +A.prod() +A.prod(axis=0) +A.prod(keepdims=True) +A.prod(out=B0) + +i8.ptp() +A.ptp() +A.ptp(axis=0) +A.ptp(keepdims=True) +A.astype(int).ptp(out=B0) + +i8.round() +A.round() +A.round(out=B2) + +i8.repeat(1) +A.repeat(1) +B0.repeat(1) + +i8.std() +A.std() +A.std(axis=0) +A.std(keepdims=True) +A.std(out=B0.astype(np.float64)) + +i8.sum() +A.sum() +A.sum(axis=0) +A.sum(keepdims=True) +A.sum(out=B0) + +i8.take(0) +A.take(0) +A.take([0]) +A.take(0, out=B0) +A.take([0], out=B1) + +i8.var() +A.var() +A.var(axis=0) +A.var(keepdims=True) +A.var(out=B0) + +A.argpartition([0]) + +A.diagonal() + +A.dot(1) +A.dot(1, out=B0) + +A.nonzero() + +C.searchsorted(1) + +A.trace() +A.trace(out=B0) + +void = cast(np.void, np.array(1, dtype=[("f", np.float64)]).take(0)) +void.setfield(10, np.float64) diff --git a/numpy/typing/tests/data/reveal/ndarray_misc.py b/numpy/typing/tests/data/reveal/ndarray_misc.py new file mode 100644 index 000000000000..22a4564df1b7 --- /dev/null +++ b/numpy/typing/tests/data/reveal/ndarray_misc.py @@ -0,0 +1,150 @@ +""" +Tests for miscellaneous (non-magic) ``np.ndarray``/``np.generic`` methods. + +More extensive tests are performed for the methods' +function-based counterpart in `../from_numeric.py`. + +""" + +import numpy as np + +class SubClass(np.ndarray): ... + +f8: np.float64 +A: np.ndarray +B: SubClass + +reveal_type(f8.all()) # E: numpy.bool_ +reveal_type(A.all()) # E: numpy.bool_ +reveal_type(A.all(axis=0)) # E: Union[numpy.bool_, numpy.ndarray] +reveal_type(A.all(keepdims=True)) # E: Union[numpy.bool_, numpy.ndarray] +reveal_type(A.all(out=B)) # E: SubClass + +reveal_type(f8.any()) # E: numpy.bool_ +reveal_type(A.any()) # E: numpy.bool_ +reveal_type(A.any(axis=0)) # E: Union[numpy.bool_, numpy.ndarray] +reveal_type(A.any(keepdims=True)) # E: Union[numpy.bool_, numpy.ndarray] +reveal_type(A.any(out=B)) # E: SubClass + +reveal_type(f8.argmax()) # E: numpy.signedinteger +reveal_type(A.argmax()) # E: numpy.signedinteger +reveal_type(A.argmax(axis=0)) # E: Union[numpy.signedinteger, numpy.ndarray] +reveal_type(A.argmax(out=B)) # E: SubClass + +reveal_type(f8.argmin()) # E: numpy.signedinteger +reveal_type(A.argmin()) # E: numpy.signedinteger +reveal_type(A.argmin(axis=0)) # E: Union[numpy.signedinteger, numpy.ndarray] +reveal_type(A.argmin(out=B)) # E: SubClass + +reveal_type(f8.argsort()) # E: numpy.ndarray +reveal_type(A.argsort()) # E: numpy.ndarray + +reveal_type(f8.astype(np.int64).choose([()])) # E: numpy.ndarray +reveal_type(A.choose([0])) # E: numpy.ndarray +reveal_type(A.choose([0], out=B)) # E: SubClass + +reveal_type(f8.clip(1)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.clip(1)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.clip(None, 1)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.clip(1, out=B)) # E: SubClass +reveal_type(A.clip(None, 1, out=B)) # E: SubClass + +reveal_type(f8.compress([0])) # E: numpy.ndarray +reveal_type(A.compress([0])) # E: numpy.ndarray +reveal_type(A.compress([0], out=B)) # E: SubClass + +reveal_type(f8.conj()) # E: numpy.float64 +reveal_type(A.conj()) # E: numpy.ndarray +reveal_type(B.conj()) # E: SubClass + +reveal_type(f8.conjugate()) # E: numpy.float64 +reveal_type(A.conjugate()) # E: numpy.ndarray +reveal_type(B.conjugate()) # E: SubClass + +reveal_type(f8.cumprod()) # E: numpy.ndarray +reveal_type(A.cumprod()) # E: numpy.ndarray +reveal_type(A.cumprod(out=B)) # E: SubClass + +reveal_type(f8.cumsum()) # E: numpy.ndarray +reveal_type(A.cumsum()) # E: numpy.ndarray +reveal_type(A.cumsum(out=B)) # E: SubClass + +reveal_type(f8.max()) # E: numpy.number +reveal_type(A.max()) # E: numpy.number +reveal_type(A.max(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.max(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.max(out=B)) # E: SubClass + +reveal_type(f8.mean()) # E: numpy.number +reveal_type(A.mean()) # E: numpy.number +reveal_type(A.mean(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.mean(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.mean(out=B)) # E: SubClass + +reveal_type(f8.min()) # E: numpy.number +reveal_type(A.min()) # E: numpy.number +reveal_type(A.min(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.min(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.min(out=B)) # E: SubClass + +reveal_type(f8.newbyteorder()) # E: numpy.float64 +reveal_type(A.newbyteorder()) # E: numpy.ndarray +reveal_type(B.newbyteorder('|')) # E: SubClass + +reveal_type(f8.prod()) # E: numpy.number +reveal_type(A.prod()) # E: numpy.number +reveal_type(A.prod(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.prod(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.prod(out=B)) # E: SubClass + +reveal_type(f8.ptp()) # E: numpy.number +reveal_type(A.ptp()) # E: numpy.number +reveal_type(A.ptp(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.ptp(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.ptp(out=B)) # E: SubClass + +reveal_type(f8.round()) # E: numpy.float64 +reveal_type(A.round()) # E: numpy.ndarray +reveal_type(A.round(out=B)) # E: SubClass + +reveal_type(f8.repeat(1)) # E: numpy.ndarray +reveal_type(A.repeat(1)) # E: numpy.ndarray +reveal_type(B.repeat(1)) # E: numpy.ndarray + +reveal_type(f8.std()) # E: numpy.number +reveal_type(A.std()) # E: numpy.number +reveal_type(A.std(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.std(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.std(out=B)) # E: SubClass + +reveal_type(f8.sum()) # E: numpy.number +reveal_type(A.sum()) # E: numpy.number +reveal_type(A.sum(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.sum(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.sum(out=B)) # E: SubClass + +reveal_type(f8.take(0)) # E: numpy.generic +reveal_type(A.take(0)) # E: numpy.generic +reveal_type(A.take([0])) # E: numpy.ndarray +reveal_type(A.take(0, out=B)) # E: SubClass +reveal_type(A.take([0], out=B)) # E: SubClass + +reveal_type(f8.var()) # E: numpy.number +reveal_type(A.var()) # E: numpy.number +reveal_type(A.var(axis=0)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.var(keepdims=True)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.var(out=B)) # E: SubClass + +reveal_type(A.argpartition([0])) # E: numpy.ndarray + +reveal_type(A.diagonal()) # E: numpy.ndarray + +reveal_type(A.dot(1)) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.dot(1, out=B)) # E: SubClass + +reveal_type(A.nonzero()) # E: tuple[numpy.ndarray] + +reveal_type(A.searchsorted([1])) # E: numpy.ndarray + +reveal_type(A.trace()) # E: Union[numpy.number, numpy.ndarray] +reveal_type(A.trace(out=B)) # E: SubClass From fe7c3614a491d087ddc9102bfcff63bed7f8cea1 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 23 Sep 2020 08:39:45 -0400 Subject: [PATCH 085/409] DOC: History cleanup for #17353 --- doc/source/conf.py | 2 + doc/source/user/how-to-io.rst | 322 +++++++++++++++++++++++++++++++ doc/source/user/howtos_index.rst | 2 +- doc/source/user/ionumpy.rst | 20 -- 4 files changed, 325 insertions(+), 21 deletions(-) create mode 100644 doc/source/user/how-to-io.rst delete mode 100644 doc/source/user/ionumpy.rst diff --git a/doc/source/conf.py b/doc/source/conf.py index fe7ea096740d..66eb948561e6 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -227,6 +227,8 @@ def setup(app): 'matplotlib': ('https://matplotlib.org', None), 'imageio': ('https://imageio.readthedocs.io/en/stable', None), 'skimage': ('https://scikit-image.org/docs/stable', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable', None), + 'scipy-lecture-notes': ('https://scipy-lectures.org', None), } diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst new file mode 100644 index 000000000000..49c0583c3b41 --- /dev/null +++ b/doc/source/user/how-to-io.rst @@ -0,0 +1,322 @@ +.. _how-to-io: + +############################################################################## +Reading and writing files +############################################################################## + +This page tackles common applications; for the full collection of I/O +routines, see :ref:`routines.io`. + + +****************************************************************************** +Reading text and CSV_ files +****************************************************************************** + +.. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values + +With no missing values +============================================================================== + +Use :func:`numpy.loadtxt`. + +With missing values +============================================================================== + +Use :func:`numpy.genfromtxt`. + +:func:`numpy.genfromtxt` will either + + - return a :ref:`masked array` + **masking out missing values** (if ``usemask=True``), or + + - **fill in the missing value** with the value specified in + ``filling_values`` (default is ``np.nan`` for float, -1 for int). + +With non-whitespace delimiters +------------------------------------------------------------------------------ +:: + + $ cat csv.txt # doctest: +SKIP + 1, 2, 3 + 4,, 6 + 7, 8, 9 + + +Masked-array output +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + >>> np.genfromtxt("csv.txt", delimiter=",", usemask=True) # doctest: +SKIP + masked_array( + data=[[1.0, 2.0, 3.0], + [4.0, --, 6.0], + [7.0, 8.0, 9.0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1e+20) + +Array output +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + >>> np.genfromtxt("csv.txt", delimiter=",") # doctest: +SKIP + array([[ 1., 2., 3.], + [ 4., nan, 6.], + [ 7., 8., 9.]]) + +Array output, specified fill-in value +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + >>> np.genfromtxt("csv.txt", delimiter=",", dtype=np.int8, filling_values=99) # doctest: +SKIP + array([[ 1, 2, 3], + [ 4, 99, 6], + [ 7, 8, 9]], dtype=int8) + +Whitespace-delimited +------------------------------------------------------------------------------- + +:func:`numpy.genfromtxt` can also parse whitespace-delimited data files +that have missing values if + +* **Each field has a fixed width**: Use the width as the `delimiter` argument. + :: + + # File with width=4. The data does not have to be justified (for example, the + # 2 in row 1), the last column can be less than width (for example, the 6 in + # row 2), and no delimiting character is required (for instance 8888 and 9 in row 3) + + $cat fixedwidth.txt # doctest: +SKIP + 1 2 3 + 44 6 + 7 88889 + + # Showing spaces as '^' + $ tr ' ' '^' < fixedwidth.txt # doctest: +SKIP + 1^^^2^^^^^^3 + 44^^^^^^6 + 7^^^88889 + + >>> np.genfromtxt("fixedwidth.txt", delimiter=4) # doctest: +SKIP + array([[1.000e+00, 2.000e+00, 3.000e+00], + [4.400e+01, nan, 6.000e+00], + [7.000e+00, 8.888e+03, 9.000e+00]]) + +* **A special value (e.g. "x") indicates a missing field**: Use it as the + `missing_values` argument. + :: + + $ cat nan.txt # doctest: +SKIP + 1 2 3 + 44 x 6 + 7 8888 9 + + >>> np.genfromtxt("nan.txt", missing_values='x') # doctest: +SKIP + array([[1.000e+00, 2.000e+00, 3.000e+00], + [4.400e+01, nan, 6.000e+00], + [7.000e+00, 8.888e+03, 9.000e+00]]) + +* **You want to skip the rows with missing values**: Set + `invalid_raise=False`. + :: + + $ cat skip.txt # doctest: +SKIP + 1 2 3 + 44 6 + 7 888 9 + + >>> np.genfromtxt("skip.txt", invalid_raise=False) # doctest: +SKIP + __main__:1: ConversionWarning: Some errors were detected ! + Line #2 (got 2 columns instead of 3) + array([[ 1., 2., 3.], + [ 7., 888., 9.]]) + + +* **The delimiter whitespace character is different from the whitespace that + indicates missing data**. For instance, if columns are delimited by ``\t``, + then missing data will be recognized if it consists of one + or more spaces. + :: + + $ cat tabs.txt # doctest: +SKIP + 1 2 3 + 44 6 + 7 888 9 + + # Showing the tabs (^I) and spaces + $ cat -T tabs.txt # doctest: +SKIP + 1^I2^I3 + 44^I ^I6 + 7^I888^I9 + + >>> np.genfromtxt("tabs.txt", delimiter="\t", missing_values=" +") # doctest: +SKIP + array([[ 1., 2., 3.], + [ 44., nan, 6.], + [ 7., 888., 9.]]) + +****************************************************************************** +Read a file in .npy or .npz format +****************************************************************************** + +Choices: + + - Use :func:`numpy.load`. It can read files generated by any of + :func:`numpy.save`, :func:`numpy.savez`, or :func:`numpy.savez_compressed`. + + - Use memory mapping. See `numpy.lib.format.open_memmap`. + +****************************************************************************** +Write to a file to be read back by NumPy +****************************************************************************** + +Binary +=============================================================================== + +Use +:func:`numpy.save`, or to store multiple arrays :func:`numpy.savez` +or :func:`numpy.savez_compressed`. + +For :ref:`security and portability `, set +``allow_pickle=False`` unless the dtype contains Python objects, which +requires pickling. + +Masked arrays :any:`can't currently be saved `, +nor can other arbitrary array subclasses. + +Human-readable +============================================================================== + +:func:`numpy.save` and :func:`numpy.savez` create binary files. To **write a +human-readable file**, use :func:`numpy.savetxt`. The array can only be 1- or +2-dimensional, and there's no ` savetxtz` for multiple files. + +Large arrays +============================================================================== + +See :ref:`how-to-io-large-arrays`. + +****************************************************************************** +Read an arbitrarily formatted binary file ("binary blob") +****************************************************************************** + +Use a :doc:`structured array `. + +**Example:** + +The ``.wav`` file header is a 44-byte block preceding ``data_size`` bytes of the +actual sound data:: + + chunk_id "RIFF" + chunk_size 4-byte unsigned little-endian integer + format "WAVE" + fmt_id "fmt " + fmt_size 4-byte unsigned little-endian integer + audio_fmt 2-byte unsigned little-endian integer + num_channels 2-byte unsigned little-endian integer + sample_rate 4-byte unsigned little-endian integer + byte_rate 4-byte unsigned little-endian integer + block_align 2-byte unsigned little-endian integer + bits_per_sample 2-byte unsigned little-endian integer + data_id "data" + data_size 4-byte unsigned little-endian integer + +The ``.wav`` file header as a NumPy structured dtype:: + + wav_header_dtype = np.dtype([ + ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4 + ("chunk_size", "`_ or `PyTables `_. +* **Zarr**: `here `_. +* **NetCDF**: :class:`scipy.io.netcdf_file`. + +For tradeoffs among memmap, Zarr, and HDF5, see +`pythonspeed.com `_. + +****************************************************************************** +Write files for reading by other (non-NumPy) tools +****************************************************************************** + +Formats for **exchanging data** with other tools include HDF5, Zarr, and +NetCDF (see :ref:`how-to-io-large-arrays`). + +****************************************************************************** +Write or read a JSON file +****************************************************************************** + +NumPy arrays are **not** directly +`JSON serializable `_. + + +.. _how-to-io-pickle-file: + +****************************************************************************** +Save/restore using a pickle file +****************************************************************************** + +Not recommended, due to lack of security and portability. + + * **security**: not secure against erroneous or maliciously constructed data + * **portability**: may not be loadable on different Python installations + +Use :func:`numpy.save` and :func:`numpy.load`. Set ``allow_pickle=False``, unless +the array dtype includes Python objects, in which case pickling is required. + +****************************************************************************** +Convert from a pandas DataFrame to a NumPy array +****************************************************************************** + +See :meth:`pandas.DataFrame.to_numpy`. + +****************************************************************************** + Save/restore using `~numpy.ndarray.tofile` and `~numpy.fromfile` +****************************************************************************** + +In general, prefer :func:`numpy.save` and :func:`numpy.load`. + +:func:`numpy.ndarray.tofile` and :func:`numpy.fromfile` lose information on +endianness and precision and so are unsuitable for anything but scratch +storage. + diff --git a/doc/source/user/howtos_index.rst b/doc/source/user/howtos_index.rst index 45e013e6fb6a..89a6f54e791c 100644 --- a/doc/source/user/howtos_index.rst +++ b/doc/source/user/howtos_index.rst @@ -12,4 +12,4 @@ the package, see the :ref:`API reference `. :maxdepth: 1 how-to-how-to - ionumpy + how-to-io diff --git a/doc/source/user/ionumpy.rst b/doc/source/user/ionumpy.rst deleted file mode 100644 index a31720322a77..000000000000 --- a/doc/source/user/ionumpy.rst +++ /dev/null @@ -1,20 +0,0 @@ -================================================ -How to read and write data using NumPy -================================================ - -.. currentmodule:: numpy - -.. testsetup:: - - import numpy as np - np.random.seed(1) - -**Objectives** - -- Writing NumPy arrays to files -- Reading NumPy arrays from files -- Dealing with encoding and dtype issues - -**Content** - -To be completed. From 9c799c4fd07c2b3d13c189fda5bee4d6b45e54bb Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 23 Sep 2020 09:06:09 -0400 Subject: [PATCH 086/409] DOC: Update pickle section in PR #17353 --- doc/source/user/how-to-io.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst index 49c0583c3b41..f8b4dee9f21f 100644 --- a/doc/source/user/how-to-io.rst +++ b/doc/source/user/how-to-io.rst @@ -296,13 +296,12 @@ NumPy arrays are **not** directly Save/restore using a pickle file ****************************************************************************** -Not recommended, due to lack of security and portability. +Avoid when possible; :doc:`pickles ` are not secure +against erroneous or maliciously constructed data. - * **security**: not secure against erroneous or maliciously constructed data - * **portability**: may not be loadable on different Python installations - -Use :func:`numpy.save` and :func:`numpy.load`. Set ``allow_pickle=False``, unless -the array dtype includes Python objects, in which case pickling is required. +Use :func:`numpy.save` and :func:`numpy.load`. Set ``allow_pickle=False``, +unless the array dtype includes Python objects, in which case pickling is +required. ****************************************************************************** Convert from a pandas DataFrame to a NumPy array From 9c5812b3eaa5a586bde4a19fa4845ee43f83ba8a Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Wed, 23 Sep 2020 09:53:40 -0400 Subject: [PATCH 087/409] DOC: distutils: Remove an obsolete paragraph. --- doc/DISTUTILS.rst.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/DISTUTILS.rst.txt b/doc/DISTUTILS.rst.txt index 01527374d182..f1f2704621a1 100644 --- a/doc/DISTUTILS.rst.txt +++ b/doc/DISTUTILS.rst.txt @@ -587,10 +587,6 @@ The header of a typical SciPy ``__init__.py`` is:: test = Tester().test bench = Tester().bench -Note that NumPy submodules still use a file named ``info.py`` in which the -module docstring and ``__all__`` dict are defined. These files will be removed -at some point. - Extra features in NumPy Distutils ''''''''''''''''''''''''''''''''' From 2fe46b4d25788a43499893c968e114bfa9be8a10 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 23 Sep 2020 11:10:39 -0400 Subject: [PATCH 088/409] DOC: Incorporate blob suggestions in PR #17353 --- doc/source/user/how-to-io.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst index f8b4dee9f21f..64096bfc2a79 100644 --- a/doc/source/user/how-to-io.rst +++ b/doc/source/user/how-to-io.rst @@ -236,13 +236,13 @@ The ``.wav`` file header as a NumPy structured dtype:: ("block_align", " Date: Wed, 23 Sep 2020 18:41:32 +0200 Subject: [PATCH 089/409] TST: Fixed an `int`-related failure on 32-bit systems --- numpy/typing/tests/data/pass/ndarray_misc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numpy/typing/tests/data/pass/ndarray_misc.py b/numpy/typing/tests/data/pass/ndarray_misc.py index 1871aebba402..787370c0c6f2 100644 --- a/numpy/typing/tests/data/pass/ndarray_misc.py +++ b/numpy/typing/tests/data/pass/ndarray_misc.py @@ -45,8 +45,9 @@ class SubClass(np.ndarray): ... A.argsort() i8.choose([()]) -C.choose([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) -C.choose([[0, 1, 2], [3, 4, 5], [6, 7, 8]], out=D) +_choices = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=np.int64) +C.choose(_choices) +C.choose(_choices, out=D) i8.clip(1) A.clip(1) From e67bbd2410927743db85c7f11b33db8129f865c0 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 23 Sep 2020 19:09:28 +0200 Subject: [PATCH 090/409] TST: Attempt #2 at fixing the `int`-related failure on 32-bit systems --- numpy/typing/tests/data/pass/ndarray_misc.py | 62 ++++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/numpy/typing/tests/data/pass/ndarray_misc.py b/numpy/typing/tests/data/pass/ndarray_misc.py index 787370c0c6f2..6c6f5d50b986 100644 --- a/numpy/typing/tests/data/pass/ndarray_misc.py +++ b/numpy/typing/tests/data/pass/ndarray_misc.py @@ -11,131 +11,131 @@ class SubClass(np.ndarray): ... -i8 = np.int64(1) -A = np.array([[1]], dtype=np.int64) -B0 = np.empty((), dtype=np.int64).view(SubClass) -B1 = np.empty((1,), dtype=np.int64).view(SubClass) -B2 = np.empty((1, 1), dtype=np.int64).view(SubClass) -C = np.array([0, 1, 2], dtype=np.int64) +i4 = np.int32(1) +A = np.array([[1]], dtype=np.int32) +B0 = np.empty((), dtype=np.int32).view(SubClass) +B1 = np.empty((1,), dtype=np.int32).view(SubClass) +B2 = np.empty((1, 1), dtype=np.int32).view(SubClass) +C = np.array([0, 1, 2], dtype=np.int32) D = np.empty(3).view(SubClass) -i8.all() +i4.all() A.all() A.all(axis=0) A.all(keepdims=True) A.all(out=B0) -i8.any() +i4.any() A.any() A.any(axis=0) A.any(keepdims=True) A.any(out=B0) -i8.argmax() +i4.argmax() A.argmax() A.argmax(axis=0) A.argmax(out=B0) -i8.argmin() +i4.argmin() A.argmin() A.argmin(axis=0) A.argmin(out=B0) -i8.argsort() +i4.argsort() A.argsort() -i8.choose([()]) -_choices = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=np.int64) +i4.choose([()]) +_choices = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=np.int32) C.choose(_choices) C.choose(_choices, out=D) -i8.clip(1) +i4.clip(1) A.clip(1) A.clip(None, 1) A.clip(1, out=B2) A.clip(None, 1, out=B2) -i8.compress([1]) +i4.compress([1]) A.compress([1]) A.compress([1], out=B1) -i8.conj() +i4.conj() A.conj() B0.conj() -i8.conjugate() +i4.conjugate() A.conjugate() B0.conjugate() -i8.cumprod() +i4.cumprod() A.cumprod() A.cumprod(out=B1) -i8.cumsum() +i4.cumsum() A.cumsum() A.cumsum(out=B1) -i8.max() +i4.max() A.max() A.max(axis=0) A.max(keepdims=True) A.max(out=B0) -i8.mean() +i4.mean() A.mean() A.mean(axis=0) A.mean(keepdims=True) A.mean(out=B0) -i8.min() +i4.min() A.min() A.min(axis=0) A.min(keepdims=True) A.min(out=B0) -i8.newbyteorder() +i4.newbyteorder() A.newbyteorder() B0.newbyteorder('|') -i8.prod() +i4.prod() A.prod() A.prod(axis=0) A.prod(keepdims=True) A.prod(out=B0) -i8.ptp() +i4.ptp() A.ptp() A.ptp(axis=0) A.ptp(keepdims=True) A.astype(int).ptp(out=B0) -i8.round() +i4.round() A.round() A.round(out=B2) -i8.repeat(1) +i4.repeat(1) A.repeat(1) B0.repeat(1) -i8.std() +i4.std() A.std() A.std(axis=0) A.std(keepdims=True) A.std(out=B0.astype(np.float64)) -i8.sum() +i4.sum() A.sum() A.sum(axis=0) A.sum(keepdims=True) A.sum(out=B0) -i8.take(0) +i4.take(0) A.take(0) A.take([0]) A.take(0, out=B0) A.take([0], out=B1) -i8.var() +i4.var() A.var() A.var(axis=0) A.var(keepdims=True) From 45aaf7f2dea425db3012aadd43376bd14a6486db Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 24 Sep 2020 08:51:56 -0500 Subject: [PATCH 091/409] TST: Add test for pickling using ufunc name as qualname --- numpy/core/src/umath/_umath_tests.c.src | 9 +++++++++ numpy/core/tests/test_ufunc.py | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src index 660c296d6ddb..750fbeb92a7b 100644 --- a/numpy/core/src/umath/_umath_tests.c.src +++ b/numpy/core/src/umath/_umath_tests.c.src @@ -461,6 +461,15 @@ addUfuncs(PyObject *dictionary) { PyDict_SetItemString(dictionary, "cross1d", f); Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(NULL, NULL, + NULL, 0, 0, 0, PyUFunc_None, "_pickleable_module_global.ufunc", + "A dotted name for pickle testing, does nothing.", 0, NULL); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "_pickleable_module_global_ufunc", f); + Py_DECREF(f); + return 0; } diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 9eaa1a977715..0e9760853def 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -178,6 +178,10 @@ def __getattr__(self, attr): assert_array_equal(res_num.astype("O"), res_obj) +def _pickleable_module_global(): + pass + + class TestUfunc: def test_pickle(self): for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): @@ -195,6 +199,15 @@ def test_pickle_withstring(self): b"(S'numpy.core.umath'\np1\nS'cos'\np2\ntp3\nRp4\n.") assert_(pickle.loads(astring) is np.cos) + def test_pickle_name_is_qualname(self): + # This tests that a simplification of our ufunc pickle code will + # lead to allowing qualnames as names. Future ufuncs should + # possible add a specific qualname, or a hook into pickling instead + # (dask+numba may benefit). + _pickleable_module_global.ufunc = umt._pickleable_module_global_ufunc + obj = pickle.loads(pickle.dumps(_pickleable_module_global.ufunc)) + assert obj is umt._pickleable_module_global_ufunc + def test_reduceat_shifting_sum(self): L = 6 x = np.arange(L) From bf71a62b52dedd8ceaa47a96c144a3584e1bd95b Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Wed, 23 Sep 2020 23:09:46 -0300 Subject: [PATCH 092/409] BUG: Fixes module data docstrings. Fixes gh-15325 --- numpy/f2py/src/fortranobject.c | 6 ++++-- numpy/f2py/tests/src/module_data/mod.mod | Bin 0 -> 412 bytes .../src/module_data/module_data_docstring.f90 | 12 ++++++++++++ numpy/f2py/tests/test_block_docstring.py | 2 +- numpy/f2py/tests/test_module_doc.py | 16 ++++++++++++++++ 5 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 numpy/f2py/tests/src/module_data/mod.mod create mode 100644 numpy/f2py/tests/src/module_data/module_data_docstring.f90 create mode 100644 numpy/f2py/tests/test_module_doc.py diff --git a/numpy/f2py/src/fortranobject.c b/numpy/f2py/src/fortranobject.c index aa46c57d0b92..e420b827b0fb 100644 --- a/numpy/f2py/src/fortranobject.c +++ b/numpy/f2py/src/fortranobject.c @@ -213,6 +213,7 @@ format_def(char *buf, Py_ssize_t size, FortranDataDef def) return -1; } memcpy(p, notalloc, sizeof(notalloc)); + p += sizeof(notalloc); } return p - buf; @@ -255,7 +256,7 @@ fortran_doc(FortranDataDef def) } else { PyArray_Descr *d = PyArray_DescrFromType(def.type); - n = PyOS_snprintf(p, size, "'%c'-", d->type); + n = PyOS_snprintf(p, size, "%s : '%c'-", def.name, d->type); Py_DECREF(d); if (n < 0 || n >= size) { goto fail; @@ -264,7 +265,7 @@ fortran_doc(FortranDataDef def) size -= n; if (def.data == NULL) { - n = format_def(p, size, def) == -1; + n = format_def(p, size, def);// == -1; if (n < 0) { goto fail; } @@ -288,6 +289,7 @@ fortran_doc(FortranDataDef def) p += n; size -= n; } + } if (size <= 1) { goto fail; diff --git a/numpy/f2py/tests/src/module_data/mod.mod b/numpy/f2py/tests/src/module_data/mod.mod new file mode 100644 index 0000000000000000000000000000000000000000..8670a97e911c48ff2cae2cf83dd14f42f8a7004a GIT binary patch literal 412 zcmV;N0b~9jiwFP!000001GSW0Z-OushVT0;&RyYROTn+~vcl1jVKEi9c}1AuVlpLB zx9sPyl#lp1-Qv81mPp@ZV7twwwq`<~;4U^s!8gj|Hg zD3T3PHo`hCwbY_|)(GHGKn?`EpR)sWaU)yHeSiME3{GQoYlK~p(v`nqTyWzIMi1!MKNavb zzF2!@e0|MasAF;@XrC#v;wM)8!Wh`P+!(Z~bIjCM#Yth%sr+Hk)owB9ndBRfe&AZC G1pojXWXECv literal 0 HcmV?d00001 diff --git a/numpy/f2py/tests/src/module_data/module_data_docstring.f90 b/numpy/f2py/tests/src/module_data/module_data_docstring.f90 new file mode 100644 index 000000000000..4505e0cbc31e --- /dev/null +++ b/numpy/f2py/tests/src/module_data/module_data_docstring.f90 @@ -0,0 +1,12 @@ +module mod + integer :: i + integer :: x(4) + real, dimension(2,3) :: a + real, allocatable, dimension(:,:) :: b +contains + subroutine foo + integer :: k + k = 1 + a(1,2) = a(1,2)+3 + end subroutine foo +end module mod diff --git a/numpy/f2py/tests/test_block_docstring.py b/numpy/f2py/tests/test_block_docstring.py index e431f5ba6c97..7d725165b2fb 100644 --- a/numpy/f2py/tests/test_block_docstring.py +++ b/numpy/f2py/tests/test_block_docstring.py @@ -19,5 +19,5 @@ class TestBlockDocString(util.F2PyTest): @pytest.mark.xfail(IS_PYPY, reason="PyPy cannot modify tp_doc after PyType_Ready") def test_block_docstring(self): - expected = "'i'-array(2,3)\n" + expected = "bar : 'i'-array(2,3)\n" assert_equal(self.module.block.__doc__, expected) diff --git a/numpy/f2py/tests/test_module_doc.py b/numpy/f2py/tests/test_module_doc.py new file mode 100644 index 000000000000..f597929ad4dc --- /dev/null +++ b/numpy/f2py/tests/test_module_doc.py @@ -0,0 +1,16 @@ +import os +from . import util + +from numpy.testing import assert_equal + + +def _path(*a): + return os.path.join(*((os.path.dirname(__file__),) + a)) + + +class TestModuleDocString(util.F2PyTest): + sources = [_path('src', 'module_data', 'module_data_docstring.f90')] + + def test_module_docstring(self): + expected = "i : 'i'-scalar\nx : 'i'-array(4)\na : 'f'-array(2,3)\nb : 'f'-array(-1,-1), not allocated\x00\nfoo()\n\nWrapper for ``foo``.\n\n" + assert_equal(self.module.mod.__doc__, expected) From 16e8c26b86f29cb18470fccc57d95c942c30f642 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Thu, 24 Sep 2020 15:25:15 -0300 Subject: [PATCH 093/409] Fixing tests for pypy and win. --- doc/source/f2py/allocarr_session.dat | 9 ++++++--- doc/source/f2py/common_session.dat | 6 +++--- doc/source/f2py/moddata_session.dat | 14 +++++++++----- numpy/f2py/tests/test_module_doc.py | 10 ++++++++-- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/doc/source/f2py/allocarr_session.dat b/doc/source/f2py/allocarr_session.dat index 754d9cb8b5a2..ba168c22aa12 100644 --- a/doc/source/f2py/allocarr_session.dat +++ b/doc/source/f2py/allocarr_session.dat @@ -1,8 +1,11 @@ >>> import allocarr >>> print(allocarr.mod.__doc__) -b - 'f'-array(-1,-1), not allocated -foo - Function signature: - foo() +b : 'f'-array(-1,-1), not allocated +foo() + +Wrapper for ``foo``. + + >>> allocarr.mod.foo() b is not allocated diff --git a/doc/source/f2py/common_session.dat b/doc/source/f2py/common_session.dat index 0a38bec27b8a..2595bfbd5b20 100644 --- a/doc/source/f2py/common_session.dat +++ b/doc/source/f2py/common_session.dat @@ -1,8 +1,8 @@ >>> import common >>> print(common.data.__doc__) -i - 'i'-scalar -x - 'i'-array(4) -a - 'f'-array(2,3) +i : 'i'-scalar +x : 'i'-array(4) +a : 'f'-array(2,3) >>> common.data.i = 5 >>> common.data.x[1] = 2 diff --git a/doc/source/f2py/moddata_session.dat b/doc/source/f2py/moddata_session.dat index e3c7580416f2..824bd86fc464 100644 --- a/doc/source/f2py/moddata_session.dat +++ b/doc/source/f2py/moddata_session.dat @@ -1,10 +1,14 @@ >>> import moddata >>> print(moddata.mod.__doc__) -i - 'i'-scalar -x - 'i'-array(4) -a - 'f'-array(2,3) -foo - Function signature: - foo() +i : 'i'-scalar +x : 'i'-array(4) +a : 'f'-array(2,3) +b : 'f'-array(-1,-1), not allocated +foo() + +Wrapper for ``foo``. + + >>> moddata.mod.i = 5 >>> moddata.mod.x[:2] = [1,2] diff --git a/numpy/f2py/tests/test_module_doc.py b/numpy/f2py/tests/test_module_doc.py index f597929ad4dc..9360b04a90a4 100644 --- a/numpy/f2py/tests/test_module_doc.py +++ b/numpy/f2py/tests/test_module_doc.py @@ -1,7 +1,9 @@ import os -from . import util +import sys +import pytest -from numpy.testing import assert_equal +from . import util +from numpy.testing import assert_equal, IS_PYPY def _path(*a): @@ -11,6 +13,10 @@ def _path(*a): class TestModuleDocString(util.F2PyTest): sources = [_path('src', 'module_data', 'module_data_docstring.f90')] + @pytest.mark.skipif(sys.platform=='win32', + reason='Fails with MinGW64 Gfortran (Issue #9673)') + @pytest.mark.xfail(IS_PYPY, + reason="PyPy cannot modify tp_doc after PyType_Ready") def test_module_docstring(self): expected = "i : 'i'-scalar\nx : 'i'-array(4)\na : 'f'-array(2,3)\nb : 'f'-array(-1,-1), not allocated\x00\nfoo()\n\nWrapper for ``foo``.\n\n" assert_equal(self.module.mod.__doc__, expected) From 2379f4c1a8f93b1f4e3e0cd40b587ca6e95d3f06 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Thu, 24 Sep 2020 19:43:24 -0300 Subject: [PATCH 094/409] Adding PR review comment. --- numpy/f2py/src/fortranobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/numpy/f2py/src/fortranobject.c b/numpy/f2py/src/fortranobject.c index e420b827b0fb..215473ed8641 100644 --- a/numpy/f2py/src/fortranobject.c +++ b/numpy/f2py/src/fortranobject.c @@ -214,6 +214,7 @@ format_def(char *buf, Py_ssize_t size, FortranDataDef def) } memcpy(p, notalloc, sizeof(notalloc)); p += sizeof(notalloc); + size -= sizeof(notalloc); } return p - buf; From 02e6b062027666b22c68d91e280b28f57ee9c2b5 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 25 Sep 2020 14:21:31 +0200 Subject: [PATCH 095/409] DOC: random: Fix default_rng docstring (#17375) --- numpy/random/_generator.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index e40dcefe34b7..bff023d7ffb6 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -4521,7 +4521,7 @@ def default_rng(seed=None): unpredictable entropy will be pulled from the OS. If an ``int`` or ``array_like[ints]`` is passed, then it will be passed to `SeedSequence` to derive the initial `BitGenerator` state. One may also - pass in a`SeedSequence` instance + pass in a `SeedSequence` instance. Additionally, when passed a `BitGenerator`, it will be wrapped by `Generator`. If passed a `Generator`, it will be returned unaltered. From dbe6d464dbde074d207804e3aed1b3cfaacf0eda Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Sat, 26 Sep 2020 10:01:01 -0300 Subject: [PATCH 096/409] Removing extra comment. --- numpy/f2py/src/fortranobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/f2py/src/fortranobject.c b/numpy/f2py/src/fortranobject.c index 215473ed8641..3275f90ad2cb 100644 --- a/numpy/f2py/src/fortranobject.c +++ b/numpy/f2py/src/fortranobject.c @@ -266,7 +266,7 @@ fortran_doc(FortranDataDef def) size -= n; if (def.data == NULL) { - n = format_def(p, size, def);// == -1; + n = format_def(p, size, def); if (n < 0) { goto fail; } From 73e95ddd7879589fbc3ff98d962dd0099fa77c10 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 26 Sep 2020 09:01:12 -0400 Subject: [PATCH 097/409] DOC: Revise 'contiguous' in PR #16996 --- doc/source/glossary.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index d4db87723d3d..5f8de3988457 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -282,7 +282,7 @@ Glossary An array is contiguous if * it occupies an unbroken block of memory, and * array elements with higher indexes occupy higher addresses (that - is, the :term:`stride` is not negative). + is, no :term:`stride` is negative). copy From 3a94fff71006af722af305f03653876c1a7e32fd Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 26 Sep 2020 11:02:25 -0400 Subject: [PATCH 098/409] DOC: Bash examples in PR #17353 now Python And other suggestions by @eric-wieser. Also standardizes quotes as '"'. --- doc/source/user/how-to-io.rst | 56 +++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst index 64096bfc2a79..d962e1546ad9 100644 --- a/doc/source/user/how-to-io.rst +++ b/doc/source/user/how-to-io.rst @@ -83,17 +83,19 @@ that have missing values if * **Each field has a fixed width**: Use the width as the `delimiter` argument. :: - # File with width=4. The data does not have to be justified (for example, the - # 2 in row 1), the last column can be less than width (for example, the 6 in - # row 2), and no delimiting character is required (for instance 8888 and 9 in row 3) + # File with width=4. The data does not have to be justified (for example, + # the 2 in row 1), the last column can be less than width (for example, the 6 + # in row 2), and no delimiting character is required (for instance 8888 and 9 + # in row 3) - $cat fixedwidth.txt # doctest: +SKIP + >>> f = open("fixedwidth.txt").read() # doctest: +SKIP + >>> print(f) # doctest: +SKIP 1 2 3 44 6 7 88889 - # Showing spaces as '^' - $ tr ' ' '^' < fixedwidth.txt # doctest: +SKIP + # Showing spaces as ^ + >>> print(f.replace(" ","^")) # doctest: +SKIP 1^^^2^^^^^^3 44^^^^^^6 7^^^88889 @@ -107,12 +109,12 @@ that have missing values if `missing_values` argument. :: - $ cat nan.txt # doctest: +SKIP + >>> print(open("nan.txt").read()) # doctest: +SKIP 1 2 3 44 x 6 7 8888 9 - >>> np.genfromtxt("nan.txt", missing_values='x') # doctest: +SKIP + >>> np.genfromtxt("nan.txt", missing_values="x") # doctest: +SKIP array([[1.000e+00, 2.000e+00, 3.000e+00], [4.400e+01, nan, 6.000e+00], [7.000e+00, 8.888e+03, 9.000e+00]]) @@ -121,7 +123,7 @@ that have missing values if `invalid_raise=False`. :: - $ cat skip.txt # doctest: +SKIP + >>> print(open("skip.txt").read()) # doctest: +SKIP 1 2 3 44 6 7 888 9 @@ -139,21 +141,22 @@ that have missing values if or more spaces. :: - $ cat tabs.txt # doctest: +SKIP - 1 2 3 - 44 6 - 7 888 9 + >>> f = open("tabs.txt").read() # doctest: +SKIP + >>> print(f) # doctest: +SKIP + 1 2 3 + 44 6 + 7 888 9 - # Showing the tabs (^I) and spaces - $ cat -T tabs.txt # doctest: +SKIP - 1^I2^I3 - 44^I ^I6 - 7^I888^I9 + # Tabs vs. spaces + >>> print(f.replace("\t","^")) # doctest: +SKIP + 1^2^3 + 44^ ^6 + 7^888^9 - >>> np.genfromtxt("tabs.txt", delimiter="\t", missing_values=" +") # doctest: +SKIP - array([[ 1., 2., 3.], - [ 44., nan, 6.], - [ 7., 888., 9.]]) + >>> np.genfromtxt("tabs.txt", delimiter="\t", missing_values=" +") # doctest: +SKIP + array([[ 1., 2., 3.], + [ 44., nan, 6.], + [ 7., 888., 9.]]) ****************************************************************************** Read a file in .npy or .npz format @@ -226,7 +229,7 @@ The ``.wav`` file header as a NumPy structured dtype:: wav_header_dtype = np.dtype([ ("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4 ("chunk_size", " Date: Sat, 26 Sep 2020 11:43:26 -0400 Subject: [PATCH 099/409] DOC: Italics sted monospace in PR #16996 --- doc/source/glossary.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst index 5f8de3988457..17071c8f175f 100644 --- a/doc/source/glossary.rst +++ b/doc/source/glossary.rst @@ -241,7 +241,7 @@ Glossary broadcast - ``broadcasting`` is NumPy's ability to process ndarrays of + *broadcasting* is NumPy's ability to process ndarrays of different sizes as if all were the same size. It permits an elegant do-what-I-mean behavior where, for instance, From 5d458ac949833246b6b4b3c3ac3117ec902c7611 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 26 Sep 2020 11:59:52 -0400 Subject: [PATCH 100/409] DOC: One more bash->python in PR #17353 --- doc/source/user/how-to-io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst index d962e1546ad9..1d4ded159422 100644 --- a/doc/source/user/how-to-io.rst +++ b/doc/source/user/how-to-io.rst @@ -36,7 +36,7 @@ With non-whitespace delimiters ------------------------------------------------------------------------------ :: - $ cat csv.txt # doctest: +SKIP + >>> print(open("csv.txt").read()) # doctest: +SKIP 1, 2, 3 4,, 6 7, 8, 9 From d267252186be8cfe1f4c7bc743d4b4523620bb42 Mon Sep 17 00:00:00 2001 From: MelissaWM Date: Sat, 26 Sep 2020 14:12:09 -0300 Subject: [PATCH 101/409] Added textwrap.dedent to test. --- numpy/f2py/tests/test_module_doc.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/numpy/f2py/tests/test_module_doc.py b/numpy/f2py/tests/test_module_doc.py index 9360b04a90a4..4b9555cee1fc 100644 --- a/numpy/f2py/tests/test_module_doc.py +++ b/numpy/f2py/tests/test_module_doc.py @@ -1,6 +1,7 @@ import os import sys import pytest +import textwrap from . import util from numpy.testing import assert_equal, IS_PYPY @@ -18,5 +19,12 @@ class TestModuleDocString(util.F2PyTest): @pytest.mark.xfail(IS_PYPY, reason="PyPy cannot modify tp_doc after PyType_Ready") def test_module_docstring(self): - expected = "i : 'i'-scalar\nx : 'i'-array(4)\na : 'f'-array(2,3)\nb : 'f'-array(-1,-1), not allocated\x00\nfoo()\n\nWrapper for ``foo``.\n\n" - assert_equal(self.module.mod.__doc__, expected) + assert_equal(self.module.mod.__doc__, + textwrap.dedent('''\ + i : 'i'-scalar + x : 'i'-array(4) + a : 'f'-array(2,3) + b : 'f'-array(-1,-1), not allocated\x00 + foo()\n + Wrapper for ``foo``.\n\n''') + ) From 4c94a5dbb1edb072af4776e2b193675de33cae06 Mon Sep 17 00:00:00 2001 From: Jack Date: Sun, 27 Sep 2020 00:30:49 -0700 Subject: [PATCH 102/409] Minor grammatical correction in quickstart doc. Single sentence grammatical correction. --- doc/source/user/quickstart.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst index b1af8188675e..8e38234c598f 100644 --- a/doc/source/user/quickstart.rst +++ b/doc/source/user/quickstart.rst @@ -23,7 +23,7 @@ https://scipy.org/install.html for instructions. **Learner profile** This tutorial is intended as a quick overview of -algebra and arrays in NumPy and want to understand how n-dimensional +algebra and arrays in NumPy. It demonstrates how n-dimensional (:math:`n>=2`) arrays are represented and can be manipulated. In particular, if you don't know how to apply common functions to n-dimensional arrays (without using for-loops), or if you want to understand axis and shape properties for From 30a904198cd57251e46e6a60f3a6878cc4d0f531 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sun, 27 Sep 2020 13:01:42 -0400 Subject: [PATCH 103/409] DOC: NumPy restyling for pydata theme --- doc/source/_static/numpy.css | 40 +++++++++++++++++++++++++++++++ doc/source/_templates/layout.html | 14 ++++------- 2 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 doc/source/_static/numpy.css diff --git a/doc/source/_static/numpy.css b/doc/source/_static/numpy.css new file mode 100644 index 000000000000..22d08cc0dca6 --- /dev/null +++ b/doc/source/_static/numpy.css @@ -0,0 +1,40 @@ +@import url('https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;0,900;1,400;1,700;1,900&family=Open+Sans:ital,wght@0,400;0,600;1,400;1,600&display=swap'); + +.navbar-brand img { + height: 75px; +} +.navbar-brand { + height: 75px; +} + +body { + font-family: 'Open Sans', sans-serif; + color:#4A4A4A; /* numpy.org body color */ +} + +pre, code { + font-size: 100%; + line-height: 155%; +} + +h1 { + font-style: "Lato", sans-serif; + color: #013243; /* warm black */ + font-weight: 700; + letter-spacing: -.04em; + text-align: right; + margin-top: 3rem; + margin-bottom: 4rem; + font-size: 3rem; +} + + +h2 { + color: #4d77cf; /* han blue */ + letter-spacing: -.03em; +} + +h3 { + color: #013243; /* warm black */ + letter-spacing: -.03em; +} diff --git a/doc/source/_templates/layout.html b/doc/source/_templates/layout.html index 0b0ba6271bdf..e2812fdd5ff3 100644 --- a/doc/source/_templates/layout.html +++ b/doc/source/_templates/layout.html @@ -1,16 +1,10 @@ {% extends "!layout.html" %} {%- block extrahead %} - +{{ super() }} + + + - -{{ super() }} {% endblock %} From 66685909f174c38ebfbf7f733f40bcf7d9e86d48 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Sun, 27 Sep 2020 13:01:41 -0500 Subject: [PATCH 104/409] NEP: Edit nep-0042 for more clarity (#17370) * DOC: Edit NEP-42 for clarity * DOC: Further edits of NEP 42 * DOC: Edit NEP-42 for clarity * DOC: Further edits of NEP 42 * WIP: Work out structure a bit and make intro longer * DOC: Round 3 of NEP 42 edits * Start with a new motivation/overview to be forced upon the rest * restructure along "top-level" categories and rename adjust_descriptors * Small fixups * Small clarity/correctness edits * Use `PyArrayMethod` instead of UFuncImpl I am starting to feel that calling this "array method" (even if it is in no way numpy array specific) is probably the best name. It is not ufunc specific, because ufuncs are dispatching + array method. Calling it ufuncimpl makes sense, but since the UFunc name which would also work perfectly is already taken, maybe a whole new name is better. * DOC: Fold in @seberg revisions * Fix typo * Make first example/overview inherit dtype instead of metaclass it could say `metaclass=DTypeMeta`, but its implicit. * DOC: Add "checklist" to NEP 42 Also anchor NEP 40 figure for reference in NEP 42. * Delete duplicated dtype-discovery section The section must have been duplicated in an merge conflict :/, I scanned through, and I think the version that was in the right place got most edits, although not 100% sure. * DOC: Reorganize scope, standardize heds * DOC: Retitle, revisions up to Casting Also anchor for parametric type discussion in NEP 40 * Some smaller changes/additions * DOC: Reorganize for better flow Also adds a NEP anchor at top. * DOC: Glossary, shuffle "Additional information" bullets * DOC: Small updates for sense and case * Fixup merge (some changes were not saved when finishing merge) * Answer bjnath's questions and motivate user impact It seems most answers bjnath already included himself, this adds a few tweaks. Added a note that tusers will basically have to implement the DType class as outlined is basically what the user will have to implement, so spell that out both at the top and in the C-API to clarify that. * NEP 41: Add subsection on dtype organization with parameters This might go a bit far, but I think the image goes very far in giving people the right idea of how to think of dtypes and DType instances. The question is if we want to even show the "alternative"... * Add bjnath as author * Add new header also to NEP 41 * DOC: Minor: delete horiz rules, tweak 2 headings * Rephrase NEP 41 bullt points in terms of NEP 42 Also changes those for NEP 43, to represent what is done, rather than what is solved (and thus fit better to the way its written for the NEP 42 part) * Try to rewerd paragraphs around setitem/getitem a bit some of these were very confusing, hopefully they are a bit better now. * Make a pass over the array-coercion part * Make careful pass up until array-coercion * Pass over the second half of the NEP 42 draft. * Use "given_descrs" which seems clearer and fix casting I always thought returning the casting safety is an awesome idea, but right now I am actually not so sure it helps much with anything. Its usually simpler, I guess. But if the user requested safe casting and we have an unsafe cast, for example for structured dtypes we will end up doing a lot of work for no reason. If we accept an error return without an error already set this is solved. My original reason was that setting a generic error in the resolve descriptor function would really be pretty annoying, so that returning the casting safety seemed like a good way to avoid worrying about most errors there. The "cast-is-view" output is important though. * Fix copy new intro note to NEP 40 (and fix it in NEP 41) Co-authored-by: Ben Nathanson --- .../_static/nep-0041-type-sketch-no-fonts.svg | 1110 ++++++++++ doc/neps/_static/nep-0041-type-sketch.svg | 523 +++++ doc/neps/nep-0040-legacy-datatype-impl.rst | 30 +- doc/neps/nep-0041-improved-dtype-support.rst | 106 +- doc/neps/nep-0042-new-dtypes.rst | 1928 +++++++++-------- 5 files changed, 2705 insertions(+), 992 deletions(-) create mode 100644 doc/neps/_static/nep-0041-type-sketch-no-fonts.svg create mode 100644 doc/neps/_static/nep-0041-type-sketch.svg diff --git a/doc/neps/_static/nep-0041-type-sketch-no-fonts.svg b/doc/neps/_static/nep-0041-type-sketch-no-fonts.svg new file mode 100644 index 000000000000..3250396c530a --- /dev/null +++ b/doc/neps/_static/nep-0041-type-sketch-no-fonts.svg @@ -0,0 +1,1110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/neps/_static/nep-0041-type-sketch.svg b/doc/neps/_static/nep-0041-type-sketch.svg new file mode 100644 index 000000000000..9e597db9d9b2 --- /dev/null +++ b/doc/neps/_static/nep-0041-type-sketch.svg @@ -0,0 +1,523 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + Value Storage + Parameters andStorage options + Value Space andBehaviour + type + instance + + ABC + instance + + type + + DType + + + base dtype + element + + dtype + + element + + dtype + + Python type + + Python typewith ABC + NEP 41 Proposal + Alternative + + + diff --git a/doc/neps/nep-0040-legacy-datatype-impl.rst b/doc/neps/nep-0040-legacy-datatype-impl.rst index c247e3d62d10..39889109de40 100644 --- a/doc/neps/nep-0040-legacy-datatype-impl.rst +++ b/doc/neps/nep-0040-legacy-datatype-impl.rst @@ -13,15 +13,15 @@ NEP 40 — Legacy Datatype Implementation in NumPy .. note:: - This NEP is part of a series of NEPs encompassing first information - about the previous dtype implementation and issues with it in NEP 40 - (this document). - :ref:`NEP 41 ` then provides an overview and generic design choices - for the refactor. - Further NEPs 42 and 43 go into the technical details of the datatype - and universal function related internal and external API changes. - In some cases it may be necessary to consult the other NEPs for a full - picture of the desired changes and why these changes are necessary. + This NEP is first in a series: + + - NEP 40 (this document) explains the shortcomings of NumPy's dtype implementation. + + - :ref:`NEP 41 ` gives an overview of our proposed replacement. + + - :ref:`NEP 42 ` describes the new design's datatype-related APIs. + + - NEP 43 describes the new design's API for universal functions. @@ -44,6 +44,8 @@ of the current implementation of dtypes as well as a discussion. In many cases subsections will be split roughly to first describe the current implementation and then follow with an "Issues and Discussion" section. +.. _parametric-datatype-discussion: + Parametric Datatypes ^^^^^^^^^^^^^^^^^^^^ @@ -253,6 +255,8 @@ types such as ``np.inexact`` (see figure below). In fact, some control flow within NumPy currently uses ``issubclass(a.dtype.type, np.inexact)``. +.. _nep-0040_dtype-hierarchy: + .. figure:: _static/nep-0040_dtype-hierarchy.png **Figure:** Hierarchy of NumPy scalar types reproduced from the reference @@ -335,7 +339,7 @@ Each of these signatures is associated with a single inner-loop function defined in C, which does the actual calculation, and may be called multiple times. The main step in finding the correct inner-loop function is to call a -:c:type:`PyUFunc_TypeResolutionFunc` which retrieves the input dtypes from +:c:type:`PyUFunc_TypeResolutionFunc` which retrieves the input dtypes from the provided input arrays and will determine the full type signature (including output dtype) to be executed. @@ -366,7 +370,7 @@ It is currently only possible for user defined functions to be found/resolved if any of the inputs (or the outputs) has the user datatype, since it uses the `OO->O` signature. For example, given that a ufunc loop to implement ``fraction_divide(int, int) --> Fraction`` has been implemented, +-> Fraction`` has been implemented, the call ``fraction_divide(4, 5)`` (with no specific output dtype) will fail because the loop that includes the user datatype ``Fraction`` (as output) can only be found if any of @@ -572,7 +576,7 @@ Related Work ------------ * Julia types are an interesting blueprint for a type hierarchy, and define - abstract and concrete types [julia-types]_. + abstract and concrete types [julia-types]_. * In Julia promotion can occur based on abstract types. If a promoter is defined, it will cast the inputs and then Julia can then retry to find @@ -607,7 +611,7 @@ the following provides a subset for more recent ones: * https://hackmd.io/ok21UoAQQmOtSVk6keaJhw and https://hackmd.io/s/ryTFaOPHE (2019-04-30) Proposals for subclassing implementation approach. - + * Discussion about the calling convention of ufuncs and need for more powerful UFuncs: https://github.com/numpy/numpy/issues/12518 diff --git a/doc/neps/nep-0041-improved-dtype-support.rst b/doc/neps/nep-0041-improved-dtype-support.rst index 6dc4ea50c6fd..d7a08562d9c4 100644 --- a/doc/neps/nep-0041-improved-dtype-support.rst +++ b/doc/neps/nep-0041-improved-dtype-support.rst @@ -15,15 +15,15 @@ NEP 41 — First step towards a new Datatype System .. note:: - This NEP is part of a series of NEPs encompassing first information - about the previous dtype implementation and issues with it in - :ref:`NEP 40 `. - NEP 41 (this document) then provides an overview and generic design - choices for the refactor. - Further NEPs 42 and 43 go into the technical details of the datatype - and universal function related internal and external API changes. - In some cases it may be necessary to consult the other NEPs for a full - picture of the desired changes and why these changes are necessary. + This NEP is second in a series: + + - :ref:`NEP 40 ` explains the shortcomings of NumPy's dtype implementation. + + - NEP 41 (this document) gives an overview of our proposed replacement. + + - :ref:`NEP 42 ` describes the new design's datatype-related APIs. + + - NEP 43 describes the new design's API for universal functions. Abstract @@ -412,27 +412,28 @@ multiple development stages are required: * Phase II: Incrementally define or rework API - * Create a new and easily extensible API for defining new datatypes - and related functionality. (NEP 42) - - * Incrementally define all necessary functionality through the new API (NEP 42): - - * Defining operations such as ``np.common_type``. - * Allowing to define casting between datatypes. - * Add functionality necessary to create a numpy array from Python scalars - (i.e. ``np.array(...)``). - * … - - * Restructure how universal functions work (NEP 43), in order to: - - * make it possible to allow a `~numpy.ufunc` such as ``np.add`` to be - extended by user-defined datatypes such as Units. - - * allow efficient lookup for the correct implementation for user-defined - datatypes. - - * enable reuse of existing code. Units should be able to use the - normal math loops and add additional logic to determine output type. + * Incrementally define all necessary functionality through methods and + properties on the DType (NEP 42): + + * The properties of the class hierarchy and DType class itself, + including methods not covered by the following, most central, points. + * The functionality that will support dtype casting using ``arr.astype()`` + and casting related operations such as ``np.common_type``. + * The implementation of item access and storage, and the way shape and + dtype are determined when creating an array with ``np.array()`` + * Create a public C-API to define new DTypes. + + * Restructure how universal functions work (NEP 43), to allow extending + a `~numpy.ufunc` such as ``np.add`` for user-defined datatypes + such as Units: + + * Refactor how the low-level C functions are organized to make it + extensible and flexible enough for complicated DTypes such as Units. + * Implement registration and efficient lookup for these low-level C + functions as defined by the user. + * Define how promotion will be used to implement behaviour when casting + is required. For example ``np.float64(3) + np.int32(3)`` promotes the + ``int32`` to a ``float64``. * Phase III: Growth of NumPy and Scientific Python Ecosystem capabilities: @@ -583,7 +584,7 @@ special methods move from the dtype instances to methods on the new DType class. This is the typical design pattern used in Python. Organizing these methods and information in a more Pythonic way provides a solid foundation for refining and extending the API in the future. -The current API cannot be extended due to how it is exposed publically. +The current API cannot be extended due to how it is exposed publicly. This means for example that the methods currently stored in ``PyArray_ArrFuncs`` on each datatype (see :ref:`NEP 40 `) will be defined differently in the future and @@ -620,6 +621,49 @@ While DType and Scalar describe the same concept/type (e.g. an `int64`), it seems practical to split out the information and functionality necessary for numpy into the DType class. +The dtype instances provide parameters and storage options +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +From a computer science point of view a type defines the *value space* +(all possible values its instances can take) and their *behaviour*. +As proposed in this NEP, the DType class defines value space and behaviour. +The ``dtype`` instance can be seen as part of the value, so that the typical +Python ``instance`` corresponds to ``dtype + element`` (where *element* is the +data stored in the array). +An alternative view would be to define value space and behaviour on the +``dtype`` instances directly. +These two options are presented in the following figure and compared to +similar Python implementation patterns: + +.. image:: _static/nep-0041-type-sketch-no-fonts.svg + +The difference is in how parameters, such as string length or the datetime +units (``ms``, ``ns``, ...), and storage options, such as byte-order, are handled. +When implementing a Python (scalar) ``type`` parameters, for example the datetimes +unit, will be stored in the instance. +This is the design NEP 42 tries to mimic, however, the parameters are now part +of the dtype instance, meaning that part of the data stored in the instance +is shared by all array elements. +As mentioned previously, this means that the Python ``instance`` corresponds +to the ``dtype + element`` stored in a NumPy array. + +An more advanced approach in Python is to use a class factory and an abstract +base class (ABC). +This allows moving the parameter into the dynamically created ``type`` and +behaviour implementation may be specific to those parameters. +An alternative approach might use this model and implemented behaviour +directly on the ``dtype`` instance. + +We believe that the version as proposed here is easier to work with and understand. +Python class factories are not commonly used and NumPy does not use code +specialized for dtype parameters or byte-orders. +Making such specialization easier to implement such specialization does not +seem to be a priority. +One result of this choice is that some DTypes may only have a singleton instance +if they have no parameters or storage variation. +However, all of the NumPy dtypes require dynamically created instances due +to allowing metadata to be attached. + Scalars should not be instances of the datatypes (2) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/neps/nep-0042-new-dtypes.rst b/doc/neps/nep-0042-new-dtypes.rst index b37555892b62..2d1e3a329e6c 100644 --- a/doc/neps/nep-0042-new-dtypes.rst +++ b/doc/neps/nep-0042-new-dtypes.rst @@ -1,9 +1,12 @@ -======================================== -NEP 42 — Implementation of New DataTypes -======================================== +.. _NEP42: -:title: Extensible Datatypes for NumPy +============================================================================== +NEP 42 — User-extensible dtypes +============================================================================== + +:title: User-extensible dtypes :Author: Sebastian Berg +:Author: Ben Nathanson :Author: Marten van Kerkwijk :Status: Draft :Type: Standard @@ -12,666 +15,439 @@ NEP 42 — Implementation of New DataTypes .. note:: - This NEP is part of a series of NEPs encompassing first information - about the previous dtype implementation and issues with it in - :ref:`NEP 40 `. - :ref:`NEP 41 ` then provides an overview and generic design - choices for the refactor. NEPs 42 (this document) - and 43 go into the technical details of the internal and external - API changes related to datatypes and universal functions, respectively. - In some cases it may be necessary to consult the other NEPs for a full - picture of the desired changes and why these changes are necessary. + This NEP is third in a series: + - :ref:`NEP 40 ` explains the shortcomings of NumPy's dtype implementation. -Abstract --------- - -NEP 40 and 41 detailed the need for the creation of a new datatype system within -NumPy to better serve downstream use-cases and improve the maintainability -and the extensibility of NumPy. -A main issue with the current dtype API is that datatypes are written as -a single Python class with special instances for each of the actual datatypes. -While this certainly has been a practical approach in implementing numerical -datatypes, it does not allow to naturally split up logic. For example, -functions such as ``can_cast`` have explicit logic for each datatype. -Because of this monolithic code structure user-defined datatypes do not have -the same capabilities as NumPy datatypes have. -The current structure also makes understanding and modifying datatypes harder. -The current datatypes are not well encapsulated, so modifications targeting -a single datatype inevitably touch code involving others. -As detailed in NEP 41, the desired general design is to create classes for -each of the NumPy-provided datatypes, meaning that ``np.dtype("float64")`` -returns an instance of a ``Float64`` class which is a subclass of ``np.dtype``. -``np.dtype[float64]`` will also be used to denote this class. -This will allow moving all logic into special methods on the ``np.dtype`` -subclasses. This ``DType`` class would then serve as the central -extension point for adding new dtypes to NumPy. - -This document proposes the new API for the datatypes itself. -A second proposal NEP 43 details proposed changes to the universal -functions. -Note that only the implementation of both NEPs will provide the desired full -functionality. + - :ref:`NEP 41 ` gives an overview of our proposed replacement. + - NEP 42 (this document) describes the new design's datatype-related APIs. -.. note:: + - NEP 43 describes the new design's API for universal functions. - At this time this NEP is in a preliminary state. Both internal and - external API may be adapted based on user input or implementation needs. - The general design principles and choices, while provisional, should not - be expected to change dramatically. +****************************************************************************** +Abstract +****************************************************************************** -Detailed Description --------------------- +NumPy's dtype architecture is monolithic, built around a single class that +handles each dtype as an instance. There's no principled way to expand it to +new dtypes, and the code is difficult to read and maintain. -NEP 41 layed out the creation of a class hierarchy for datatypes using the -new DType classes to provide all necessary implementations. -This NEP defines the specific choice of API necessary to define new DTypes. -Here, these are suggested as C-API slots; however, conceptually these -translate identically to Python methods. +As NEP 41 explains, we are proposing a new architecture that is modular and +open to user additions. dtypes will derive from a new ``DType`` class serving +as the extension point for new types. ``np.dtype("float64")`` will return an +instance of a ``Float64`` class, a subclass of root class ``np.dtype``. -Additionally, the NEP proposes to implement the notion of *abstract* DTypes. -Further, we detail – in part – how the proposed methods (C-API slots) -enable all necessary use cases. +This NEP is one of two that lay out the design and API of this new +architecture. This NEP addresses dtype implementation; NEP 43 addresses +universal functions. -Each section will begin with a short motivation of the issue or what -problem is addressed. This is followed by a description of the proposed -design choice, and then may list alternatives. +.. note:: + Details of the private and external APIs may change to reflect user + comments and implementation constraints. The underlying principles and + choices should not change significantly. -Nomenclature -"""""""""""" -As a brief note on nomenclature, it should be noted that ``dtype`` normally -denotes the dtype *instance*, which is the object attached to a numpy array. -On the other hand the ``DType`` class is the subclass of ``np.dtype``. -On the C-level we currently use the name ``descriptor`` or ``descr`` -interchangeably with *dtype instance*. ``descriptor`` or ``descr`` will be -used in proposed C-API names to differentiate dtype instances from DType -classes more clearly. -Note that the notion of dtype class is currently represented mainly as -the ``dtype.num`` and ``dtype.char``. -Please see the :ref:`dtype hierarchy figure ` for an -illustration of this distinction. +****************************************************************************** +Motivation and scope +****************************************************************************** -There are currently classes in NumPy for numeric types e.g. -``np.float64``; however, -these are not DTypes but the corresponding scalar classes -(see also NEP 40 and 41 for discussion on why these are largely unrelated to -the proposed changes). +Our goal is to allow user code to create fully featured dtypes for a broad +variety of uses, from physical units (such as meters) to domain-specific +representations of geometric objects. NEP 41 describes a number of these new +dtypes and their benefits. +Any design supporting dtypes must consider: -Proposed access to DType class -"""""""""""""""""""""""""""""" +- How shape and dtype are determined when an array is created +- How array elements are stored and accessed +- The rules for casting dtypes to other dtypes -**Motivation:** +In addition: -Currently we often call ``np.dtype`` to create the dtype instance -corresponding to a given scalar type (e.g. ``np.dtype(np.int64)``). -Adding the DType classes may require a way to access the classes conveniently. +- We want dtypes to comprise a class hierarchy open to new types and to + subhierarchies, as motivated in :ref:`NEP 41 `. -**Description:** +And to provide this, -To avoid duplication, but also to expose the classes conveniently to users -we propose the addition of:: +- We need to define a user API. - np.dtype[np.int64] +All these are the subjects of this NEP. -as a class getter. This can work both for user and NumPy DTypes, -although, in many cases a library may choose to provide a more direct -way to access the specific DType class. -This method may initially be limited to concrete DTypes. -The main reason for this choice is to provide a single -clear and future-proof way to find the DType class given the -Python (scalar) class. +- The class hierarchy, its relation to the Python scalar types, and its + important attributes are described in `DType class`_. -This should not be a common operation, so providing this class getter reduces -the pressure of adding the new DType classes into the namespace. +- The functionality that will support dtype casting is described in `Casting`_. -*Note: This is currently a possible extension and not yet decided.* +- The implementation of item access and storage, and the way shape and dtype + are determined when creating an array, are described in `Array coercion`_. +- The functionality for users to define their own DTypes is described in + `Public C-API`_. -Hierarchy of DTypes and Abstract DTypes -""""""""""""""""""""""""""""""""""""""" +The API here and in NEP 43 is entirely on the C side. A Python-side version +will be proposed in a future NEP. +A future Python API is expected to be similar, but provide a more convenient +API to reuse the functionality of existing DTypes. +It could also provide shorthands to create structured DTypes similar to python's +`dataclasses `_. -**Motivation:** -The creation of DType classes has already been decided in NEP 41. -Here we discuss the notion of **abstract** DTypes. -There are multiple reasons for this: -1. It allows the definition of a class hierarchy, in principle allowing checks like - ``isinstance(np.dtype("float64"), np.inexact)``. - **This hierarchy may be a prerequisite to implementing dispatching - for universal functions (NEP 43)** -2. Abstract DTypes can enable code such as - ``arr.astype(Complex)`` to express the desire to cast to a - complex data type of unspecified precision. -3. It anticipates the creation of families of DTypes by users. - For example allowing the creation of an abstract ``Unit`` class with a concrete - ``Float64Unit``. In which case ``Unit(np.float64, "m")`` could be - identical to ``Float64Unit("m")``. - -A very concrete example is the current Pandas ``Categorical`` DType, -which may benefit from abstraction to allow the differentiation of -a categorical of integer values and one of general object values. -The reason for this is that we may want to reject -``common_dtype(CategoricalInt64, String)``, but accept -``common_dtype(CategoricalObject, String)`` to be the ``object`` DType. -The current Pandas ``Categorical`` DType combines both and must remain -representable. The solution is thus to make ``Categorical`` abstract with -the two subclasses ``CategoricalInt64`` and ``CategoricalObject`` -distinguishing the two. - - -**Description:** - -The figure below shows the proposed datatype hierarchy. -It should be noted that abstract DTypes are distinct in two ways: - -1. They do not have instances. Instantiating an abstract DType has to return - a concrete subclass or raise an error (default, and possibly enforced - initially). -2. Unlike concrete DTypes, abstract DTypes can be superclasses, they may also - serve like Python's abstract base classes (ABC). - (It may be possible to simply use/inherit from Python ABCs.) - -These two rules are identical to the type choices made for example in the -`Julia language `_. -It allows for the creation of a datatype hierarchy, but avoids issues with -subclassing concrete DTypes directly. -For example, logic such as ``can_cast`` does not cleanly inherit from a -``Int64`` to a ``Datetime64`` even though the ``Datetime64`` could be seen -as an integer with only a unit attached (and thus implemented as a subclass). - -The main consequence for the DType implementer is that concrete DTypes can -never be subclasses of existing concrete DTypes. -End-users would not notice or need to know about this distinction. -However, subclassing may be a possible mechanism to extend the datatypes -in the future to allow specialized implementations for existing dtypes -such as a GPU float64 subclassing a NumPy float64. - -The combination of (initially) rejecting subclassing of concrete DTypes -while allowing it for abstract ones allows the transparent definition of -a class hierarchy, while avoiding potential issues with subclassing and -especially inheritance. - -As a technical implementation detail: the DType class will require C-side -storage of methods and additional information. -This requires the creation of a ``DTypeMeta`` class. -Each ``DType`` class is thus an instance of ``DTypeMeta`` with a well-defined -and extensible interface. -The end-user will not need to be aware of this. - -.. _hierarchy_figure: -.. figure:: _static/dtype_hierarchy.svg - :figclass: align-center - - -Methods/Slots defined for each DType -"""""""""""""""""""""""""""""""""""" - -NEP 41 detailed that all logic should be defined through special methods -on the DTypes. -This section will list a specific set of such methods (in the form of -Python methods). -The C-side equivalent slot signature will be summarized below after proposing -the general C-API for defining new Datatypes. -Note that while the slots are defined as special Python methods here, this is -for the readers convenience and *not* meant to imply the identical exposure -as a Python API. -This will need to be proposed in a separate, later, NEP. +****************************************************************************** +Backward compatibility +****************************************************************************** -Some of the methods may be similar or even reuse existing Python slots. -User-defined DType classes are discouraged from defining or using Python's -special slots without consulting the NumPy developers, in order to allow -defining them later. -For example ``dtype1 & dtype2`` could be a shorthand for -``np.common_dtype(dtype1, dtype2)``, and comparisons should be defined mainly -through casting logic. +The disruption is expected to be no greater than that of a typical NumPy +release. +- The main issues are noted in :ref:`NEP 41 ` and will mostly affect + heavy users of the NumPy C-API. -Additional Information -^^^^^^^^^^^^^^^^^^^^^^ +- Eventually we will want to deprecate the API currently used for creating + user-defined dtypes. -In addition to the more detailed methods below, the following general -information is currently provided and will be defined on the class: +- Small, rarely noticed inconsistencies are likely to change. Examples: -* ``cls.parametric`` (see also `NEP 40 `_): + - ``np.array(np.nan, dtype=np.int64)`` behaves differently from + ``np.array([np.nan], dtype=np.int64)`` with the latter raising an error. + This may require identical results (either both error or both succeed). + - ``np.array([array_like])`` sometimes behaves differently from + ``np.array([np.array(array_like)])`` + - array operations may or may not preserve dtype metadata - * Parametric will be a flag in the (private) C-API. However, the - Python API will instead use a ``ParametricDType`` class from - which to inherit. (This is similar to Python's type flags, which include - flags for some basic subclasses such as subclasses of ``float`` or ``tuple``) - * This flag is mainly to simplify DType creation and casting and - allow for performance tweaks. - * DTypes which are not parametric must define a canonical dtype instance - which should be a singleton. - * Parametric dtypes require some additional methods (below). +The new code must pass NumPy's regular test suite, giving some assurance that +the changes are compatible with existing code. -* ``self.canonical`` method (Alternative: new instance attribute) +****************************************************************************** +Usage and impact +****************************************************************************** - * Instead of byteorder, we may want a ``canonical`` flag (reusing the - ISNBO flag – "is native byte order" seems possible here). - This flag signals that the data are stored in the default/canonical way. - In practice this is always an NBO check, but generalization should be possible. - A potential use-case is a complex-conjugated instance of Complex which - stores ``real - imag`` instead of ``real + imag`` and is thus not - the canonical storage. +We believe the few structures in this section are sufficient to consolidate +NumPy's present functionality and also to support complex user-defined DTypes. -* ``ensure_canonical(self) -> dtype`` return a new dtype (or ``self``). - The returned dtype must have the ``canonical`` flag set. +The rest of the NEP fills in details and provides support for the claim. -* ``DType.type`` is the associated scalar type. ``dtype.type`` will be a - class attribute and the current ``dtype.type`` field will be considered - deprecated. This may be relaxed if a use-case arises. +Again, though Python is used for illustration, the implementation is a C API only; a +future NEP will tackle the Python API. -Additionally, existing methods (and C-side fields) will be provided. -However, the fields ``kind`` and ``char`` will be set to ``\0`` -(NULL character) on the C-side. -While discouraged, except for NumPy builtin types, ``kind`` both will return -the ``__qualname__`` of the object to ensure uniqueness for all DTypes. -(the replacement for ``kind`` will be to use ``isinstance`` checks). +After implementing this NEP, creating a DType will be possible by implementing +the following outlined DType base class, +that is further described in `DType class`_: -Another example of methods that should be moved to the DType class are the -various sorting functions, which shall be implemented by defining a method: +.. code-block:: python + :dedent: 0 -* ``dtype_get_sort_function(self, sortkind="stable") -> sortfunction`` + class DType(np.dtype): + type : type # Python scalar type + parametric : bool # (may be indicated by superclass) -which must return ``NotImplemented`` if the given ``sortkind`` is not known. -Similarly, any function implemented previously which cannot be removed will -be implemented as a special method. -Since these methods can be deprecated and new (renamed) replacements added, -the API is not defined here and it is acceptable if it changes over time. + @property + def canonical(self) -> bool: + raise NotImplementedError -For some of the current "methods" defined on the dtype, including sorting, -a long term solution may be to instead create generalized ufuncs to provide -the functionality. + def ensure_canonical(self : DType) -> DType: + raise NotImplementedError -**Alternatives:** +For casting, a large part of the functionality is provided by the "methods" stored +in ``_castingimpl`` -Some of these flags could be implemented by inheriting -for example from a ``ParametricDType`` class. However, on the C-side as -an implementation detail it seems simpler to provide a flag. -This does not preclude the possibility of creating a ``ParametricDType`` -to Python to represent the same thing. +.. code-block:: python + :dedent: 0 -**Example:** + @classmethod + def common_dtype(cls : DTypeMeta, other : DTypeMeta) -> DTypeMeta: + raise NotImplementedError -The ``datetime64`` DType is considered parametric, due to its unit, and -unlike a float64 has no canonical representation. The associated ``type`` -is the ``np.datetime64`` scalar. + def common_instance(self : DType, other : DType) -> DType: + raise NotImplementedError + # A mapping of "methods" each detailing how to cast to another DType + # (further specified at the end of the section) + _castingimpl = {} -**Issues and Details:** +For array-coercion, also part of casting: -A potential DType such as ``Categorical`` will not be required to have a clear type -associated with it. Instead, the ``type`` may be ``object`` and the -categorical's values are arbitrary objects. -Unlike with well-defined scalars, this ``type`` cannot -not be used for the dtype discovery necessary for coercion -(compare section `DType Discovery during Array Coercion`_). - - -Coercion to and from Python Objects -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: python + :dedent: 0 -**Motivation:** + def __dtype_setitem__(self, item_pointer, value): + raise NotImplementedError -When storing a single value in an array or taking it out of the array, -it is necessary to coerce (convert) it to and from the low-level -representation inside the array. + def __dtype_getitem__(self, item_pointer, base_obj) -> object: + raise NotImplementedError -**Description:** + @classmethod + def __discover_descr_from_pyobject__(cls, obj : object) -> DType: + raise NotImplementedError -Coercing to and from Python scalars requires two to three methods: + # initially private: + @classmethod + def _known_scalar_type(cls, obj : object) -> bool: + raise NotImplementedError -1. ``__dtype_setitem__(self, item_pointer, value)`` -2. ``__dtype_getitem__(self, item_pointer, base_obj) -> object`` - The ``base_obj`` should be ignored normally, it is provided *only* for - memory management purposes, pointing to an object owning the data. - It exists only to allow support of structured datatypes with subarrays - within NumPy, which (currently) return views into the array. - The function returns an equivalent Python scalar (i.e. typically a NumPy - scalar). -3. ``__dtype_get_pyitem__(self, item_pointer, base_obj) -> object`` - (initially hidden for new-style user-defined datatypes, may be exposed - on user request). This corresponds to the ``arr.item()`` method which - is also used by ``arr.tolist()`` and returns e.g. Python floats instead of - NumPy floats. -(The above is meant for C-API. A Python-side API would have to use byte -buffers or similar to implement this, which may be useful for prototyping.) +Other elements of the casting implementation is the ``CastingImpl``: -These largely correspond to the current definitions. When a certain scalar -has a known (different) dtype, NumPy may in the future use casting instead -of ``__dtype_setitem__``. -A user datatype is (initially) expected to implement ``__dtype_setitem__`` -for its own ``DType.type`` and all basic Python scalars it wishes to support -(e.g. integers, floats, datetime). -In the future a function "``known_scalartype``" may be added to allow a user -dtype to signal which Python scalars it can store directly. +.. code-block:: python + :dedent: 0 + casting = Union["safe", "same_kind", "unsafe"] -**Implementation:** + class CastingImpl: + # Object describing and performing the cast + casting : casting -The pseudo-code implementation for setting a single item in an array -from an arbitrary Python object ``value`` is (note that some of the -functions are only defined below):: - - def PyArray_Pack(dtype, item_pointer, value): - DType = type(dtype) - if DType.type is type(value) or DType.known_scalartype(type(value)): - return dtype.__dtype_setitem__(item_pointer, value) - - # The dtype cannot handle the value, so try casting: - arr = np.array(value) - if arr.dtype is object or arr.ndim != 0: - # not a numpy or user scalar; try using the dtype after all: - return dtype.__dtype_setitem__(item_pointer, value) - - arr.astype(dtype) - item_pointer.write(arr[()]) - -where the call to ``np.array()`` represents the dtype discovery and is -not actually performed. + def resolve_descriptors(self, Tuple[DType] : input) -> (casting, Tuple[DType]): + raise NotImplementedError -**Example:** + # initially private: + def _get_loop(...) -> lowlevel_C_loop: + raise NotImplementedError -Current ``datetime64`` returns ``np.datetime64`` scalars and can be assigned -from ``np.datetime64``. -However, the datetime ``__dtype_setitem__`` also allows assignment from -date strings ("2016-05-01") or Python integers. -Additionally the datetime ``__dtype_get_pyitem__`` function actually returns -Python ``datetime.datetime`` object (most of the time). +which describes the casting from one DType to another. +In NEP 43 this ``CastingImpl`` object is used unchanged to support +universal functions. -**Alternatives:** +****************************************************************************** +Definitions +****************************************************************************** +.. glossary:: -This may be seen as simply a cast to and from the ``object`` dtype. -However, it seems slightly more complicated. This is because -in general a Python object could itself be a zero-dimensional array or -scalar with an associated DType. -Thus, representing it as a normal cast would either require that: + dtype + The dtype *instance*; this is the object attached to a numpy array. -* The implementor handles all Python classes, even those for which - ``np.array(scalar).astype(UserDType)`` already works because - ``np.array(scalar)`` returns, say, a datetime64 array. -* The cast is actually added between a typed-object to dtype. And even - in this case a generic fallback (for example ``float64`` can use - ``float(scalar)`` to do the cast) is also necessary. + DType + Any subclass of the base type ``np.dtype``. -It is certainly possible to describe the coercion to and from Python objects -using the general casting machinery. However, it seems special enough to -handle specifically. + coercion + Conversion of Python types to NumPy arrays and values stored in a NumPy + array. + cast + Conversion of an array to a different dtype. -**Further Issues and Discussion:** + promotion + Finding a dtype that can perform an operation on a mix of dtypes without + loss of information. -The setitem function currently duplicates some code, such as coercion -from a string. ``datetime64`` allows assignment from string, but the same -conversion also occurs for casts from the string dtype to ``datetime64``. -In the future, we may expose a way to signal whether a conversion is known, -and otherwise a normal cast is made so that the item is effectively set to ``np.array(scalar).astype(requested_dtype)``. + safe cast + A cast is safe if no information is lost when changing type. -There is a general issue about the handling of subclasses. We anticipate to not -automatically detect the dtype for ``np.array(float64_subclass)`` to be -float64. The user can still provide ``dtype=np.float64``. However, the above -"assign by casting" using ``np.array(scalar_subclass).astype(requested_dtype)`` -will fail. +On the C level we use ``descriptor`` or ``descr`` to mean +*dtype instance*. In the proposed C-API, these terms will distinguish +dtype instances from DType classes. .. note:: + Perhaps confusingly, NumPy already has a class hierarchy for numeric types, as + seen :ref:`in the figure ` of NEP 40, and the new + DType hierarchy will resemble it. But the existing hierarchy is for scalar + types, not DTypes, and its existence is largely irrelevant here, as NEP 40 and + 41 explain. - This means that ``np.complex256`` should not use ``__float__`` in its - ``__dtype_setitem__`` method in the future unless it is a known floating - point type. If the scalar is a subclass of a different high precision - floating point type (e.g. ``np.float128``) then this will lose precision. - - -DType Discovery during Array Coercion -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -An important step in the usage of NumPy arrays is the creation of the array -itself from collections of generic Python objects. - -**Motivation:** - -Although the distinction is not clear currently, there are two main needs:: - - np.array([1, 2, 3, 4.]) - -needs to guess the correct dtype based on the Python objects inside. -Such an array may include a mix of datatypes, as long as they can be clearly -promoted. -Currently not clearly distinct (but partially existing for strings) is the -use case of:: - - # np.dtype[np.str_] can also be spelled np.str_ or "S" (which works today) - np.array([object(), None], dtype=np.dtype[np.str_]) - -which forces each object to be interpreted as string. This is anticipated -to be useful for example for categorical datatypes:: - - np.array([1, 2, 1, 1, 2], dtype=Categorical) +.. _DType class: -to allow the discovery the of all unique values. -(For NumPy ``datetime64`` this is also currently used to allow string input.) - -There are three further issues to consider: - -1. It may be desirable that datatypes can be created which are associated - to normal Python scalars (such as ``datetime.datetime``), which do not - have a ``dtype`` attribute already. -2. In general, a datatype could represent a sequence, however, NumPy currently - assumes that sequences are always collections of elements (the sequence cannot be an - element itself). An example for this is would be a ``vector`` DType. -3. An array may itself contain arrays with a specific dtype (even - general Python objects). For example: - ``np.array([np.array(None, dtype=object)], dtype=np.String)`` - poses the issue of how to handle the included array. - -Some of these difficulties arise due to the fact that finding the correct shape -of the output array and finding the correct datatype are closely related. - -**Implementation:** - -There are two distinct cases given above: First, when the user has provided no -dtype information, and second when the user provided a DType class – -a notion that is currently represented e.g. by the parametric instance ``"S"`` -representing a string of any length. - -In the first case, it is necessary to establish a mapping from the Python type(s) -of the constituent elements to the DType class. -When the DType class is known, the correct dtype instance still needs to be found. -This shall be implemented by leveraging two pieces of information: - -1. ``DType.type``: The current type attribute to indicate which Python scalar - type is associated with the DType class (this is a *class* attribute that always - exists for any datatype and is not limited to array coercion). -2. The reverse lookup will remain hardcoded for the basic Python types initially. - Otherwise the ``type`` attribute will be used, and at least initially may - enforce deriving the scalar from a NumPy-provided scalar base class. - This method may be expanded later (see alternatives). -3. ``__discover_descr_from_pyobject__(cls, obj) -> dtype``: A classmethod that - returns the correct descriptor given the input object. - *Note that only parametric DTypes have to implement this*, most datatypes - can simply use a default (singleton) dtype instance which is found only - based on the ``type(obj)`` of the Python object. +****************************************************************************** +The DType class +****************************************************************************** -The Python type which is already associated with a DType through the -``DType.type`` attribute maps from the DType to the Python type. -A DType may choose to automatically discover from this Python type. -This will be achieved using a global a mapping (dictionary-like) of:: +This section reviews the structure underlying the proposed DType class, +including the type hierarchy and the use of abstract DTypes. - known_python_types[type] = DType +Class getter +============================================================================== -To anticipate the possibility of creating both a Python type (``pytype``) -and ``DType`` dynamically, and thus the potential desire to delete them again, -this mapping should generally be weak. -This requires that the ``pytype`` holds on to the ``DType`` explicitly. -Thus, in addition to building the global mapping, NumPy will store -the ``DType`` as ``pytype.__associated_array_dtype__`` in the Python type. -This does *not* define the mapping and should *not* be accessed directly. -In particular potential inheritance of the attribute does not mean that -NumPy will use the superclasses ``DType`` automatically. -A new ``DType`` must be created for the subclass. +To create a dtype instance from a scalar type users now call ``np.dtype`` (for +instance, ``np.dtype(np.int64)``). -.. note:: +To get the DType of a scalar type, we propose this getter syntax:: - Python integers do not have a clear/concrete NumPy type associated with - them right now. This is because during array coercion NumPy currently - finds the first type capable of representing their value in the list - of `long`, `unsigned long`, `int64`, `unsigned int64`, and `object` - (on many machines `long` is 64 bit). - - Instead they will need to be implemented using an - ``AbstractPyInt``. This DType class can then provide - ``__discover_descr_from_pyobject__`` and return the actual dtype which - is e.g. ``np.dtype("int64")``. - For dispatching/promotion in ufuncs, it will also be necessary - to dynamically create ``AbstractPyInt[value]`` classes (creation can be - cached), so that they can provide the current value based promotion - functionality provided by ``np.result_type(python_integer, array)`` [1]_. - -To allow for a DType to accept specific inputs as known scalars, we will -initially use a ``known_scalar_type`` method. -This allows discovery of a ``vector`` as a scalar (element) instead of -a sequence (for the command ``np.array(vector, dtype=VectorDType)``) -even when ``vector`` is itself a sequence or even an array subclass. -This will *not* be public API initially, but may be made public at a later -time. - -This will work similar to the following pseudo-code:: + np.dtype[np.int64] - def find_dtype(array_like): - common_dtype = None - for element in array_like: - # default to object dtype, if unknown - DType = known_python_types.get(type(element), np.dtype[object]) - dtype = DType.__discover_descr_from_pyobject__(element) +The notation works equally well with built-in and user-defined DTypes +and is inspired by and potentially useful for type hinting. - if common_dtype is None: - common_dtype = dtype - else: - common_dtype = np.promote_types(common_dtype, dtype) +This getter eliminates the need to create an explicit name for every +DType, crowding the ``np`` namespace; the getter itself signifies the type. -In practice, we have to find out whether an element is actually a sequence. -This means that instead of using the ``object`` dtype directly, we have to -check whether or not it is a sequence. +Since getter calls won't be needed often, this is unlikely to be burdensome. +Classes can also offer concise alternatives. -The full algorithm (without user provided dtype) thus looks more like:: +The initial implementation probably will return only concrete (not abstract) +DTypes. - def find_dtype_recursive(array_like, dtype=None): - """ - Recursively find the dtype for a nested sequences (arrays are not - supported here). - """ - DType = known_python_types.get(type(element), None) +*This item is still under review.* - if DType is None and is_array_like(array_like): - # Code for a sequence, an array_like may have a DType we - # can use directly: - for element in array_like: - dtype = find_dtype_recursive(element, dtype=dtype) - return dtype - elif DType is None: - DType = np.dtype[object] +Hierarchy and abstract classes +============================================================================== - # Same as above +We will use abstract classes as building blocks of our extensible DType class +hierarchy. -If the user provides ``DType``, then this DType will be tried first, and the -``dtype`` may need to be cast before the promotion is performed. +1. Abstract classes are inherited cleanly, in principle allowing checks like + ``isinstance(np.dtype("float64"), np.inexact)``. -**Limitations:** - -The above issue 3. is currently (sometimes) supported by NumPy so that -the values of an included array are inspected. -Support in those cases may be kept for compatibility, however, -it will not be exposed to user datatypes. -This means that if e.g. an array with a parametric string dtype is coerced above -(or cast) to an array of a fixed length string dtype (with unknown length), -this will result in an error. -Such a conversion will require passing the correct DType (fixed length of the -string) or providing a utility function to the user. - -The use of a global type map means that an error or warning has to be given -if two DTypes wish to map to the same Python type. In most cases user -DTypes should only be implemented for types defined within the same library to -avoid the potential for conflicts. -It will be the DType implementor's responsibility to be careful about this and use -the flag to disable registration when in doubt. - -**Alternatives:** - -The above proposes to add a global mapping, however, initially limiting it -to types deriving from a NumPy subclass (and a fixed set of Python types). -This could be relaxed in the future. -Alternatively, we could rely on the scalar belonging to the user dtype to -implement ``scalar.__associated_array_dtype__`` or similar. - -Initially, the exact implementation shall be *undefined*, if -scalars will have to derive from a NumPy scalar, they will also have -a ``.__associated_array_dtype__`` attribute. -At this time, a future update may to use this instead of a global mapping, -however, it makes NumPy a hard dependency for the scalar class. - -An initial alternative suggestion was to use a two-pass approach instead. -The first pass would only find the correct DType class, and the second pass -would then find correct dtype instance (the second pass is often not necessary). -The advantage of this is that the DType class information is vital for universal -functions to decide which loop to execute. -The first pass would provide the full information necessary for value-based -casting currently implemented for scalars, giving even the possibility of -expanding it to e.g. list inputs ``np.add(np.array([8], dtype="uint8"), [4])`` -giving a ``uint8`` result. -This is mainly related to the question to how the common dtype is found above. -It seems unlikely that this is useful, and similar to a global, could be -added later if deemed necessary. - -**Further Issues and Discussion:** - -While it is possible to create e.g. a DType such as Categorical, array, -or vector which can only be used if `dtype=DType` is provided, if this -is necessary these will not roundtrip correctly when converted back -and forth:: +2. Abstract classes allow a single piece of code to handle a multiplicity of + input types. Code written to accept Complex objects can work with numbers + of any precision; the precision of the results is determined by the + precision of the arguments. + +3. There is room for user-created families of DTypes. We can envision an + abstract ``Unit`` class for physical units, with a concrete subclass like + ``Float64Unit``. Calling ``Unit(np.float64, "m")`` (``m`` for meters) would + be equivalent to ``Float64Unit("m")``. + +4. The implementation of universal functions in NEP 43 may require + a class hierarchy. + +**Example:** A NumPy ``Categorical`` class would be a match for pandas +``Categorical`` objects, which can contain integers or general Python objects. +NumPy needs a DType that it can assign a Categorical to, but it also needs +DTypes like ``CategoricalInt64`` and ``CategoricalObject`` such that +``common_dtype(CategoricalInt64, String)`` raises an error, but +``common_dtype(CategoricalObject, String)`` returns an ``object`` DType. In +our scheme, ``Categorical`` is an abstract type with ``CategoricalInt64`` and +``CategoricalObject`` subclasses. + + +Rules for the class structure, illustrated :ref:`below `: + +1. Abstract DTypes cannot be instantiated. Instantiating an abstract DType + raises an error, or perhaps returns an instance of a concrete subclass. + Raising an error will be the default behavior and may be required initially. + +2. While abstract DTypes may be superclasses, they may also act like Python's + abstract base classes (ABC) allowing registration instead of subclassing. + It may be possible to simply use or inherit from Python ABCs. + +3. Concrete DTypes may not be subclassed. In the future this might be relaxed + to allow specialized implementations such as a GPU float64 subclassing a + NumPy float64. + +The +`Julia language `_ +has a similar prohibition against subclassing concrete types. +For example methods such as the later ``__common_instance__`` or +``__common_dtype__`` cannot work for a subclass unless they were designed +very carefully. +It helps avoid unintended vulnerabilities to implementation changes that +result from subclassing types that were not written to be subclassed. +We believe that the DType API should rather be extended to simplify wrapping +of existing functionality. + +The DType class requires C-side storage of methods and additional information, +to be implemented by a ``DTypeMeta`` class. Each ``DType`` class is an +instance of ``DTypeMeta`` with a well-defined and extensible interface; +end users ignore it. + +.. _nep42_hierarchy_figure: +.. figure:: _static/dtype_hierarchy.svg + :figclass: align-center - np.array(np.array(1, dtype=Categorical)[()]) -requires to pass the original ``dtype=Categorical`` or returns an array -with dtype ``object``. -While a general limitation, the round-tripping shall always be possible if -``dtype=old_dtype`` is provided. +Miscellaneous methods and attributes +============================================================================== + +This section collects definitions in the DType class that are not used in +casting and array coercion, which are described in detail below. + +* Existing dtype methods and C-side fields are preserved. + +* ``DType.type`` replaces ``dtype.type``. Unless a use case arises, + ``dtype.type`` will be deprecated. + This indicates a Python scalar type which represents the same values as + the DType. This is the same type as used in the proposed `Class getter`_ + and for `DType discovery during array coercion`_. + (This can may also be set for abstract DTypes, this is necessary + for array coercion.) + +* A new ``self.canonical`` property generalizes the notion of byte order to + indicate whether data has been stored in a default/canonical way. For + existing code, "canonical" will just signify native byte order, but it can + take on new meanings in new DTypes -- for instance, to distinguish a + complex-conjugated instance of Complex which stores ``real - imag`` instead + of ``real + imag`` and is thus not the canonical storage. The ISNBO ("is + native byte order") flag might be repurposed as the canonical flag. + +* Support is included for parametric DTypes. As explained in + :ref:`NEP 40 `, parametric types have a + value associated with them. A DType will be deemed parametric if it + inherits from ParametricDType. + + Strings are one example of a parametric type -- ``S8`` is different from + ``S4`` because ``S4`` cannot store a length 8 string such as ``"length 8"`` + while ``S8`` can. + Similarly, the ``datetime64`` DType is parametric, since its unit must be specified. + The associated ``type`` is the ``np.datetime64`` scalar. + +* DType methods may resemble or even reuse existing Python slots. Thus Python + special slots are off-limits for user-defined DTypes (for instance, defining + ``Unit("m") > Unit("cm")``), since we may want to develop a meaning for these + operators that is common to all DTypes. + +* Sorting functions are moved to the DType class. They may be implemented by + defining a method ``dtype_get_sort_function(self, sortkind="stable") -> + sortfunction`` that must return ``NotImplemented`` if the given ``sortkind`` + is not known. + +* Functions that cannot be removed are implemented as special methods. + Many of these were previously defined part of the :c:type:`PyArray_ArrFuncs` + slot of the dtype instance (``PyArray_Descr *``) and include functions + such as ``nonzero``, ``fill`` (used for ``np.arange``), and + ``fromstr`` (used to parse text files). + These old methods will be deprecated and replacements + following the new design principles added. + The API is not defined here. Since these methods can be deprecated and renamed + replacements added, it is acceptable if these new methods have to be modified. + +* Use of ``kind`` for non-built-in types is discouraged in favor of + ``isinstance`` checks. ``kind`` will return the ``__qualname__`` of the + object to ensure uniqueness for all DTypes. On the C side, ``kind`` and + ``char`` are set to ``\0`` (NULL character). + While ``kind`` will be discouraged, the current ``np.issubdtype`` + may remain the preferred method for this type of check. + +* A method ``ensure_canonical(self) -> dtype`` returns a new dtype (or + ``self``) with the ``canonical`` flag set. + +* Since NumPy's approach is to provide functionality through unfuncs, + functions like sorting that will be implemented in DTypes might eventually be + reimplemented as generalized ufuncs. + +.. _casting: + +****************************************************************************** +Casting +****************************************************************************** -**Example:** +We review here the operations related to casting arrays: -The current datetime DType requires a ``__discover_descr_from_pyobject__`` -which returns the correct unit for string inputs. This allows it to support -the current:: +- Finding the "common dtype," currently exposed by ``np.promote_types`` or + ``np.result_type`` - np.array(["2020-01-02", "2020-01-02 11:24"], dtype="M8") +- The result of calling ``np.can_cast`` -By inspecting the date strings. Together with the below common dtype -operation, this allows it to automatically find that the datetime64 unit -should be "minutes". +We show how casting arrays with ``arr.astype(new_dtype)`` will be implemented. +`Common DType` operations +============================================================================== -Common DType Operations -^^^^^^^^^^^^^^^^^^^^^^^ +Common-type operations are vital for array coercion when input types are +mixed. They determine the output dtype of ``np.concatenate()`` and are useful +in themselves. -NumPy currently provides functions like ``np.result_type`` and -``np.promote_types`` for determining common types. +NumPy provides ``np.result_type`` and +``np.promote_types``. These differ in that ``np.result_type`` can take arrays and scalars as input -and implements value based promotion [1]_. +and implements value-based promotion [1]_. -To distinguish between the promotion occurring during universal function application, -we will call it "common type" operation here. +To distinguish between the promotion occurring during universal function +application, we will call it "common type" operation here. **Motivation:** -Common type operations are vital for array coercion when different -input types are mixed. -They also provide the logic currently used to decide the output dtype of -``np.concatenate()`` and on their own are quite useful. Furthermore, common type operations may be used to find the correct dtype to use for functions with different inputs (including universal functions). @@ -684,32 +460,33 @@ This includes an interesting distinction: (Hypothetical example: ``float_arr + string_arr -> string``, but the output string length is not the same as ``np.concatenate(float_arr, string_arr)).dtype``.) -2. Array coercion and concatenation require the common dtype *instance*. -**Implementation:** -The implementation of the common dtype (instance) determination -has some overlap with casting. -Casting from a specific dtype (Float64) to a String needs to find -the correct string length (a step that is mainly necessary for parametric dtypes). +2. Array coercion and concatenation require the common dtype *instance*. + +**Implementation:** The implementation of the common dtype (instance) +determination has some overlap with casting. Casting from a specific dtype +(Float64) to a String needs to find the correct string length (a step that is +mainly necessary for parametric dtypes). We propose the following implementation: -1. ``__common_dtype__(cls, other : DTypeMeta) -> DTypeMeta`` answers what the common - DType class is given two DType class objects. - It may return ``NotImplemented`` to defer to ``other``. - (For abstract DTypes, subclasses get precedence, concrete types are always - leaves, so always get preference or are tried from left to right). +1. ``__common_dtype__(cls, other : DTypeMeta) -> DTypeMeta`` answers what the + common DType class is, given two DType class objects. It may return + ``NotImplemented`` to defer to ``other``. (For abstract DTypes, subclasses + get precedence, concrete types are never superclasses, so always get preference + or are tried from left to right). + 2. ``__common_instance__(self: SelfT, other : SelfT) -> SelfT`` is used when two instances of the same DType are given. - For builtin dtypes (that are not parametric), this - currently always returns ``self`` (but ensures native byte order). + For built-in dtypes (that are not parametric), this + currently always returns ``self`` (but ensures canonical representation). This is to preserve metadata. We can thus provide a default implementation for non-parametric user dtypes. These two cases do *not* cover the case where two different dtype instances -need to be promoted. For example `">float64"` and `"S8"`. -The solution is partially "outsourced" to the casting machinery by -splitting the operation up into three steps: +need to be promoted. For example `">float64"` and `"S8"`. The solution is +partially "outsourced" to the casting machinery by splitting the operation up +into three steps: 1. ``Float64.__common_dtype__(type(>float64), type(S8))`` returns `String` (or defers to ``String.__common_dtype__``). @@ -717,15 +494,13 @@ splitting the operation up into three steps: to `"S32"` (see below for how casting will be defined). 3. ``String.__common_instance__("S8", "S32")`` returns the final `"S32"`. -The main reason for this is to avoid the need to implement -identical functionality multiple times. -The design (together with casting) naturally separates the concerns of -different Datatypes. -In the above example, Float64 does not need to know about the cast. -While the casting machinery (``CastingImpl[Float64, String]``) -could include the third step, it is not required to do so and the string -can always be extended (e.g. with new encodings) without extending the -``CastingImpl[Float64, String]``. +The main reason for this is to avoid the need to implement identical +functionality multiple times. The design (together with casting) naturally +separates the concerns of different Datatypes. In the above example, Float64 +does not need to know about the cast. While the casting machinery +(``CastingImpl[Float64, String]``) could include the third step, it is not +required to do so and the string can always be extended (e.g. with new +encodings) without extending the ``CastingImpl[Float64, String]``. This means the implementation will work like this:: @@ -744,7 +519,7 @@ This means the implementation will work like this:: # Find what dtype1 is cast to when cast to the common DType # by using the CastingImpl as described below: castingimpl = get_castingimpl(type(dtype1), common) - safety, (_, dtype1) = castingimpl.adjust_descriptors((dtype1, None)) + safety, (_, dtype1) = castingimpl.resolve_descriptors((dtype1, None)) assert safety == "safe" # promotion should normally be a safe cast if type(dtype2) is not common: @@ -755,12 +530,10 @@ This means the implementation will work like this:: Some of these steps may be optimized for non-parametric DTypes. -**Note:** - -A currently implemented fallback for the ``__common_dtype__`` operation -is to use the "safe" casting logic. -Since ``int16`` can safely cast to ``int64``, it is clear that -``np.promote_types(int16, int64)`` should be ``int64``. +**Note:** A currently implemented fallback for the ``__common_dtype__`` +operation is to use the "safe" casting logic. Since ``int16`` can safely cast +to ``int64``, it is clear that ``np.promote_types(int16, int64)`` should be +``int64``. However, this cannot define all such operations, and will fail for example for:: @@ -771,39 +544,35 @@ in most cases a safe-cast implies that this will be the result of the ``__common_dtype__`` method. Note that some exceptions may apply. For example casting ``int32`` to -a (long enough) string is – at least at this time – considered "safe". +a (long enough) string is at least at this time considered "safe". However ``np.promote_types(int32, String)`` will *not* be defined. -**Alternatives:** - -The use of casting for common dtype (instance) determination neatly separates -the concerns and allows for a minimal set of duplicate functionality -being implemented. -In cases of mixed DType (classes), it also adds an additional step -to finding the common dtype. -The common dtype (of two instances) could thus be implemented explicitly to avoid -this indirection, potentially only as a fast-path. -The above suggestion assumes that this is, however, not a speed relevant path, -since in most cases, e.g. in array coercion, only a single Python type (and thus -dtype) is involved. -The proposed design hinges in the implementation of casting to be -separated into its own ufunc-like object as described below. +**Alternatives:** The use of casting for common dtype (instance) determination +neatly separates the concerns and allows for a minimal set of duplicate +functionality being implemented. In cases of mixed DType (classes), it also +adds an additional step to finding the common dtype. The common dtype (of two +instances) could thus be implemented explicitly to avoid this indirection, +potentially only as a fast-path. The above suggestion assumes that this is, +however, not a speed relevant path, since in most cases, e.g. in array +coercion, only a single Python type (and thus dtype) is involved. The proposed +design hinges in the implementation of casting to be separated into its own +ufunc-like object as described below. In principle common DType could be defined only based on "safe casting" rules, if we order all DTypes and find the first one both can cast to safely. However, the issue with this approach is that a newly added DType can change the behaviour of an existing program. For example, a new ``int24`` would be -the first valid common type for ``int16`` and ``uint16``, demoting the currently -defined behaviour of ``int32``. -This API extension could be allowed in the future, while adding it may be -more involved, the current proposal for defining casts is fully opaque in -this regard and thus extensible. - -**Example:** - -``object`` always chooses ``object`` as the common DType. For ``datetime64`` -type promotion is defined with no other datatype, but if someone were to -implement a new higher precision datetime, then:: +the first valid common type for ``int16`` and ``uint16``, demoting the +currently defined behavior of ``int32``. +Both, the need of a linear type hierarchy and the potential of changing +existing behaviour by adding a new DType, are a downside to using a generic +rule based on "safe casting". +However, a more generic common DType could be implemented in the future, since +``__common_dtype__`` can in principle use casting information internally. + +**Example:** ``object`` always chooses ``object`` as the common DType. For +``datetime64`` type promotion is defined with no other datatype, but if +someone were to implement a new higher precision datetime, then:: HighPrecisionDatetime.__common_dtype__(np.dtype[np.datetime64]) @@ -811,33 +580,31 @@ would return ``HighPrecisionDatetime``, and the below casting may need to decide how to handle the datetime unit. -Casting -^^^^^^^ +The cast operation +============================================================================== -Maybe the most complex and interesting operation which is provided -by DTypes is the ability to cast from one dtype to another. -The casting operation is much like a typical function (universal function) on -arrays converting one input to a new output. -There are mainly two distinctions: +Perhaps the most complex and interesting DType operation is casting. Casting +is much like a typical universal function on arrays, converting one input to a +new output. There are two key distinctions: -1. Casting always requires an explicit output datatype to be given. -2. The NumPy iterator API requires access to lower-level functions than - is currently necessary for universal functions. +1. Casting always requires an explicit output datatype. +2. The NumPy iterator API requires access to functions that are lower-level + than what universal functions currently need. -Casting from one dtype to another can be complex, and generally a casting -function may not implement all details of each input datatype (such as -non-native byte order or unaligned access). -Thus casting naturally is performed in up to three steps: +Casting can be complex, and may not implement all details of each input +datatype (such as non-native byte order or unaligned access). Thus casting +naturally is performed in up to three steps: -1. The input datatype is normalized and prepared for the actual cast. +1. The given datatype is normalized and prepared for the actual cast. 2. The cast is performed. 3. The cast result, which is in a normalized form, is cast to the requested form (non-native byte order). -although often only step 2. is required. +Often only step 2 is required. Further, NumPy provides different casting kinds or safety specifiers: +* "equivalent" * "safe" * "same_kind" * "unsafe" @@ -845,66 +612,63 @@ Further, NumPy provides different casting kinds or safety specifiers: and in some cases a cast may even be represented as a simple view. -**Motivation:** - -Similar to the common dtype/DType operation above, we again have two use cases: +**Motivation:** Similar to the common dtype/DType operation above, we again +have two use cases: 1. ``arr.astype(np.String)`` (current spelling ``arr.astype("S")``) -2. ``arr.astype(np.dtype("S8"))``. +2. ``arr.astype(np.dtype("S8"))`` -Where the first case is also noted in NEP 40 and 41 as a design goal, since +where the first case is also noted in NEP 40 and 41 as a design goal, since ``np.String`` could also be an abstract DType as mentioned above. The implementation of casting should also come with as little duplicate -implementation as necessary, i.e. to avoid unnecessary methods on the -DTypes. +implementation as necessary, i.e. to avoid unnecessary methods on the DTypes. Furthermore, it is desirable that casting is implemented similar to universal functions. Analogous to the above, the following also need to be defined: 1. ``np.can_cast(dtype, DType, "safe")`` (instance to class) -2. ``np.can_cast(dtype, other_dtype, "safe")`` (casting an instance to another instance) +2. ``np.can_cast(dtype, other_dtype, "safe")`` (casting an instance to another + instance) -overloading the meaning of ``dtype`` to mean either class or instance -(on the Python level). -The question of ``np.can_cast(DType, OtherDType, "safe")`` is also a -possibility and may be used internally. -However, it is initially not necessary to expose to Python. +overloading the meaning of ``dtype`` to mean either class or instance (on the +Python level). The question of ``np.can_cast(DType, OtherDType, "safe")`` is +also a possibility and may be used internally. However, it is initially not +necessary to expose to Python. -**Implementation:** - -During DType creation, DTypes will have the ability to pass a list of -``CastingImpl`` objects, which can define casting to and from the DType. -One of these ``CastingImpl`` objects is special because it should define -the cast within the same DType (from one instance to another). -A DType which does not define this, must have only a single implementation -and not be parametric. +**Implementation:** During DType creation, DTypes will have the ability to +pass a list of ``CastingImpl`` objects, which can define casting to and from +the DType. One of these ``CastingImpl`` objects is special because it should +define the cast within the same DType (from one instance to another). A DType +which does not define this, must have only a single implementation and not be +parametric. Each ``CastingImpl`` has a specific DType signature: -``CastingImpl[InputDtype, RequestedDtype]``. -And implements the following methods and attributes: +``CastingImpl[InputDtype, RequestedDtype]`` +and implements the following methods and attributes: -* ``adjust_descriptors(self, Tuple[DType] : input) -> casting, Tuple[DType]``. +* ``resolve_descriptors(self, Tuple[DType] : input) -> casting, Tuple[DType]``. Here ``casting`` signals the casting safeness (safe, unsafe, or same-kind) and the output dtype tuple is used for more multi-step casting (see below). * ``get_transferfunction(...) -> function handling cast`` (signature to be decided). This function returns a low-level implementation of a strided casting function ("transfer function"). -* ``cast_kind`` attribute with one of safe, unsafe, or same-kind. Used to +* ``casting`` attribute with one of equivalent, safe, unsafe, or same-kind. Used to quickly decide casting safety when this is relevant. -``adjust_descriptors`` provides information about whether or +``resolve_descriptors`` provides information about whether or not a cast is safe and is of importance mainly for parametric DTypes. ``get_transferfunction`` provides NumPy with a function capable of performing the actual cast. Initially the implementation of ``get_transferfunction`` -will be *private*, and users will only be able to provide contiguous loops +will be *private*, and users will only be able to provide strided loops with the signature. -**Performing the Cast:** +**Performing the cast** .. _cast_figure: + .. figure:: _static/casting_flow.svg :figclass: align-center @@ -916,141 +680,116 @@ an ``int24`` to an ``S8`` string (which can hold all 24bit integers). Due to this limited implementation, the full cast has to do multiple conversions. The full process is: -1. Call ``CastingImpl[Int24, String].adjust_descriptors((int24, "S20"))``. +1. Call ``CastingImpl[Int24, String].resolve_descriptors((int24, "S20"))``. This provides the information that ``CastingImpl[Int24, String]`` only - implements the cast of ``int24`` to ``"S8``. + implements the cast of ``int24`` to ``"S8"``. 2. Since ``"S8"`` does not match ``"S20"``, use ``CastingImpl[String, String].get_transferfunction()`` to find the transfer (casting) function to convert an ``"S8"`` into an ``"S20"`` 3. Fetch the transfer function to convert an ``int24`` to an ``"S8"`` using ``CastingImpl[Int24, String].get_transferfunction()`` 4. Perform the actual cast using the two transfer functions: - ``int24(42) -> S8("42") -> S20("42")``. + ``int24(42) -> S8("42") -> S20("42")``. -Note that in this example the ``adjust_descriptors`` function plays a less +Note that in this example the ``resolve_descriptors`` function plays a less central role. It becomes more important for ``np.can_cast``. -Further, ``adjust_descriptors`` allows the implementation for +Further, ``resolve_descriptors`` allows the implementation for ``np.array(42, dtype=int24).astype(String)`` to call -``CastingImpl[Int24, String].adjust_descriptors((int24, None))``. +``CastingImpl[Int24, String].resolve_descriptors((int24, None))``. In this case the result of ``(int24, "S8")`` defines the correct cast: ``np.array(42, dtype=int24),astype(String) == np.array("42", dtype="S8")``. -**Casting Safety:** +**Casting safety** -To answer the question of casting safety -``np.can_cast(int24, "S20", casting="safe")``, only the ``adjust_descriptors`` -function is required and called is in the same way as in -`the figure describing a cast `_. -In this case, the calls to ``adjust_descriptors``, will also provide the -information that ``int24 -> "S8"`` as well as ``"S8" -> "S20"`` are safe casts, -and thus also the ``int24 -> "S20"`` is a safe cast. +To answer the question of casting safety ``np.can_cast(int24, "S20", +casting="safe")``, only the ``resolve_descriptors`` function is required and +is called in the same way as in `the figure describing a cast `_. +In this case, the calls to ``resolve_descriptors``, will also provide the +information that ``int24 -> "S8"`` as well as ``"S8" -> "S20"`` are safe +casts, and thus also the ``int24 -> "S20"`` is a safe cast. -The casting safety can currently be "equivalent" when a cast is both safe -and can be performed using only a view. -The information that a cast is a simple "view" will instead be handled by -an additional flag. Thus the ``casting`` can have the 6 values in total: -safe, unsafe, same-kind as well as safe+view, unsafe+view, same-kind+view. -Where the current "equivalent" is the same as safe+view. +In some cases, no cast is necessary. For example, on most Linux systems +``np.dtype("long")`` and ``np.dtype("longlong")`` are different dtypes but are +both 64bit integers. +In this case, the cast can be performed using ``long_arr.view("longlong")``. +The information that a cast is a +"view" will be handled by an additional flag. Thus the ``casting`` +can have the 8 values in total: equivalent, safe, unsafe, same-kind as well as equivalent+view, safe+view, +unsafe+view, and same-kind+view. +NumPy currently defines ``dtype1 == dtype2`` to be True only if byte order matches. +This functionality can be replaced with the combination of "equivalent" casting +and the "view" flag. -(For more information on the ``adjust_descriptor`` signature see the -C-API section below.) +(For more information on the ``resolve_descriptors`` signature see the C-API +section below and NEP 43.) -**Casting between instances of the same DType:** +**Casting between instances of the same DType** -In general one of the casting implementations define by the DType implementor +In general one of the casting implementations defined by the DType implementor must be ``CastingImpl[DType, DType]`` (unless there is only a singleton -instance). -To keep the casting to as few steps as possible, this implementation must -be capable any conversions between all instances of this DType. +instance). To keep the casting to as few steps as possible, this +implementation must initially be capable of any conversions between all instances of this +DType. -**General Multi-Step Casting** +**General multistep casting** In general we could implement certain casts, such as ``int8`` to ``int24`` -even if the user only provides an ``int16 -> int24`` cast. -This proposal currently does not provide this functionality. However, -it could be extended in the future to either find such casts dynamically, -or at least allow ``adjust_descriptors`` to return arbitrary ``dtypes``. -If ``CastingImpl[Int8, Int24].adjust_descriptors((int8, int24))`` returns -``(int16, int24)``, the actual casting process could be extended to include -the ``int8 -> int16`` cast. Unlike the above example, which is limited -to at most three steps. - - -**Alternatives:** - -The choice of using only the DType classes in the first step of finding the -correct ``CastingImpl`` means that the default implementation of -``__common_dtype__`` has a reasonable definition of "safe casting" between +even if the user only provides an ``int16 -> int24`` cast. This proposal +currently does not provide this functionality. However, it could be extended +in the future to either find such casts dynamically, or at least allow +``resolve_descriptors`` to return arbitrary ``dtypes``. If ``CastingImpl[Int8, +Int24].resolve_descriptors((int8, int24))`` returns ``(int16, int24)``, the +actual casting process could be extended to include the ``int8 -> int16`` +cast. This adds an additional step to the casting process. + + +**Alternatives:** The choice of using only the DType classes in the first step +of finding the correct ``CastingImpl`` means that the default implementation +of ``__common_dtype__`` has a reasonable definition of "safe casting" between DTypes classes (although e.g. the concatenate operation using it may still fail when attempting to find the actual common instance or cast). -The split into multiple steps may seem to add complexity -rather than reduce it, however, it consolidates that we have the two distinct -signatures of ``np.can_cast(dtype, DTypeClass)`` and ``np.can_cast(dtype, other_dtype)``. +The split into multiple steps may seem to add complexity rather than reduce +it, however, it consolidates that we have the two distinct signatures of +``np.can_cast(dtype, DTypeClass)`` and ``np.can_cast(dtype, other_dtype)``. Further, the above API guarantees the separation of concerns for user DTypes. -The user ``Int24`` dtype does not have to handle all string lengths if it -does not wish to do so. Further, if an encoding was added to the ``String`` -DType, this does not affect the overall cast. -The ``adjust_descriptor`` function can keep returning the default encoding -and the ``CastingImpl[String, String]`` can take care of any necessary encoding -changes. +The user ``Int24`` dtype does not have to handle all string lengths if it does +not wish to do so. Further, if an encoding was added to the ``String`` DType, +this does not affect the overall cast. The ``resolve_descriptors`` function can +keep returning the default encoding and the ``CastingImpl[String, String]`` +can take care of any necessary encoding changes. The main alternative to the proposed design is to move most of the information -which is here pushed into the ``CastingImpl`` directly into methods -on the DTypes. This, however, will not allow the close similarity between casting -and universal functions. On the up side, it reduces the necessary indirection -as noted below. - -An initial proposal defined two methods ``__can_cast_to__(self, other)`` -to dynamically return ``CastingImpl``. -The advantage of this addition is that it removes the requirement to know all -possible casts at DType creation time (of one of the involved DTypes). -Such API could be added at a later time. It should be noted, however, -that it would be mainly useful for inheritance-like logic, which can be -problematic. As an example two different ``Float64WithUnit`` implementations -both could infer that they can unsafely cast between one another when in fact -some combinations should cast safely or preserve the Unit (both of which the -"base" ``Float64`` would discard). -In the proposed implementation this is not possible, since the two implementations -are not aware of each other. - - -**Notes:** - -The proposed ``CastingImpl`` is designed to be compatible with the -``UFuncImpl`` proposed in NEP 43. -While initially it will be a distinct object or C-struct, the aim is that -``CastingImpl`` can be a subclass or extension of ``UFuncImpl``. -Once this happens, this may naturally allow the use of a ``CastingImpl`` to -pass around a specialized casting function directly. - -In the future, we may consider adding a way to spell out that specific -casts are known to be *not* possible. - -In the above text ``CastingImpl`` is described as a Python object. In practice, -the current plan is to implement it as a C-side structure stored on the ``from`` -datatype. -A Python side API to get an equivalent ``CastingImpl`` object will be created, -but storing it (similar to the current implementation) on the ``from`` datatype -avoids the creation of cyclic reference counts. - -The way dispatching works for ``CastingImpl`` is planned to be limited initially -and fully opaque. -In the future, it may or may not be moved into a special UFunc, or behave -more like a universal function. - - -**Example:** - -The implementation for casting integers to datetime would currently generally -say that this cast is unsafe (it is always an unsafe cast). -Its ``adjust_descriptors`` functions may look like:: - - def adjust_descriptors(input): - from_dtype, to_dtype = input +which is here pushed into the ``CastingImpl`` directly into methods on the +DTypes. This, however, will not allow the close similarity between casting and +universal functions. On the up side, it reduces the necessary indirection as +noted below. + +An initial proposal defined two methods ``__can_cast_to__(self, other)`` to +dynamically return ``CastingImpl``. The advantage of this addition is that it +removes the requirement to define all possible casts at DType creation time (of +one of the involved DTypes). +Such API could be added at a later time. This is similar to Python which +provides ``__getattr__`` for additional control over attribute lookup. + +**Notes:** The proposed ``CastingImpl`` is designed to be identical to the +``PyArrayMethod`` proposed in NEP 43 as part of restructuring ufuncs to handle +new DTypes. + +The way dispatching works for ``CastingImpl`` is planned to be limited +initially and fully opaque. In the future, it may or may not be moved into a +special UFunc, or behave more like a universal function. + + +**Example:** The implementation for casting integers to datetime would generally +say that this cast is unsafe (because it is always an unsafe cast). +Its ``resolve_descriptors`` function may look like:: + + def resolve_descriptors(self, given_dtypes): + from_dtype, to_dtype = given_dtypes from_dtype = from_dtype.ensure_canonical() # ensure not byte-swapped if to_dtype is None: @@ -1065,26 +804,384 @@ Its ``adjust_descriptors`` functions may look like:: .. note:: - While NumPy currently defines some of these casts, with the possible - exception of the unit-less ``timedelta64`` it may be better to not - define these cast at all. In general we expect that user defined - DTypes will be using other methods such as ``unit.drop_unit(arr)`` - or ``arr * unit.seconds``. + While NumPy currently defines integer to datetime casts, with the possible + exception of the unit-less ``timedelta64`` it may be better to not define + these casts at all. In general we expect that user defined DTypes will be + using custom methods such as ``unit.drop_unit(arr)`` or ``arr * + unit.seconds``. + + +****************************************************************************** +Array coercion +****************************************************************************** + +The following sections discuss the two aspects related to creating an array from +arbitrary python objects. This requires a defined protocol to store data +inside the array. Further, it requires the ability to find the correct dtype +when a user does not provide the dtype explicitly. + +Coercion to and from Python objects +============================================================================== + +**Motivation:** When storing a single value in an array or taking it out, it +is necessary to coerce (convert) it to and from the low-level representation +inside the array. + +**Description:** Coercing to and from Python scalars requires two to three +methods: + +1. ``__dtype_setitem__(self, item_pointer, value)`` +2. ``__dtype_getitem__(self, item_pointer, base_obj) -> object``; + ``base_obj`` is for memory management and usually ignored; it points to + an object owning the data. Its only role is to support structured datatypes + with subarrays within NumPy, which currently return views into the array. + The function returns an equivalent Python scalar (i.e. typically a NumPy + scalar). +3. ``__dtype_get_pyitem__(self, item_pointer, base_obj) -> object`` (initially + hidden for new-style user-defined datatypes, may be exposed on user + request). This corresponds to the ``arr.item()`` method also used by + ``arr.tolist()`` and returns Python floats, for example, instead of NumPy + floats. + +(The above is meant for C-API. A Python-side API would have to use byte +buffers or similar to implement this, which may be useful for prototyping.) + +These largely correspond to the current definitions. When a certain scalar +has a known (different) dtype, NumPy may in the future use casting instead of +``__dtype_setitem__``. A user datatype is (initially) expected to implement +``__dtype_setitem__`` for its own ``DType.type`` and all basic Python scalars +it wishes to support (e.g. ``int`` and ``float``). In the future a +function "``known_scalartype``" may be made public to allow a user dtype to signal +which Python scalars it can store directly. + + +**Implementation:** The pseudocode implementation for setting a single item in +an array from an arbitrary Python object ``value`` is (note that some +functions are only defined below):: + + def PyArray_Pack(dtype, item_pointer, value): + DType = type(dtype) + if DType.type is type(value) or DType.known_scalartype(type(value)): + return dtype.__dtype_setitem__(item_pointer, value) + + # The dtype cannot handle the value, so try casting: + arr = np.array(value) + if arr.dtype is object or arr.ndim != 0: + # not a numpy or user scalar; try using the dtype after all: + return dtype.__dtype_setitem__(item_pointer, value) + + arr.astype(dtype) + item_pointer.write(arr[()]) + +where the call to ``np.array()`` represents the dtype discovery and is +not actually performed. + +**Example:** Current ``datetime64`` returns ``np.datetime64`` scalars and can +be assigned from ``np.datetime64``. However, the datetime +``__dtype_setitem__`` also allows assignment from date strings ("2016-05-01") +or Python integers. Additionally the datetime ``__dtype_get_pyitem__`` +function actually returns a Python ``datetime.datetime`` object (most of the +time). + + +**Alternatives:** This functionality could also be implemented as a cast to and +from the ``object`` dtype. +However, coercion is slightly more complex than typical casts. +One reason is that in general a Python object could itself be a +zero-dimensional array or scalar with an associated DType. +Such an object has a DType, and the correct cast to another DType is already +defined:: + + np.array(np.float32(4), dtype=object).astype(np.float64) + +is identical to:: + + np.array(4, dtype=np.float32).astype(np.float64) + +Implementing the first ``object`` to ``np.float64`` cast explicitly, +would require the user to take to duplicate or fall back to existing +casting functionality. + +It is certainly possible to describe the coercion to and from Python objects +using the general casting machinery, +but the ``object`` dtype is special and important enough to be handled by NumPy +using the presented methods. + +**Further Issues and Discussion:** The ``__dtype_setitem__`` function currently duplicates +some code, such as coercion from a string. ``datetime64`` allows assignment +from string, but the same conversion also occurs for casting from the string +dtype to ``datetime64``. In the future, we may expose the ``known_scalartype`` +function to allow the user to implement such duplication. +For example, NumPy would normally use ``np.array(np.string_("2019")).astype(datetime64)``, +but ``datetime64`` could choose to use its ``__dtype_setitem__`` instead, +e.g. for performance reasons. + +There is an issue about how subclasses of scalars should be handled. +We anticipate to stop automatically detecting the dtype for +``np.array(float64_subclass)`` to be float64. +The user can still provide ``dtype=np.float64``. +However, the above automatic casting using ``np.array(scalar_subclass).astype(requested_dtype)`` +will fail. +In many cases, this is not an issue, since the Python ``__float__`` protocol +can be used instead. But in some cases, this will mean that subclasses of +Python scalars will behave differently. + +.. note:: + + *Example:* ``np.complex256`` should not use ``__float__`` in its + ``__dtype_setitem__`` method in the future unless it is a known floating + point type. If the scalar is a subclass of a different high precision + floating point type (e.g. ``np.float128``) then this currently loses + precision without notifying the user. + In that case ``np.array(float128_subclass(3), dtype=np.complex256)`` + may fail unless the ``float128_subclass`` is first converted to the + ``np.float128`` base class. + + +DType discovery during array coercion +============================================================================== + +An important step in the use of NumPy arrays is creation of the array +from collections of generic Python objects. + +**Motivation:** Although the distinction is not clear currently, there are two main needs:: + + np.array([1, 2, 3, 4.]) + +needs to guess the correct dtype based on the Python objects inside. +Such an array may include a mix of datatypes, as long as they can be +promoted. +A second use case is when users provide the output DType class, but not the +specific DType instance:: + + np.array([object(), None], dtype=np.dtype[np.string_]) # (or `dtype="S"`) + +In this case the user indicates that ``object()`` and ``None`` should be +interpreted as strings. +The need to consider the user provided DType also arises for a future +``Categorical``:: + + np.array([1, 2, 1, 1, 2], dtype=Categorical) + +which must interpret the numbers as unique categorical values rather than +integers. + +There are three further issues to consider: + +1. It may be desirable to create datatypes associated + with normal Python scalars (such as ``datetime.datetime``) that do not + have a ``dtype`` attribute already. +2. In general, a datatype could represent a sequence, however, NumPy currently + assumes that sequences are always collections of elements + (the sequence cannot be an element itself). + An example would be a ``vector`` DType. +3. An array may itself contain arrays with a specific dtype (even + general Python objects). For example: + ``np.array([np.array(None, dtype=object)], dtype=np.String)`` + poses the issue of how to handle the included array. + +Some of these difficulties arise because finding the correct shape +of the output array and finding the correct datatype are closely related. + +**Implementation:** There are two distinct cases above: + +1. The user has provided no dtype information. +2. The user provided a DType class -- as represented, for example, by ``"S"`` + representing a string of any length. + +In the first case, it is necessary to establish a mapping from the Python type(s) +of the constituent elements to the DType class. +Once the DType class is known, the correct dtype instance needs to be found. +In the case of strings, this requires to find the string length. + +These two cases shall be implemented by leveraging two pieces of information: + +1. ``DType.type``: The current type attribute to indicate which Python scalar + type is associated with the DType class (this is a *class* attribute that always + exists for any datatype and is not limited to array coercion). +2. ``__discover_descr_from_pyobject__(cls, obj) -> dtype``: A classmethod that + returns the correct descriptor given the input object. + Note that only parametric DTypes have to implement this. + For non-parametric DTypes using the default instance will always be acceptable. + +The Python scalar type which is already associated with a DType through the +``DType.type`` attribute maps from the DType to the Python scalar type. +At registration time, a DType may choose to allow automatically discover for +this Python scalar type. +This requires a lookup in the opposite direction, which will be implemented +using global a mapping (dictionary-like) of:: + + known_python_types[type] = DType + +Correct garbage collection requires additional care. +If both the Python scalar type (``pytype``) and ``DType`` are created dynamically, +they will potentially be deleted again. +To allow this, it must be possible to make the above mapping weak. +This requires that the ``pytype`` holds a reference of ``DType`` explicitly. +Thus, in addition to building the global mapping, NumPy will store the ``DType`` as +``pytype.__associated_array_dtype__`` in the Python type. +This does *not* define the mapping and should *not* be accessed directly. +In particular potential inheritance of the attribute does not mean that NumPy will use the +superclasses ``DType`` automatically. A new ``DType`` must be created for the +subclass. + +.. note:: + + Python integers do not have a clear/concrete NumPy type associated right + now. This is because during array coercion NumPy currently finds the first + type capable of representing their value in the list of `long`, `unsigned + long`, `int64`, `unsigned int64`, and `object` (on many machines `long` is + 64 bit). + + Instead they will need to be implemented using an ``AbstractPyInt``. This + DType class can then provide ``__discover_descr_from_pyobject__`` and + return the actual dtype which is e.g. ``np.dtype("int64")``. For + dispatching/promotion in ufuncs, it will also be necessary to dynamically + create ``AbstractPyInt[value]`` classes (creation can be cached), so that + they can provide the current value based promotion functionality provided + by ``np.result_type(python_integer, array)`` [1]_. + +To allow for a DType to accept inputs as scalars that are not basic Python +types or instances of ``DType.type``, we use ``known_scalar_type`` method. +This can allow discovery of a ``vector`` as a scalar (element) instead of a sequence +(for the command ``np.array(vector, dtype=VectorDType)``) even when ``vector`` is itself a +sequence or even an array subclass. This will *not* be public API initially, +but may be made public at a later time. + +**Example:** The current datetime DType requires a +``__discover_descr_from_pyobject__`` which returns the correct unit for string +inputs. This allows it to support:: + + np.array(["2020-01-02", "2020-01-02 11:24"], dtype="M8") + +By inspecting the date strings. Together with the common dtype +operation, this allows it to automatically find that the datetime64 unit +should be "minutes". + + +**NumPy Internal Implementation:** The implementation to find the correct dtype +will work similar to the following pseudocode:: + + def find_dtype(array_like): + common_dtype = None + for element in array_like: + # default to object dtype, if unknown + DType = known_python_types.get(type(element), np.dtype[object]) + dtype = DType.__discover_descr_from_pyobject__(element) + + if common_dtype is None: + common_dtype = dtype + else: + common_dtype = np.promote_types(common_dtype, dtype) + +In practice, the input to ``np.array()`` is a mix of sequences and array-like +objects, so that deciding what is an element requires to check whether it +is a sequence. +The full algorithm (without user provided dtypes) thus looks more like:: + + def find_dtype_recursive(array_like, dtype=None): + """ + Recursively find the dtype for a nested sequences (arrays are not + supported here). + """ + DType = known_python_types.get(type(element), None) + + if DType is None and is_array_like(array_like): + # Code for a sequence, an array_like may have a DType we + # can use directly: + for element in array_like: + dtype = find_dtype_recursive(element, dtype=dtype) + return dtype + + elif DType is None: + DType = np.dtype[object] + + # dtype discovery and promotion as in `find_dtype` above + +If the user provides ``DType``, then this DType will be tried first, and the +``dtype`` may need to be cast before the promotion is performed. + +**Limitations:** The motivational point 3. of a nested array +``np.array([np.array(None, dtype=object)], dtype=np.String)`` is currently +(sometimes) supported by inspecting all elements of the nested array. +User DTypes will implicitly handle these correctly if the nested array +is of ``object`` dtype. +In some other cases NumPy will retain backward compatibility for existing +functionality only. +NumPy uses such functionality to allow code such as:: + + >>> np.array([np.array(["2020-05-05"], dtype="S")], dtype=np.datetime64) + array([['2020-05-05']], dtype='datetime64[D]') + +which discovers the datetime unit ``D`` (days). +This possibility will not be accessible to user DTypes without an +intermediate cast to ``object`` or a custom function. + +The use of a global type map means that an error or warning has to be given if +two DTypes wish to map to the same Python type. In most cases user DTypes +should only be implemented for types defined within the same library to avoid +the potential for conflicts. It will be the DType implementor's responsibility +to be careful about this and use avoid registration when in doubt. + +**Alternatives:** Instead of a global mapping, we could rely on the scalar +attribute ``scalar.__associated_array_dtype__``. +This only creates a difference in behaviour for subclasses and the exact +implementation can be undefined initially. +Scalars will be expected to derive from a NumPy scalar. +In principle NumPy could, for a time, still choose to rely on the attribute. + +An earlier proposal for the ``dtype`` discovery algorithm, +was to use a two-pass approach. +First finding only the correct ``DType`` class and only then discovering the parametric +``dtype`` instance. +This was rejected for unnecessary complexity. +The main advantage of this method is that it would have enabled value +based promotion in universal functions, allowing:: + + np.add(np.array([8], dtype="uint8"), [4]) + +to return a ``uint8`` result (instead of ``int16``), which currently happens for:: + + np.add(np.array([8], dtype="uint8"), 4) +(note the list ``[4]`` instead of scalar ``4``). +This is not a feature NumPy currently has or desires to support. -C-Side API -^^^^^^^^^^ +**Further Issues and Discussion:** It is possible to create a DType +such as Categorical, array, or vector which can only be used if ``dtype=DType`` +is provided. Such DTypes cannot roundtrip correctly. For example:: + + np.array(np.array(1, dtype=Categorical)[()]) + +will result in an integer array. To get the original ``Categorical`` array +``dtype=Categorical`` will need to be passed explicitly. +This is a general limitation, but round-tripping is always possible if +``dtype=original_arr.dtype`` is passed. + + +.. _c-api: + +****************************************************************************** +Public C-API +****************************************************************************** A Python side API shall not be defined here. This is a general side approach. DType creation -"""""""""""""" +============================================================================== + +To create a new DType the user will need to define all the methods and +attributes as presented above and outlined in the `Usage and impact`_ +section. +Some additional methods similar to those currently defined as part of +:c:type:`PyArray_ArrFuncs` will be necessary and part of the slots struct +below. -As already mentioned in NEP 41, the interface to define new DTypes in C -is modeled after the limited API in Python: the above-mentioned slots -and some additional necessary information will thus be passed within a slots -struct and identified by ``ssize_t`` integers:: +As already mentioned in NEP 41, the interface to define this DType class in C is +modeled after the `Python limited API `_: +the above-mentioned slots and some additional necessary information will +thus be passed within a slots struct and identified by ``ssize_t`` integers:: static struct PyArrayMethodDef slots[] = { {NPY_dt_method, method_implementation}, @@ -1093,10 +1190,8 @@ struct and identified by ``ssize_t`` integers:: } typedef struct{ - PyTypeObject *typeobj; /* type of python scalar */ - int is_parametric; /* Is the dtype parametric? */ - int is_abstract; /* Is the dtype abstract? */ - int flags /* flags (to be discussed) */ + PyTypeObject *typeobj; /* type of python scalar or NULL */ + int flags /* flags, including parametric and abstract */ /* NULL terminated CastingImpl; is copied and references are stolen */ CastingImpl *castingimpls[]; PyType_Slot *slots; @@ -1105,20 +1200,19 @@ struct and identified by ``ssize_t`` integers:: PyObject* PyArray_InitDTypeMetaFromSpec(PyArrayDTypeMeta_Spec *dtype_spec); -all of this information will be copied during instantiation. +All of this information will be copied. -**TODO:** The DType author should be able to at define new methods for -their DType, up to defining a full type object and in the future possibly even -extending the ``PyArrayDTypeMeta_Type`` struct. -We have to decide on how (and what) to make available to the user initially. -A proposed initial solution may be to simply allow inheriting from an existing -class. -Further this prevents overriding some slots, such as `==` which may not be -desirable. +**TODO:** The DType author should be able to define new methods for their +DType, up to defining a full type object and in the future possibly even +extending the ``PyArrayDTypeMeta_Type`` struct. We have to decide on how (and +what) to make available to the user initially. A possible initial solution may +be to only allow inheriting from an existing class: ``class MyDType(np.dtype, +MyBaseclass)``. If ``np.dtype`` is first in the method resolution order, this +also prevents overriding some slots, such as ``==`` which may not be desirable. -The proposed method slots are (prepended with ``NPY_dt_``), these are -detailed above and given here for summary: +The ``slots`` will be identified by names which are prefixed with ``NPY_dt_`` +and are: * ``is_canonical(self) -> {0, 1}`` * ``ensure_canonical(self) -> dtype`` @@ -1129,205 +1223,142 @@ detailed above and given here for summary: * ``common_dtype(cls, other) -> DType, NotImplemented, or NULL`` * ``common_instance(self, other) -> dtype or NULL`` -If not set, most slots are filled with slots which either error or defer automatically. +Where possible, a default implementation will be provided if the slot is +ommitted or set to ``NULL``. Non-parametric dtypes do not have to implement: * ``discover_descr_from_pyobject`` (uses ``default_descr`` instead) * ``common_instance`` (uses ``default_descr`` instead) -* ``ensure_canonical`` (uses ``default_descr`` instead) - -Which will be correct for most dtypes *which do not store metadata*. - -Other slots may be replaced by convenience versions, e.g. sorting methods -can be defined by providing: +* ``ensure_canonical`` (uses ``default_descr`` instead). -* ``compare(self, char *item_ptr1, char *item_ptr2, int *res) -> {-1, 0}`` - *TODO: We would like an error return, is this reasonable? (similar to old - python compare)* - -which uses generic sorting functionality. In general, we could add a -functions such as: +Sorting is expected to be implemented using: * ``get_sort_function(self, NPY_SORTKIND sort_kind) -> {out_sortfunction, NotImplemented, NULL}``. - If the sortkind is not understood it may be allowed to return ``NotImplemented``. -in the future. However, for example sorting is likely better solved by the -implementation of multiple generalized ufuncs which are called internally. +Although for convenience, it will be sufficient if the user implements only: + +* ``compare(self, char *item_ptr1, char *item_ptr2, int *res) -> {-1, 0, 1}`` -**Limitations:** -Using the above ``PyArrayDTypeMeta_Spec`` struct, the structure itself can -only be extended clumsily (e.g. by adding a version tag to the ``slots`` -to indicate a new, longer version of the struct). -We could also provide the struct using a function, which however will require -memory management but would allow ABI-compatible extension -(the struct is freed again when the DType is created). +**Limitations:** Using the above ``PyArrayDTypeMeta_Spec`` struct, the +structure itself can only be extended clumsily (e.g. by adding a version tag +to the ``slots`` to indicate a new, longer version of the struct). We could +also provide the struct using a function, which however will require memory +management but would allow ABI-compatible extension (the struct is freed again +when the DType is created). CastingImpl -""""""""""" +============================================================================== The external API for ``CastingImpl`` will be limited initially to defining: -* ``cast_kind`` attribute, which can be one of the supported casting kinds. +* ``casting`` attribute, which can be one of the supported casting kinds. This is the safest cast possible. For example casting between two NumPy strings is of course "safe" in general, but may be "same kind" in a specific instance if the second string is shorter. If neither type is parametric the - ``adjust_descriptors`` must use it. -* ``adjust_descriptors(dtypes_in[2], dtypes_out[2], casting_out) -> int {0, -1}`` - The out dtypes must be set correctly to dtypes which the strided loop - (transfer function) can handle. Initially the result must have be instances - of the same DType class as the ``CastingImpl`` is defined for. - The ``casting_out`` will be set to ``NPY_SAFE_CASTING``, ``NPY_UNSAFE_CASTING``, - or ``NPY_SAME_KIND_CASTING``. With a new, additional, flag ``NPY_CAST_IS_VIEW`` - which can be set to indicate that no cast is necessary, but a simple view - is sufficient to perform the cast. - The cast should return ``-1`` when a custom error message is set and - ``NPY_NO_CASTING`` to indicate that a generic casting error should be - set (this is in most cases preferable). -* ``strided_loop(char **args, npy_intp *dimensions, npy_intp *strides, dtypes[2]) -> int {0, nonzero}`` (must currently succeed) - -This is identical to the proposed API for ufuncs. By default the two dtypes -are passed in as the last argument. On error return (if no error is set) a -generic error will be given. -More optimized loops are in use internally, and will be made available to users -in the future (see notes) -The iterator API does not currently support casting errors: this is -a bug that needs to be fixed. Until it is fixed the loop should always -succeed (return 0). - -Although verbose, the API shall mimic the one for creating a new DType. -The ``PyArrayCastingImpl_Spec`` will include a field for ``dtypes`` and -identical to a ``PyArrayUFuncImpl_Spec``:: + ``resolve_descriptors`` must use it. + +* ``resolve_descriptors(self, given_descrs[2], loop_descrs[2]) -> int {casting, -1}``: + The ``loop_descrs`` must be set correctly to dtypes which the strided loop + (transfer function) can handle. Initially the result must have instances + of the same DType class as the ``CastingImpl`` is defined for. The + ``casting`` will be set to ``NPY_EQUIV_CASTING``, ``NPY_SAFE_CASTING``, + ``NPY_UNSAFE_CASTING``, or ``NPY_SAME_KIND_CASTING``. + A new, additional flag, ``NPY_CAST_IS_VIEW``, can be set to indicate that + no cast is necessary and a view is sufficient to perform the cast. + The return value shall be ``-1`` to indicate that the cast is not possible. + If no error is set, a generic error message will be given. If an error is + already set it will be chained and may provide additional information. + Note that ``self`` represents additional call information; details are given + in NEP 43. + +* ``strided_loop(char **args, npy_intp *dimensions, npy_intp *strides, + ...) -> int {0, -1}`` (signature will be fully defined in NEP 43) + +This is identical to the proposed API for ufuncs. The additional ``...`` +part of the signature will include information such as the two ``dtype``\s. +More optimized loops are in use internally, and +will be made available to users in the future (see notes). + +Although verbose, the API shall mimic the one for creating a new DType: + +.. code-block:: C typedef struct{ - int needs_api; /* whether the cast requires the API */ - PyArray_DTypeMeta *in_dtype; /* input DType class */ - PyArray_DTypeMeta *out_dtype; /* output DType class */ + int flags; /* e.g. whether the cast requires the API */ + int nin, nout; /* Number of Input and outputs (always 1) */ + NPY_CASTING casting; /* The default casting level */ + PyArray_DTypeMeta *dtypes; /* input and output DType class */ /* NULL terminated slots defining the methods */ PyType_Slot *slots; - } PyArrayUFuncImpl_Spec; - -The actual creation function ``PyArrayCastingImpl_FromSpec()`` will additionally -require a ``casting`` parameter to define the default (maximum) casting safety. -The internal representation of ufuncs and casting implementations may differ -initially if it makes implementation simpler, but should be kept opaque to -allow future merging. + } PyArrayMethod_Spec; -**TODO:** It may be possible to make this more close to the ufuncs or even -use a single FromSpec. This API shall only be finalized after/when NEP 43 -is finalized. +The focus differs between casting and general ufuncs. For example for casts +``nin == nout == 1`` is always correct, while for ufuncs ``casting`` is +expected to be usually `"safe"`. -**Notes:** - -We may initially allow users to define only a single loop. -However, internally NumPy optimizes far more, and this should be made -public incrementally, by either allowing to provide multiple versions, such -as: +**Notes:** We may initially allow users to define only a single loop. However, +internally NumPy optimizes far more, and this should be made public +incrementally, either by allowing multiple versions, such as: * contiguous inner loop * strided inner loop * scalar inner loop -or more likely through an additional ``get_inner_loop`` function which has -additional information, such as the fixed strides (similar to our internal API). +or more likely through exposure of the ``get_loop`` function which is passed +additional information, such as the fixed strides (similar to our internal +API). -The above example does not yet include the definition of setup/teardown -functionality, which may overlap with ``get_inner_loop``. -Since these are similar to the UFunc machinery, this should be defined in -detail in NEP 43 and then incorporated identically into casting. +The above example does not yet include potential setup and error handling +requirements. Since these are similar to the UFunc machinery, this will be +defined in detail in NEP 43 and then incorporated identically into casting. -Also the ``needs_api`` decision may actually be moved into a setup function, -and removed or mainly provided as a convenience flag. +The slots/methods used will be prefixed ``NPY_uf_`` for similarity to the +ufunc machinery. -The slots/methods used will be prefixed ``NPY_uf_`` for similarity to the ufunc -machinery. +**Alternatives:** Aside from name changes, and possible signature tweaks, +there seem to be few alternatives to the above structure. +The proposed API using ``*_FromSpec`` function is a good way to achieve a stable +and extensible API. The slots design is extensible and can be +changed without breaking binary compatibility. +Convenience functions can still be provided to allow creation with less code. -Alternatives -"""""""""""" - -Aside from name changes, and possible signature tweaks, there seem to -be few alternatives to the above structure. -Keeping the creation process close the Python limited API has some advantage. -Convenience functions could still be provided to allow creation with less -code. -The central point in the above design is that the enumerated slots design -is extensible and can be changed without breaking binary compatibility. -A downside is the possible need to pass in e.g. integer flags using a void -pointer inside this structure. - -A downside of this is that compilers cannot warn about function -pointer incompatibilities. There is currently no proposed solution to this. - - -Issues -^^^^^^ - -Any possible design decision will have issues. - -The above split into Python objects has the disadvantage that reference cycles -naturally occur. For example a potential ``CastingImpl`` object needs to -hold on to both ``DTypes``. Further, a scalar type may want to own a -strong reference to the corresponding ``DType`` while the ``DType`` *must* -hold a strong reference to the scalar. -We do not believe that these reference cycles are an issue. The may -require implementation of of cyclic reference counting at some point, but -cyclic reference resolution is very common in Python and dtypes (especially -classes) are only a small number of objects. - -In some cases, the new split will add additional indirections to the code, -since methods on the DType have to be looked up and called. -This should not have serious performance impact and seems necessary to -achieve the desired flexibility. - -From a user-perspective, a more serious downside is that handling certain -functionality in the ``DType`` rather than directly can mean that error -messages need to be raised from places where less context is available. -This may mean that error messages can be less specific. -This will be alleviated by exception chaining. Also decisions such as -returning the casting safety (even when it is impossible to cast) allow -most exceptions to be set at a point where more context is available -and ensures uniform errors messages. +One downside of this approach is that compilers cannot warn about function pointer +incompatibilities. +****************************************************************************** Implementation --------------- +****************************************************************************** -Internally a few implementation details have to be decided. These will be -fully opaque to the user and can be changed at a later time. +Steps for implementation are outlined in :ref:`NEP 41 `. This includes +internal restructuring for the new casting and array-coercion. +First, the NumPy will internally be rewritten using the above methods for +casting and array-coercion. -This includes: +After that, the new public API will be added incrementally. +We plan to expose it in a preliminary state initially to allow modification +after some experience can be gained. +In addition to the features presented in detail in this NEP, all functionality +currently implemented on the dtypes will be replaced systematically. -* How ``CastingImpl`` lookup, and thus the decision whether a cast is possible, - is defined. (This is speed relevant, although mainly during a transition - phase where UFuncs where NEP 43 is not yet implemented). - Thus, it is not very relevant to the NEP. It is only necessary to ensure fast - lookup during the transition phase for the current builtin Numerical types. -* How the mapping from a python scalar (e.g. ``3.``) to the DType is - implemented. - -The main steps for implementation are outlined in :ref:`NEP 41 `. -This includes the internal restructure for how casting and array-coercion -works. -After this the new public API will be added incrementally. -This includes replacements for certain slots which are occasionally -directly used on the dtype (e.g. ``dtype->f->setitem``). - - -Discussion ----------- +****************************************************************************** +Alternatives +****************************************************************************** -There is a large space of possible implementations with many discussions -in various places, as well as initial thoughts and design documents. -These are listed in the discussion of NEP 40 and not repeated here for -brevity. +The space of possible implementations is large, so there have been many +discussions, conceptions, and design documents. These are listed in NEP 40. +Since this NEP encompasses multiple individual decisions, alternatives +are discussed in the above individual sections. +****************************************************************************** References ----------- +****************************************************************************** .. [1] NumPy currently inspects the value to allow the operations:: @@ -1335,10 +1366,11 @@ References np.array([1.2], dtype=np.float32) + 1. to return a ``uint8`` or ``float32`` array respectively. This is - further described in the documentation of `numpy.result_type`. + further described in the documentation for :func:`numpy.result_type`. +****************************************************************************** Copyright ---------- +****************************************************************************** This document has been placed in the public domain. From e5a82c95742908899b0b4d7ee54617eb4c6b3972 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 28 Sep 2020 07:16:02 +0000 Subject: [PATCH 105/409] MAINT: Bump hypothesis from 5.35.3 to 5.36.1 Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 5.35.3 to 5.36.1. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-5.35.3...hypothesis-python-5.36.1) Signed-off-by: dependabot-preview[bot] --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 036e9861fa4e..c86d46eb879a 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,7 +1,7 @@ cython==0.29.21 wheel setuptools<49.2.0 -hypothesis==5.35.3 +hypothesis==5.36.1 pytest==6.0.2 pytz==2020.1 pytest-cov==2.10.1 From 33e1dbee8d9e11a3e96efaae822ff6f3c44e3cef Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Mon, 28 Sep 2020 08:15:49 -0600 Subject: [PATCH 106/409] MAINT: Finish replacing PyInt_Check (#17364) * MAINT: Replace PyInt_Check in multiarray_tests.c.src * MAINT: Replace PyInt_Check in number.c * MAINT: Replace PyInt_Check in interators.c * MAINT: Replace PyInt_Check in refcount.c * MAINT: Replace PyInt_Check in hashdescr.c * MAINT: Replace PyInt_Check in scalarapi.c. * MAINT: Replace PyInt_Check in scalartypes.c.src. --- .../src/multiarray/_multiarray_tests.c.src | 27 ++++++--- numpy/core/src/multiarray/hashdescr.c | 4 +- numpy/core/src/multiarray/iterators.c | 2 +- numpy/core/src/multiarray/number.c | 13 +++- numpy/core/src/multiarray/refcount.c | 24 ++++---- numpy/core/src/multiarray/scalarapi.c | 60 +++++++++++-------- numpy/core/src/multiarray/scalartypes.c.src | 2 +- 7 files changed, 79 insertions(+), 53 deletions(-) diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index ea04c82bdf2e..0bf6958cde54 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -176,17 +176,20 @@ test_neighborhood_iterator(PyObject* NPY_UNUSED(self), PyObject* args) /* Compute boundaries for the neighborhood iterator */ for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { PyObject* bound; + bound = PySequence_GetItem(b, i); if (bound == NULL) { goto clean_itx; } - if (!PyInt_Check(bound)) { + /* PyLong_AsSsize checks for PyLong */ + bounds[i] = PyLong_AsSsize_t(bound); + if (error_converting(bounds[i])) { + PyErr_Clear(); PyErr_SetString(PyExc_ValueError, - "bound not long"); + "bound is invalid"); Py_DECREF(bound); goto clean_itx; } - bounds[i] = PyLong_AsSsize_t(bound); Py_DECREF(bound); } @@ -335,17 +338,20 @@ test_neighborhood_iterator_oob(PyObject* NPY_UNUSED(self), PyObject* args) /* Compute boundaries for the neighborhood iterator */ for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { PyObject* bound; + bound = PySequence_GetItem(b1, i); if (bound == NULL) { goto clean_itx; } - if (!PyInt_Check(bound)) { + /* PyLong_AsSsize checks for PyLong */ + bounds[i] = PyLong_AsSsize_t(bound); + if (error_converting(bounds[i])) { + PyErr_Clear(); PyErr_SetString(PyExc_ValueError, - "bound not long"); + "bound is invalid"); Py_DECREF(bound); goto clean_itx; } - bounds[i] = PyLong_AsSsize_t(bound); Py_DECREF(bound); } @@ -359,17 +365,20 @@ test_neighborhood_iterator_oob(PyObject* NPY_UNUSED(self), PyObject* args) for (i = 0; i < 2 * PyArray_NDIM(ax); ++i) { PyObject* bound; + bound = PySequence_GetItem(b2, i); if (bound == NULL) { goto clean_itx; } - if (!PyInt_Check(bound)) { + /* PyLong_AsSsize checks for PyLong */ + bounds[i] = PyLong_AsSsize_t(bound); + if (error_converting(bounds[i])) { + PyErr_Clear(); PyErr_SetString(PyExc_ValueError, - "bound not long"); + "bound is invalid"); Py_DECREF(bound); goto clean_itx; } - bounds[i] = PyLong_AsSsize_t(bound); Py_DECREF(bound); } diff --git a/numpy/core/src/multiarray/hashdescr.c b/numpy/core/src/multiarray/hashdescr.c index c596a7098a97..e9a99cc8fa8f 100644 --- a/numpy/core/src/multiarray/hashdescr.c +++ b/numpy/core/src/multiarray/hashdescr.c @@ -165,7 +165,7 @@ static int _array_descr_walk_fields(PyObject *names, PyObject* fields, PyObject* } foffset = PyTuple_GET_ITEM(value, 1); - if (!PyInt_Check(foffset)) { + if (!PyLong_Check(foffset)) { PyErr_SetString(PyExc_SystemError, "(Hash) Second item in compound dtype tuple not an int ???"); return -1; @@ -208,7 +208,7 @@ static int _array_descr_walk_subarray(PyArray_ArrayDescr* adescr, PyObject *l) PyList_Append(l, item); } } - else if (PyInt_Check(adescr->shape)) { + else if (PyLong_Check(adescr->shape)) { PyList_Append(l, adescr->shape); } else { diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c index 31795b2d0faf..3ebd4c858974 100644 --- a/numpy/core/src/multiarray/iterators.c +++ b/numpy/core/src/multiarray/iterators.c @@ -597,7 +597,7 @@ iter_subscript(PyArrayIterObject *self, PyObject *ind) } /* Check for Integer or Slice */ - if (PyLong_Check(ind) || PyInt_Check(ind) || PySlice_Check(ind)) { + if (PyLong_Check(ind) || PySlice_Check(ind)) { start = parse_index_entry(ind, &step_size, &n_steps, self->size, 0, 1); if (start == -1) { diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index 87c3c9b0a708..a629dfe97fde 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -397,14 +397,21 @@ is_scalar_with_conversion(PyObject *o2, double* out_exponent) PyObject *temp; const int optimize_fpexps = 1; - if (PyInt_Check(o2)) { - *out_exponent = (double)PyLong_AsLong(o2); + if (PyLong_Check(o2)) { + long tmp = PyLong_AsLong(o2); + if (error_converting(tmp)) { + PyErr_Clear(); + return NPY_NOSCALAR; + } + *out_exponent = (double)tmp; return NPY_INTPOS_SCALAR; } + if (optimize_fpexps && PyFloat_Check(o2)) { *out_exponent = PyFloat_AsDouble(o2); return NPY_FLOAT_SCALAR; } + if (PyArray_Check(o2)) { if ((PyArray_NDIM((PyArrayObject *)o2) == 0) && ((PyArray_ISINTEGER((PyArrayObject *)o2) || @@ -442,7 +449,7 @@ is_scalar_with_conversion(PyObject *o2, double* out_exponent) else if (PyIndex_Check(o2)) { PyObject* value = PyNumber_Index(o2); Py_ssize_t val; - if (value==NULL) { + if (value == NULL) { if (PyErr_Occurred()) { PyErr_Clear(); } diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c index 0f84449af012..41dd059b0ac1 100644 --- a/numpy/core/src/multiarray/refcount.c +++ b/numpy/core/src/multiarray/refcount.c @@ -292,20 +292,22 @@ static void _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype) { if (!PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)) { - if ((obj == Py_None) || (PyInt_Check(obj) && PyLong_AsLong(obj)==0)) { + PyObject *arr; + + if ((obj == Py_None) || + (PyLong_Check(obj) && PyLong_AsLong(obj) == 0)) { return; } - else { - PyObject *arr; - Py_INCREF(dtype); - arr = PyArray_NewFromDescr(&PyArray_Type, dtype, - 0, NULL, NULL, NULL, - 0, NULL); - if (arr!=NULL) { - dtype->f->setitem(obj, optr, arr); - } - Py_XDECREF(arr); + /* Clear possible long conversion error */ + PyErr_Clear(); + Py_INCREF(dtype); + arr = PyArray_NewFromDescr(&PyArray_Type, dtype, + 0, NULL, NULL, NULL, + 0, NULL); + if (arr!=NULL) { + dtype->f->setitem(obj, optr, arr); } + Py_XDECREF(arr); } if (dtype->type_num == NPY_OBJECT) { Py_XINCREF(obj); diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index b918786f22b6..f610ad4680e3 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -373,7 +373,8 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) NPY_NO_EXPORT PyObject * PyArray_ScalarFromObject(PyObject *object) { - PyObject *ret=NULL; + PyObject *ret = NULL; + if (PyArray_IsZeroDim(object)) { return PyArray_ToScalar(PyArray_DATA((PyArrayObject *)object), (PyArrayObject *)object); @@ -390,42 +391,49 @@ PyArray_ScalarFromObject(PyObject *object) PyArrayScalar_RETURN_FALSE; } } - else if (PyInt_Check(object)) { - ret = PyArrayScalar_New(Long); - if (ret == NULL) { - return NULL; + else if (PyLong_Check(object)) { + /* Check if fits in long */ + npy_long val_long = PyLong_AsLong(object); + if (!error_converting(val_long)) { + ret = PyArrayScalar_New(Long); + if (ret != NULL) { + PyArrayScalar_VAL(ret, Long) = val_long; + } + return ret; } - PyArrayScalar_VAL(ret, Long) = PyLong_AsLong(object); + PyErr_Clear(); + + /* Check if fits in long long */ + npy_longlong val_longlong = PyLong_AsLongLong(object); + if (!error_converting(val_longlong)) { + ret = PyArrayScalar_New(LongLong); + if (ret != NULL) { + PyArrayScalar_VAL(ret, LongLong) = val_longlong; + } + return ret; + } + PyErr_Clear(); + + return NULL; } else if (PyFloat_Check(object)) { ret = PyArrayScalar_New(Double); - if (ret == NULL) { - return NULL; + if (ret != NULL) { + PyArrayScalar_VAL(ret, Double) = PyFloat_AS_DOUBLE(object); } - PyArrayScalar_VAL(ret, Double) = PyFloat_AS_DOUBLE(object); + return ret; } else if (PyComplex_Check(object)) { ret = PyArrayScalar_New(CDouble); - if (ret == NULL) { - return NULL; + if (ret != NULL) { + PyArrayScalar_VAL(ret, CDouble).real = PyComplex_RealAsDouble(object); + PyArrayScalar_VAL(ret, CDouble).imag = PyComplex_ImagAsDouble(object); } - PyArrayScalar_VAL(ret, CDouble).real = PyComplex_RealAsDouble(object); - PyArrayScalar_VAL(ret, CDouble).imag = PyComplex_ImagAsDouble(object); + return ret; } - else if (PyLong_Check(object)) { - npy_longlong val; - val = PyLong_AsLongLong(object); - if (error_converting(val)) { - PyErr_Clear(); - return NULL; - } - ret = PyArrayScalar_New(LongLong); - if (ret == NULL) { - return NULL; - } - PyArrayScalar_VAL(ret, LongLong) = val; + else { + return NULL; } - return ret; } /*New reference */ diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 5a3f4922a66a..65672237dad4 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -2952,7 +2952,7 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) * For a VOID scalar first see if obj is an integer or long * and create new memory of that size (filled with 0) for the scalar */ - if (PyLong_Check(obj) || PyInt_Check(obj) || + if (PyLong_Check(obj) || PyArray_IsScalar(obj, Integer) || (PyArray_Check(obj) && PyArray_NDIM((PyArrayObject *)obj)==0 && From 3d37ebccf696d8b186e11df99f3940441919f1c2 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Mon, 28 Sep 2020 08:39:02 -0600 Subject: [PATCH 107/409] MAINT: Remove old debug print statement. This looks like an old debug statement that should have been removed long ago. The C stdio.h header is nowhere imported and PyInt_Type no longer exists in Python 3. --- numpy/core/src/multiarray/multiarraymodule.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index f1d5ab694a80..ff2b796d2908 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -4409,12 +4409,6 @@ setup_scalartypes(PyObject *NPY_UNUSED(dict)) /* Timedelta is an integer with an associated unit */ SINGLE_INHERIT(Timedelta, SignedInteger); - /* - fprintf(stderr, - "tp_free = %p, PyObject_Del = %p, int_tp_free = %p, base.tp_free = %p\n", - PyIntArrType_Type.tp_free, PyObject_Del, PyInt_Type.tp_free, - PySignedIntegerArrType_Type.tp_free); - */ SINGLE_INHERIT(UByte, UnsignedInteger); SINGLE_INHERIT(UShort, UnsignedInteger); SINGLE_INHERIT(UInt, UnsignedInteger); From 5e2d8f47703c783e168bd845b65a92b9c8040a0f Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Mon, 28 Sep 2020 16:36:24 +0100 Subject: [PATCH 108/409] Apply suggestions from code review Co-authored-by: Sebastian Berg --- numpy/core/src/multiarray/datetime_busdaycal.c | 2 +- numpy/core/src/multiarray/datetime_strings.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/datetime_busdaycal.c b/numpy/core/src/multiarray/datetime_busdaycal.c index cec7135ffea3..d48141d4cb7d 100644 --- a/numpy/core/src/multiarray/datetime_busdaycal.c +++ b/numpy/core/src/multiarray/datetime_busdaycal.c @@ -73,7 +73,7 @@ PyArray_WeekMaskConverter(PyObject *weekmask_in, npy_bool *weekmask) general_weekmask_string: /* a string like "SatSun" or "Mon Tue Wed" */ memset(weekmask, 0, 7); - for (int i = 0; i < len; i += 3) { + for (Py_ssize_t i = 0; i < len; i += 3) { while (isspace(str[i])) ++i; diff --git a/numpy/core/src/multiarray/datetime_strings.c b/numpy/core/src/multiarray/datetime_strings.c index 8665d329579f..360868568478 100644 --- a/numpy/core/src/multiarray/datetime_strings.c +++ b/numpy/core/src/multiarray/datetime_strings.c @@ -1399,7 +1399,7 @@ array_datetime_as_string(PyObject *NPY_UNUSED(self), PyObject *args, strobj = unit_in; } - Py_ssize_t len = 0; + Py_ssize_t len; char const *str = PyUnicode_AsUTF8AndSize(strobj, &len); if (str == NULL) { Py_DECREF(strobj); From 34d7d395d79f880dc9e156ff83af0cf5844867bf Mon Sep 17 00:00:00 2001 From: JMFT Date: Mon, 28 Sep 2020 18:43:07 +0100 Subject: [PATCH 109/409] DOC: Fix docstring for np.matmul (#17383) Update argument names to match ufunc signature in matmul docstring. --- numpy/core/code_generators/ufunc_docstrings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 82cd6fb273f2..1f8e4a6fb314 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -2646,8 +2646,8 @@ def add_newdoc(place, name, doc): Raises ------ ValueError - If the last dimension of `a` is not the same size as - the second-to-last dimension of `b`. + If the last dimension of `x1` is not the same size as + the second-to-last dimension of `x2`. If a scalar value is passed in. From 47c32b959ae71eb2c65c3b9b06796614105f5964 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Mon, 28 Sep 2020 15:55:23 -0400 Subject: [PATCH 110/409] DOC: Replace "About NumPy" with "Document conventions" Eliminates the unhelpful "About NumPy" title. All contents of the original appear more prominently elsewhere, except for the information about document conventions, which is now the title and subject. --- doc/source/_templates/indexcontent.html | 2 +- doc/source/about.rst | 62 ------------------------- doc/source/doc_conventions.rst | 23 +++++++++ doc/source/docs/howto_document.rst | 5 ++ doc/source/user/index.rst | 7 ++- 5 files changed, 34 insertions(+), 65 deletions(-) delete mode 100644 doc/source/about.rst create mode 100644 doc/source/doc_conventions.rst diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html index 5929e755dee6..6633aa9bef90 100644 --- a/doc/source/_templates/indexcontent.html +++ b/doc/source/_templates/indexcontent.html @@ -56,7 +56,7 @@

{{ docstitle|e }}

- + diff --git a/doc/source/about.rst b/doc/source/about.rst deleted file mode 100644 index 3e83833d178d..000000000000 --- a/doc/source/about.rst +++ /dev/null @@ -1,62 +0,0 @@ -About NumPy -=========== - -NumPy is the fundamental package -needed for scientific computing with Python. This package contains: - -- a powerful N-dimensional :ref:`array object ` -- sophisticated :ref:`(broadcasting) functions ` -- basic :ref:`linear algebra functions ` -- basic :ref:`Fourier transforms ` -- sophisticated :ref:`random number capabilities ` -- tools for integrating Fortran code -- tools for integrating C/C++ code - -Besides its obvious scientific uses, *NumPy* can also be used as an -efficient multi-dimensional container of generic data. Arbitrary -data types can be defined. This allows *NumPy* to seamlessly and -speedily integrate with a wide variety of databases. - -NumPy is a successor for two earlier scientific Python libraries: -Numeric and Numarray. - -NumPy community ---------------- - -NumPy is a distributed, volunteer, open-source project. *You* can help -us make it better; if you believe something should be improved either -in functionality or in documentation, don't hesitate to contact us --- or -even better, contact us and participate in fixing the problem. - -Our main means of communication are: - -- `scipy.org website `__ - -- `Mailing lists `__ - -- `NumPy Issues `__ (bug reports go here) - -- `Old NumPy Trac `__ (dead link) - -More information about the development of NumPy can be found at our `Developer Zone `__. - -The project management structure can be found at our :doc:`governance page ` - - -About this documentation -======================== - -Conventions ------------ - -Names of classes, objects, constants, etc. are given in **boldface** font. -Often they are also links to a more detailed documentation of the -referred object. - -This manual contains many examples of use, usually prefixed with the -Python prompt ``>>>`` (which is not a part of the example code). The -examples assume that you have first entered:: - ->>> import numpy as np - -before running the examples. diff --git a/doc/source/doc_conventions.rst b/doc/source/doc_conventions.rst new file mode 100644 index 000000000000..e2bc419d1691 --- /dev/null +++ b/doc/source/doc_conventions.rst @@ -0,0 +1,23 @@ +.. _documentation_conventions: + +############################################################################## +Documentation conventions +############################################################################## + +- Names that look like :func:`numpy.array` are links to detailed + documentation. + +- Examples often include the Python prompt ``>>>``. This is not part of the + code and will cause an error if typed or pasted into the Python + shell. It can be safely typed or pasted into the IPython shell; the ``>>>`` + is ignored. + +- Examples often use ``np`` as an alias for ``numpy``; that is, they assume + you've run:: + + >>> import numpy as np + +- If you're a code contributor writing a docstring, see :ref:`docstring_intro`. + +- If you're a writer contributing ordinary (non-docstring) documentation, see + :ref:`userdoc_guide`. diff --git a/doc/source/docs/howto_document.rst b/doc/source/docs/howto_document.rst index 9f9068ab32a7..5a36fa718db7 100644 --- a/doc/source/docs/howto_document.rst +++ b/doc/source/docs/howto_document.rst @@ -4,10 +4,15 @@ A Guide to NumPy/SciPy Documentation ==================================== +.. _userdoc_guide: + User documentation ******************* NumPy text documents should follow the `Google developer documentation style guide `_. + +.. _docstring_intro: + Docstrings ********** diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst index 3a79f0f2e9a5..11a019b489b8 100644 --- a/doc/source/user/index.rst +++ b/doc/source/user/index.rst @@ -26,7 +26,10 @@ classes contained in the package, see the :ref:`reference`. howtos_index -.. These are stuck here to avoid the "WARNING: document isn't included in any +.. Links to these files are placed directly in the top-level html + (doc/source/_templates/indexcontent.html, which appears for the URLs + numpy.org/devdocs and numpy.org/doc/XX) and are not in any toctree, so + we include them here to avoid a "WARNING: document isn't included in any toctree" message .. toctree:: @@ -39,5 +42,5 @@ classes contained in the package, see the :ref:`reference`. ../docs/index ../bugs ../release - ../about + ../doc_conventions ../license From d7b88ad4b695f7994f471d2f0b4e493d072d3cc1 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Mon, 28 Sep 2020 17:14:07 -0400 Subject: [PATCH 111/409] DOC: Update info on doc style rules --- doc/source/docs/howto_document.rst | 32 ++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/doc/source/docs/howto_document.rst b/doc/source/docs/howto_document.rst index 9f9068ab32a7..7cd97b954959 100644 --- a/doc/source/docs/howto_document.rst +++ b/doc/source/docs/howto_document.rst @@ -1,12 +1,36 @@ .. _howto-document: -A Guide to NumPy/SciPy Documentation -==================================== +A Guide to NumPy Documentation +============================== User documentation -******************* -NumPy text documents should follow the `Google developer documentation style guide `_. +****************** +- In general, we follow the + `Google developer documentation style guide `_. + +- NumPy style governs cases where: + + - Google has no guidance, or + - We prefer not to use the Google style + + Our current rules: + + - We pluralize *index* as *indices* rather than + `indexes `_, + following the precedent of :func:`numpy.indices`. + + - For consistency we also pluralize *matrix* as *matrices*. + +- Grammatical issues inadequately addressed by the NumPy or Google rules are + decided by the section on "Grammar and Usage" in the most recent edition of + the `Chicago Manual of Style + `_. + +- We welcome being + `alerted `_ to cases + we should add to the NumPy style rules. + Docstrings ********** From 127b262b0da014e624889532c3d4eb330ed2aa07 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 28 Sep 2020 18:12:14 -0500 Subject: [PATCH 112/409] BUG: Fix default void, datetime, and timedelta in array coercion When converting an empty sequence to an array, datetimes would use an incorrect itemsize (on master only). The legacy behaviour of void is that it uses 8 bytes, which is due to the fact that `[]` uses `float64` by default which has 8 bytes. --- numpy/core/src/multiarray/dtypemeta.c | 31 ++++++++++++++++++++++--- numpy/core/tests/test_array_coercion.py | 16 +++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 531f746d89a3..dbe5ba476887 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -204,8 +204,32 @@ nonparametric_default_descr(PyArray_DTypeMeta *cls) } +/* Ensure a copy of the singleton (just in case we do adapt it somewhere) */ static PyArray_Descr * -flexible_default_descr(PyArray_DTypeMeta *cls) +datetime_and_timedelta_default_descr(PyArray_DTypeMeta *cls) +{ + return PyArray_DescrNew(cls->singleton); +} + + +static PyArray_Descr * +void_default_descr(PyArray_DTypeMeta *cls) +{ + PyArray_Descr *res = PyArray_DescrNew(cls->singleton); + if (res == NULL) { + return NULL; + } + /* + * The legacy behaviour for `np.array([], dtype="V")` is to use "V8". + * This is because `[]` uses `float64` as dtype, and then that is used + * for the size of the requested void. + */ + res->elsize = 8; + return res; +} + +static PyArray_Descr * +string_and_unicode_default_descr(PyArray_DTypeMeta *cls) { PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num); if (res == NULL) { @@ -534,7 +558,7 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) else if (PyTypeNum_ISDATETIME(descr->type_num)) { /* Datetimes are flexible, but were not considered previously */ dtype_class->parametric = NPY_TRUE; - dtype_class->default_descr = flexible_default_descr; + dtype_class->default_descr = datetime_and_timedelta_default_descr; dtype_class->discover_descr_from_pyobject = ( discover_datetime_and_timedelta_from_pyobject); dtype_class->common_dtype = datetime_common_dtype; @@ -545,13 +569,14 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) } else if (PyTypeNum_ISFLEXIBLE(descr->type_num)) { dtype_class->parametric = NPY_TRUE; - dtype_class->default_descr = flexible_default_descr; if (descr->type_num == NPY_VOID) { + dtype_class->default_descr = void_default_descr; dtype_class->discover_descr_from_pyobject = ( void_discover_descr_from_pyobject); dtype_class->common_instance = void_common_instance; } else { + dtype_class->default_descr = string_and_unicode_default_descr; dtype_class->is_known_scalar_type = string_known_scalar_types; dtype_class->discover_descr_from_pyobject = ( string_discover_descr_from_pyobject); diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index a6c8cc8b2156..e0480c7bfcf5 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -324,6 +324,22 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): ass[()] = scalar assert_array_equal(ass, cast) + @pytest.mark.parametrize("dtype_char", np.typecodes["All"]) + def test_default_dtype_instance(self, dtype_char): + if dtype_char in "SU": + dtype = np.dtype(dtype_char + "1") + elif dtype_char == "V": + # Legacy behaviour was to use V8. The reason was float64 being the + # default dtype and that having 8 bytes. + dtype = np.dtype("V8") + else: + dtype = np.dtype(dtype_char) + + discovered_dtype, _ = _discover_array_parameters([], type(dtype)) + + assert discovered_dtype == dtype + assert discovered_dtype.itemsize == dtype.itemsize + class TestTimeScalars: @pytest.mark.parametrize("dtype", [np.int64, np.float32]) From 7a50de698a5cfcbd613fcb8660250fc87c24e4dc Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Mon, 28 Sep 2020 11:25:27 -0600 Subject: [PATCH 113/409] ENH: Add metastr_to_unicode function to datetime.c. This new function is intended to be used instead the existing append_metastr_to_string function. It returns the datetime metastr as a unicode string rather than appending it to an existing unicode string. Uses of append_metastr_to_string are replaced in datetime.c. --- numpy/core/src/multiarray/_datetime.h | 10 ++ numpy/core/src/multiarray/datetime.c | 158 ++++++++++++++++++-------- 2 files changed, 121 insertions(+), 47 deletions(-) diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index 4e7ade5edacc..be9bac48c811 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -199,6 +199,16 @@ NPY_NO_EXPORT int convert_pyobject_to_datetime_metadata(PyObject *obj, PyArray_DatetimeMetaData *out_meta); +/* + * Returns datetime metadata as a new reference a Unicode object. + * Returns NULL on error. + * + * If 'skip_brackets' is true, skips the '[]'. + * + */ +NPY_NO_EXPORT PyObject * +metastr_to_unicode(PyArray_DatetimeMetaData *meta, int skip_brackets); + /* * 'ret' is a PyUString containing the datetime string, and this * function appends the metadata string to it. diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index f2225809a904..99b6a24e9681 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -1434,18 +1434,20 @@ raise_if_datetime64_metadata_cast_error(char *object_type, return 0; } else { - PyObject *errmsg; - errmsg = PyUnicode_FromFormat("Cannot cast %s " - "from metadata ", object_type); - errmsg = append_metastr_to_string(src_meta, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" to ")); - errmsg = append_metastr_to_string(dst_meta, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromFormat(" according to the rule %s", - npy_casting_to_string(casting))); - PyErr_SetObject(PyExc_TypeError, errmsg); - Py_DECREF(errmsg); + PyObject *src = metastr_to_unicode(src_meta, 0); + if (src == NULL) { + return -1; + } + PyObject *dst = metastr_to_unicode(dst_meta, 0); + if (dst == NULL) { + Py_DECREF(src); + return -1; + } + PyErr_Format(PyExc_TypeError, + "Cannot cast %s from metadata %S to %S according to the rule %s", + object_type, src, dst, npy_casting_to_string(casting)); + Py_DECREF(src); + Py_DECREF(dst); return -1; } } @@ -1466,18 +1468,20 @@ raise_if_timedelta64_metadata_cast_error(char *object_type, return 0; } else { - PyObject *errmsg; - errmsg = PyUnicode_FromFormat("Cannot cast %s " - "from metadata ", object_type); - errmsg = append_metastr_to_string(src_meta, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" to ")); - errmsg = append_metastr_to_string(dst_meta, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromFormat(" according to the rule %s", - npy_casting_to_string(casting))); - PyErr_SetObject(PyExc_TypeError, errmsg); - Py_DECREF(errmsg); + PyObject *src = metastr_to_unicode(src_meta, 0); + if (src == NULL) { + return -1; + } + PyObject *dst = metastr_to_unicode(dst_meta, 0); + if (dst == NULL) { + Py_DECREF(src); + return -1; + } + PyErr_Format(PyExc_TypeError, + "Cannot cast %s from metadata %S to %S according to the rule %s", + object_type, src, dst, npy_casting_to_string(casting)); + Py_DECREF(src); + Py_DECREF(dst); return -1; } } @@ -1600,32 +1604,38 @@ compute_datetime_metadata_greatest_common_divisor( return 0; incompatible_units: { - PyObject *errmsg; - errmsg = PyUnicode_FromString("Cannot get " - "a common metadata divisor for " - "NumPy datetime metadata "); - errmsg = append_metastr_to_string(meta1, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" and ")); - errmsg = append_metastr_to_string(meta2, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" because they have " - "incompatible nonlinear base time units")); - PyErr_SetObject(PyExc_TypeError, errmsg); - Py_DECREF(errmsg); + PyObject *umeta1 = metastr_to_unicode(meta1, 0); + if (umeta1 == NULL) { + return -1; + } + PyObject *umeta2 = metastr_to_unicode(meta2, 0); + if (umeta2 == NULL) { + Py_DECREF(umeta1); + return -1; + } + PyErr_Format(PyExc_TypeError, + "Cannot get a common metadata divisor for Numpy datatime " + "metadata %S and %S because they have incompatible nonlinear " + "base time units.", umeta1, umeta2); + Py_DECREF(umeta1); + Py_DECREF(umeta2); return -1; } units_overflow: { - PyObject *errmsg; - errmsg = PyUnicode_FromString("Integer overflow " - "getting a common metadata divisor for " - "NumPy datetime metadata "); - errmsg = append_metastr_to_string(meta1, 0, errmsg); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" and ")); - errmsg = append_metastr_to_string(meta2, 0, errmsg); - PyErr_SetObject(PyExc_OverflowError, errmsg); - Py_DECREF(errmsg); + PyObject *umeta1 = metastr_to_unicode(meta1, 0); + if (umeta1 == NULL) { + return -1; + } + PyObject *umeta2 = metastr_to_unicode(meta2, 0); + if (umeta2 == NULL) { + Py_DECREF(umeta1); + return -1; + } + PyErr_Format(PyExc_OverflowError, + "Integer overflow getting a common metadata divisor for " + "NumPy datetime metadata %S and %S.", umeta1, umeta2); + Py_DECREF(umeta1); + Py_DECREF(umeta2); return -1; } } @@ -1949,6 +1959,60 @@ convert_pyobject_to_datetime_metadata(PyObject *obj, } } +/* + * Return the datetime metadata as a Unicode object. + * + * Returns new reference, NULL on error. + * + * If 'skip_brackets' is true, skips the '[]'. + * + */ +NPY_NO_EXPORT PyObject * +metastr_to_unicode(PyArray_DatetimeMetaData *meta, int skip_brackets) +{ + int num; + char const *basestr; + + if (meta->base == NPY_FR_GENERIC) { + /* Without brackets, give a string "generic" */ + if (skip_brackets) { + return PyUnicode_FromString("generic"); + } + /* But with brackets, append nothing */ + else { + return PyUnicode_FromString(""); + } + } + + num = meta->num; + if (meta->base >= 0 && meta->base < NPY_DATETIME_NUMUNITS) { + basestr = _datetime_strings[meta->base]; + } + else { + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted"); + return NULL; + } + + if (num == 1) { + if (skip_brackets) { + return PyUnicode_FromFormat("%s", basestr); + } + else { + return PyUnicode_FromFormat("[%s]", basestr); + } + } + else { + if (skip_brackets) { + return PyUnicode_FromFormat("%d%s", num, basestr); + } + else { + return PyUnicode_FromFormat("[%d%s]", num, basestr); + } + } +} + + /* * 'ret' is a PyUString containing the datetime string, and this * function appends the metadata string to it. From e77c8455bbf5590db0cae41707eba1b657d78d17 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Mon, 28 Sep 2020 16:16:51 -0600 Subject: [PATCH 114/409] MAINT: Replace append_metastr_to_string in scalartypes.c.src Use metastr_to_unicode instead. --- numpy/core/src/multiarray/scalartypes.c.src | 54 ++++++++++----------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 65672237dad4..74ee260afd2e 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -518,21 +518,15 @@ datetimetype_repr(PyObject *self) */ if ((scal->obmeta.num == 1 && scal->obmeta.base != NPY_FR_h) || scal->obmeta.base == NPY_FR_GENERIC) { - ret = PyUnicode_FromString("numpy.datetime64('"); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString(iso)); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString("')")); + ret = PyUnicode_FromFormat("numpy.datetime64('%s')", iso); } else { - ret = PyUnicode_FromString("numpy.datetime64('"); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString(iso)); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString("','")); - ret = append_metastr_to_string(&scal->obmeta, 1, ret); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString("')")); + PyObject *meta = metastr_to_unicode(&scal->obmeta, 1); + if (meta == NULL) { + return NULL; + } + ret = PyUnicode_FromFormat("numpy.datetime64('%s','%S')", iso, meta); + Py_DECREF(meta); } return ret; @@ -542,7 +536,7 @@ static PyObject * timedeltatype_repr(PyObject *self) { PyTimedeltaScalarObject *scal; - PyObject *ret; + PyObject *val, *ret; if (!PyArray_IsScalar(self, Timedelta)) { PyErr_SetString(PyExc_RuntimeError, @@ -554,32 +548,34 @@ timedeltatype_repr(PyObject *self) /* The value */ if (scal->obval == NPY_DATETIME_NAT) { - ret = PyUnicode_FromString("numpy.timedelta64('NaT'"); + val = PyUnicode_FromString("'NaT'"); } else { - /* - * Can't use "%lld" if HAVE_LONG_LONG is not defined - */ + /* Can't use "%lld" if HAVE_LONG_LONG is not defined */ #if defined(HAVE_LONG_LONG) - ret = PyUnicode_FromFormat("numpy.timedelta64(%lld", - (long long)scal->obval); + val = PyUnicode_FromFormat("%lld", (long long)scal->obval); #else - ret = PyUnicode_FromFormat("numpy.timedelta64(%ld", - (long)scal->obval); + val = PyUnicode_FromFormat("%ld", (long)scal->obval); #endif } + if (val == NULL) { + return NULL; + } + /* The metadata unit */ if (scal->obmeta.base == NPY_FR_GENERIC) { - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString(")")); + ret = PyUnicode_FromFormat("numpy.timedelta64(%S)", val); } else { - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString(",'")); - ret = append_metastr_to_string(&scal->obmeta, 1, ret); - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString("')")); + PyObject *meta = metastr_to_unicode(&scal->obmeta, 1); + if (meta == NULL) { + Py_DECREF(val); + return NULL; + } + ret = PyUnicode_FromFormat("numpy.timedelta64(%S,'%S')", val, meta); + Py_DECREF(meta); } + Py_DECREF(val); return ret; } From a3fc7708f3808084bf7203f809998fa7448b30ef Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Mon, 28 Sep 2020 19:58:34 -0600 Subject: [PATCH 115/409] MAINT: Replace append_metastr_to_string in descriptor.c. Use metastr_to_unicode instead. --- numpy/core/src/multiarray/descriptor.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 6e378f626b6a..257ededaecac 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1892,18 +1892,26 @@ arraydescr_protocol_typestr_get(PyArray_Descr *self) else { ret = PyUnicode_FromFormat("%c%c%d", endian, basic_, size); } + if (ret == NULL) { + return NULL; + } + if (PyDataType_ISDATETIME(self)) { PyArray_DatetimeMetaData *meta; - meta = get_datetime_metadata_from_dtype(self); if (meta == NULL) { Py_DECREF(ret); return NULL; } + PyObject *umeta = metastr_to_unicode(meta, 0); + if (umeta == NULL) { + Py_DECREF(ret); + return NULL; + } - ret = append_metastr_to_string(meta, 0, ret); + Py_SETREF(ret, PyUnicode_Concat(ret, umeta)); + Py_DECREF(umeta); } - return ret; } From 97a4bc1401d3cce1211ea986dff447c12c9a62fb Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Tue, 29 Sep 2020 07:27:40 -0600 Subject: [PATCH 116/409] MAINT: Revise comment in numpy.core._dtype.py Replace append_metastr_to_string by metastr_to_unicode. --- numpy/core/_dtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py index 50aeeb5bc921..4249071ffe98 100644 --- a/numpy/core/_dtype.py +++ b/numpy/core/_dtype.py @@ -176,7 +176,7 @@ def _byte_order_str(dtype): def _datetime_metadata_str(dtype): - # TODO: this duplicates the C append_metastr_to_string + # TODO: this duplicates the C metastr_to_unicode functionality unit, count = np.datetime_data(dtype) if unit == 'generic': return '' From 320cfb238c4bc160dad9c839eef8325618c89e23 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Tue, 29 Sep 2020 07:36:27 -0600 Subject: [PATCH 117/409] MAINT: Remove append_metastr_to_string function. It has been replaced by metastr_to_unicode. --- numpy/core/src/multiarray/_datetime.h | 12 ----- numpy/core/src/multiarray/datetime.c | 66 +-------------------------- 2 files changed, 1 insertion(+), 77 deletions(-) diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index be9bac48c811..421b03f93c5a 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -209,18 +209,6 @@ convert_pyobject_to_datetime_metadata(PyObject *obj, NPY_NO_EXPORT PyObject * metastr_to_unicode(PyArray_DatetimeMetaData *meta, int skip_brackets); -/* - * 'ret' is a PyUString containing the datetime string, and this - * function appends the metadata string to it. - * - * If 'skip_brackets' is true, skips the '[]'. - * - * This function steals the reference 'ret' - */ -NPY_NO_EXPORT PyObject * -append_metastr_to_string(PyArray_DatetimeMetaData *meta, - int skip_brackets, - PyObject *ret); /* * Tests for and converts a Python datetime.datetime or datetime.date diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 99b6a24e9681..4afc45fb63e8 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -1978,7 +1978,7 @@ metastr_to_unicode(PyArray_DatetimeMetaData *meta, int skip_brackets) if (skip_brackets) { return PyUnicode_FromString("generic"); } - /* But with brackets, append nothing */ + /* But with brackets, return nothing */ else { return PyUnicode_FromString(""); } @@ -2013,70 +2013,6 @@ metastr_to_unicode(PyArray_DatetimeMetaData *meta, int skip_brackets) } -/* - * 'ret' is a PyUString containing the datetime string, and this - * function appends the metadata string to it. - * - * If 'skip_brackets' is true, skips the '[]'. - * - * This function steals the reference 'ret' - */ -NPY_NO_EXPORT PyObject * -append_metastr_to_string(PyArray_DatetimeMetaData *meta, - int skip_brackets, - PyObject *ret) -{ - PyObject *res; - int num; - char const *basestr; - - if (ret == NULL) { - return NULL; - } - - if (meta->base == NPY_FR_GENERIC) { - /* Without brackets, give a string "generic" */ - if (skip_brackets) { - PyUString_ConcatAndDel(&ret, PyUnicode_FromString("generic")); - return ret; - } - /* But with brackets, append nothing */ - else { - return ret; - } - } - - num = meta->num; - if (meta->base >= 0 && meta->base < NPY_DATETIME_NUMUNITS) { - basestr = _datetime_strings[meta->base]; - } - else { - PyErr_SetString(PyExc_RuntimeError, - "NumPy datetime metadata is corrupted"); - return NULL; - } - - if (num == 1) { - if (skip_brackets) { - res = PyUnicode_FromFormat("%s", basestr); - } - else { - res = PyUnicode_FromFormat("[%s]", basestr); - } - } - else { - if (skip_brackets) { - res = PyUnicode_FromFormat("%d%s", num, basestr); - } - else { - res = PyUnicode_FromFormat("[%d%s]", num, basestr); - } - } - - PyUString_ConcatAndDel(&ret, res); - return ret; -} - /* * Adjusts a datetimestruct based on a seconds offset. Assumes * the current values are valid. From 4c74f375936c5271d48ec101676de1b3ab0b743c Mon Sep 17 00:00:00 2001 From: leakec Date: Tue, 29 Sep 2020 12:43:46 -0500 Subject: [PATCH 118/409] BLD: Fixed ARGOUTVIEWM memory deallocation. Closes #17398 --- tools/swig/numpy.i | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/swig/numpy.i b/tools/swig/numpy.i index 6b69ce96e0ed..a2e7a335fdb0 100644 --- a/tools/swig/numpy.i +++ b/tools/swig/numpy.i @@ -2492,9 +2492,9 @@ if (!array) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$2), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$2), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2562,9 +2562,9 @@ if (!array) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$3), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$3), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2632,9 +2632,9 @@ if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$3), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$3), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2706,9 +2706,9 @@ if (!array) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$4), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$4), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2780,9 +2780,9 @@ if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$4), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$4), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2856,9 +2856,9 @@ if (!array) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$5), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$5), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -2932,9 +2932,9 @@ if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$5), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$5), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -3008,9 +3008,9 @@ if (!array) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$5), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$5), free); %#endif %#if NPY_API_VERSION < 0x00000007 @@ -3084,9 +3084,9 @@ if (!array || !require_fortran(array)) SWIG_fail; %#ifdef SWIGPY_USE_CAPSULE - PyObject* cap = PyCapsule_New((void*)(*$1), SWIGPY_CAPSULE_NAME, free_cap); + PyObject* cap = PyCapsule_New((void*)(*$5), SWIGPY_CAPSULE_NAME, free_cap); %#else - PyObject* cap = PyCObject_FromVoidPtr((void*)(*$1), free); + PyObject* cap = PyCObject_FromVoidPtr((void*)(*$5), free); %#endif %#if NPY_API_VERSION < 0x00000007 From ab81a55dba9294aac6926d2921b147c791488ae3 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Tue, 29 Sep 2020 15:08:09 -0700 Subject: [PATCH 119/409] DOC: rm incorrect alias from recarray user article. --- doc/source/user/basics.rec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user/basics.rec.rst b/doc/source/user/basics.rec.rst index f579b0d85141..7380ef6359dc 100644 --- a/doc/source/user/basics.rec.rst +++ b/doc/source/user/basics.rec.rst @@ -576,7 +576,7 @@ Record Arrays As an optional convenience numpy provides an ndarray subclass, :class:`numpy.recarray`, and associated helper functions in the -:mod:`numpy.lib.recfunctions` submodule (aliased as ``numpy.rec``), that allows +:mod:`numpy.lib.recfunctions` submodule, that allows access to fields of structured arrays by attribute instead of only by index. Record arrays also use a special datatype, :class:`numpy.record`, that allows field access by attribute on the structured scalars obtained from the array. From d1d725b10a568da98ade43e81deeef3e41789ab8 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Tue, 29 Sep 2020 20:06:08 -0700 Subject: [PATCH 120/409] DOC: Add arraysetops to autosummary. * Generate stub for module, fixes 7 broken links * Modify arraysetops module docstring w/ rst formatting * Remove listing of functions from module docstring. --- doc/source/reference/routines.set.rst | 5 +++++ numpy/lib/arraysetops.py | 21 +++++---------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/doc/source/reference/routines.set.rst b/doc/source/reference/routines.set.rst index b12d3d5f5e79..149c33a8b610 100644 --- a/doc/source/reference/routines.set.rst +++ b/doc/source/reference/routines.set.rst @@ -3,6 +3,11 @@ Set routines .. currentmodule:: numpy +.. autosummary:: + :toctree: generated/ + + lib.arraysetops + Making proper sets ------------------ .. autosummary:: diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 6a2ad004cbf0..9464692e0b2b 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -1,28 +1,17 @@ """ Set operations for arrays based on sorting. -:Contains: - unique, - isin, - ediff1d, - intersect1d, - setxor1d, - in1d, - union1d, - setdiff1d - -:Notes: +Notes +----- For floating point arrays, inaccurate results may appear due to usual round-off and floating point comparison issues. Speed could be gained in some operations by an implementation of -sort(), that can provide directly the permutation vectors, avoiding -thus calls to argsort(). +`numpy.sort`, that can provide directly the permutation vectors, thus avoiding +calls to `numpy.argsort`. -To do: Optionally return indices analogously to unique for all functions. - -:Author: Robert Cimrman +Original author: Robert Cimrman """ import functools From 3599af53d7fbf3976020b8443424ed5ffb955693 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Tue, 29 Sep 2020 20:34:42 -0700 Subject: [PATCH 121/409] Incorporate feedback from review. --- doc/source/user/basics.rec.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/user/basics.rec.rst b/doc/source/user/basics.rec.rst index 7380ef6359dc..bb4ed89e999a 100644 --- a/doc/source/user/basics.rec.rst +++ b/doc/source/user/basics.rec.rst @@ -575,11 +575,14 @@ Record Arrays ============= As an optional convenience numpy provides an ndarray subclass, -:class:`numpy.recarray`, and associated helper functions in the -:mod:`numpy.lib.recfunctions` submodule, that allows -access to fields of structured arrays by attribute instead of only by index. -Record arrays also use a special datatype, :class:`numpy.record`, that allows +:class:`numpy.recarray` that allows access to fields of structured arrays by +attribute instead of only by index. +Record arrays use a special datatype, :class:`numpy.record`, that allows field access by attribute on the structured scalars obtained from the array. +The :mod:`numpy.rec` module provides functions for creating recarrays from +various objects. +Additional helper functions for creating and manipulating structured arrays +can be found in :mod:`numpy.lib.recfunctions`. The simplest way to create a record array is with ``numpy.rec.array``:: From e13747d8dc27c359f11485b4d6e764d7f8231562 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Tue, 29 Sep 2020 15:31:41 -0600 Subject: [PATCH 122/409] MAINT: Replace PyUString_ConcatAndDel in nditer_constr.c. --- numpy/core/src/multiarray/nditer_constr.c | 234 ++++++++++------------ 1 file changed, 108 insertions(+), 126 deletions(-) diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 4bc6d2ca1c5c..b379a28ac3ae 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -1750,73 +1750,70 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itf return 1; broadcast_error: { - PyObject *errmsg, *tmp; npy_intp remdims[NPY_MAXDIMS]; - char *tmpstr; if (op_axes == NULL) { - errmsg = PyUnicode_FromString("operands could not be broadcast " - "together with shapes "); - if (errmsg == NULL) { + PyObject *shape1 = PyUnicode_FromString(""); + if (shape1 == NULL) { return 0; } for (iop = 0; iop < nop; ++iop) { if (op[iop] != NULL) { - tmp = convert_shape_to_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), - " "); + int ndims = PyArray_NDIM(op[iop]); + npy_intp *dims = PyArray_DIMS(op[iop]); + PyObject *tmp = convert_shape_to_string(ndims, dims, " "); if (tmp == NULL) { - Py_DECREF(errmsg); + Py_DECREF(shape1); return 0; } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp)); + Py_DECREF(tmp); + if (shape1 == NULL) { return 0; } } } - if (itershape != NULL) { - tmp = PyUnicode_FromString("and requested shape "); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - tmp = convert_shape_to_string(ndim, itershape, ""); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + if (itershape == NULL) { + PyErr_Format(PyExc_ValueError, + "operands could not be broadcast together with " + "shapes %S", shape1); + Py_DECREF(shape1); + return 0; + } + else { + PyObject *shape2 = convert_shape_to_string(ndim, itershape, ""); + if (shape2 == NULL) { + Py_DECREF(shape1); return 0; } - + PyErr_Format(PyExc_ValueError, + "operands could not be broadcast together with " + "shapes %S and requested shape %S", shape1, shape2); + Py_DECREF(shape1); + Py_DECREF(shape2); + return 0; } - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); } else { - errmsg = PyUnicode_FromString("operands could not be broadcast " - "together with remapped shapes " - "[original->remapped]: "); + PyObject *shape1 = PyUnicode_FromString(""); + if (shape1 == NULL) { + return 0; + } for (iop = 0; iop < nop; ++iop) { if (op[iop] != NULL) { int *axes = op_axes[iop]; + int ndims = PyArray_NDIM(op[iop]); + npy_intp *dims = PyArray_DIMS(op[iop]); + char *tmpstr = (axes == NULL) ? " " : "->"; - tmpstr = (axes == NULL) ? " " : "->"; - tmp = convert_shape_to_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), - tmpstr); + PyObject *tmp = convert_shape_to_string(ndims, dims, tmpstr); if (tmp == NULL) { + Py_DECREF(shape1); return 0; } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp)); + Py_DECREF(tmp); + if (shape1 == NULL) { return 0; } @@ -1831,80 +1828,83 @@ broadcast_error: { remdims[idim] = -1; } } - tmp = convert_shape_to_string(ndim, remdims, " "); + PyObject *tmp = convert_shape_to_string(ndim, remdims, " "); if (tmp == NULL) { + Py_DECREF(shape1); return 0; } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + Py_SETREF(shape1, PyUnicode_Concat(shape1, tmp)); + Py_DECREF(tmp); + if (shape1 == NULL) { return 0; } } } } - if (itershape != NULL) { - tmp = PyUnicode_FromString("and requested shape "); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - tmp = convert_shape_to_string(ndim, itershape, ""); - if (tmp == NULL) { - Py_DECREF(errmsg); - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + if (itershape == NULL) { + PyErr_Format(PyExc_ValueError, + "operands could not be broadcast together with " + "remapped shapes [original->remapped]: %S", shape1); + Py_DECREF(shape1); + return 0; + } + else { + PyObject *shape2 = convert_shape_to_string(ndim, itershape, ""); + if (shape2 == NULL) { + Py_DECREF(shape1); return 0; } - + PyErr_Format(PyExc_ValueError, + "operands could not be broadcast together with " + "remapped shapes [original->remapped]: %S and " + "requested shape %S", shape1, shape2); + Py_DECREF(shape1); + Py_DECREF(shape2); + return 0; } - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); } - - return 0; } operand_different_than_broadcast: { - npy_intp remdims[NPY_MAXDIMS]; - PyObject *errmsg, *tmp; - - /* Start of error message */ - if (op_flags[iop] & NPY_ITER_READONLY) { - errmsg = PyUnicode_FromString("non-broadcastable operand " - "with shape "); - } - else { - errmsg = PyUnicode_FromString("non-broadcastable output " - "operand with shape "); - } - if (errmsg == NULL) { + /* operand shape */ + int ndims = PyArray_NDIM(op[iop]); + npy_intp *dims = PyArray_DIMS(op[iop]); + PyObject *shape1 = convert_shape_to_string(ndims, dims, ""); + if (shape1 == NULL) { return 0; } - /* Operand shape */ - tmp = convert_shape_to_string(PyArray_NDIM(op[iop]), - PyArray_DIMS(op[iop]), ""); - if (tmp == NULL) { + /* Broadcast shape */ + PyObject *shape2 = convert_shape_to_string(ndim, broadcast_shape, ""); + if (shape2 == NULL) { + Py_DECREF(shape1); return 0; } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + + if (op_axes == NULL || op_axes[iop] == NULL) { + /* operand shape not remapped */ + + if (op_flags[iop] & NPY_ITER_READONLY) { + PyErr_Format(PyExc_ValueError, + "non-broadcastable operand with shape %S doesn't " + "match the broadcast shape %S", shape1, shape2); + } + else { + PyErr_Format(PyExc_ValueError, + "non-broadcastable output operand with shape %S doesn't " + "match the broadcast shape %S", shape1, shape2); + } + Py_DECREF(shape1); + Py_DECREF(shape2); return 0; } - /* Remapped operand shape */ - if (op_axes != NULL && op_axes[iop] != NULL) { - int *axes = op_axes[iop]; + else { + /* operand shape remapped */ + npy_intp remdims[NPY_MAXDIMS]; + int *axes = op_axes[iop]; for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - + npy_intp i = axes[ndim - idim - 1]; if (i >= 0 && i < PyArray_NDIM(op[iop])) { remdims[idim] = PyArray_DIM(op[iop], i); } @@ -1913,48 +1913,30 @@ operand_different_than_broadcast: { } } - tmp = PyUnicode_FromString(" [remapped to "); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + PyObject *shape3 = convert_shape_to_string(ndim, remdims, ""); + if (shape3 == NULL) { + Py_DECREF(shape1); + Py_DECREF(shape2); return 0; } - tmp = convert_shape_to_string(ndim, remdims, "]"); - if (tmp == NULL) { - return 0; + if (op_flags[iop] & NPY_ITER_READONLY) { + PyErr_Format(PyExc_ValueError, + "non-broadcastable operand with shape %S " + "[remapped to %S] doesn't match the broadcast shape %S", + shape1, shape3, shape2); } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; + else { + PyErr_Format(PyExc_ValueError, + "non-broadcastable output operand with shape %S " + "[remapped to %S] doesn't match the broadcast shape %S", + shape1, shape3, shape2); } - } - - tmp = PyUnicode_FromString(" doesn't match the broadcast shape "); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + Py_DECREF(shape1); + Py_DECREF(shape2); + Py_DECREF(shape3); return 0; } - - /* Broadcast shape */ - tmp = convert_shape_to_string(ndim, broadcast_shape, ""); - if (tmp == NULL) { - return 0; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - return 0; - } - - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); - - return 0; } } From 6d97fdda4875a723366d58db6c5e5febfc67b5da Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Tue, 29 Sep 2020 12:35:46 -0600 Subject: [PATCH 123/409] MAINT: Replace PyUString_ConcatAndDel in mapping.c. --- numpy/core/src/multiarray/mapping.c | 72 +++++++++++------------------ 1 file changed, 27 insertions(+), 45 deletions(-) diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 0998a6b495c8..cb5c3823dccf 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1418,10 +1418,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) return 0; } else if (tup == NULL){ - PyObject *errmsg = PyUnicode_FromString("no field of name "); - PyUString_Concat(&errmsg, ind); - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); + PyErr_Format(PyExc_ValueError, "no field of name %S", ind); return 0; } if (_unpack_field(tup, &fieldtype, &offset) < 0) { @@ -2345,7 +2342,6 @@ mapiter_fill_info(PyArrayMapIterObject *mit, npy_index_info *indices, int consec_status = -1; int axis, broadcast_axis; npy_intp dimension; - PyObject *errmsg, *tmp; for (i = 0; i < mit->nd_fancy; i++) { mit->dimensions[i] = 1; @@ -2433,35 +2429,38 @@ mapiter_fill_info(PyArrayMapIterObject *mit, npy_index_info *indices, return 0; - broadcast_error: +broadcast_error: ; // Declarations cannot follow labels, add empty statement. /* * Attempt to set a meaningful exception. Could also find out * if a boolean index was converted. */ - errmsg = PyUnicode_FromString("shape mismatch: indexing arrays could not " - "be broadcast together with shapes "); + PyObject *errmsg = PyUnicode_FromString(""); if (errmsg == NULL) { return -1; } - for (i = 0; i < index_num; i++) { if (!(indices[i].type & HAS_FANCY)) { continue; } - tmp = convert_shape_to_string( - PyArray_NDIM((PyArrayObject *)indices[i].object), - PyArray_SHAPE((PyArrayObject *)indices[i].object), - " "); + + int ndim = PyArray_NDIM((PyArrayObject *)indices[i].object); + npy_intp *shape = PyArray_SHAPE((PyArrayObject *)indices[i].object); + PyObject *tmp = convert_shape_to_string(ndim, shape, " "); if (tmp == NULL) { + Py_DECREF(errmsg); return -1; } - PyUString_ConcatAndDel(&errmsg, tmp); + + Py_SETREF(errmsg, PyUnicode_Concat(errmsg, tmp)); + Py_DECREF(tmp); if (errmsg == NULL) { return -1; } } - PyErr_SetObject(PyExc_IndexError, errmsg); + PyErr_Format(PyExc_IndexError, + "shape mismatch: indexing arrays could not " + "be broadcast together with shapes %S", errmsg); Py_DECREF(errmsg); return -1; } @@ -2653,7 +2652,6 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type, npy_uint32 extra_op_flags, PyArrayObject *extra_op, PyArray_Descr *extra_op_dtype) { - PyObject *errmsg, *tmp; /* For shape reporting on error */ PyArrayObject *original_extra_op = extra_op; @@ -3183,45 +3181,29 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type, goto finish; broadcast_error: - errmsg = PyUnicode_FromString("shape mismatch: value array " - "of shape "); - if (errmsg == NULL) { - goto finish; - } - /* Report the shape of the original array if it exists */ if (original_extra_op == NULL) { original_extra_op = extra_op; } - tmp = convert_shape_to_string(PyArray_NDIM(original_extra_op), - PyArray_DIMS(original_extra_op), " "); - if (tmp == NULL) { - goto finish; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + int extra_ndim = PyArray_NDIM(original_extra_op); + npy_intp *extra_dims = PyArray_DIMS(original_extra_op); + PyObject *shape1 = convert_shape_to_string(extra_ndim, extra_dims, " "); + if (shape1 == NULL) { goto finish; } - tmp = PyUnicode_FromString("could not be broadcast to indexing " - "result of shape "); - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { + PyObject *shape2 = convert_shape_to_string(mit->nd, mit->dimensions, ""); + if (shape2 == NULL) + Py_DECREF(shape1); goto finish; - } - tmp = convert_shape_to_string(mit->nd, mit->dimensions, ""); - if (tmp == NULL) { - goto finish; - } - PyUString_ConcatAndDel(&errmsg, tmp); - if (errmsg == NULL) { - goto finish; - } + PyErr_Format(PyExc_ValueError, + "shape mismatch: value array of shape %S could not be broadcast " + "to indexing result of shape %S", shape1, shape2); - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); + Py_DECREF(shape1); + Py_DECREF(shape2); finish: Py_XDECREF(extra_op); @@ -3320,7 +3302,7 @@ PyArray_MapIterArrayCopyIfOverlap(PyArrayObject * a, PyObject * index, Py_XDECREF(a_copy); Py_XDECREF(subspace); Py_XDECREF((PyObject *)mit); - for (i=0; i < index_num; i++) { + for (i = 0; i < index_num; i++) { Py_XDECREF(indices[i].object); } return NULL; From c909c478f4a1974a573843d912c2027d034770d0 Mon Sep 17 00:00:00 2001 From: Jun Kudo Date: Thu, 1 Oct 2020 03:50:03 +0900 Subject: [PATCH 124/409] ENH: Replace the module-level `__getattr__` with explicit type annotations #17367 --- numpy/__init__.pyi | 320 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 318 insertions(+), 2 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index d4eda6b3150f..7fdd1a5ece74 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -154,8 +154,324 @@ __all__ = [ "var", ] -# TODO: remove when the full numpy namespace is defined -def __getattr__(name: str) -> Any: ... +DataSource: Any +False_: Any +MachAr: Any +ScalarType: Any +True_: Any +UFUNC_PYVALS_NAME: Any +abs: Any +angle: Any +append: Any +apply_along_axis: Any +apply_over_axes: Any +arange: Any +array2string: Any +array_repr: Any +array_split: Any +array_str: Any +asanyarray: Any +asarray: Any +asarray_chkfinite: Any +ascontiguousarray: Any +asfarray: Any +asfortranarray: Any +asmatrix: Any +asscalar: Any +atleast_1d: Any +atleast_2d: Any +atleast_3d: Any +average: Any +bartlett: Any +bincount: Any +bitwise_not: Any +blackman: Any +block: Any +bmat: Any +bool8: Any +broadcast: Any +broadcast_arrays: Any +broadcast_to: Any +busday_count: Any +busday_offset: Any +busdaycalendar: Any +byte: Any +byte_bounds: Any +bytes0: Any +c_: Any +can_cast: Any +cast: Any +cdouble: Any +cfloat: Any +char: Any +chararray: Any +clongdouble: Any +clongfloat: Any +column_stack: Any +common_type: Any +compare_chararrays: Any +compat: Any +complex256: Any +complex_: Any +concatenate: Any +conj: Any +copy: Any +copyto: Any +corrcoef: Any +cov: Any +csingle: Any +ctypeslib: Any +cumproduct: Any +datetime_as_string: Any +datetime_data: Any +delete: Any +deprecate: Any +deprecate_with_doc: Any +diag: Any +diag_indices: Any +diag_indices_from: Any +diagflat: Any +diff: Any +digitize: Any +disp: Any +divide: Any +dot: Any +double: Any +dsplit: Any +dstack: Any +ediff1d: Any +einsum: Any +einsum_path: Any +emath: Any +errstate: Any +expand_dims: Any +extract: Any +eye: Any +fft: Any +fill_diagonal: Any +finfo: Any +fix: Any +flip: Any +fliplr: Any +flipud: Any +float128: Any +float_: Any +format_float_positional: Any +format_float_scientific: Any +format_parser: Any +frombuffer: Any +fromfile: Any +fromiter: Any +frompyfunc: Any +fromregex: Any +fromstring: Any +genfromtxt: Any +geomspace: Any +get_include: Any +get_printoptions: Any +getbufsize: Any +geterr: Any +geterrcall: Any +geterrobj: Any +gradient: Any +half: Any +hamming: Any +hanning: Any +histogram: Any +histogram2d: Any +histogram_bin_edges: Any +histogramdd: Any +hsplit: Any +hstack: Any +i0: Any +iinfo: Any +imag: Any +in1d: Any +index_exp: Any +info: Any +inner: Any +insert: Any +int0: Any +int_: Any +intc: Any +interp: Any +intersect1d: Any +intp: Any +is_busday: Any +iscomplex: Any +iscomplexobj: Any +isin: Any +isneginf: Any +isposinf: Any +isreal: Any +isrealobj: Any +iterable: Any +ix_: Any +kaiser: Any +kron: Any +lexsort: Any +lib: Any +linalg: Any +linspace: Any +load: Any +loads: Any +loadtxt: Any +logspace: Any +longcomplex: Any +longdouble: Any +longfloat: Any +longlong: Any +lookfor: Any +ma: Any +mafromtxt: Any +mask_indices: Any +mat: Any +math: Any +matrix: Any +matrixlib: Any +max: Any +may_share_memory: Any +median: Any +memmap: Any +meshgrid: Any +mgrid: Any +min: Any +min_scalar_type: Any +mintypecode: Any +mod: Any +msort: Any +nan_to_num: Any +nanargmax: Any +nanargmin: Any +nancumprod: Any +nancumsum: Any +nanmax: Any +nanmean: Any +nanmedian: Any +nanmin: Any +nanpercentile: Any +nanprod: Any +nanquantile: Any +nanstd: Any +nansum: Any +nanvar: Any +nbytes: Any +ndenumerate: Any +ndfromtxt: Any +ndindex: Any +nditer: Any +nested_iters: Any +newaxis: Any +numarray: Any +object0: Any +ogrid: Any +packbits: Any +pad: Any +percentile: Any +piecewise: Any +place: Any +poly: Any +poly1d: Any +polyadd: Any +polyder: Any +polydiv: Any +polyfit: Any +polyint: Any +polymul: Any +polynomial: Any +polysub: Any +polyval: Any +printoptions: Any +product: Any +promote_types: Any +put_along_axis: Any +putmask: Any +quantile: Any +r_: Any +random: Any +ravel_multi_index: Any +real: Any +real_if_close: Any +rec: Any +recarray: Any +recfromcsv: Any +recfromtxt: Any +record: Any +require: Any +result_type: Any +roots: Any +rot90: Any +round: Any +round_: Any +row_stack: Any +s_: Any +save: Any +savetxt: Any +savez: Any +savez_compressed: Any +sctypeDict: Any +sctypeNA: Any +sctypes: Any +select: Any +set_printoptions: Any +set_string_function: Any +setbufsize: Any +setdiff1d: Any +seterr: Any +seterrcall: Any +seterrobj: Any +setxor1d: Any +shares_memory: Any +short: Any +show_config: Any +sinc: Any +single: Any +singlecomplex: Any +sort_complex: Any +source: Any +split: Any +stack: Any +str0: Any +string_: Any +sys: Any +take_along_axis: Any +testing: Any +tile: Any +trapz: Any +tri: Any +tril: Any +tril_indices: Any +tril_indices_from: Any +trim_zeros: Any +triu: Any +triu_indices: Any +triu_indices_from: Any +typeDict: Any +typeNA: Any +typecodes: Any +typename: Any +ubyte: Any +uint: Any +uint0: Any +uintc: Any +uintp: Any +ulonglong: Any +unicode_: Any +union1d: Any +unique: Any +unpackbits: Any +unravel_index: Any +unwrap: Any +ushort: Any +vander: Any +vdot: Any +vectorize: Any +version: Any +void0: Any +vsplit: Any +vstack: Any +where: Any +who: Any _NdArraySubClass = TypeVar("_NdArraySubClass", bound=ndarray) _ByteOrder = Literal["S", "<", ">", "=", "|", "L", "B", "N", "I"] From 8528a10d0e0b436f2238039e9c46012395a2958e Mon Sep 17 00:00:00 2001 From: Jun Kudo Date: Thu, 1 Oct 2020 04:44:22 +0900 Subject: [PATCH 125/409] ENH: remove unnecessary annotations in __init__.pyi. --- numpy/__init__.pyi | 3 --- 1 file changed, 3 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 7fdd1a5ece74..6a1c5b72b58e 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -160,7 +160,6 @@ MachAr: Any ScalarType: Any True_: Any UFUNC_PYVALS_NAME: Any -abs: Any angle: Any append: Any apply_along_axis: Any @@ -410,7 +409,6 @@ savetxt: Any savez: Any savez_compressed: Any sctypeDict: Any -sctypeNA: Any sctypes: Any select: Any set_printoptions: Any @@ -447,7 +445,6 @@ triu: Any triu_indices: Any triu_indices_from: Any typeDict: Any -typeNA: Any typecodes: Any typename: Any ubyte: Any From 0996962236506342d16730580e126efe2dfbf98d Mon Sep 17 00:00:00 2001 From: Jun Kudo Date: Thu, 1 Oct 2020 05:02:13 +0900 Subject: [PATCH 126/409] TST: Add test to verify that getting a non-existent objects indeed results in an error. --- numpy/typing/tests/data/fail/modules.py | 315 ++++++++++++++++++++++++ 1 file changed, 315 insertions(+) diff --git a/numpy/typing/tests/data/fail/modules.py b/numpy/typing/tests/data/fail/modules.py index e7ffe89207f0..5b3d3ffb3075 100644 --- a/numpy/typing/tests/data/fail/modules.py +++ b/numpy/typing/tests/data/fail/modules.py @@ -1,3 +1,318 @@ import numpy as np np.testing.bob # E: Module has no attribute +np.DataSource # E: Module has no attribute +np.False_ # E: Module has no attribute +np.MachAr # E: Module has no attribute +np.ScalarType # E: Module has no attribute +np.True_ # E: Module has no attribute +np.UFUNC_PYVALS_NAME # E: Module has no attribute +np.angle # E: Module has no attribute +np.append # E: Module has no attribute +np.apply_along_axis # E: Module has no attribute +np.apply_over_axes # E: Module has no attribute +np.arange # E: Module has no attribute +np.array2string # E: Module has no attribute +np.array_repr # E: Module has no attribute +np.array_split # E: Module has no attribute +np.array_str # E: Module has no attribute +np.asanyarray # E: Module has no attribute +np.asarray # E: Module has no attribute +np.asarray_chkfinite # E: Module has no attribute +np.ascontiguousarray # E: Module has no attribute +np.asfarray # E: Module has no attribute +np.asfortranarray # E: Module has no attribute +np.asmatrix # E: Module has no attribute +np.asscalar # E: Module has no attribute +np.atleast_1d # E: Module has no attribute +np.atleast_2d # E: Module has no attribute +np.atleast_3d # E: Module has no attribute +np.average # E: Module has no attribute +np.bartlett # E: Module has no attribute +np.bincount # E: Module has no attribute +np.bitwise_not # E: Module has no attribute +np.blackman # E: Module has no attribute +np.block # E: Module has no attribute +np.bmat # E: Module has no attribute +np.bool8 # E: Module has no attribute +np.broadcast # E: Module has no attribute +np.broadcast_arrays # E: Module has no attribute +np.broadcast_to # E: Module has no attribute +np.busday_count # E: Module has no attribute +np.busday_offset # E: Module has no attribute +np.busdaycalendar # E: Module has no attribute +np.byte # E: Module has no attribute +np.byte_bounds # E: Module has no attribute +np.bytes0 # E: Module has no attribute +np.c_ # E: Module has no attribute +np.can_cast # E: Module has no attribute +np.cast # E: Module has no attribute +np.cdouble # E: Module has no attribute +np.cfloat # E: Module has no attribute +np.char # E: Module has no attribute +np.chararray # E: Module has no attribute +np.clongdouble # E: Module has no attribute +np.clongfloat # E: Module has no attribute +np.column_stack # E: Module has no attribute +np.common_type # E: Module has no attribute +np.compare_chararrays # E: Module has no attribute +np.compat # E: Module has no attribute +np.complex256 # E: Module has no attribute +np.complex_ # E: Module has no attribute +np.concatenate # E: Module has no attribute +np.conj # E: Module has no attribute +np.copy # E: Module has no attribute +np.copyto # E: Module has no attribute +np.corrcoef # E: Module has no attribute +np.cov # E: Module has no attribute +np.csingle # E: Module has no attribute +np.ctypeslib # E: Module has no attribute +np.cumproduct # E: Module has no attribute +np.datetime_as_string # E: Module has no attribute +np.datetime_data # E: Module has no attribute +np.delete # E: Module has no attribute +np.deprecate # E: Module has no attribute +np.deprecate_with_doc # E: Module has no attribute +np.diag # E: Module has no attribute +np.diag_indices # E: Module has no attribute +np.diag_indices_from # E: Module has no attribute +np.diagflat # E: Module has no attribute +np.diff # E: Module has no attribute +np.digitize # E: Module has no attribute +np.disp # E: Module has no attribute +np.divide # E: Module has no attribute +np.dot # E: Module has no attribute +np.double # E: Module has no attribute +np.dsplit # E: Module has no attribute +np.dstack # E: Module has no attribute +np.ediff1d # E: Module has no attribute +np.einsum # E: Module has no attribute +np.einsum_path # E: Module has no attribute +np.emath # E: Module has no attribute +np.errstate # E: Module has no attribute +np.expand_dims # E: Module has no attribute +np.extract # E: Module has no attribute +np.eye # E: Module has no attribute +np.fft # E: Module has no attribute +np.fill_diagonal # E: Module has no attribute +np.finfo # E: Module has no attribute +np.fix # E: Module has no attribute +np.flip # E: Module has no attribute +np.fliplr # E: Module has no attribute +np.flipud # E: Module has no attribute +np.float128 # E: Module has no attribute +np.float_ # E: Module has no attribute +np.format_float_positional # E: Module has no attribute +np.format_float_scientific # E: Module has no attribute +np.format_parser # E: Module has no attribute +np.frombuffer # E: Module has no attribute +np.fromfile # E: Module has no attribute +np.fromiter # E: Module has no attribute +np.frompyfunc # E: Module has no attribute +np.fromregex # E: Module has no attribute +np.fromstring # E: Module has no attribute +np.genfromtxt # E: Module has no attribute +np.geomspace # E: Module has no attribute +np.get_include # E: Module has no attribute +np.get_printoptions # E: Module has no attribute +np.getbufsize # E: Module has no attribute +np.geterr # E: Module has no attribute +np.geterrcall # E: Module has no attribute +np.geterrobj # E: Module has no attribute +np.gradient # E: Module has no attribute +np.half # E: Module has no attribute +np.hamming # E: Module has no attribute +np.hanning # E: Module has no attribute +np.histogram # E: Module has no attribute +np.histogram2d # E: Module has no attribute +np.histogram_bin_edges # E: Module has no attribute +np.histogramdd # E: Module has no attribute +np.hsplit # E: Module has no attribute +np.hstack # E: Module has no attribute +np.i0 # E: Module has no attribute +np.iinfo # E: Module has no attribute +np.imag # E: Module has no attribute +np.in1d # E: Module has no attribute +np.index_exp # E: Module has no attribute +np.info # E: Module has no attribute +np.inner # E: Module has no attribute +np.insert # E: Module has no attribute +np.int0 # E: Module has no attribute +np.int_ # E: Module has no attribute +np.intc # E: Module has no attribute +np.interp # E: Module has no attribute +np.intersect1d # E: Module has no attribute +np.intp # E: Module has no attribute +np.is_busday # E: Module has no attribute +np.iscomplex # E: Module has no attribute +np.iscomplexobj # E: Module has no attribute +np.isin # E: Module has no attribute +np.isneginf # E: Module has no attribute +np.isposinf # E: Module has no attribute +np.isreal # E: Module has no attribute +np.isrealobj # E: Module has no attribute +np.iterable # E: Module has no attribute +np.ix_ # E: Module has no attribute +np.kaiser # E: Module has no attribute +np.kron # E: Module has no attribute +np.lexsort # E: Module has no attribute +np.lib # E: Module has no attribute +np.linalg # E: Module has no attribute +np.linspace # E: Module has no attribute +np.load # E: Module has no attribute +np.loads # E: Module has no attribute +np.loadtxt # E: Module has no attribute +np.logspace # E: Module has no attribute +np.longcomplex # E: Module has no attribute +np.longdouble # E: Module has no attribute +np.longfloat # E: Module has no attribute +np.longlong # E: Module has no attribute +np.lookfor # E: Module has no attribute +np.ma # E: Module has no attribute +np.mafromtxt # E: Module has no attribute +np.mask_indices # E: Module has no attribute +np.mat # E: Module has no attribute +np.math # E: Module has no attribute +np.matrix # E: Module has no attribute +np.matrixlib # E: Module has no attribute +np.max # E: Module has no attribute +np.may_share_memory # E: Module has no attribute +np.median # E: Module has no attribute +np.memmap # E: Module has no attribute +np.meshgrid # E: Module has no attribute +np.mgrid # E: Module has no attribute +np.min # E: Module has no attribute +np.min_scalar_type # E: Module has no attribute +np.mintypecode # E: Module has no attribute +np.mod # E: Module has no attribute +np.msort # E: Module has no attribute +np.nan_to_num # E: Module has no attribute +np.nanargmax # E: Module has no attribute +np.nanargmin # E: Module has no attribute +np.nancumprod # E: Module has no attribute +np.nancumsum # E: Module has no attribute +np.nanmax # E: Module has no attribute +np.nanmean # E: Module has no attribute +np.nanmedian # E: Module has no attribute +np.nanmin # E: Module has no attribute +np.nanpercentile # E: Module has no attribute +np.nanprod # E: Module has no attribute +np.nanquantile # E: Module has no attribute +np.nanstd # E: Module has no attribute +np.nansum # E: Module has no attribute +np.nanvar # E: Module has no attribute +np.nbytes # E: Module has no attribute +np.ndenumerate # E: Module has no attribute +np.ndfromtxt # E: Module has no attribute +np.ndindex # E: Module has no attribute +np.nditer # E: Module has no attribute +np.nested_iters # E: Module has no attribute +np.newaxis # E: Module has no attribute +np.numarray # E: Module has no attribute +np.object0 # E: Module has no attribute +np.ogrid # E: Module has no attribute +np.packbits # E: Module has no attribute +np.pad # E: Module has no attribute +np.percentile # E: Module has no attribute +np.piecewise # E: Module has no attribute +np.place # E: Module has no attribute +np.poly # E: Module has no attribute +np.poly1d # E: Module has no attribute +np.polyadd # E: Module has no attribute +np.polyder # E: Module has no attribute +np.polydiv # E: Module has no attribute +np.polyfit # E: Module has no attribute +np.polyint # E: Module has no attribute +np.polymul # E: Module has no attribute +np.polynomial # E: Module has no attribute +np.polysub # E: Module has no attribute +np.polyval # E: Module has no attribute +np.printoptions # E: Module has no attribute +np.product # E: Module has no attribute +np.promote_types # E: Module has no attribute +np.put_along_axis # E: Module has no attribute +np.putmask # E: Module has no attribute +np.quantile # E: Module has no attribute +np.r_ # E: Module has no attribute +np.random # E: Module has no attribute +np.ravel_multi_index # E: Module has no attribute +np.real # E: Module has no attribute +np.real_if_close # E: Module has no attribute +np.rec # E: Module has no attribute +np.recarray # E: Module has no attribute +np.recfromcsv # E: Module has no attribute +np.recfromtxt # E: Module has no attribute +np.record # E: Module has no attribute +np.require # E: Module has no attribute +np.result_type # E: Module has no attribute +np.roots # E: Module has no attribute +np.rot90 # E: Module has no attribute +np.round # E: Module has no attribute +np.round_ # E: Module has no attribute +np.row_stack # E: Module has no attribute +np.s_ # E: Module has no attribute +np.save # E: Module has no attribute +np.savetxt # E: Module has no attribute +np.savez # E: Module has no attribute +np.savez_compressed # E: Module has no attribute +np.sctypeDict # E: Module has no attribute +np.sctypes # E: Module has no attribute +np.select # E: Module has no attribute +np.set_printoptions # E: Module has no attribute +np.set_string_function # E: Module has no attribute +np.setbufsize # E: Module has no attribute +np.setdiff1d # E: Module has no attribute +np.seterr # E: Module has no attribute +np.seterrcall # E: Module has no attribute +np.seterrobj # E: Module has no attribute +np.setxor1d # E: Module has no attribute +np.shares_memory # E: Module has no attribute +np.short # E: Module has no attribute +np.show_config # E: Module has no attribute +np.sinc # E: Module has no attribute +np.single # E: Module has no attribute +np.singlecomplex # E: Module has no attribute +np.sort_complex # E: Module has no attribute +np.source # E: Module has no attribute +np.split # E: Module has no attribute +np.stack # E: Module has no attribute +np.str0 # E: Module has no attribute +np.string_ # E: Module has no attribute +np.sys # E: Module has no attribute +np.take_along_axis # E: Module has no attribute +np.testing # E: Module has no attribute +np.tile # E: Module has no attribute +np.trapz # E: Module has no attribute +np.tri # E: Module has no attribute +np.tril # E: Module has no attribute +np.tril_indices # E: Module has no attribute +np.tril_indices_from # E: Module has no attribute +np.trim_zeros # E: Module has no attribute +np.triu # E: Module has no attribute +np.triu_indices # E: Module has no attribute +np.triu_indices_from # E: Module has no attribute +np.typeDict # E: Module has no attribute +np.typecodes # E: Module has no attribute +np.typename # E: Module has no attribute +np.ubyte # E: Module has no attribute +np.uint # E: Module has no attribute +np.uint0 # E: Module has no attribute +np.uintc # E: Module has no attribute +np.uintp # E: Module has no attribute +np.ulonglong # E: Module has no attribute +np.unicode_ # E: Module has no attribute +np.union1d # E: Module has no attribute +np.unique # E: Module has no attribute +np.unpackbits # E: Module has no attribute +np.unravel_index # E: Module has no attribute +np.unwrap # E: Module has no attribute +np.ushort # E: Module has no attribute +np.vander # E: Module has no attribute +np.vdot # E: Module has no attribute +np.vectorize # E: Module has no attribute +np.version # E: Module has no attribute +np.void0 # E: Module has no attribute +np.vsplit # E: Module has no attribute +np.vstack # E: Module has no attribute +np.where # E: Module has no attribute +np.who # E: Module has no attribute \ No newline at end of file From d120110a0e0dff81a54a982f82574f1e072c345f Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Wed, 30 Sep 2020 21:35:26 +0100 Subject: [PATCH 127/409] DOC: in PR template, set expectations for PR review timeline --- .github/PULL_REQUEST_TEMPLATE.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 528580a8e6df..3634292f8ce7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,3 +9,10 @@ http://www.numpy.org/devdocs/dev/development_workflow.html#writing-the-commit-me + + From 618456137a265d0dd84b85dd33a588bc69babc99 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 15:18:59 -0600 Subject: [PATCH 128/409] MAINT: Cleanup PyUString_ConcatAndDel in scalartypes.c.src. --- numpy/core/src/multiarray/scalartypes.c.src | 38 ++++++++++----------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 74ee260afd2e..1a50927a8cd4 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -660,14 +660,12 @@ timedeltatype_str(PyObject *self) * Can't use "%lld" if HAVE_LONG_LONG is not defined */ #if defined(HAVE_LONG_LONG) - ret = PyUnicode_FromFormat("%lld ", - (long long)(scal->obval * scal->obmeta.num)); + ret = PyUnicode_FromFormat("%lld %s", + (long long)(scal->obval * scal->obmeta.num), basestr); #else - ret = PyUnicode_FromFormat("%ld ", - (long)(scal->obval * scal->obmeta.num)); + ret = PyUnicode_FromFormat("%ld %s", + (long)(scal->obval * scal->obmeta.num), basestr); #endif - PyUString_ConcatAndDel(&ret, - PyUnicode_FromString(basestr)); } return ret; @@ -886,7 +884,7 @@ static PyObject * static PyObject * c@name@type_@kind@(PyObject *self) { - PyObject *rstr, *istr, *ret; + PyObject *rstr, *istr; npy_c@name@ val = PyArrayScalar_VAL(self, C@Name@); TrimMode trim = TrimMode_DptZeros; @@ -899,16 +897,13 @@ c@name@type_@kind@(PyObject *self) if (istr == NULL) { return NULL; } - - PyUString_ConcatAndDel(&istr, PyUnicode_FromString("j")); - return istr; + PyObject *ret = PyUnicode_FromFormat("%Sj", istr); + Py_DECREF(istr); + return ret; } if (npy_isfinite(val.real)) { rstr = @name@type_@kind@_either(val.real, trim, trim, 0); - if (rstr == NULL) { - return NULL; - } } else if (npy_isnan(val.real)) { rstr = PyUnicode_FromString("nan"); @@ -919,12 +914,12 @@ c@name@type_@kind@(PyObject *self) else { rstr = PyUnicode_FromString("-inf"); } + if (rstr == NULL) { + return NULL; + } if (npy_isfinite(val.imag)) { istr = @name@type_@kind@_either(val.imag, trim, trim, 1); - if (istr == NULL) { - return NULL; - } } else if (npy_isnan(val.imag)) { istr = PyUnicode_FromString("+nan"); @@ -935,11 +930,14 @@ c@name@type_@kind@(PyObject *self) else { istr = PyUnicode_FromString("-inf"); } + if (istr == NULL) { + Py_DECREF(rstr); + return NULL; + } - ret = PyUnicode_FromString("("); - PyUString_ConcatAndDel(&ret, rstr); - PyUString_ConcatAndDel(&ret, istr); - PyUString_ConcatAndDel(&ret, PyUnicode_FromString("j)")); + PyObject *ret = PyUnicode_FromFormat("(%S%Sj)", rstr, istr); + Py_DECREF(rstr); + Py_DECREF(istr); return ret; } From fc6b5916dc261cc8deb953a0ca85828ddfff36cc Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 16:00:11 -0600 Subject: [PATCH 129/409] MAINT: Replace PyUString_ConcatAndDel in shape.c. --- numpy/core/src/multiarray/shape.c | 36 +++++++++++++------------------ 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 1a38fe956f4d..9dda899131d9 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -458,14 +458,12 @@ _attempt_nocopy_reshape(PyArrayObject *self, int newnd, const npy_intp *newdims, static void raise_reshape_size_mismatch(PyArray_Dims *newshape, PyArrayObject *arr) { - PyObject *msg = PyUnicode_FromFormat("cannot reshape array of size %zd " - "into shape ", PyArray_SIZE(arr)); PyObject *tmp = convert_shape_to_string(newshape->len, newshape->ptr, ""); - - PyUString_ConcatAndDel(&msg, tmp); - if (msg != NULL) { - PyErr_SetObject(PyExc_ValueError, msg); - Py_DECREF(msg); + if (tmp != NULL) { + PyErr_Format(PyExc_ValueError, + "cannot reshape array of size %zd into shape %S", + PyArray_SIZE(arr), tmp); + Py_DECREF(tmp); } } @@ -984,29 +982,25 @@ NPY_NO_EXPORT PyObject * build_shape_string(npy_intp n, npy_intp const *vals) { npy_intp i; - PyObject *ret, *tmp; /* * Negative dimension indicates "newaxis", which can * be discarded for printing if it's a leading dimension. * Find the first non-"newaxis" dimension. */ - i = 0; - while (i < n && vals[i] < 0) { - ++i; - } + for (i = 0; i < n && vals[i] < 0; ++i); if (i == n) { return PyUnicode_FromFormat("()"); } - else { - ret = PyUnicode_FromFormat("(%" NPY_INTP_FMT, vals[i++]); - if (ret == NULL) { - return NULL; - } - } + PyObject *ret = PyUnicode_FromFormat("%" NPY_INTP_FMT, vals[i++]); + if (ret == NULL) { + return NULL; + } for (; i < n; ++i) { + PyObject *tmp; + if (vals[i] < 0) { tmp = PyUnicode_FromString(",newaxis"); } @@ -1018,14 +1012,14 @@ build_shape_string(npy_intp n, npy_intp const *vals) return NULL; } - PyUString_ConcatAndDel(&ret, tmp); + Py_SETREF(ret, PyUnicode_Concat(ret, tmp)); + Py_DECREF(tmp); if (ret == NULL) { return NULL; } } - tmp = PyUnicode_FromFormat(")"); - PyUString_ConcatAndDel(&ret, tmp); + Py_SETREF(ret, PyUnicode_FromFormat("(%S)", ret)); return ret; } From b8b9b56a8112abd56b33a4dcdbfa9ed651c50e0d Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 16:56:08 -0600 Subject: [PATCH 130/409] MAINT: Replace PyUString_ConcatAndDel in array_assign.c --- numpy/core/src/common/array_assign.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c index 67abcae24268..c55f6bdb4624 100644 --- a/numpy/core/src/common/array_assign.c +++ b/numpy/core/src/common/array_assign.c @@ -64,19 +64,22 @@ broadcast_strides(int ndim, npy_intp const *shape, return 0; broadcast_error: { - PyObject *errmsg; - - errmsg = PyUnicode_FromFormat("could not broadcast %s from shape ", - strides_name); - PyUString_ConcatAndDel(&errmsg, - build_shape_string(strides_ndim, strides_shape)); - PyUString_ConcatAndDel(&errmsg, - PyUnicode_FromString(" into shape ")); - PyUString_ConcatAndDel(&errmsg, - build_shape_string(ndim, shape)); - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); + PyObject *shape1 = convert_shape_to_string(strides_ndim, + strides_shape, ""); + if (shape1 == NULL) { + return -1; + } + PyObject *shape2 = convert_shape_to_string(ndim, shape, ""); + if (shape2 == NULL) { + Py_DECREF(shape1); + return -1; + } + PyErr_Format(PyExc_ValueError, + "could not broadcast %s from shape %S into shape %S", + strides_name, shape1, shape2); + Py_DECREF(shape1); + Py_DECREF(shape2); return -1; } } From 278ed3eb540529c5cb5dc30c317c9e4558c05c64 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 17:36:20 -0600 Subject: [PATCH 131/409] MAINT: Replace PyUString_ConcatAndDel in common.c. --- numpy/core/src/multiarray/common.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 6af71f351ab0..841ed799db54 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -233,7 +233,6 @@ NPY_NO_EXPORT PyObject * convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending) { npy_intp i; - PyObject *ret, *tmp; /* * Negative dimension indicates "newaxis", which can @@ -245,14 +244,14 @@ convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending) if (i == n) { return PyUnicode_FromFormat("()%s", ending); } - else { - ret = PyUnicode_FromFormat("(%" NPY_INTP_FMT, vals[i++]); - if (ret == NULL) { - return NULL; - } - } + PyObject *ret = PyUnicode_FromFormat("%" NPY_INTP_FMT, vals[i++]); + if (ret == NULL) { + return NULL; + } for (; i < n; ++i) { + PyObject *tmp; + if (vals[i] < 0) { tmp = PyUnicode_FromString(",newaxis"); } @@ -264,19 +263,19 @@ convert_shape_to_string(npy_intp n, npy_intp const *vals, char *ending) return NULL; } - PyUString_ConcatAndDel(&ret, tmp); + Py_SETREF(ret, PyUnicode_Concat(ret, tmp)); + Py_DECREF(tmp); if (ret == NULL) { return NULL; } } if (i == 1) { - tmp = PyUnicode_FromFormat(",)%s", ending); + Py_SETREF(ret, PyUnicode_FromFormat("(%S,)%s", ret, ending)); } else { - tmp = PyUnicode_FromFormat(")%s", ending); + Py_SETREF(ret, PyUnicode_FromFormat("(%S)%s", ret, ending)); } - PyUString_ConcatAndDel(&ret, tmp); return ret; } From d79cbd8c1c818d9135e1d3b69526d7f04f259922 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 18:03:41 -0600 Subject: [PATCH 132/409] MAINT: Replace PyUString_ConcatAndDel in descriptor.c. --- numpy/core/src/multiarray/descriptor.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 257ededaecac..24a3507bc408 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -2898,14 +2898,13 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args) } if (PyDataType_ISDATETIME(self) && (metadata != NULL)) { - PyObject *old_metadata, *errmsg; + PyObject *old_metadata; PyArray_DatetimeMetaData temp_dt_data; if ((! PyTuple_Check(metadata)) || (PyTuple_Size(metadata) != 2)) { - errmsg = PyUnicode_FromString("Invalid datetime dtype (metadata, c_metadata): "); - PyUString_ConcatAndDel(&errmsg, PyObject_Repr(metadata)); - PyErr_SetObject(PyExc_ValueError, errmsg); - Py_DECREF(errmsg); + PyErr_Format(PyExc_ValueError, + "Invalid datetime dtype (metadata, c_metadata): %R", + metadata); return NULL; } From 2dd8693721f9781d4837ed14461d122bdd5c7874 Mon Sep 17 00:00:00 2001 From: Jun Kudo Date: Thu, 1 Oct 2020 11:24:21 +0900 Subject: [PATCH 133/409] Revert "TST: Add test to verify that getting a non-existent objects indeed results in an error." This reverts commit 0996962236506342d16730580e126efe2dfbf98d. --- numpy/typing/tests/data/fail/modules.py | 315 ------------------------ 1 file changed, 315 deletions(-) diff --git a/numpy/typing/tests/data/fail/modules.py b/numpy/typing/tests/data/fail/modules.py index 5b3d3ffb3075..e7ffe89207f0 100644 --- a/numpy/typing/tests/data/fail/modules.py +++ b/numpy/typing/tests/data/fail/modules.py @@ -1,318 +1,3 @@ import numpy as np np.testing.bob # E: Module has no attribute -np.DataSource # E: Module has no attribute -np.False_ # E: Module has no attribute -np.MachAr # E: Module has no attribute -np.ScalarType # E: Module has no attribute -np.True_ # E: Module has no attribute -np.UFUNC_PYVALS_NAME # E: Module has no attribute -np.angle # E: Module has no attribute -np.append # E: Module has no attribute -np.apply_along_axis # E: Module has no attribute -np.apply_over_axes # E: Module has no attribute -np.arange # E: Module has no attribute -np.array2string # E: Module has no attribute -np.array_repr # E: Module has no attribute -np.array_split # E: Module has no attribute -np.array_str # E: Module has no attribute -np.asanyarray # E: Module has no attribute -np.asarray # E: Module has no attribute -np.asarray_chkfinite # E: Module has no attribute -np.ascontiguousarray # E: Module has no attribute -np.asfarray # E: Module has no attribute -np.asfortranarray # E: Module has no attribute -np.asmatrix # E: Module has no attribute -np.asscalar # E: Module has no attribute -np.atleast_1d # E: Module has no attribute -np.atleast_2d # E: Module has no attribute -np.atleast_3d # E: Module has no attribute -np.average # E: Module has no attribute -np.bartlett # E: Module has no attribute -np.bincount # E: Module has no attribute -np.bitwise_not # E: Module has no attribute -np.blackman # E: Module has no attribute -np.block # E: Module has no attribute -np.bmat # E: Module has no attribute -np.bool8 # E: Module has no attribute -np.broadcast # E: Module has no attribute -np.broadcast_arrays # E: Module has no attribute -np.broadcast_to # E: Module has no attribute -np.busday_count # E: Module has no attribute -np.busday_offset # E: Module has no attribute -np.busdaycalendar # E: Module has no attribute -np.byte # E: Module has no attribute -np.byte_bounds # E: Module has no attribute -np.bytes0 # E: Module has no attribute -np.c_ # E: Module has no attribute -np.can_cast # E: Module has no attribute -np.cast # E: Module has no attribute -np.cdouble # E: Module has no attribute -np.cfloat # E: Module has no attribute -np.char # E: Module has no attribute -np.chararray # E: Module has no attribute -np.clongdouble # E: Module has no attribute -np.clongfloat # E: Module has no attribute -np.column_stack # E: Module has no attribute -np.common_type # E: Module has no attribute -np.compare_chararrays # E: Module has no attribute -np.compat # E: Module has no attribute -np.complex256 # E: Module has no attribute -np.complex_ # E: Module has no attribute -np.concatenate # E: Module has no attribute -np.conj # E: Module has no attribute -np.copy # E: Module has no attribute -np.copyto # E: Module has no attribute -np.corrcoef # E: Module has no attribute -np.cov # E: Module has no attribute -np.csingle # E: Module has no attribute -np.ctypeslib # E: Module has no attribute -np.cumproduct # E: Module has no attribute -np.datetime_as_string # E: Module has no attribute -np.datetime_data # E: Module has no attribute -np.delete # E: Module has no attribute -np.deprecate # E: Module has no attribute -np.deprecate_with_doc # E: Module has no attribute -np.diag # E: Module has no attribute -np.diag_indices # E: Module has no attribute -np.diag_indices_from # E: Module has no attribute -np.diagflat # E: Module has no attribute -np.diff # E: Module has no attribute -np.digitize # E: Module has no attribute -np.disp # E: Module has no attribute -np.divide # E: Module has no attribute -np.dot # E: Module has no attribute -np.double # E: Module has no attribute -np.dsplit # E: Module has no attribute -np.dstack # E: Module has no attribute -np.ediff1d # E: Module has no attribute -np.einsum # E: Module has no attribute -np.einsum_path # E: Module has no attribute -np.emath # E: Module has no attribute -np.errstate # E: Module has no attribute -np.expand_dims # E: Module has no attribute -np.extract # E: Module has no attribute -np.eye # E: Module has no attribute -np.fft # E: Module has no attribute -np.fill_diagonal # E: Module has no attribute -np.finfo # E: Module has no attribute -np.fix # E: Module has no attribute -np.flip # E: Module has no attribute -np.fliplr # E: Module has no attribute -np.flipud # E: Module has no attribute -np.float128 # E: Module has no attribute -np.float_ # E: Module has no attribute -np.format_float_positional # E: Module has no attribute -np.format_float_scientific # E: Module has no attribute -np.format_parser # E: Module has no attribute -np.frombuffer # E: Module has no attribute -np.fromfile # E: Module has no attribute -np.fromiter # E: Module has no attribute -np.frompyfunc # E: Module has no attribute -np.fromregex # E: Module has no attribute -np.fromstring # E: Module has no attribute -np.genfromtxt # E: Module has no attribute -np.geomspace # E: Module has no attribute -np.get_include # E: Module has no attribute -np.get_printoptions # E: Module has no attribute -np.getbufsize # E: Module has no attribute -np.geterr # E: Module has no attribute -np.geterrcall # E: Module has no attribute -np.geterrobj # E: Module has no attribute -np.gradient # E: Module has no attribute -np.half # E: Module has no attribute -np.hamming # E: Module has no attribute -np.hanning # E: Module has no attribute -np.histogram # E: Module has no attribute -np.histogram2d # E: Module has no attribute -np.histogram_bin_edges # E: Module has no attribute -np.histogramdd # E: Module has no attribute -np.hsplit # E: Module has no attribute -np.hstack # E: Module has no attribute -np.i0 # E: Module has no attribute -np.iinfo # E: Module has no attribute -np.imag # E: Module has no attribute -np.in1d # E: Module has no attribute -np.index_exp # E: Module has no attribute -np.info # E: Module has no attribute -np.inner # E: Module has no attribute -np.insert # E: Module has no attribute -np.int0 # E: Module has no attribute -np.int_ # E: Module has no attribute -np.intc # E: Module has no attribute -np.interp # E: Module has no attribute -np.intersect1d # E: Module has no attribute -np.intp # E: Module has no attribute -np.is_busday # E: Module has no attribute -np.iscomplex # E: Module has no attribute -np.iscomplexobj # E: Module has no attribute -np.isin # E: Module has no attribute -np.isneginf # E: Module has no attribute -np.isposinf # E: Module has no attribute -np.isreal # E: Module has no attribute -np.isrealobj # E: Module has no attribute -np.iterable # E: Module has no attribute -np.ix_ # E: Module has no attribute -np.kaiser # E: Module has no attribute -np.kron # E: Module has no attribute -np.lexsort # E: Module has no attribute -np.lib # E: Module has no attribute -np.linalg # E: Module has no attribute -np.linspace # E: Module has no attribute -np.load # E: Module has no attribute -np.loads # E: Module has no attribute -np.loadtxt # E: Module has no attribute -np.logspace # E: Module has no attribute -np.longcomplex # E: Module has no attribute -np.longdouble # E: Module has no attribute -np.longfloat # E: Module has no attribute -np.longlong # E: Module has no attribute -np.lookfor # E: Module has no attribute -np.ma # E: Module has no attribute -np.mafromtxt # E: Module has no attribute -np.mask_indices # E: Module has no attribute -np.mat # E: Module has no attribute -np.math # E: Module has no attribute -np.matrix # E: Module has no attribute -np.matrixlib # E: Module has no attribute -np.max # E: Module has no attribute -np.may_share_memory # E: Module has no attribute -np.median # E: Module has no attribute -np.memmap # E: Module has no attribute -np.meshgrid # E: Module has no attribute -np.mgrid # E: Module has no attribute -np.min # E: Module has no attribute -np.min_scalar_type # E: Module has no attribute -np.mintypecode # E: Module has no attribute -np.mod # E: Module has no attribute -np.msort # E: Module has no attribute -np.nan_to_num # E: Module has no attribute -np.nanargmax # E: Module has no attribute -np.nanargmin # E: Module has no attribute -np.nancumprod # E: Module has no attribute -np.nancumsum # E: Module has no attribute -np.nanmax # E: Module has no attribute -np.nanmean # E: Module has no attribute -np.nanmedian # E: Module has no attribute -np.nanmin # E: Module has no attribute -np.nanpercentile # E: Module has no attribute -np.nanprod # E: Module has no attribute -np.nanquantile # E: Module has no attribute -np.nanstd # E: Module has no attribute -np.nansum # E: Module has no attribute -np.nanvar # E: Module has no attribute -np.nbytes # E: Module has no attribute -np.ndenumerate # E: Module has no attribute -np.ndfromtxt # E: Module has no attribute -np.ndindex # E: Module has no attribute -np.nditer # E: Module has no attribute -np.nested_iters # E: Module has no attribute -np.newaxis # E: Module has no attribute -np.numarray # E: Module has no attribute -np.object0 # E: Module has no attribute -np.ogrid # E: Module has no attribute -np.packbits # E: Module has no attribute -np.pad # E: Module has no attribute -np.percentile # E: Module has no attribute -np.piecewise # E: Module has no attribute -np.place # E: Module has no attribute -np.poly # E: Module has no attribute -np.poly1d # E: Module has no attribute -np.polyadd # E: Module has no attribute -np.polyder # E: Module has no attribute -np.polydiv # E: Module has no attribute -np.polyfit # E: Module has no attribute -np.polyint # E: Module has no attribute -np.polymul # E: Module has no attribute -np.polynomial # E: Module has no attribute -np.polysub # E: Module has no attribute -np.polyval # E: Module has no attribute -np.printoptions # E: Module has no attribute -np.product # E: Module has no attribute -np.promote_types # E: Module has no attribute -np.put_along_axis # E: Module has no attribute -np.putmask # E: Module has no attribute -np.quantile # E: Module has no attribute -np.r_ # E: Module has no attribute -np.random # E: Module has no attribute -np.ravel_multi_index # E: Module has no attribute -np.real # E: Module has no attribute -np.real_if_close # E: Module has no attribute -np.rec # E: Module has no attribute -np.recarray # E: Module has no attribute -np.recfromcsv # E: Module has no attribute -np.recfromtxt # E: Module has no attribute -np.record # E: Module has no attribute -np.require # E: Module has no attribute -np.result_type # E: Module has no attribute -np.roots # E: Module has no attribute -np.rot90 # E: Module has no attribute -np.round # E: Module has no attribute -np.round_ # E: Module has no attribute -np.row_stack # E: Module has no attribute -np.s_ # E: Module has no attribute -np.save # E: Module has no attribute -np.savetxt # E: Module has no attribute -np.savez # E: Module has no attribute -np.savez_compressed # E: Module has no attribute -np.sctypeDict # E: Module has no attribute -np.sctypes # E: Module has no attribute -np.select # E: Module has no attribute -np.set_printoptions # E: Module has no attribute -np.set_string_function # E: Module has no attribute -np.setbufsize # E: Module has no attribute -np.setdiff1d # E: Module has no attribute -np.seterr # E: Module has no attribute -np.seterrcall # E: Module has no attribute -np.seterrobj # E: Module has no attribute -np.setxor1d # E: Module has no attribute -np.shares_memory # E: Module has no attribute -np.short # E: Module has no attribute -np.show_config # E: Module has no attribute -np.sinc # E: Module has no attribute -np.single # E: Module has no attribute -np.singlecomplex # E: Module has no attribute -np.sort_complex # E: Module has no attribute -np.source # E: Module has no attribute -np.split # E: Module has no attribute -np.stack # E: Module has no attribute -np.str0 # E: Module has no attribute -np.string_ # E: Module has no attribute -np.sys # E: Module has no attribute -np.take_along_axis # E: Module has no attribute -np.testing # E: Module has no attribute -np.tile # E: Module has no attribute -np.trapz # E: Module has no attribute -np.tri # E: Module has no attribute -np.tril # E: Module has no attribute -np.tril_indices # E: Module has no attribute -np.tril_indices_from # E: Module has no attribute -np.trim_zeros # E: Module has no attribute -np.triu # E: Module has no attribute -np.triu_indices # E: Module has no attribute -np.triu_indices_from # E: Module has no attribute -np.typeDict # E: Module has no attribute -np.typecodes # E: Module has no attribute -np.typename # E: Module has no attribute -np.ubyte # E: Module has no attribute -np.uint # E: Module has no attribute -np.uint0 # E: Module has no attribute -np.uintc # E: Module has no attribute -np.uintp # E: Module has no attribute -np.ulonglong # E: Module has no attribute -np.unicode_ # E: Module has no attribute -np.union1d # E: Module has no attribute -np.unique # E: Module has no attribute -np.unpackbits # E: Module has no attribute -np.unravel_index # E: Module has no attribute -np.unwrap # E: Module has no attribute -np.ushort # E: Module has no attribute -np.vander # E: Module has no attribute -np.vdot # E: Module has no attribute -np.vectorize # E: Module has no attribute -np.version # E: Module has no attribute -np.void0 # E: Module has no attribute -np.vsplit # E: Module has no attribute -np.vstack # E: Module has no attribute -np.where # E: Module has no attribute -np.who # E: Module has no attribute \ No newline at end of file From 72172ce9fd0fc11378cb65d7ed033407ac722ca7 Mon Sep 17 00:00:00 2001 From: Jun Kudo Date: Thu, 1 Oct 2020 14:43:40 +0900 Subject: [PATCH 134/409] TST: add test to verify that getting a non-existent objects of top-level indeed results in an error --- numpy/typing/tests/data/fail/modules.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numpy/typing/tests/data/fail/modules.py b/numpy/typing/tests/data/fail/modules.py index e7ffe89207f0..be031e6e15e5 100644 --- a/numpy/typing/tests/data/fail/modules.py +++ b/numpy/typing/tests/data/fail/modules.py @@ -1,3 +1,4 @@ import numpy as np np.testing.bob # E: Module has no attribute +np.bob # E: Module has no attribute \ No newline at end of file From 5d8d296e9b7694c1254792dac564be14d47a46a9 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 1 Oct 2020 10:56:53 +0200 Subject: [PATCH 135/409] TST: Mark the typing tests as slow --- numpy/typing/tests/test_typing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numpy/typing/tests/test_typing.py b/numpy/typing/tests/test_typing.py index beb53ddecefa..cba1dc1be7eb 100644 --- a/numpy/typing/tests/test_typing.py +++ b/numpy/typing/tests/test_typing.py @@ -36,6 +36,7 @@ def get_test_cases(directory): ) +@pytest.mark.slow @pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") @pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) def test_success(path): @@ -50,6 +51,7 @@ def test_success(path): assert re.match(r"Success: no issues found in \d+ source files?", stdout.strip()) +@pytest.mark.slow @pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") @pytest.mark.parametrize("path", get_test_cases(FAIL_DIR)) def test_fail(path): @@ -99,6 +101,7 @@ def test_fail(path): pytest.fail(f"Error {repr(errors[lineno])} not found") +@pytest.mark.slow @pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") @pytest.mark.parametrize("path", get_test_cases(REVEAL_DIR)) def test_reveal(path): @@ -130,6 +133,7 @@ def test_reveal(path): assert marker in error_line +@pytest.mark.slow @pytest.mark.skipif(NO_MYPY, reason="Mypy is not installed") @pytest.mark.parametrize("path", get_test_cases(PASS_DIR)) def test_code_runs(path): From d03a906ed6d220fb9a359c51bf8413dfb3012a49 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sun, 30 Aug 2020 00:45:55 +0200 Subject: [PATCH 136/409] ENH,WIP: Added type hints to `np.core._asarray` --- numpy/core/_asarray.pyi | 70 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 numpy/core/_asarray.pyi diff --git a/numpy/core/_asarray.pyi b/numpy/core/_asarray.pyi new file mode 100644 index 000000000000..bcfc5dbd13d4 --- /dev/null +++ b/numpy/core/_asarray.pyi @@ -0,0 +1,70 @@ +import sys +from typing import TypeVar, Optional, Union, Iterable, Tuple, overload + +from numpy import ndarray +from numpy.typing import ArrayLike, DtypeLike + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +_ArrayType = TypeVar("_ArrayType", bound=ndarray) + +def asarray( + a: object, + dtype: DtypeLike = ..., + order: Optional[str] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... +@overload +def asanyarray( + a: _ArrayType, + dtype: None = ..., + order: Optional[str] = ..., + *, + like: ArrayLike = ... +) -> _ArrayType: ... +@overload +def asanyarray( + a: object, + dtype: DtypeLike = ..., + order: Optional[str] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... +def ascontiguousarray( + a: object, dtype: DtypeLike = ..., *, like: ArrayLike = ... +) -> ndarray: ... +def asfortranarray( + a: object, dtype: DtypeLike = ..., *, like: ArrayLike = ... +) -> ndarray: ... + +_Requirements = Literal["F", "C", "A", "W", "O"] +_E = Literal["E"] + +@overload +def require( + a: object, + dtype: DtypeLike = ..., + requirements: Union[_E, Iterable[Union[_E, _Requirements]]] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... +@overload +def require( + a: _ArrayType, + dtype: None = ..., + requirements: Union[None, _Requirements, Iterable[_Requirements]] = ..., + *, + like: ArrayLike = ... +) -> _ArrayType: ... +@overload +def require( + a: object, + dtype: DtypeLike = ..., + requirements: Union[None, _Requirements, Iterable[_Requirements]] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... From abe1429ee329571b640bf2b59b9be361e4f84c86 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sun, 30 Aug 2020 00:49:55 +0200 Subject: [PATCH 137/409] ENH: Use literals where possible --- numpy/core/_asarray.pyi | 8 ++++---- numpy/typing/tests/data/pass/literal.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/numpy/core/_asarray.pyi b/numpy/core/_asarray.pyi index bcfc5dbd13d4..57081b76018f 100644 --- a/numpy/core/_asarray.pyi +++ b/numpy/core/_asarray.pyi @@ -1,7 +1,7 @@ import sys from typing import TypeVar, Optional, Union, Iterable, Tuple, overload -from numpy import ndarray +from numpy import ndarray, _OrderKACF from numpy.typing import ArrayLike, DtypeLike if sys.version_info >= (3, 8): @@ -14,7 +14,7 @@ _ArrayType = TypeVar("_ArrayType", bound=ndarray) def asarray( a: object, dtype: DtypeLike = ..., - order: Optional[str] = ..., + order: _OrderKACF = ..., *, like: ArrayLike = ... ) -> ndarray: ... @@ -22,7 +22,7 @@ def asarray( def asanyarray( a: _ArrayType, dtype: None = ..., - order: Optional[str] = ..., + order: _OrderKACF = ..., *, like: ArrayLike = ... ) -> _ArrayType: ... @@ -30,7 +30,7 @@ def asanyarray( def asanyarray( a: object, dtype: DtypeLike = ..., - order: Optional[str] = ..., + order: _OrderKACF = ..., *, like: ArrayLike = ... ) -> ndarray: ... diff --git a/numpy/typing/tests/data/pass/literal.py b/numpy/typing/tests/data/pass/literal.py index 321ce3c2bc0c..8eaeb6afb2ad 100644 --- a/numpy/typing/tests/data/pass/literal.py +++ b/numpy/typing/tests/data/pass/literal.py @@ -31,6 +31,8 @@ (KACF, partial(np.add, 1, 1)), # i.e. np.ufunc.__call__ (ACF, partial(np.reshape, AR, 1)), (KACF, partial(np.ravel, AR)), + (KACF, partial(np.asarray, 1)), + (KACF, partial(np.asanyarray, 1)), ] for order_set, func in order_list: From 37acf5f73c05bfbd115eeefc6201c2212b240933 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 16 Sep 2020 11:43:38 +0200 Subject: [PATCH 138/409] TST: Added tests for `np.core._asarray` --- .../tests/data/fail/array_constructors.py | 7 ++++ .../tests/data/pass/array_constructors.py | 38 +++++++++++++++++++ .../tests/data/reveal/array_constructors.py | 38 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 numpy/typing/tests/data/fail/array_constructors.py create mode 100644 numpy/typing/tests/data/pass/array_constructors.py create mode 100644 numpy/typing/tests/data/reveal/array_constructors.py diff --git a/numpy/typing/tests/data/fail/array_constructors.py b/numpy/typing/tests/data/fail/array_constructors.py new file mode 100644 index 000000000000..04e869863328 --- /dev/null +++ b/numpy/typing/tests/data/fail/array_constructors.py @@ -0,0 +1,7 @@ +import numpy as np + +a: np.ndarray + +np.require(a, requirements=1) # E: No overload variant +np.require(a, requirements="TEST") # E: incompatible type + diff --git a/numpy/typing/tests/data/pass/array_constructors.py b/numpy/typing/tests/data/pass/array_constructors.py new file mode 100644 index 000000000000..d5f7c35ca74a --- /dev/null +++ b/numpy/typing/tests/data/pass/array_constructors.py @@ -0,0 +1,38 @@ +from typing import List +import numpy as np + +class SubClass(np.ndarray): ... + +A = np.array([1]) +B = A.view(SubClass).copy() +C = [1] + +np.asarray(A) +np.asarray(B) +np.asarray(C) + +np.asanyarray(A) +np.asanyarray(B) +np.asanyarray(B, dtype=int) +np.asanyarray(C) + +np.ascontiguousarray(A) +np.ascontiguousarray(B) +np.ascontiguousarray(C) + +np.asfortranarray(A) +np.asfortranarray(B) +np.asfortranarray(C) + +np.require(A) +np.require(B) +np.require(B, dtype=int) +np.require(B, requirements=None) +np.require(B, requirements="E") +np.require(B, requirements=["ENSUREARRAY"]) +np.require(B, requirements={"F", "E"}) +np.require(B, requirements=["C", "OWNDATA"]) +np.require(B, requirements="W") +np.require(B, requirements="A") +np.require(C) + diff --git a/numpy/typing/tests/data/reveal/array_constructors.py b/numpy/typing/tests/data/reveal/array_constructors.py new file mode 100644 index 000000000000..ffbcc869c40a --- /dev/null +++ b/numpy/typing/tests/data/reveal/array_constructors.py @@ -0,0 +1,38 @@ +from typing import List +import numpy as np + +class SubClass(np.ndarray): ... + +A: np.ndarray +B: SubClass +C: List[int] + +reveal_type(np.asarray(A)) # E: ndarray +reveal_type(np.asarray(B)) # E: ndarray +reveal_type(np.asarray(C)) # E: ndarray + +reveal_type(np.asanyarray(A)) # E: ndarray +reveal_type(np.asanyarray(B)) # E: SubClass +reveal_type(np.asanyarray(B, dtype=int)) # E: ndarray +reveal_type(np.asanyarray(C)) # E: ndarray + +reveal_type(np.ascontiguousarray(A)) # E: ndarray +reveal_type(np.ascontiguousarray(B)) # E: ndarray +reveal_type(np.ascontiguousarray(C)) # E: ndarray + +reveal_type(np.asfortranarray(A)) # E: ndarray +reveal_type(np.asfortranarray(B)) # E: ndarray +reveal_type(np.asfortranarray(C)) # E: ndarray + +reveal_type(np.require(A)) # E: ndarray +reveal_type(np.require(B)) # E: SubClass +reveal_type(np.require(B, requirements=None)) # E: SubClass +reveal_type(np.require(B, dtype=int)) # E: ndarray +reveal_type(np.require(B, requirements="E")) # E: ndarray +reveal_type(np.require(B, requirements=["ENSUREARRAY"])) # E: ndarray +reveal_type(np.require(B, requirements={"F", "E"})) # E: ndarray +reveal_type(np.require(B, requirements=["C", "OWNDATA"])) # E: SubClass +reveal_type(np.require(B, requirements="W")) # E: SubClass +reveal_type(np.require(B, requirements="A")) # E: SubClass +reveal_type(np.require(C)) # E: ndarray + From e142934543713507bab9418d53290d8896455b79 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 16 Sep 2020 11:44:43 +0200 Subject: [PATCH 139/409] TST: Consolidate all array construction tests --- .../tests/data/fail/array_constructors.py | 19 +++++++++++++ numpy/typing/tests/data/fail/linspace.py | 13 --------- numpy/typing/tests/data/fail/simple.py | 12 -------- .../tests/data/pass/array_constructors.py | 28 +++++++++++++++++++ numpy/typing/tests/data/pass/linspace.py | 22 --------------- numpy/typing/tests/data/pass/simple.py | 9 ------ .../tests/data/reveal/array_constructors.py | 4 +++ numpy/typing/tests/data/reveal/linspace.py | 6 ---- 8 files changed, 51 insertions(+), 62 deletions(-) delete mode 100644 numpy/typing/tests/data/fail/linspace.py delete mode 100644 numpy/typing/tests/data/fail/simple.py delete mode 100644 numpy/typing/tests/data/pass/linspace.py delete mode 100644 numpy/typing/tests/data/reveal/linspace.py diff --git a/numpy/typing/tests/data/fail/array_constructors.py b/numpy/typing/tests/data/fail/array_constructors.py index 04e869863328..5218572a61c2 100644 --- a/numpy/typing/tests/data/fail/array_constructors.py +++ b/numpy/typing/tests/data/fail/array_constructors.py @@ -5,3 +5,22 @@ np.require(a, requirements=1) # E: No overload variant np.require(a, requirements="TEST") # E: incompatible type +np.zeros("test") # E: incompatible type +np.zeros() # E: Too few arguments + +np.ones("test") # E: incompatible type +np.ones() # E: Too few arguments + +np.array(0, float, True) # E: Too many positional + +np.linspace(None, 'bob') # E: No overload variant +np.linspace(0, 2, num=10.0) # E: No overload variant +np.linspace(0, 2, endpoint='True') # E: No overload variant +np.linspace(0, 2, retstep=b'False') # E: No overload variant +np.linspace(0, 2, dtype=0) # E: No overload variant +np.linspace(0, 2, axis=None) # E: No overload variant + +np.logspace(None, 'bob') # E: Argument 1 +np.logspace(0, 2, base=None) # E: Argument "base" + +np.geomspace(None, 'bob') # E: Argument 1 diff --git a/numpy/typing/tests/data/fail/linspace.py b/numpy/typing/tests/data/fail/linspace.py deleted file mode 100644 index a9769c5d6fb1..000000000000 --- a/numpy/typing/tests/data/fail/linspace.py +++ /dev/null @@ -1,13 +0,0 @@ -import numpy as np - -np.linspace(None, 'bob') # E: No overload variant -np.linspace(0, 2, num=10.0) # E: No overload variant -np.linspace(0, 2, endpoint='True') # E: No overload variant -np.linspace(0, 2, retstep=b'False') # E: No overload variant -np.linspace(0, 2, dtype=0) # E: No overload variant -np.linspace(0, 2, axis=None) # E: No overload variant - -np.logspace(None, 'bob') # E: Argument 1 -np.logspace(0, 2, base=None) # E: Argument "base" - -np.geomspace(None, 'bob') # E: Argument 1 diff --git a/numpy/typing/tests/data/fail/simple.py b/numpy/typing/tests/data/fail/simple.py deleted file mode 100644 index 57c08fb7db7e..000000000000 --- a/numpy/typing/tests/data/fail/simple.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Simple expression that should fail with mypy.""" - -import numpy as np - -# Array creation routines checks -np.zeros("test") # E: incompatible type -np.zeros() # E: Too few arguments - -np.ones("test") # E: incompatible type -np.ones() # E: Too few arguments - -np.array(0, float, True) # E: Too many positional diff --git a/numpy/typing/tests/data/pass/array_constructors.py b/numpy/typing/tests/data/pass/array_constructors.py index d5f7c35ca74a..bf29e52b91b9 100644 --- a/numpy/typing/tests/data/pass/array_constructors.py +++ b/numpy/typing/tests/data/pass/array_constructors.py @@ -1,12 +1,24 @@ from typing import List import numpy as np +class Index: + def __index__(self) -> int: + return 0 + class SubClass(np.ndarray): ... A = np.array([1]) B = A.view(SubClass).copy() C = [1] +np.array(1, dtype=float) +np.array(1, copy=False) +np.array(1, order='F') +np.array(1, order=None) +np.array(1, subok=True) +np.array(1, ndmin=3) +np.array(1, str, copy=True, order='C', subok=False, ndmin=2) + np.asarray(A) np.asarray(B) np.asarray(C) @@ -36,3 +48,19 @@ class SubClass(np.ndarray): ... np.require(B, requirements="A") np.require(C) +np.linspace(0, 2) +np.linspace(0.5, [0, 1, 2]) +np.linspace([0, 1, 2], 3) +np.linspace(0j, 2) +np.linspace(0, 2, num=10) +np.linspace(0, 2, endpoint=True) +np.linspace(0, 2, retstep=True) +np.linspace(0j, 2j, retstep=True) +np.linspace(0, 2, dtype=bool) +np.linspace([0, 1], [2, 3], axis=Index()) + +np.logspace(0, 2, base=2) +np.logspace(0, 2, base=2) +np.logspace(0, 2, base=[1j, 2j], num=2) + +np.geomspace(1, 2) diff --git a/numpy/typing/tests/data/pass/linspace.py b/numpy/typing/tests/data/pass/linspace.py deleted file mode 100644 index 8c6d0d56b93b..000000000000 --- a/numpy/typing/tests/data/pass/linspace.py +++ /dev/null @@ -1,22 +0,0 @@ -import numpy as np - -class Index: - def __index__(self) -> int: - return 0 - -np.linspace(0, 2) -np.linspace(0.5, [0, 1, 2]) -np.linspace([0, 1, 2], 3) -np.linspace(0j, 2) -np.linspace(0, 2, num=10) -np.linspace(0, 2, endpoint=True) -np.linspace(0, 2, retstep=True) -np.linspace(0j, 2j, retstep=True) -np.linspace(0, 2, dtype=bool) -np.linspace([0, 1], [2, 3], axis=Index()) - -np.logspace(0, 2, base=2) -np.logspace(0, 2, base=2) -np.logspace(0, 2, base=[1j, 2j], num=2) - -np.geomspace(1, 2) diff --git a/numpy/typing/tests/data/pass/simple.py b/numpy/typing/tests/data/pass/simple.py index 52705055702c..4d397bd29f29 100644 --- a/numpy/typing/tests/data/pass/simple.py +++ b/numpy/typing/tests/data/pass/simple.py @@ -17,15 +17,6 @@ def ndarray_func(x): array == 1 array.dtype == float -# Array creation routines checks -np.array(1, dtype=float) -np.array(1, copy=False) -np.array(1, order='F') -np.array(1, order=None) -np.array(1, subok=True) -np.array(1, ndmin=3) -np.array(1, str, copy=True, order='C', subok=False, ndmin=2) - ndarray_func(np.zeros([1, 2])) ndarray_func(np.ones([1, 2])) ndarray_func(np.empty([1, 2])) diff --git a/numpy/typing/tests/data/reveal/array_constructors.py b/numpy/typing/tests/data/reveal/array_constructors.py index ffbcc869c40a..ba8a8eda1e94 100644 --- a/numpy/typing/tests/data/reveal/array_constructors.py +++ b/numpy/typing/tests/data/reveal/array_constructors.py @@ -36,3 +36,7 @@ class SubClass(np.ndarray): ... reveal_type(np.require(B, requirements="A")) # E: SubClass reveal_type(np.require(C)) # E: ndarray +reveal_type(np.linspace(0, 10)) # E: numpy.ndarray +reveal_type(np.linspace(0, 10, retstep=True)) # E: Tuple[numpy.ndarray, numpy.inexact] +reveal_type(np.logspace(0, 10)) # E: numpy.ndarray +reveal_type(np.geomspace(1, 10)) # E: numpy.ndarray diff --git a/numpy/typing/tests/data/reveal/linspace.py b/numpy/typing/tests/data/reveal/linspace.py deleted file mode 100644 index cfbbdf390264..000000000000 --- a/numpy/typing/tests/data/reveal/linspace.py +++ /dev/null @@ -1,6 +0,0 @@ -import numpy as np - -reveal_type(np.linspace(0, 10)) # E: numpy.ndarray -reveal_type(np.linspace(0, 10, retstep=True)) # E: Tuple[numpy.ndarray, numpy.inexact] -reveal_type(np.logspace(0, 10)) # E: numpy.ndarray -reveal_type(np.geomspace(1, 10)) # E: numpy.ndarray From da2a85480b7920b059f1cf0b82c2fe98ea798bee Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 16 Sep 2020 11:45:10 +0200 Subject: [PATCH 140/409] ENH: Update the `np.__all__` --- numpy/__init__.pyi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 6a1c5b72b58e..c9f45472d18e 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -107,6 +107,14 @@ from numpy.core.fromnumeric import ( var, ) +from numpy.core._asarray import ( + asarray, + asanyarray, + ascontiguousarray, + asfortranarray, + require, +) + # Add an object to `__all__` if their stubs are defined in an external file; # their stubs will not be recognized otherwise. # NOTE: This is redundant for objects defined within this file. @@ -152,6 +160,11 @@ __all__ = [ "mean", "std", "var", + "asarray", + "asanyarray", + "ascontiguousarray", + "asfortranarray", + "require", ] DataSource: Any From 97c221685fd4a8773bfe24e974a59cca6babf8c5 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 16 Sep 2020 11:45:36 +0200 Subject: [PATCH 141/409] MAINT: Minor fixes; removed unused imports --- numpy/core/_asarray.pyi | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/numpy/core/_asarray.pyi b/numpy/core/_asarray.pyi index 57081b76018f..5eb5dac36b59 100644 --- a/numpy/core/_asarray.pyi +++ b/numpy/core/_asarray.pyi @@ -1,5 +1,5 @@ import sys -from typing import TypeVar, Optional, Union, Iterable, Tuple, overload +from typing import TypeVar, Union, Iterable, overload from numpy import ndarray, _OrderKACF from numpy.typing import ArrayLike, DtypeLike @@ -41,17 +41,15 @@ def asfortranarray( a: object, dtype: DtypeLike = ..., *, like: ArrayLike = ... ) -> ndarray: ... -_Requirements = Literal["F", "C", "A", "W", "O"] -_E = Literal["E"] +_Requirements = Literal[ + "C", "C_CONTIGUOUS", "CONTIGUOUS", + "F", "F_CONTIGUOUS", "FORTRAN", + "A", "ALIGNED", + "W", "WRITEABLE", + "O", "OWNDATA" +] +_E = Literal["E", "ENSUREARRAY"] -@overload -def require( - a: object, - dtype: DtypeLike = ..., - requirements: Union[_E, Iterable[Union[_E, _Requirements]]] = ..., - *, - like: ArrayLike = ... -) -> ndarray: ... @overload def require( a: _ArrayType, @@ -61,6 +59,14 @@ def require( like: ArrayLike = ... ) -> _ArrayType: ... @overload +def require( + a: object, + dtype: DtypeLike = ..., + requirements: Union[_E, Iterable[Union[_E, _Requirements]]] = ..., + *, + like: ArrayLike = ... +) -> ndarray: ... +@overload def require( a: object, dtype: DtypeLike = ..., From 7794d059f3531965b05cc11052376d3930d1c518 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 16 Sep 2020 12:59:50 +0200 Subject: [PATCH 142/409] ENH: Add an alias for `Union[_Requirements, _E]` Addresses https://github.com/numpy/numpy/pull/17326#discussion_r489322146 --- numpy/core/_asarray.pyi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numpy/core/_asarray.pyi b/numpy/core/_asarray.pyi index 5eb5dac36b59..e074d69d2c23 100644 --- a/numpy/core/_asarray.pyi +++ b/numpy/core/_asarray.pyi @@ -49,6 +49,7 @@ _Requirements = Literal[ "O", "OWNDATA" ] _E = Literal["E", "ENSUREARRAY"] +_RequirementsWithE = Union[_Requirements, _E] @overload def require( @@ -62,7 +63,7 @@ def require( def require( a: object, dtype: DtypeLike = ..., - requirements: Union[_E, Iterable[Union[_E, _Requirements]]] = ..., + requirements: Union[_E, Iterable[_RequirementsWithE]] = ..., *, like: ArrayLike = ... ) -> ndarray: ... From 2af59625c802ba72f9557a6667112f34cc10fbc3 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 23 Sep 2020 11:41:55 +0200 Subject: [PATCH 143/409] MAINT: Replaced `__all__` with `import x as x` Objects defined in an external file will be picked up if they are either specified in `__all__` or imported as `import x as x`. The latter notation is more compact and is less prone to merge conflicts. --- numpy/__init__.pyi | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index c9f45472d18e..9baa4f4bd307 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -108,11 +108,11 @@ from numpy.core.fromnumeric import ( ) from numpy.core._asarray import ( - asarray, - asanyarray, - ascontiguousarray, - asfortranarray, - require, + asarray as asarray, + asanyarray as asanyarray, + ascontiguousarray as ascontiguousarray, + asfortranarray as asfortranarray, + require as require, ) # Add an object to `__all__` if their stubs are defined in an external file; @@ -160,11 +160,6 @@ __all__ = [ "mean", "std", "var", - "asarray", - "asanyarray", - "ascontiguousarray", - "asfortranarray", - "require", ] DataSource: Any From be77442c1facae452d3742415161c4f8203f4a86 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 1 Oct 2020 13:57:35 +0200 Subject: [PATCH 144/409] MAINT: Removed placeholder annotations --- numpy/__init__.pyi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 9baa4f4bd307..30d15ed12338 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -177,12 +177,8 @@ array2string: Any array_repr: Any array_split: Any array_str: Any -asanyarray: Any -asarray: Any asarray_chkfinite: Any -ascontiguousarray: Any asfarray: Any -asfortranarray: Any asmatrix: Any asscalar: Any atleast_1d: Any @@ -404,7 +400,6 @@ recarray: Any recfromcsv: Any recfromtxt: Any record: Any -require: Any result_type: Any roots: Any rot90: Any From 247245ebec0513ba1450900024db2691698f20f4 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 18:32:03 -0600 Subject: [PATCH 145/409] MAINT: Replace PyUString_ConcatAndDel in ufunc_object.c. --- numpy/core/src/umath/ufunc_object.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index f693eb5c246f..8660ee413ee9 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -5977,6 +5977,7 @@ _typecharfromnum(int num) { return ret; } + static PyObject * ufunc_get_doc(PyUFuncObject *ufunc) { @@ -5997,18 +5998,18 @@ ufunc_get_doc(PyUFuncObject *ufunc) * introspection on name and nin + nout to automate the first part * of it the doc string shouldn't need the calling convention */ - doc = PyObject_CallFunctionObjArgs( - _sig_formatter, (PyObject *)ufunc, NULL); + doc = PyObject_CallFunctionObjArgs(_sig_formatter, + (PyObject *)ufunc, NULL); if (doc == NULL) { return NULL; } if (ufunc->doc != NULL) { - PyUString_ConcatAndDel(&doc, - PyUnicode_FromFormat("\n\n%s", ufunc->doc)); + Py_SETREF(doc, PyUnicode_FromFormat("%S\n\n%s", doc, ufunc->doc)); } return doc; } + static PyObject * ufunc_get_nin(PyUFuncObject *ufunc) { From e6030ff2101e12abd01bd93b11a949821e26c248 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Wed, 30 Sep 2020 17:35:05 -0600 Subject: [PATCH 146/409] MAINT: Remove the build_shape_string function. The build_shape_string function is effectively a duplicate of the convert_shape_to_string function in common.c. All uses have now been replaced, so remove it. --- numpy/core/src/multiarray/shape.c | 45 ------------------------------- numpy/core/src/multiarray/shape.h | 7 ----- 2 files changed, 52 deletions(-) diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 9dda899131d9..02c349759528 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -977,51 +977,6 @@ PyArray_Flatten(PyArrayObject *a, NPY_ORDER order) return (PyObject *)ret; } -/* See shape.h for parameters documentation */ -NPY_NO_EXPORT PyObject * -build_shape_string(npy_intp n, npy_intp const *vals) -{ - npy_intp i; - - /* - * Negative dimension indicates "newaxis", which can - * be discarded for printing if it's a leading dimension. - * Find the first non-"newaxis" dimension. - */ - for (i = 0; i < n && vals[i] < 0; ++i); - - if (i == n) { - return PyUnicode_FromFormat("()"); - } - - PyObject *ret = PyUnicode_FromFormat("%" NPY_INTP_FMT, vals[i++]); - if (ret == NULL) { - return NULL; - } - for (; i < n; ++i) { - PyObject *tmp; - - if (vals[i] < 0) { - tmp = PyUnicode_FromString(",newaxis"); - } - else { - tmp = PyUnicode_FromFormat(",%" NPY_INTP_FMT, vals[i]); - } - if (tmp == NULL) { - Py_DECREF(ret); - return NULL; - } - - Py_SETREF(ret, PyUnicode_Concat(ret, tmp)); - Py_DECREF(tmp); - if (ret == NULL) { - return NULL; - } - } - - Py_SETREF(ret, PyUnicode_FromFormat("(%S)", ret)); - return ret; -} /*NUMPY_API * diff --git a/numpy/core/src/multiarray/shape.h b/numpy/core/src/multiarray/shape.h index d252925569ba..875b5430f2e8 100644 --- a/numpy/core/src/multiarray/shape.h +++ b/numpy/core/src/multiarray/shape.h @@ -1,13 +1,6 @@ #ifndef _NPY_ARRAY_SHAPE_H_ #define _NPY_ARRAY_SHAPE_H_ -/* - * Builds a string representation of the shape given in 'vals'. - * A negative value in 'vals' gets interpreted as newaxis. - */ -NPY_NO_EXPORT PyObject * -build_shape_string(npy_intp n, npy_intp const *vals); - /* * Creates a sorted stride perm matching the KEEPORDER behavior * of the NpyIter object. Because this operates based on multiple From a2e76ff3dc1e19203b2529d939f489ca04ab98a0 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 30 Sep 2020 13:55:54 -0500 Subject: [PATCH 147/409] API: Special case how numpy scalars are coerced to signed integer This removes one of the larger changes to array-coercion, which meant that NumPy scalars were always coerced like a 0-D array would be (i.e. using normal casting). When the assignment is explicitly an integer, now `scalar.__int__()` will be used instead (as was the case previously). Since previously this was handled differently, a *single* scalar is still converted using casting: np.array(np.float64(np.nan), dtype=np.int64) succeeds, but any other thing fails, such as: np.array([np.float64(np.nan)], dtype=np.int64) arr1d_int64[()] = np.float64(np.nan) np.array(np.array(np.nan), dtype=np.int64) This does not affect Python scalars, that always raise, because they always are converted using `scalar.__int__()`. Unsigned integers always supported casting from their signed equivalent, so the difference is much less visible for them and this chooses to always use the casting behaviour. The main reason for this change is to help pands: https://github.com/pandas-dev/pandas/issues/35481 --- .../upcoming_changes/16200.compatibility.rst | 22 +++++++-- numpy/core/src/multiarray/ctors.c | 28 ++++++++++- numpy/core/src/multiarray/dtypemeta.c | 17 +++++++ numpy/core/tests/test_array_coercion.py | 47 +++++++++++++++++-- 4 files changed, 104 insertions(+), 10 deletions(-) diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst index d0fd51265f6e..2bbdd883ed48 100644 --- a/doc/release/upcoming_changes/16200.compatibility.rst +++ b/doc/release/upcoming_changes/16200.compatibility.rst @@ -8,14 +8,26 @@ error:: np.array([np.float64(np.nan)], dtype=np.int64) -will succeed at this time (this may change) and return an undefined result -(usually the smallest possible integer). This also affects assignments:: +will succeed and return an undefined result (usually the smallest possible +integer). This also affects assignments:: arr[0] = np.float64(np.nan) -Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)`` -and that the behaviour is unchanged for ``np.nan`` itself which is a Python -float. +At this time, NumPy retains the behaviour for:: + + np.array(np.float64(np.nan), dtype=np.int64) + +The above changes do not affect Python scalars: + + np.array([float("NaN")], dtype=np.int64) + +remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one). +Unlike signed integers, unsigned integers do not retain this special case, +since they always behaved more like casting. +The following code stops raising an error:: + + np.array([np.float64(np.nan)], dtype=np.uint64) + To avoid backward compatibility issues, at this time assignment from ``datetime64`` scalar to strings of too short length remains supported. This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")`` diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 956dfd3bbf9e..f543d02d0041 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1460,6 +1460,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, ((PyVoidScalarObject *)op)->flags, NULL, op); } + else if (cache == 0 && newtype != NULL && + PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) { + assert(ndim == 0); + /* + * This is an (possible) inconsistency where: + * + * np.array(np.float64(np.nan), dtype=np.int64) + * + * behaves differently from: + * + * np.array([np.float64(np.nan)], dtype=np.int64) + * arr1d_int64[0] = np.float64(np.nan) + * np.array(np.array(np.nan), dtype=np.int64) + * + * by not raising an error instead of using typical casting. + * The error is desirable, but to always error seems like a + * larger change to be considered at some other time and it is + * undesirable that 0-D arrays behave differently from scalars. + * This retains the behaviour, largely due to issues in pandas + * which relied on a try/except (although hopefully that will + * have a better solution at some point): + * https://github.com/pandas-dev/pandas/issues/35481 + */ + return PyArray_FromScalar(op, dtype); + } /* There was no array (or array-like) passed in directly. */ if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) || @@ -1480,7 +1505,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (cache == NULL) { /* This is a single item. Set it directly. */ assert(ndim == 0); - if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) { Py_DECREF(ret); return NULL; } diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index dbe5ba476887..109f4a225d64 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -304,6 +304,18 @@ python_builtins_are_known_scalar_types( } +static int +signed_integers_is_known_scalar_types( + PyArray_DTypeMeta *cls, PyTypeObject *pytype) +{ + if (python_builtins_are_known_scalar_types(cls, pytype)) { + return 1; + } + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + return PyType_IsSubtype(pytype, &PyGenericArrType_Type); +} + + static int datetime_known_scalar_types( PyArray_DTypeMeta *cls, PyTypeObject *pytype) @@ -549,6 +561,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->common_dtype = default_builtin_common_dtype; dtype_class->common_instance = NULL; + if (PyTypeNum_ISSIGNED(dtype_class->type_num)) { + /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ + dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types; + } + if (PyTypeNum_ISUSERDEF(descr->type_num)) { dtype_class->common_dtype = legacy_userdtype_common_dtype_function; } diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index e0480c7bfcf5..ce66589cacab 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -309,6 +309,13 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): # coercion should also raise (error type may change) with pytest.raises(Exception): np.array(scalar, dtype=dtype) + + if (isinstance(scalar, rational) and + np.issubdtype(dtype, np.signedinteger)): + return + + with pytest.raises(Exception): + np.array([scalar], dtype=dtype) # assignment should also raise res = np.zeros((), dtype=dtype) with pytest.raises(Exception): @@ -340,6 +347,30 @@ def test_default_dtype_instance(self, dtype_char): assert discovered_dtype == dtype assert discovered_dtype.itemsize == dtype.itemsize + @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) + def test_scalar_to_int_coerce_does_not_cast(self, dtype): + """ + Signed integers are currently different in that they do not cast other + NumPy scalar, but instead use scalar.__int__(). The harcoded + exception to this rule is `np.array(scalar, dtype=integer)`. + """ + dtype = np.dtype(dtype) + invalid_int = np.ulonglong(-1) + + float_nan = np.float64(np.nan) + + for scalar in [float_nan, invalid_int]: + # This is a special case using casting logic and thus not failing: + coerced = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + assert_array_equal(coerced, cast) + + # However these fail: + with pytest.raises((ValueError, OverflowError)): + np.array([scalar], dtype=dtype) + with pytest.raises((ValueError, OverflowError)): + cast[()] = scalar + class TestTimeScalars: @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @@ -349,13 +380,21 @@ class TestTimeScalars: param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), param(np.datetime64(1, "D"), id="datetime64[D]")],) def test_coercion_basic(self, dtype, scalar): + # Note the `[scalar]` is there because np.array(scalar) uses stricter + # `scalar.__int__()` rules for backward compatibility right now. arr = np.array(scalar, dtype=dtype) cast = np.array(scalar).astype(dtype) - ass = np.ones((), dtype=dtype) - ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) - assert_array_equal(arr, cast) - assert_array_equal(cast, cast) + + ass = np.ones((), dtype=dtype) + if issubclass(dtype, np.integer): + with pytest.raises(TypeError): + # raises, as would np.array([scalar], dtype=dtype), this is + # conversion from times, but behaviour of integers. + ass[()] = scalar + else: + ass[()] = scalar + assert_array_equal(ass, cast) @pytest.mark.parametrize("dtype", [np.int64, np.float32]) @pytest.mark.parametrize("scalar", From 57adb4bb6a8eb46fed597d4a781e47bcb86ebe11 Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Thu, 1 Oct 2020 18:23:53 +0200 Subject: [PATCH 148/409] DOC: Fix a parameter type in the `putmask` docs (#17412) --- numpy/core/multiarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 540d1ea9b5f6..6b335f1a64bb 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -1100,7 +1100,7 @@ def putmask(a, mask, values): Parameters ---------- - a : array_like + a : ndarray Target array. mask : array_like Boolean mask array. It has to be the same shape as `a`. From e31ae7ff70c72dec2cf7ba3bd817982ad1b68713 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 1 Oct 2020 15:39:11 -0500 Subject: [PATCH 149/409] DEP: Deprecate coercion to subarray dtypes When coercing to subarray dtypes, e.g. using `np.array(obj, dtype)`, but also `arr.astype(dtype)`, the behaviour was only well defined with tuple inputs, but not with array-like inputs. In particular, `arr.astype(dtype)` had arguably surprising behaviour of not converting by element, but rather attempting (and often failing) to broadcast `arr` to the result array with added dimensions. This deprecates all of these cases, the main issue would be for users relying on stranger inputs with broadcasted tuples contained in sequences: ``` np.array([((0, 1), (1, 2)), ((2,),)], dtype='(2,2)f4') ``` In most cases, where the tuples have the correct output shape, the new base dtype can be directly used since the discovered shape should match. However, there is no work-around for the above case. Closes gh-17173 --- .../upcoming_changes/17419.deprecation.rst | 24 +++ numpy/core/src/multiarray/ctors.c | 202 ++++++++++++++++++ numpy/core/src/multiarray/methods.c | 14 ++ numpy/core/tests/test_deprecations.py | 41 ++++ 4 files changed, 281 insertions(+) create mode 100644 doc/release/upcoming_changes/17419.deprecation.rst diff --git a/doc/release/upcoming_changes/17419.deprecation.rst b/doc/release/upcoming_changes/17419.deprecation.rst new file mode 100644 index 000000000000..fcab3a8adef9 --- /dev/null +++ b/doc/release/upcoming_changes/17419.deprecation.rst @@ -0,0 +1,24 @@ +Arrays cannot be using subarray dtypes +-------------------------------------- +Array creation and casting using ``np.array(obj, dtype)`` +and ``arr.astype(dtype)`` will not support ``dtype`` +to be a subarray dtype such as ``np.dtype("(2)i,")``. + +For such a ``dtype`` the following behaviour occurs currently:: + + res = np.array(obj, dtype) + + res.dtype is not dtype + res.dtype is dtype.base + res.shape[-dtype.ndim:] == dtype.shape + +The shape of the dtype is included into the array. +This leads to inconsistencies when ``obj`` is: + +* a scalar, such as ``np.array(1, dtype="(2)i")`` +* an array, such as ``np.array(np.array([1]), dtype="(2)i")`` + +In most cases the work-around is to pass the output dtype directly +and possibly check ``res.shape[-dtype.ndim:] == dtype.shape``. +If this is insufficient, please open an issue on the NumPy issue +tracker. diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 956dfd3bbf9e..014de27a5d71 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1367,6 +1367,160 @@ PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op), } +/* + * This function is a legacy implementation to retain subarray dtype + * behaviour in array coercion. The behaviour here makes sense if tuples + * of matching dimensionality are being coerced. Due to the difficulty + * that the result is ill-defined for lists of array-likes, this is deprecated. + * + * WARNING: Do not use this function, it exists purely to support a deprecated + * code path. + */ +static int +setArrayFromSequence(PyArrayObject *a, PyObject *s, + int dim, PyArrayObject * dst) +{ + Py_ssize_t i, slen; + int res = -1; + + /* first recursion, view equal destination */ + if (dst == NULL) + dst = a; + + /* + * This code is to ensure that the sequence access below will + * return a lower-dimensional sequence. + */ + + /* INCREF on entry DECREF on exit */ + Py_INCREF(s); + + PyObject *seq = NULL; + + if (PyArray_Check(s)) { + if (!(PyArray_CheckExact(s))) { + /* + * make sure a base-class array is used so that the dimensionality + * reduction assumption is correct. + */ + /* This will DECREF(s) if replaced */ + s = PyArray_EnsureArray(s); + if (s == NULL) { + goto fail; + } + } + + /* dst points to correct array subsection */ + if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) { + goto fail; + } + + Py_DECREF(s); + return 0; + } + + if (dim > PyArray_NDIM(a)) { + PyErr_Format(PyExc_ValueError, + "setArrayFromSequence: sequence/array dimensions mismatch."); + goto fail; + } + + /* Try __array__ before using s as a sequence */ + PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL); + if (tmp == NULL) { + goto fail; + } + else if (tmp == Py_NotImplemented) { + Py_DECREF(tmp); + } + else { + int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp); + Py_DECREF(tmp); + if (r < 0) { + goto fail; + } + Py_DECREF(s); + return 0; + } + + seq = PySequence_Fast(s, "Could not convert object to sequence"); + if (seq == NULL) { + goto fail; + } + slen = PySequence_Fast_GET_SIZE(seq); + + /* + * Either the dimensions match, or the sequence has length 1 and can + * be broadcast to the destination. + */ + if (slen != PyArray_DIMS(a)[dim] && slen != 1) { + PyErr_Format(PyExc_ValueError, + "cannot copy sequence with size %zd to array axis " + "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]); + goto fail; + } + + /* Broadcast the one element from the sequence to all the outputs */ + if (slen == 1) { + PyObject *o = PySequence_Fast_GET_ITEM(seq, 0); + npy_intp alen = PyArray_DIM(a, dim); + + for (i = 0; i < alen; i++) { + if ((PyArray_NDIM(a) - dim) > 1) { + PyArrayObject * tmp = + (PyArrayObject *)array_item_asarray(dst, i); + if (tmp == NULL) { + goto fail; + } + + res = setArrayFromSequence(a, o, dim+1, tmp); + Py_DECREF(tmp); + } + else { + char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); + res = PyArray_SETITEM(dst, b, o); + } + if (res < 0) { + goto fail; + } + } + } + /* Copy element by element */ + else { + for (i = 0; i < slen; i++) { + PyObject * o = PySequence_Fast_GET_ITEM(seq, i); + if ((PyArray_NDIM(a) - dim) > 1) { + PyArrayObject * tmp = + (PyArrayObject *)array_item_asarray(dst, i); + if (tmp == NULL) { + goto fail; + } + + res = setArrayFromSequence(a, o, dim+1, tmp); + Py_DECREF(tmp); + } + else { + char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); + res = PyArray_SETITEM(dst, b, o); + } + if (res < 0) { + goto fail; + } + } + } + + Py_DECREF(seq); + Py_DECREF(s); + return 0; + + fail: + Py_XDECREF(seq); + Py_DECREF(s); + return res; +} + + + /*NUMPY_API * Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags * Steals a reference to newtype --- which can be NULL @@ -1407,6 +1561,54 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (ndim < 0) { return NULL; } + + if (NPY_UNLIKELY(fixed_descriptor != NULL && PyDataType_HASSUBARRAY(dtype))) { + /* + * When a subarray dtype was passed in, its dimensions are absorbed + * into the array dimension (causing a dimension mismatch). + * We can't reasonably handle this because of inconsistencies in + * how it was handled (depending on nested list vs. embed array-likes). + * So we give a deprecation warning and fall back to legacy code. + */ + ret = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, dtype, ndim, dims, NULL, NULL, + flags&NPY_ARRAY_F_CONTIGUOUS, NULL); + if (ret == NULL) { + npy_free_coercion_cache(cache); + return NULL; + } + assert(PyArray_NDIM(ret) != ndim); + + if (cache == NULL) { + /* This is a single item. Sets only first subarray element. */ + assert(ndim == 0); + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + Py_DECREF(ret); + return NULL; + } + } + else { + npy_free_coercion_cache(cache); + if (setArrayFromSequence(ret, op, 0, NULL) < 0) { + Py_DECREF(ret); + return NULL; + } + } + /* NumPy 1.20, 2020-10-01 */ + if (DEPRECATE( + "using a dtype with a subarray field is deprecated. " + "This can lead to inconsistent behaviour due to the resulting " + "dtype being different from the input dtype. " + "You may try to use `dtype=dtype.base`, which should give the " + "same result for most inputs, but does not guarantee the " + "output dimensions to match the subarray ones. " + "(Deprecated NumPy 1.20)")) { + Py_DECREF(ret); + return NULL; + } + return (PyObject *)ret; + } + if (dtype == NULL) { dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); } diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index f7cb2185b5b0..e4421b41b7df 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -844,6 +844,20 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) if (ret == NULL) { return NULL; } + /* NumPy 1.20, 2020-10-01 */ + if ((PyArray_NDIM(self) != PyArray_NDIM(ret)) && DEPRECATE( + "using a dtype with a subarray field is deprecated. " + "This can lead to inconsistent behaviour due to the resulting " + "dtype being different from the input dtype. " + "You may try to use `dtype=dtype.base`, which should give the " + "same result for most inputs, but does not guarantee the " + "output dimensions to match the subarray ones. " + "For `arr.astype()` the old, surprising, behaviour can be " + "retained using `res = np.empty(arr.shape, dtype)` followed" + "by `res[...] = arr`. (Deprecated NumPy 1.20)")) { + Py_DECREF(ret); + return NULL; + } if (PyArray_CopyInto(ret, self) < 0) { Py_DECREF(ret); diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 17391e80cdc4..91fd95d4d8f2 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -81,6 +81,8 @@ def assert_deprecated(self, function, num=1, ignore_others=False, kwargs : dict Keyword arguments for `function` """ + __tracebackhide__ = True # Hide traceback for py.test + # reset the log self.log[:] = [] @@ -728,3 +730,42 @@ def test_not_deprecated(self): np.concatenate(([0.], [1.]), out=np.empty(2, dtype=np.int64), casting="same_kind") + +class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): + message = "using a dtype with a subarray field is deprecated" + + @pytest.mark.parametrize(["obj", "dtype"], + [([((0, 1), (1, 2)), ((2,),)], '(2,2)f4'), + (["1", "2"], "(2)i,")]) + def test_deprecated_sequence(self, obj, dtype): + dtype = np.dtype(dtype) + self.assert_deprecated(lambda: np.array(obj, dtype=dtype)) + with pytest.warns(DeprecationWarning): + res = np.array(obj, dtype=dtype) + + # Using `arr.astype(subarray_dtype)` is also deprecated, because + # it uses broadcasting instead of casting each element. + self.assert_deprecated(lambda: res.astype(dtype)) + expected = np.empty(len(obj), dtype=dtype) + for i in range(len(expected)): + expected[i] = obj[i] + + assert_array_equal(res, expected) + + def test_deprecated_array(self): + # Arrays are more complex, since they "broadcast" on success: + arr = np.array([1, 2]) + self.assert_deprecated(lambda: np.array(arr, dtype="(2)i,")) + with pytest.warns(DeprecationWarning): + res = np.array(arr, dtype="(2)i,") + + assert_array_equal(res, [[1, 2], [1, 2]]) + + def test_not_deprecated(self): + # These error paths are not deprecated, the tests should be retained + # when the deprecation is finalized. + arr = np.arange(5 * 2).reshape(5, 2) + with pytest.raises(ValueError): + arr.astype("(2,2)f") + with pytest.raises(ValueError): + np.array(arr, dtype="(2,2)f") From 0e4a4d4026e9fba72c5e063c59f243a755d8979a Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 1 Oct 2020 23:30:30 -0500 Subject: [PATCH 150/409] BUG: Fix memory leak in array-coercion error paths Also uses PyMem_*, since those seem the more approriate functions. --- numpy/core/src/multiarray/array_coercion.c | 4 ++-- numpy/core/src/multiarray/ctors.c | 3 +++ numpy/core/tests/test_array_coercion.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index aae8d5141412..64a06d58b12a 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -538,7 +538,7 @@ npy_new_coercion_cache( cache = _coercion_cache_cache[_coercion_cache_num]; } else { - cache = PyObject_MALLOC(sizeof(coercion_cache_obj)); + cache = PyMem_Malloc(sizeof(coercion_cache_obj)); } if (cache == NULL) { PyErr_NoMemory(); @@ -570,7 +570,7 @@ npy_unlink_coercion_cache(coercion_cache_obj *current) _coercion_cache_num++; } else { - PyObject_FREE(current); + PyMem_Free(current); } return next; } diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 956dfd3bbf9e..55c0a31f0c8a 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -610,6 +610,7 @@ PyArray_AssignFromCache(PyArrayObject *self, coercion_cache_obj *cache) { PyErr_SetString(PyExc_RuntimeError, "Inconsistent object during array creation? " "Content of sequences changed (cache not consumed)."); + npy_free_coercion_cache(cache); return -1; } return 0; @@ -1467,6 +1468,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, PyErr_SetString(PyExc_TypeError, "WRITEBACKIFCOPY used for non-array input."); Py_DECREF(dtype); + npy_free_coercion_cache(cache); return NULL; } @@ -1475,6 +1477,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, &PyArray_Type, dtype, ndim, dims, NULL, NULL, flags&NPY_ARRAY_F_CONTIGUOUS, NULL); if (ret == NULL) { + npy_free_coercion_cache(cache); return NULL; } if (cache == NULL) { diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index e0480c7bfcf5..79954b998fb3 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -635,3 +635,18 @@ def __array__(self): assert arr[()] is ArrayLike arr = np.array([ArrayLike]) assert arr[0] is ArrayLike + + @pytest.mark.skipif( + np.dtype(np.intp).itemsize < 8, reason="Needs 64bit platform") + def test_too_large_array_error_paths(self): + """Test the error paths, including for memory leaks""" + arr = np.array(0, dtype="uint8") + # Guarantees that a contiguous copy won't work: + arr = np.broadcast_to(arr, 2**62) + + for i in range(5): + # repeat, to ensure caching cannot have an effect: + with pytest.raises(MemoryError): + np.array(arr) + with pytest.raises(MemoryError): + np.array([arr]) From c71edcec18d2fdbe391822e7658db43d783e8db0 Mon Sep 17 00:00:00 2001 From: Jovial Joe Jayarson Date: Fri, 2 Oct 2020 11:02:54 +0530 Subject: [PATCH 151/409] fix: chains nested try-except-raise --- numpy/ma/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index b5371f51a6bc..4e320576bdb3 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -443,9 +443,9 @@ def _check_fill_value(fill_value, ndtype): if isinstance(fill_value, (ndarray, np.void)): try: fill_value = np.array(fill_value, copy=False, dtype=ndtype) - except ValueError: + except ValueError as e: err_msg = "Unable to transform %s to dtype %s" - raise ValueError(err_msg % (fill_value, ndtype)) + raise ValueError(err_msg % (fill_value, ndtype)) from e else: fill_value = np.asarray(fill_value, dtype=object) fill_value = np.array(_recursive_set_fill_value(fill_value, ndtype), @@ -460,12 +460,12 @@ def _check_fill_value(fill_value, ndtype): # Also in case of converting string arrays. try: fill_value = np.array(fill_value, copy=False, dtype=ndtype) - except (OverflowError, ValueError): + except (OverflowError, ValueError) as e: # Raise TypeError instead of OverflowError or ValueError. # OverflowError is seldom used, and the real problem here is # that the passed fill_value is not compatible with the ndtype. err_msg = "Cannot convert fill_value %s to dtype %s" - raise TypeError(err_msg % (fill_value, ndtype)) + raise TypeError(err_msg % (fill_value, ndtype)) from e return np.array(fill_value) From 01e48bf1222794454a8fad7c44e6d2ac19c7a9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Mat=C3=ADas=20S=C3=A1nchez=20=28Quique=29?= Date: Fri, 2 Oct 2020 09:31:55 +0200 Subject: [PATCH 152/409] DOC: Remove bogus reference to _a_ This will make this docstring coherent with the one for ndarray.argmax(). --- numpy/core/_add_newdocs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 879b3645d74d..b8cf12c60c1c 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -2618,7 +2618,7 @@ """ a.argmin(axis=None, out=None) - Return indices of the minimum values along the given axis of `a`. + Return indices of the minimum values along the given axis. Refer to `numpy.argmin` for detailed documentation. From cd20e7460ec84bb3b1b18b881ce09a851e492a08 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 2 Oct 2020 11:39:53 +0100 Subject: [PATCH 153/409] DOC: Fix formatting issues in description of .c.src files --- doc/DISTUTILS.rst.txt | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/doc/DISTUTILS.rst.txt b/doc/DISTUTILS.rst.txt index 01527374d182..81ad8496397b 100644 --- a/doc/DISTUTILS.rst.txt +++ b/doc/DISTUTILS.rst.txt @@ -394,37 +394,37 @@ and ``/**end repeat**/`` lines, which may also be nested using consecutively numbered delimiting lines such as ``/**begin repeat1`` and ``/**end repeat1**/``: -1. "/\**begin repeat "on a line by itself marks the beginning of -a segment that should be repeated. +1. ``/**begin repeat ``on a line by itself marks the beginning of + a segment that should be repeated. 2. Named variable expansions are defined using ``#name=item1, item2, item3, -..., itemN#`` and placed on successive lines. These variables are -replaced in each repeat block with corresponding word. All named -variables in the same repeat block must define the same number of -words. + ..., itemN#`` and placed on successive lines. These variables are + replaced in each repeat block with corresponding word. All named + variables in the same repeat block must define the same number of + words. 3. In specifying the repeat rule for a named variable, ``item*N`` is short- -hand for ``item, item, ..., item`` repeated N times. In addition, -parenthesis in combination with \*N can be used for grouping several -items that should be repeated. Thus, #name=(item1, item2)*4# is -equivalent to #name=item1, item2, item1, item2, item1, item2, item1, -item2# + hand for ``item, item, ..., item`` repeated N times. In addition, + parenthesis in combination with ``*N`` can be used for grouping several + items that should be repeated. Thus, ``#name=(item1, item2)*4#`` is + equivalent to ``#name=item1, item2, item1, item2, item1, item2, item1, + item2#``. -4. "\*/ "on a line by itself marks the end of the variable expansion -naming. The next line is the first line that will be repeated using -the named rules. +4. ``*/`` on a line by itself marks the end of the variable expansion + naming. The next line is the first line that will be repeated using + the named rules. 5. Inside the block to be repeated, the variables that should be expanded -are specified as ``@name@`` + are specified as ``@name@``. -6. "/\**end repeat**/ "on a line by itself marks the previous line -as the last line of the block to be repeated. +6. ``/**end repeat**/`` on a line by itself marks the previous line + as the last line of the block to be repeated. 7. A loop in the NumPy C source code may have a ``@TYPE@`` variable, targeted -for string substitution, which is preprocessed to a number of otherwise -identical loops with several strings such as INT, LONG, UINT, ULONG. The -``@TYPE@`` style syntax thus reduces code duplication and maintenance burden by -mimicking languages that have generic type support. + for string substitution, which is preprocessed to a number of otherwise + identical loops with several strings such as ``INT``, ``LONG``, ``UINT``, + ``ULONG``. The ``@TYPE@`` style syntax thus reduces code duplication and + maintenance burden by mimicking languages that have generic type support. The above rules may be clearer in the following template source example: @@ -464,13 +464,13 @@ The above rules may be clearer in the following template source example: /**end repeat**/ -The preprocessing of generically typed C source files (whether in NumPy +The preprocessing of generically-typed C source files (whether in NumPy proper or in any third party package using NumPy Distutils) is performed by `conv_template.py`_. -The type specific C files generated (extension: .c) +The type-specific C files generated (extension: ``.c``) by these modules during the build process are ready to be compiled. This form of generic typing is also supported for C header files (preprocessed -to produce .h files). +to produce ``.h`` files). .. _conv_template.py: https://github.com/numpy/numpy/blob/master/numpy/distutils/conv_template.py From d846cdabf5e0f76d53cb941bf075c21ecb98d573 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 2 Oct 2020 12:24:36 +0100 Subject: [PATCH 154/409] Update doc/DISTUTILS.rst.txt --- doc/DISTUTILS.rst.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/DISTUTILS.rst.txt b/doc/DISTUTILS.rst.txt index 81ad8496397b..dbc6bf8b8ddd 100644 --- a/doc/DISTUTILS.rst.txt +++ b/doc/DISTUTILS.rst.txt @@ -394,7 +394,7 @@ and ``/**end repeat**/`` lines, which may also be nested using consecutively numbered delimiting lines such as ``/**begin repeat1`` and ``/**end repeat1**/``: -1. ``/**begin repeat ``on a line by itself marks the beginning of +1. ``/**begin repeat`` on a line by itself marks the beginning of a segment that should be repeated. 2. Named variable expansions are defined using ``#name=item1, item2, item3, From f691175d8fd836d1f65f95db4774370cc1ec3a88 Mon Sep 17 00:00:00 2001 From: danbeibei <57632244+danbeibei@users.noreply.github.com> Date: Fri, 2 Oct 2020 15:57:18 +0200 Subject: [PATCH 155/409] Update numpy/distutils/fcompiler/nv.py Co-authored-by: Eric Wieser --- numpy/distutils/fcompiler/nv.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/numpy/distutils/fcompiler/nv.py b/numpy/distutils/fcompiler/nv.py index 4cf30f5ef100..c5f203ee8ddb 100644 --- a/numpy/distutils/fcompiler/nv.py +++ b/numpy/distutils/fcompiler/nv.py @@ -1,15 +1,18 @@ -# https://developer.nvidia.com/hpc-sdk import sys from numpy.distutils.fcompiler import FCompiler compilers = ['NVHPCFCompiler'] -""" -Since august 2020 the NVIDIA HPC SDK includes the compilers formely known as The Portland Group compilers. -https://www.pgroup.com/index.htm -""" class NVHPCFCompiler(FCompiler): + """ NVIDIA High Performance Computing (HPC) SDK Fortran Compiler + + https://developer.nvidia.com/hpc-sdk + + Since august 2020 the NVIDIA HPC SDK includes the compilers formerly known as The Portland Group compilers, + https://www.pgroup.com/index.htm. + See also `numpy.distutils.fcompiler.pg`. + """ compiler_type = 'nv' description = 'NVIDIA HPC SDK' From 47a918f644da4ed3947ee2f797c790027041f29b Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:25:14 +0200 Subject: [PATCH 156/409] ENH: Annotate the arithmetic operations of `ndarray` and `generic` (#17273) * ENH: Added annotations for arithmetic-based magic methods * TST: Added arithmetic tests * TST: Moved a number of tests to `arithmetic.py` * ENH: Ensure that objects annotated as `number` support arithmetic operations * MAINT: Arithmetic operations on 0d arrays return scalars * MAINT: Clarify the type of generics returned by `ufunc.__call__` * TST: Added more arithmetic tests * MAINT: Use `_CharLike` when both `str` and `bytes` are accepted * MAINT: Change the `timedelta64` baseclass to `generic` * MAINT: Add aliases for common scalar unions * MAINT: Update the defition of `_NumberLike` * MAINT: Replace `_NumberLike` with `_ComplexLike` in the `complexfloating` annotations * MAINT: Move the callback protocols to their own module * MAINT: Make `typing._callback` available at runtime * DOC: Provide further clarification about callback protocols * MAINT: Replace `_callback` with `_callable` Addresses https://github.com/numpy/numpy/pull/17273#discussion_r485821346 The use of `__call__`-defining protocols is not limited to callbacks. The module name name & docstring now reflects this. * MAINT: Removed `__add__` from `str_` and `bytes_` Most `np.bytes_` / `np.str_` methods return their builtin `bytes` / `str` counterpart. This includes addition. * MAINT: Fix the return type of boolean division Addresses https://github.com/numpy/numpy/pull/17273#discussion_r486271220 Dividing a `np.bool_` by an integer (or vice versa) always returns `float64` * MAINT: Renamed all `_Arithmetic` protocols to `_Op Addresses https://github.com/numpy/numpy/pull/17273#discussion_r486272745 * TST: Add tests for boolean division * ENH: Make `np.number` generic w.r.t. its precision * ENH,WIP: Add a mypy plugin for casting `np.number` instances to appropiate subclasses * Revert "ENH,WIP: Add a mypy plugin for casting `np.number` instances to appropiate subclasses" This reverts commit c526fb619d20902bfd77709c8983c7a7d5477c95. * Revert "ENH: Make `np.number` generic w.r.t. its precision" This reverts commit dbf20183cf7ff71e379cd1a165d07e1a1d643135. * MAINT: Narow the definition of `_ComplexLike` Addresses https://github.com/numpy/numpy/pull/17273#discussion_r490440238 * MAINT: Refined the return type of `unint + int` ops `unsignedinteger + signedinteger` generally returns a `signedinteger` subclass. The exception to this is `uint64 + signedinteger`, which returns `float64`. Addresses https://github.com/numpy/numpy/pull/17273#discussion_r490442023 * MAINT: Use `_IntLike` and `_FloatLike` in the definition of `_ComplexLike` --- numpy/__init__.pyi | 181 ++++++++++--- numpy/typing/_callable.py | 136 ++++++++++ numpy/typing/tests/data/fail/arithmetic.py | 19 ++ numpy/typing/tests/data/fail/scalars.py | 16 -- numpy/typing/tests/data/pass/arithmetic.py | 257 +++++++++++++++++++ numpy/typing/tests/data/pass/scalars.py | 13 - numpy/typing/tests/data/pass/ufuncs.py | 5 +- numpy/typing/tests/data/reveal/arithmetic.py | 256 ++++++++++++++++++ numpy/typing/tests/data/reveal/scalars.py | 17 -- 9 files changed, 811 insertions(+), 89 deletions(-) create mode 100644 numpy/typing/_callable.py create mode 100644 numpy/typing/tests/data/fail/arithmetic.py create mode 100644 numpy/typing/tests/data/pass/arithmetic.py create mode 100644 numpy/typing/tests/data/reveal/arithmetic.py diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 30d15ed12338..e712801eba7e 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -5,6 +5,18 @@ from abc import abstractmethod from numpy.core._internal import _ctypes from numpy.typing import ArrayLike, DtypeLike, _Shape, _ShapeLike +from numpy.typing._callable import ( + _BoolOp, + _BoolSub, + _BoolTrueDiv, + _TD64Div, + _IntTrueDiv, + _UnsignedIntOp, + _SignedIntOp, + _FloatOp, + _ComplexOp, + _NumberOp, +) from typing import ( Any, @@ -646,23 +658,10 @@ class _ArrayOrScalarCommon( def __ne__(self, other): ... def __gt__(self, other): ... def __ge__(self, other): ... - def __add__(self, other): ... - def __radd__(self, other): ... - def __sub__(self, other): ... - def __rsub__(self, other): ... - def __mul__(self, other): ... - def __rmul__(self, other): ... - def __truediv__(self, other): ... - def __rtruediv__(self, other): ... - def __floordiv__(self, other): ... - def __rfloordiv__(self, other): ... def __mod__(self, other): ... def __rmod__(self, other): ... def __divmod__(self, other): ... def __rdivmod__(self, other): ... - # NumPy's __pow__ doesn't handle a third argument - def __pow__(self, other): ... - def __rpow__(self, other): ... def __lshift__(self, other): ... def __rlshift__(self, other): ... def __rshift__(self, other): ... @@ -834,14 +833,26 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): def __matmul__(self, other): ... def __imatmul__(self, other): ... def __rmatmul__(self, other): ... + def __add__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __radd__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __sub__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __rsub__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __mul__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __rmul__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __floordiv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __rfloordiv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __pow__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __rpow__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __truediv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __rtruediv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... # `np.generic` does not support inplace operations - def __iadd__(self, other): ... - def __isub__(self, other): ... - def __imul__(self, other): ... - def __itruediv__(self, other): ... - def __ifloordiv__(self, other): ... + def __iadd__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __isub__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __imul__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __itruediv__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __ifloordiv__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __ipow__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... def __imod__(self, other): ... - def __ipow__(self, other): ... def __ilshift__(self, other): ... def __irshift__(self, other): ... def __iand__(self, other): ... @@ -857,6 +868,11 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): # See https://github.com/numpy/numpy-stubs/pull/80 for more details. _CharLike = Union[str, bytes] +_BoolLike = Union[bool, bool_] +_IntLike = Union[int, integer] +_FloatLike = Union[_IntLike, float, floating] +_ComplexLike = Union[_FloatLike, complex, complexfloating] +_NumberLike = Union[int, float, complex, number, bool_] class generic(_ArrayOrScalarCommon): @abstractmethod @@ -869,6 +885,19 @@ class number(generic): # type: ignore def real(self: _ArraySelf) -> _ArraySelf: ... @property def imag(self: _ArraySelf) -> _ArraySelf: ... + # Ensure that objects annotated as `number` support arithmetic operations + __add__: _NumberOp + __radd__: _NumberOp + __sub__: _NumberOp + __rsub__: _NumberOp + __mul__: _NumberOp + __rmul__: _NumberOp + __floordiv__: _NumberOp + __rfloordiv__: _NumberOp + __pow__: _NumberOp + __rpow__: _NumberOp + __truediv__: _NumberOp + __rtruediv__: _NumberOp class bool_(generic): def __init__(self, __value: object = ...) -> None: ... @@ -876,6 +905,18 @@ class bool_(generic): def real(self: _ArraySelf) -> _ArraySelf: ... @property def imag(self: _ArraySelf) -> _ArraySelf: ... + __add__: _BoolOp[bool_] + __radd__: _BoolOp[bool_] + __sub__: _BoolSub + __rsub__: _BoolSub + __mul__: _BoolOp[bool_] + __rmul__: _BoolOp[bool_] + __floordiv__: _BoolOp[int8] + __rfloordiv__: _BoolOp[int8] + __pow__: _BoolOp[int8] + __rpow__: _BoolOp[int8] + __truediv__: _BoolTrueDiv + __rtruediv__: _BoolTrueDiv class object_(generic): def __init__(self, __value: object = ...) -> None: ... @@ -892,10 +933,18 @@ class datetime64(generic): __format: Union[_CharLike, Tuple[_CharLike, _IntLike]] = ..., ) -> None: ... @overload - def __init__(self, __value: int, __format: Union[_CharLike, Tuple[_CharLike, _IntLike]]) -> None: ... - def __add__(self, other: Union[timedelta64, int]) -> datetime64: ... - def __sub__(self, other: Union[timedelta64, datetime64, int]) -> timedelta64: ... - def __rsub__(self, other: Union[datetime64, int]) -> timedelta64: ... + def __init__( + self, + __value: int, + __format: Union[_CharLike, Tuple[_CharLike, _IntLike]] + ) -> None: ... + def __add__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> datetime64: ... + def __radd__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> datetime64: ... + @overload + def __sub__(self, other: datetime64) -> timedelta64: ... + @overload + def __sub__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> datetime64: ... + def __rsub__(self, other: datetime64) -> timedelta64: ... # Support for `__index__` was added in python 3.8 (bpo-20092) if sys.version_info >= (3, 8): @@ -911,8 +960,20 @@ class integer(number): # type: ignore # NOTE: `__index__` is technically defined in the bottom-most # sub-classes (`int64`, `uint32`, etc) def __index__(self) -> int: ... - -class signedinteger(integer): ... # type: ignore + __truediv__: _IntTrueDiv + __rtruediv__: _IntTrueDiv + +class signedinteger(integer): # type: ignore + __add__: _SignedIntOp + __radd__: _SignedIntOp + __sub__: _SignedIntOp + __rsub__: _SignedIntOp + __mul__: _SignedIntOp + __rmul__: _SignedIntOp + __floordiv__: _SignedIntOp + __rfloordiv__: _SignedIntOp + __pow__: _SignedIntOp + __rpow__: _SignedIntOp class int8(signedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... @@ -926,24 +987,36 @@ class int32(signedinteger): class int64(signedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... -class timedelta64(signedinteger): +class timedelta64(generic): def __init__( self, __value: Union[None, int, _CharLike, dt.timedelta, timedelta64] = ..., __format: Union[_CharLike, Tuple[_CharLike, _IntLike]] = ..., ) -> None: ... - @overload - def __add__(self, other: Union[timedelta64, int]) -> timedelta64: ... - @overload - def __add__(self, other: datetime64) -> datetime64: ... - def __sub__(self, other: Union[timedelta64, int]) -> timedelta64: ... - @overload - def __truediv__(self, other: timedelta64) -> float: ... - @overload - def __truediv__(self, other: float) -> timedelta64: ... + def __add__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> timedelta64: ... + def __radd__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> timedelta64: ... + def __sub__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> timedelta64: ... + def __rsub__(self, other: Union[timedelta64, _IntLike, _BoolLike]) -> timedelta64: ... + def __mul__(self, other: Union[_FloatLike, _BoolLike]) -> timedelta64: ... + def __rmul__(self, other: Union[_FloatLike, _BoolLike]) -> timedelta64: ... + __truediv__: _TD64Div[float64] + __floordiv__: _TD64Div[signedinteger] + def __rtruediv__(self, other: timedelta64) -> float64: ... + def __rfloordiv__(self, other: timedelta64) -> signedinteger: ... def __mod__(self, other: timedelta64) -> timedelta64: ... -class unsignedinteger(integer): ... # type: ignore +class unsignedinteger(integer): # type: ignore + # NOTE: `uint64 + signedinteger -> float64` + __add__: _UnsignedIntOp + __radd__: _UnsignedIntOp + __sub__: _UnsignedIntOp + __rsub__: _UnsignedIntOp + __mul__: _UnsignedIntOp + __rmul__: _UnsignedIntOp + __floordiv__: _UnsignedIntOp + __rfloordiv__: _UnsignedIntOp + __pow__: _UnsignedIntOp + __rpow__: _UnsignedIntOp class uint8(unsignedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... @@ -958,7 +1031,20 @@ class uint64(unsignedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... class inexact(number): ... # type: ignore -class floating(inexact): ... # type: ignore + +class floating(inexact): # type: ignore + __add__: _FloatOp + __radd__: _FloatOp + __sub__: _FloatOp + __rsub__: _FloatOp + __mul__: _FloatOp + __rmul__: _FloatOp + __truediv__: _FloatOp + __rtruediv__: _FloatOp + __floordiv__: _FloatOp + __rfloordiv__: _FloatOp + __pow__: _FloatOp + __rpow__: _FloatOp _FloatType = TypeVar('_FloatType', bound=floating) @@ -977,6 +1063,18 @@ class complexfloating(inexact, Generic[_FloatType]): # type: ignore @property def imag(self) -> _FloatType: ... # type: ignore[override] def __abs__(self) -> _FloatType: ... # type: ignore[override] + __add__: _ComplexOp + __radd__: _ComplexOp + __sub__: _ComplexOp + __rsub__: _ComplexOp + __mul__: _ComplexOp + __rmul__: _ComplexOp + __truediv__: _ComplexOp + __rtruediv__: _ComplexOp + __floordiv__: _ComplexOp + __rfloordiv__: _ComplexOp + __pow__: _ComplexOp + __rpow__: _ComplexOp class complex64(complexfloating[float32]): def __init__(self, __value: _ComplexValue = ...) -> None: ... @@ -987,7 +1085,7 @@ class complex128(complexfloating[float64], complex): class flexible(generic): ... # type: ignore class void(flexible): - def __init__(self, __value: Union[int, integer, bool_, bytes]): ... + def __init__(self, __value: Union[_IntLike, _BoolLike, bytes]): ... @property def real(self: _ArraySelf) -> _ArraySelf: ... @property @@ -995,6 +1093,9 @@ class void(flexible): class character(flexible): ... # type: ignore +# NOTE: Most `np.bytes_` / `np.str_` methods return their +# builtin `bytes` / `str` counterpart + class bytes_(character, bytes): @overload def __init__(self, __value: object = ...) -> None: ... @@ -1396,7 +1497,3 @@ def sctype2char(sctype: object) -> str: ... def find_common_type( array_types: Sequence[DtypeLike], scalar_types: Sequence[DtypeLike] ) -> dtype: ... - -_NumberLike = Union[int, float, complex, number, bool_] -_IntLike = Union[int, integer] -_BoolLike = Union[bool, bool_] diff --git a/numpy/typing/_callable.py b/numpy/typing/_callable.py new file mode 100644 index 000000000000..5e14b708f1a6 --- /dev/null +++ b/numpy/typing/_callable.py @@ -0,0 +1,136 @@ +""" +A module with various ``typing.Protocol`` subclasses that implement +the ``__call__`` magic method. + +See the `Mypy documentation`_ on protocols for more details. + +.. _`Mypy documentation`: https://mypy.readthedocs.io/en/stable/protocols.html#callback-protocols + +""" + +import sys +from typing import Union, TypeVar, overload, Any + +from numpy import ( + _BoolLike, + _IntLike, + _FloatLike, + _ComplexLike, + _NumberLike, + generic, + bool_, + timedelta64, + number, + integer, + unsignedinteger, + signedinteger, + int32, + int64, + floating, + float32, + float64, + complexfloating, + complex128, +) + +if sys.version_info >= (3, 8): + from typing import Protocol + HAVE_PROTOCOL = True +else: + try: + from typing_extensions import Protocol + except ImportError: + HAVE_PROTOCOL = False + else: + HAVE_PROTOCOL = True + +if HAVE_PROTOCOL: + _NumberType = TypeVar("_NumberType", bound=number) + _NumberType_co = TypeVar("_NumberType_co", covariant=True, bound=number) + _GenericType_co = TypeVar("_GenericType_co", covariant=True, bound=generic) + + class _BoolOp(Protocol[_GenericType_co]): + @overload + def __call__(self, __other: _BoolLike) -> _GenericType_co: ... + @overload # platform dependent + def __call__(self, __other: int) -> Union[int32, int64]: ... + @overload + def __call__(self, __other: float) -> float64: ... + @overload + def __call__(self, __other: complex) -> complex128: ... + @overload + def __call__(self, __other: _NumberType) -> _NumberType: ... + + class _BoolSub(Protocol): + # Note that `__other: bool_` is absent here + @overload # platform dependent + def __call__(self, __other: int) -> Union[int32, int64]: ... + @overload + def __call__(self, __other: float) -> float64: ... + @overload + def __call__(self, __other: complex) -> complex128: ... + @overload + def __call__(self, __other: _NumberType) -> _NumberType: ... + + class _BoolTrueDiv(Protocol): + @overload + def __call__(self, __other: Union[float, _IntLike, _BoolLike]) -> float64: ... + @overload + def __call__(self, __other: complex) -> complex128: ... + @overload + def __call__(self, __other: _NumberType) -> _NumberType: ... + + class _TD64Div(Protocol[_NumberType_co]): + @overload + def __call__(self, __other: timedelta64) -> _NumberType_co: ... + @overload + def __call__(self, __other: _FloatLike) -> timedelta64: ... + + class _IntTrueDiv(Protocol): + @overload + def __call__(self, __other: Union[_IntLike, float]) -> floating: ... + @overload + def __call__(self, __other: complex) -> complexfloating[floating]: ... + + class _UnsignedIntOp(Protocol): + # NOTE: `uint64 + signedinteger -> float64` + @overload + def __call__(self, __other: Union[bool, unsignedinteger]) -> unsignedinteger: ... + @overload + def __call__(self, __other: Union[int, signedinteger]) -> Union[signedinteger, float64]: ... + @overload + def __call__(self, __other: float) -> floating: ... + @overload + def __call__(self, __other: complex) -> complexfloating[floating]: ... + + class _SignedIntOp(Protocol): + @overload + def __call__(self, __other: Union[int, signedinteger]) -> signedinteger: ... + @overload + def __call__(self, __other: float) -> floating: ... + @overload + def __call__(self, __other: complex) -> complexfloating[floating]: ... + + class _FloatOp(Protocol): + @overload + def __call__(self, __other: _FloatLike) -> floating: ... + @overload + def __call__(self, __other: complex) -> complexfloating[floating]: ... + + class _ComplexOp(Protocol): + def __call__(self, __other: _ComplexLike) -> complexfloating[floating]: ... + + class _NumberOp(Protocol): + def __call__(self, __other: _NumberLike) -> number: ... + +else: + _BoolOp = Any + _BoolSub = Any + _BoolTrueDiv = Any + _TD64Div = Any + _IntTrueDiv = Any + _UnsignedIntOp = Any + _SignedIntOp = Any + _FloatOp = Any + _ComplexOp = Any + _NumberOp = Any diff --git a/numpy/typing/tests/data/fail/arithmetic.py b/numpy/typing/tests/data/fail/arithmetic.py new file mode 100644 index 000000000000..169e104f9a22 --- /dev/null +++ b/numpy/typing/tests/data/fail/arithmetic.py @@ -0,0 +1,19 @@ +import numpy as np + +b_ = np.bool_() +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +b_ - b_ # E: No overload variant + +dt + dt # E: Unsupported operand types +td - dt # E: Unsupported operand types +td % 1 # E: Unsupported operand types +td / dt # E: No overload + +# NOTE: The 1 tests below currently don't work due to the broad +# (i.e. untyped) signature of `.__mod__()`. +# TODO: Revisit this once annotations are added to the +# `_ArrayOrScalarCommon` magic methods. + +# td % dt # E: Unsupported operand types diff --git a/numpy/typing/tests/data/fail/scalars.py b/numpy/typing/tests/data/fail/scalars.py index 47c031163636..13bb45483353 100644 --- a/numpy/typing/tests/data/fail/scalars.py +++ b/numpy/typing/tests/data/fail/scalars.py @@ -28,22 +28,6 @@ np.datetime64(0) # E: non-matching overload -dt_64 = np.datetime64(0, "D") -td_64 = np.timedelta64(1, "h") - -dt_64 + dt_64 # E: Unsupported operand types -td_64 - dt_64 # E: Unsupported operand types -td_64 % 1 # E: Unsupported operand types - -# NOTE: The 2 tests below currently don't work due to the broad -# (i.e. untyped) signature of `generic.__truediv__()` and `.__mod__()`. -# TODO: Revisit this once annotations are added to the -# `_ArrayOrScalarCommon` magic methods. - -# td_64 / dt_64 # E: No overload -# td_64 % dt_64 # E: Unsupported operand types - - class A: def __float__(self): return 1.0 diff --git a/numpy/typing/tests/data/pass/arithmetic.py b/numpy/typing/tests/data/pass/arithmetic.py new file mode 100644 index 000000000000..f26eab879255 --- /dev/null +++ b/numpy/typing/tests/data/pass/arithmetic.py @@ -0,0 +1,257 @@ +import numpy as np + +c16 = np.complex128(1) +f8 = np.float64(1) +i8 = np.int64(1) +u8 = np.uint64(1) + +c8 = np.complex64(1) +f4 = np.float32(1) +i4 = np.int32(1) +u4 = np.uint32(1) + +dt = np.datetime64(1, "D") +td = np.timedelta64(1, "D") + +b_ = np.bool_(1) + +b = bool(1) +c = complex(1) +f = float(1) +i = int(1) + +AR = np.ones(1, dtype=np.float64) +AR.setflags(write=False) + +# Time structures + +dt + td +dt + i +dt + i4 +dt + i8 +dt - dt +dt - i +dt - i4 +dt - i8 + +td + td +td + i +td + i4 +td + i8 +td - td +td - i +td - i4 +td - i8 +td / f +td / f4 +td / f8 +td / td +td // td +td % td + + +# boolean + +b_ / b +b_ / b_ +b_ / i +b_ / i8 +b_ / i4 +b_ / u8 +b_ / u4 +b_ / f +b_ / f8 +b_ / f4 +b_ / c +b_ / c16 +b_ / c8 + +b / b_ +b_ / b_ +i / b_ +i8 / b_ +i4 / b_ +u8 / b_ +u4 / b_ +f / b_ +f8 / b_ +f4 / b_ +c / b_ +c16 / b_ +c8 / b_ + +# Complex + +c16 + c16 +c16 + f8 +c16 + i8 +c16 + c8 +c16 + f4 +c16 + i4 +c16 + b_ +c16 + b +c16 + c +c16 + f +c16 + i +c16 + AR + +c16 + c16 +f8 + c16 +i8 + c16 +c8 + c16 +f4 + c16 +i4 + c16 +b_ + c16 +b + c16 +c + c16 +f + c16 +i + c16 +AR + c16 + +c8 + c16 +c8 + f8 +c8 + i8 +c8 + c8 +c8 + f4 +c8 + i4 +c8 + b_ +c8 + b +c8 + c +c8 + f +c8 + i +c8 + AR + +c16 + c8 +f8 + c8 +i8 + c8 +c8 + c8 +f4 + c8 +i4 + c8 +b_ + c8 +b + c8 +c + c8 +f + c8 +i + c8 +AR + c8 + +# Float + +f8 + f8 +f8 + i8 +f8 + f4 +f8 + i4 +f8 + b_ +f8 + b +f8 + c +f8 + f +f8 + i +f8 + AR + +f8 + f8 +i8 + f8 +f4 + f8 +i4 + f8 +b_ + f8 +b + f8 +c + f8 +f + f8 +i + f8 +AR + f8 + +f4 + f8 +f4 + i8 +f4 + f4 +f4 + i4 +f4 + b_ +f4 + b +f4 + c +f4 + f +f4 + i +f4 + AR + +f8 + f4 +i8 + f4 +f4 + f4 +i4 + f4 +b_ + f4 +b + f4 +c + f4 +f + f4 +i + f4 +AR + f4 + +# Int + +i8 + i8 +i8 + u8 +i8 + i4 +i8 + u4 +i8 + b_ +i8 + b +i8 + c +i8 + f +i8 + i +i8 + AR + +u8 + u8 +u8 + i4 +u8 + u4 +u8 + b_ +u8 + b +u8 + c +u8 + f +u8 + i +u8 + AR + +i8 + i8 +u8 + i8 +i4 + i8 +u4 + i8 +b_ + i8 +b + i8 +c + i8 +f + i8 +i + i8 +AR + i8 + +u8 + u8 +i4 + u8 +u4 + u8 +b_ + u8 +b + u8 +c + u8 +f + u8 +i + u8 +AR + u8 + +i4 + i8 +i4 + i4 +i4 + i +i4 + b_ +i4 + b +i4 + AR + +u4 + i8 +u4 + i4 +u4 + u8 +u4 + u4 +u4 + i +u4 + b_ +u4 + b +u4 + AR + +i8 + i4 +i4 + i4 +i + i4 +b_ + i4 +b + i4 +AR + i4 + +i8 + u4 +i4 + u4 +u8 + u4 +u4 + u4 +b_ + u4 +b + u4 +i + u4 +AR + u4 diff --git a/numpy/typing/tests/data/pass/scalars.py b/numpy/typing/tests/data/pass/scalars.py index c02e1ed36f28..49ddb8ed91f5 100644 --- a/numpy/typing/tests/data/pass/scalars.py +++ b/numpy/typing/tests/data/pass/scalars.py @@ -108,19 +108,6 @@ def __float__(self) -> float: np.timedelta64(None) np.timedelta64(None, "D") -dt_64 = np.datetime64(0, "D") -td_64 = np.timedelta64(1, "h") - -dt_64 + td_64 -dt_64 - dt_64 -dt_64 - td_64 - -td_64 + td_64 -td_64 - td_64 -td_64 / 1.0 -td_64 / td_64 -td_64 % td_64 - np.void(1) np.void(np.int64(1)) np.void(True) diff --git a/numpy/typing/tests/data/pass/ufuncs.py b/numpy/typing/tests/data/pass/ufuncs.py index 82172952a61d..ad4d483d4a70 100644 --- a/numpy/typing/tests/data/pass/ufuncs.py +++ b/numpy/typing/tests/data/pass/ufuncs.py @@ -6,7 +6,10 @@ np.matmul(np.ones((2, 2, 2)), np.ones((2, 2, 2)), axes=[(0, 1), (0, 1), (0, 1)]) np.sin(1, signature="D") np.sin(1, extobj=[16, 1, lambda: None]) -np.sin(1) + np.sin(1) +# NOTE: `np.generic` subclasses are not guaranteed to support addition; +# re-enable this we can infer the exact return type of `np.sin(...)`. +# +# np.sin(1) + np.sin(1) np.sin.types[0] np.sin.__name__ diff --git a/numpy/typing/tests/data/reveal/arithmetic.py b/numpy/typing/tests/data/reveal/arithmetic.py new file mode 100644 index 000000000000..b8c457aafb53 --- /dev/null +++ b/numpy/typing/tests/data/reveal/arithmetic.py @@ -0,0 +1,256 @@ +import numpy as np + +c16 = np.complex128() +f8 = np.float64() +i8 = np.int64() +u8 = np.uint64() + +c8 = np.complex64() +f4 = np.float32() +i4 = np.int32() +u4 = np.uint32() + +dt = np.datetime64(0, "D") +td = np.timedelta64(0, "D") + +b_ = np.bool_() + +b = bool() +c = complex() +f = float() +i = int() + +AR = np.array([0], dtype=np.float64) +AR.setflags(write=False) + +# Time structures + +reveal_type(dt + td) # E: numpy.datetime64 +reveal_type(dt + i) # E: numpy.datetime64 +reveal_type(dt + i4) # E: numpy.datetime64 +reveal_type(dt + i8) # E: numpy.datetime64 +reveal_type(dt - dt) # E: numpy.timedelta64 +reveal_type(dt - i) # E: numpy.datetime64 +reveal_type(dt - i4) # E: numpy.datetime64 +reveal_type(dt - i8) # E: numpy.datetime64 + +reveal_type(td + td) # E: numpy.timedelta64 +reveal_type(td + i) # E: numpy.timedelta64 +reveal_type(td + i4) # E: numpy.timedelta64 +reveal_type(td + i8) # E: numpy.timedelta64 +reveal_type(td - td) # E: numpy.timedelta64 +reveal_type(td - i) # E: numpy.timedelta64 +reveal_type(td - i4) # E: numpy.timedelta64 +reveal_type(td - i8) # E: numpy.timedelta64 +reveal_type(td / f) # E: numpy.timedelta64 +reveal_type(td / f4) # E: numpy.timedelta64 +reveal_type(td / f8) # E: numpy.timedelta64 +reveal_type(td / td) # E: float64 +reveal_type(td // td) # E: signedinteger +reveal_type(td % td) # E: numpy.timedelta64 + +# boolean + +reveal_type(b_ / b) # E: float64 +reveal_type(b_ / b_) # E: float64 +reveal_type(b_ / i) # E: float64 +reveal_type(b_ / i8) # E: float64 +reveal_type(b_ / i4) # E: float64 +reveal_type(b_ / u8) # E: float64 +reveal_type(b_ / u4) # E: float64 +reveal_type(b_ / f) # E: float64 +reveal_type(b_ / f8) # E: float64 +reveal_type(b_ / f4) # E: float32 +reveal_type(b_ / c) # E: complex128 +reveal_type(b_ / c16) # E: complex128 +reveal_type(b_ / c8) # E: complex64 + +reveal_type(b / b_) # E: float64 +reveal_type(b_ / b_) # E: float64 +reveal_type(i / b_) # E: float64 +reveal_type(i8 / b_) # E: float64 +reveal_type(i4 / b_) # E: float64 +reveal_type(u8 / b_) # E: float64 +reveal_type(u4 / b_) # E: float64 +reveal_type(f / b_) # E: float64 +reveal_type(f8 / b_) # E: float64 +reveal_type(f4 / b_) # E: float32 +reveal_type(c / b_) # E: complex128 +reveal_type(c16 / b_) # E: complex128 +reveal_type(c8 / b_) # E: complex64 + +# Complex + +reveal_type(c16 + c16) # E: complexfloating +reveal_type(c16 + f8) # E: complexfloating +reveal_type(c16 + i8) # E: complexfloating +reveal_type(c16 + c8) # E: complexfloating +reveal_type(c16 + f4) # E: complexfloating +reveal_type(c16 + i4) # E: complexfloating +reveal_type(c16 + b_) # E: complex128 +reveal_type(c16 + b) # E: complexfloating +reveal_type(c16 + c) # E: complexfloating +reveal_type(c16 + f) # E: complexfloating +reveal_type(c16 + i) # E: complexfloating +reveal_type(c16 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(c16 + c16) # E: complexfloating +reveal_type(f8 + c16) # E: complexfloating +reveal_type(i8 + c16) # E: complexfloating +reveal_type(c8 + c16) # E: complexfloating +reveal_type(f4 + c16) # E: complexfloating +reveal_type(i4 + c16) # E: complexfloating +reveal_type(b_ + c16) # E: complex128 +reveal_type(b + c16) # E: complexfloating +reveal_type(c + c16) # E: complexfloating +reveal_type(f + c16) # E: complexfloating +reveal_type(i + c16) # E: complexfloating +reveal_type(AR + c16) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(c8 + c16) # E: complexfloating +reveal_type(c8 + f8) # E: complexfloating +reveal_type(c8 + i8) # E: complexfloating +reveal_type(c8 + c8) # E: complexfloating +reveal_type(c8 + f4) # E: complexfloating +reveal_type(c8 + i4) # E: complexfloating +reveal_type(c8 + b_) # E: complex64 +reveal_type(c8 + b) # E: complexfloating +reveal_type(c8 + c) # E: complexfloating +reveal_type(c8 + f) # E: complexfloating +reveal_type(c8 + i) # E: complexfloating +reveal_type(c8 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(c16 + c8) # E: complexfloating +reveal_type(f8 + c8) # E: complexfloating +reveal_type(i8 + c8) # E: complexfloating +reveal_type(c8 + c8) # E: complexfloating +reveal_type(f4 + c8) # E: complexfloating +reveal_type(i4 + c8) # E: complexfloating +reveal_type(b_ + c8) # E: complex64 +reveal_type(b + c8) # E: complexfloating +reveal_type(c + c8) # E: complexfloating +reveal_type(f + c8) # E: complexfloating +reveal_type(i + c8) # E: complexfloating +reveal_type(AR + c8) # E: Union[numpy.ndarray, numpy.generic] + +# Float + +reveal_type(f8 + f8) # E: floating +reveal_type(f8 + i8) # E: floating +reveal_type(f8 + f4) # E: floating +reveal_type(f8 + i4) # E: floating +reveal_type(f8 + b_) # E: float64 +reveal_type(f8 + b) # E: floating +reveal_type(f8 + c) # E: complexfloating +reveal_type(f8 + f) # E: floating +reveal_type(f8 + i) # E: floating +reveal_type(f8 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(f8 + f8) # E: floating +reveal_type(i8 + f8) # E: floating +reveal_type(f4 + f8) # E: floating +reveal_type(i4 + f8) # E: floating +reveal_type(b_ + f8) # E: float64 +reveal_type(b + f8) # E: floating +reveal_type(c + f8) # E: complexfloating +reveal_type(f + f8) # E: floating +reveal_type(i + f8) # E: floating +reveal_type(AR + f8) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(f4 + f8) # E: floating +reveal_type(f4 + i8) # E: floating +reveal_type(f4 + f4) # E: floating +reveal_type(f4 + i4) # E: floating +reveal_type(f4 + b_) # E: float32 +reveal_type(f4 + b) # E: floating +reveal_type(f4 + c) # E: complexfloating +reveal_type(f4 + f) # E: floating +reveal_type(f4 + i) # E: floating +reveal_type(f4 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(f8 + f4) # E: floating +reveal_type(i8 + f4) # E: floating +reveal_type(f4 + f4) # E: floating +reveal_type(i4 + f4) # E: floating +reveal_type(b_ + f4) # E: float32 +reveal_type(b + f4) # E: floating +reveal_type(c + f4) # E: complexfloating +reveal_type(f + f4) # E: floating +reveal_type(i + f4) # E: floating +reveal_type(AR + f4) # E: Union[numpy.ndarray, numpy.generic] + +# Int + +reveal_type(i8 + i8) # E: signedinteger +reveal_type(i8 + u8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(i8 + i4) # E: signedinteger +reveal_type(i8 + u4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(i8 + b_) # E: int64 +reveal_type(i8 + b) # E: signedinteger +reveal_type(i8 + c) # E: complexfloating +reveal_type(i8 + f) # E: floating +reveal_type(i8 + i) # E: signedinteger +reveal_type(i8 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(u8 + u8) # E: unsignedinteger +reveal_type(u8 + i4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u8 + u4) # E: unsignedinteger +reveal_type(u8 + b_) # E: uint64 +reveal_type(u8 + b) # E: unsignedinteger +reveal_type(u8 + c) # E: complexfloating +reveal_type(u8 + f) # E: floating +reveal_type(u8 + i) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u8 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(i8 + i8) # E: signedinteger +reveal_type(u8 + i8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(i4 + i8) # E: signedinteger +reveal_type(u4 + i8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(b_ + i8) # E: int64 +reveal_type(b + i8) # E: signedinteger +reveal_type(c + i8) # E: complexfloating +reveal_type(f + i8) # E: floating +reveal_type(i + i8) # E: signedinteger +reveal_type(AR + i8) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(u8 + u8) # E: unsignedinteger +reveal_type(i4 + u8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u4 + u8) # E: unsignedinteger +reveal_type(b_ + u8) # E: uint64 +reveal_type(b + u8) # E: unsignedinteger +reveal_type(c + u8) # E: complexfloating +reveal_type(f + u8) # E: floating +reveal_type(i + u8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(AR + u8) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(i4 + i8) # E: signedinteger +reveal_type(i4 + i4) # E: signedinteger +reveal_type(i4 + i) # E: signedinteger +reveal_type(i4 + b_) # E: int32 +reveal_type(i4 + b) # E: signedinteger +reveal_type(i4 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(u4 + i8) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u4 + i4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u4 + u8) # E: unsignedinteger +reveal_type(u4 + u4) # E: unsignedinteger +reveal_type(u4 + i) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u4 + b_) # E: uint32 +reveal_type(u4 + b) # E: unsignedinteger +reveal_type(u4 + AR) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(i8 + i4) # E: signedinteger +reveal_type(i4 + i4) # E: signedinteger +reveal_type(i + i4) # E: signedinteger +reveal_type(b_ + i4) # E: int32 +reveal_type(b + i4) # E: signedinteger +reveal_type(AR + i4) # E: Union[numpy.ndarray, numpy.generic] + +reveal_type(i8 + u4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(i4 + u4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(u8 + u4) # E: unsignedinteger +reveal_type(u4 + u4) # E: unsignedinteger +reveal_type(b_ + u4) # E: uint32 +reveal_type(b + u4) # E: unsignedinteger +reveal_type(i + u4) # E: Union[numpy.signedinteger, numpy.float64] +reveal_type(AR + u4) # E: Union[numpy.ndarray, numpy.generic] diff --git a/numpy/typing/tests/data/reveal/scalars.py b/numpy/typing/tests/data/reveal/scalars.py index 882fe96128d6..ec3713b0fc42 100644 --- a/numpy/typing/tests/data/reveal/scalars.py +++ b/numpy/typing/tests/data/reveal/scalars.py @@ -12,22 +12,5 @@ reveal_type(x.shape) # E: tuple[builtins.int] reveal_type(x.strides) # E: tuple[builtins.int] -# Time structures -dt = np.datetime64(0, "D") -td = np.timedelta64(0, "D") - -reveal_type(dt + td) # E: numpy.datetime64 -reveal_type(dt + 1) # E: numpy.datetime64 -reveal_type(dt - dt) # E: numpy.timedelta64 -reveal_type(dt - 1) # E: numpy.timedelta64 - -reveal_type(td + td) # E: numpy.timedelta64 -reveal_type(td + 1) # E: numpy.timedelta64 -reveal_type(td - td) # E: numpy.timedelta64 -reveal_type(td - 1) # E: numpy.timedelta64 -reveal_type(td / 1.0) # E: numpy.timedelta64 -reveal_type(td / td) # E: float -reveal_type(td % td) # E: numpy.timedelta64 - reveal_type(np.complex64().real) # E: numpy.float32 reveal_type(np.complex128().imag) # E: numpy.float64 From e4312341c1ad9c102383a1d8a86a56490a3019c8 Mon Sep 17 00:00:00 2001 From: Safouane Chergui <31566323+chsafouane@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:41:16 +0200 Subject: [PATCH 157/409] nep-0029 typo correction to provides -> to provide --- doc/neps/nep-0029-deprecation_policy.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/neps/nep-0029-deprecation_policy.rst b/doc/neps/nep-0029-deprecation_policy.rst index 4674d24ec663..957674ee6e20 100644 --- a/doc/neps/nep-0029-deprecation_policy.rst +++ b/doc/neps/nep-0029-deprecation_policy.rst @@ -77,7 +77,7 @@ release in November 2020 should support Python 3.7 and newer. The current Python release cadence is 18 months so a 42 month window ensures that there will always be at least two minor versions of Python in the window. The window is extended 6 months beyond the anticipated two-release -interval for Python to provides resilience against small fluctuations / +interval for Python to provide resilience against small fluctuations / delays in its release schedule. Because Python minor version support is based only on historical From f632c948ff954892acda4d7849b8aa0e632c0a25 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Fri, 2 Oct 2020 19:45:27 -0500 Subject: [PATCH 158/409] BUG: Fix memoryleaks related to NEP 37 function overrides This adds a few missing DECREFs/XDECREFs, all of these are already covered by the tests and can be found running a leak-checker. --- numpy/core/src/multiarray/arrayfunction_override.c | 4 ++++ numpy/core/src/multiarray/multiarraymodule.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/numpy/core/src/multiarray/arrayfunction_override.c b/numpy/core/src/multiarray/arrayfunction_override.c index 613fe6b3f09f..8e3bde78f714 100644 --- a/numpy/core/src/multiarray/arrayfunction_override.c +++ b/numpy/core/src/multiarray/arrayfunction_override.c @@ -388,15 +388,18 @@ array_implement_c_array_function_creation( PyObject *numpy_module = PyImport_Import(npy_ma_str_numpy); if (numpy_module == NULL) { + Py_DECREF(relevant_args); return NULL; } PyObject *public_api = PyObject_GetAttrString(numpy_module, function_name); Py_DECREF(numpy_module); if (public_api == NULL) { + Py_DECREF(relevant_args); return NULL; } if (!PyCallable_Check(public_api)) { + Py_DECREF(relevant_args); Py_DECREF(public_api); return PyErr_Format(PyExc_RuntimeError, "numpy.%s is not callable.", @@ -406,6 +409,7 @@ array_implement_c_array_function_creation( PyObject* result = array_implement_array_function_internal( public_api, relevant_args, args, kwargs); + Py_DECREF(relevant_args); Py_DECREF(public_api); return result; } diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index ff2b796d2908..1aad70dc65bb 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2296,6 +2296,7 @@ array_fromiter(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) array_function_result = array_implement_c_array_function_creation( "fromiter", args, keywds); if (array_function_result != Py_NotImplemented) { + Py_DECREF(descr); return array_function_result; } @@ -2942,6 +2943,7 @@ array_arange(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kws) { array_function_result = array_implement_c_array_function_creation( "arange", args, kws); if (array_function_result != Py_NotImplemented) { + Py_XDECREF(typecode); return array_function_result; } From 24d368a809827cd2cd787a470a65b4a7cd721d10 Mon Sep 17 00:00:00 2001 From: Takanori H Date: Sat, 3 Oct 2020 14:39:31 +0900 Subject: [PATCH 159/409] DOC: Fix the links for ``Ellipsis`` (#17431) --- doc/source/reference/arrays.indexing.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst index 180a79dae18c..b2a9f1d219f3 100644 --- a/doc/source/reference/arrays.indexing.rst +++ b/doc/source/reference/arrays.indexing.rst @@ -34,7 +34,7 @@ Basic Slicing and Indexing Basic slicing extends Python's basic concept of slicing to N dimensions. Basic slicing occurs when *obj* is a :class:`slice` object (constructed by ``start:stop:step`` notation inside of brackets), an -integer, or a tuple of slice objects and integers. :const:`Ellipsis` +integer, or a tuple of slice objects and integers. :py:data:`Ellipsis` and :const:`newaxis` objects can be interspersed with these as well. @@ -43,7 +43,7 @@ well. In order to remain backward compatible with a common usage in Numeric, basic slicing is also initiated if the selection object is any non-ndarray and non-tuple sequence (such as a :class:`list`) containing - :class:`slice` objects, the :const:`Ellipsis` object, or the :const:`newaxis` + :class:`slice` objects, the :py:data:`Ellipsis` object, or the :const:`newaxis` object, but not for integer arrays or other embedded sequences. .. index:: @@ -129,7 +129,7 @@ concepts to remember include: [5], [6]]]) -- :const:`Ellipsis` expands to the number of ``:`` objects needed for the +- :py:data:`Ellipsis` expands to the number of ``:`` objects needed for the selection tuple to index all dimensions. In most cases, this means that length of the expanded selection tuple is ``x.ndim``. There may only be a single ellipsis present. @@ -333,7 +333,7 @@ the subspace defined by the basic indexing (excluding integers) and the subspace from the advanced indexing part. Two cases of index combination need to be distinguished: -* The advanced indexes are separated by a slice, :const:`Ellipsis` or :const:`newaxis`. +* The advanced indexes are separated by a slice, :py:data:`Ellipsis` or :const:`newaxis`. For example ``x[arr1, :, arr2]``. * The advanced indexes are all next to each other. For example ``x[..., arr1, arr2, :]`` but *not* ``x[arr1, :, 1]`` From 35c67f6d33aa82d1ee8bd4fbdc5b925ef560de2a Mon Sep 17 00:00:00 2001 From: Alex Rogozhnikov Date: Sat, 3 Oct 2020 02:28:51 -0700 Subject: [PATCH 160/409] DOC: add references to einops and opt_einsum Following discussion in mailing list, this PR adds mentions to third-party libs einops and opt_einsum to einsum documentation --- numpy/core/einsumfunc.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py index f65f4015c928..e0942becaaa5 100644 --- a/numpy/core/einsumfunc.py +++ b/numpy/core/einsumfunc.py @@ -1062,6 +1062,17 @@ def einsum(*operands, out=None, optimize=False, **kwargs): -------- einsum_path, dot, inner, outer, tensordot, linalg.multi_dot + einops: + similar verbose interface is provided by + `einops `_ package to cover + additional operations: transpose, reshape/flatten, repeat/tile, + squeeze/unsqueeze and reductions. + + opt_einsum: + `opt_einsum `_ + optimizes contraction order for einsum-like expressions + in backend-agnostic manner. + Notes ----- .. versionadded:: 1.6.0 From 30aca02691baf56dd9b5d81dcefebdb3e575678e Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Sat, 3 Oct 2020 08:16:10 -0600 Subject: [PATCH 161/409] MAINT : Disable 32 bit PyPy CI testing on Windows. The test is consistently failing at the moment, we can enable it again when PyPy gets fixed. --- azure-pipelines.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index da57649b884b..9382ac83c129 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -217,11 +217,11 @@ stages: BITS: 64 NPY_USE_BLAS_ILP64: '1' OPENBLAS_SUFFIX: '64_' - PyPy36-32bit: - PYTHON_VERSION: 'PyPy3.6' - PYTHON_ARCH: 'x32' - TEST_MODE: fast - BITS: 32 + #PyPy36-32bit: + #PYTHON_VERSION: 'PyPy3.6' + #PYTHON_ARCH: 'x32' + #TEST_MODE: fast + #BITS: 32 steps: - template: azure-steps-windows.yml - job: Linux_PyPy3 From 92a3c76976a7a791933160544bb73cd37c28da6a Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 12:16:08 -0400 Subject: [PATCH 162/409] DOC: Security warning for issues template Tag for "Bug report" now warns not to file security vulnerabilities. Fixes gh-17434. --- .github/ISSUE_TEMPLATE/bug-report.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md index d2df08689da5..78ffc1063eaf 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.md +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -1,6 +1,6 @@ --- -name: "Bug Report" -about: Submit a bug report to help us improve NumPy +name: "Bug report" +about: Report a bug. Not for security vulnerabilities -- see below. --- From d55243ea48b06d82584ec0ead0de5dc72b70be8f Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 12:26:22 -0400 Subject: [PATCH 163/409] DOC: Fix "Feature request" spelling in issue templates "Request" had an upper-case R, inconsistent with Google style and other template items. --- .github/ISSUE_TEMPLATE/feature-request.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md index 0be94f92893d..00c6f59c5faf 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.md +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -1,5 +1,5 @@ --- -name: "Feature Request" +name: "Feature request" about: Check instructions for submitting your idea on the mailing list first. --- From 3ebcf252efb8189e5d2b114685280fd5fff007ef Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 15:17:31 -0400 Subject: [PATCH 164/409] DOC: Revise credit line per CC Adds "CC BY 4.0" credit to the Pauli Vertanen example. --- doc/source/user/how-to-io.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/user/how-to-io.rst b/doc/source/user/how-to-io.rst index 1d4ded159422..ca9fc41f07f8 100644 --- a/doc/source/user/how-to-io.rst +++ b/doc/source/user/how-to-io.rst @@ -250,7 +250,8 @@ The ``.wav`` file header as a NumPy structured dtype:: This ``.wav`` example is for illustration; to read a ``.wav`` file in real life, use Python's built-in module :mod:`wave`. -Credit: Pauli Virtanen, :ref:`advanced_numpy`. +(Adapted from Pauli Virtanen, :ref:`advanced_numpy`, licensed +under `CC BY 4.0 `_.) .. _how-to-io-large-arrays: From fe01a9b1cca286f67e01807786b991dd22b1b7f7 Mon Sep 17 00:00:00 2001 From: Abhishek Singh <34937403+devhpes@users.noreply.github.com> Date: Sun, 4 Oct 2020 01:42:11 +0530 Subject: [PATCH 165/409] fix exception chaining in format.py --- numpy/lib/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/lib/format.py b/numpy/lib/format.py index afbd3784a9ad..5d951e262570 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -746,7 +746,7 @@ def read_array(fp, allow_pickle=False, pickle_kwargs=None): # Friendlier error message raise UnicodeError("Unpickling a python object failed: %r\n" "You may need to pass the encoding= option " - "to numpy.load" % (err,)) + "to numpy.load" % (err,)) from err else: if isfileobj(fp): # We can use the fast fromfile() function. From 93b5da05bcaa39d76b07c334690f85d5d54f54df Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 17:03:18 -0400 Subject: [PATCH 166/409] DOC: Cleaner PR template Make the instructions easy to see and hard to delete unread. Moves some text to new sections in dev/development_workflow.rst. The dashes above and below the heading, which in monospace seem too long, fit it well in Chromium browsers and reasonably in Firefox. --- .github/PULL_REQUEST_TEMPLATE.md | 40 ++++++++++++++----------- doc/source/dev/development_workflow.rst | 35 ++++++++++++++++------ 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3634292f8ce7..075d2519b2ce 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,18 +1,22 @@ - - - - - - - + diff --git a/doc/source/dev/development_workflow.rst b/doc/source/dev/development_workflow.rst index d5a49a9f9512..53f3eef9136b 100644 --- a/doc/source/dev/development_workflow.rst +++ b/doc/source/dev/development_workflow.rst @@ -188,6 +188,16 @@ Standard acronyms to start the commit message with are:: REL: related to releasing numpy +.. _workflow_mailing_list: + +Get the mailing list's opinion +======================================================= + +If you plan a new feature or API change, it's wisest to first email the +NumPy `mailing list `_ +asking for comment. If you haven't heard back in a week, it's +OK to ping the list again. + .. _asking-for-merging: Asking for your changes to be merged with the main repo @@ -197,15 +207,22 @@ When you feel your work is finished, you can create a pull request (PR). Github has a nice help page that outlines the process for `filing pull requests`_. If your changes involve modifications to the API or addition/modification of a -function, you should - -- send an email to the `NumPy mailing list`_ with a link to your PR along with - a description of and a motivation for your changes. This may generate - changes and feedback. It might be prudent to start with this step if your - change may be controversial. -- add a release note to the ``doc/release/upcoming_changes/`` directory, - following the instructions and format in the - ``doc/release/upcoming_changes/README.rst`` file. +function, add a release note to the ``doc/release/upcoming_changes/`` +directory, following the instructions and format in the +``doc/release/upcoming_changes/README.rst`` file. + + +Getting your PR reviewed +======================== + +We review pull requests as soon as we can, typically within a week. If you get +no review comments within two weeks, feel free to ask for feedback by +adding a comment on your PR (this will notify maintainers). + +If your PR is large or complicated, asking for input on the numpy-discussion +mailing list may also be useful. + + .. _rebasing-on-master: From 1edd9693c2879e41d1f787a0731b49e286985c22 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 17:11:32 -0400 Subject: [PATCH 167/409] DOC: Fixes for PR 17440 Colon missing in heading. Added sphinx anchor for PR-timeline (html anchor was OK). --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- doc/source/dev/development_workflow.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 075d2519b2ce..1f8d35aabe49 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -9,7 +9,7 @@ http://www.numpy.org/devdocs/dev/development_workflow.html#writing-the-commit-message -* IF IT'S A NEW FEATURE OR API CHANGE, TEST THE WATERS +* IF IT'S A NEW FEATURE OR API CHANGE, TEST THE WATERS: http://www.numpy.org/devdocs/dev/development_workflow.html#get-the-mailing-list-s-opinion diff --git a/doc/source/dev/development_workflow.rst b/doc/source/dev/development_workflow.rst index 53f3eef9136b..1665cfddb9b7 100644 --- a/doc/source/dev/development_workflow.rst +++ b/doc/source/dev/development_workflow.rst @@ -212,6 +212,8 @@ directory, following the instructions and format in the ``doc/release/upcoming_changes/README.rst`` file. +.. _workflow_PR_timeline: + Getting your PR reviewed ======================== From 49b10b0e32473441d84810fff236dbb0a5d11768 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sat, 3 Oct 2020 17:16:17 -0400 Subject: [PATCH 168/409] DOC: Fix indents in PE #17440 --- .github/PULL_REQUEST_TEMPLATE.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1f8d35aabe49..dee33ee5fd13 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,13 +10,13 @@ * IF IT'S A NEW FEATURE OR API CHANGE, TEST THE WATERS: - http://www.numpy.org/devdocs/dev/development_workflow.html#get-the-mailing-list-s-opinion + http://www.numpy.org/devdocs/dev/development_workflow.html#get-the-mailing-list-s-opinion * HIT ALL THE GUIDELINES: - https://numpy.org/devdocs/dev/index.html#guidelines + https://numpy.org/devdocs/dev/index.html#guidelines * WHAT TO DO IF WE HAVEN'T GOTTEN BACK TO YOU: - http://www.numpy.org/devdocs/dev/development_workflow.html#getting-your-pr-reviewed + http://www.numpy.org/devdocs/dev/development_workflow.html#getting-your-pr-reviewed --> From f3a6b33144e62ce77aa767b73d930dec420a3511 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 12:09:52 +0300 Subject: [PATCH 169/409] ENH: Warn on unsupported Python 3.9+ --- setup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1f5212676310..b4c06c402186 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ import sys import subprocess import textwrap -import sysconfig +import warnings if sys.version_info[:2] < (3, 6): @@ -59,6 +59,13 @@ ISRELEASED = False VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) +if sys.version_info >= (3, 9): + warnings.warn( + f"NumPy {VERSION} does not support Python " + f"{sys.version_info.major}.{sys.version_info.minor}.", + RuntimeWarning, + ) + # Return the git revision as a string def git_version(): From 2ebb45374d845ad6d4843e977c0ca03e53674d25 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 12:41:47 +0300 Subject: [PATCH 170/409] MAINT: Fix Flake8 --- setup.py | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/setup.py b/setup.py index b4c06c402186..a4e498af09a3 100755 --- a/setup.py +++ b/setup.py @@ -95,6 +95,7 @@ def _minimal_ext_cmd(cmd): return GIT_REVISION + # BEFORE importing setuptools, remove MANIFEST. Otherwise it may not be # properly updated when the contents of directories change (true for distutils, # not sure about setuptools). @@ -157,7 +158,7 @@ def write_version_py(filename='numpy/version.py'): a.close() -def configuration(parent_package='',top_path=None): +def configuration(parent_package='', top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration(None, parent_package, top_path) @@ -170,7 +171,7 @@ def configuration(parent_package='',top_path=None): config.add_data_files(('numpy', 'LICENSE.txt')) config.add_data_files(('numpy', 'numpy/*.pxd')) - config.get_version('numpy/version.py') # sets config.version + config.get_version('numpy/version.py') # sets config.version return config @@ -182,13 +183,12 @@ def check_submodules(): if not os.path.exists('.git'): return with open('.gitmodules') as f: - for l in f: - if 'path' in l: - p = l.split('=')[-1].strip() + for line in f: + if 'path' in line: + p = line.split('=')[-1].strip() if not os.path.exists(p): raise ValueError('Submodule {} missing'.format(p)) - proc = subprocess.Popen(['git', 'submodule', 'status'], stdout=subprocess.PIPE) status, _ = proc.communicate() @@ -280,9 +280,9 @@ def generate_cython(): print("Cythonizing sources") for d in ('random',): p = subprocess.call([sys.executable, - os.path.join(cwd, 'tools', 'cythonize.py'), - 'numpy/{0}'.format(d)], - cwd=cwd) + os.path.join(cwd, 'tools', 'cythonize.py'), + 'numpy/{0}'.format(d)], + cwd=cwd) if p != 0: raise RuntimeError("Running cythonize failed!") @@ -353,7 +353,6 @@ def parse_setuppy_commands(): """)) return False - # The following commands aren't supported. They can only be executed when # the user explicitly adds a --force command-line argument. bad_commands = dict( @@ -391,8 +390,8 @@ def parse_setuppy_commands(): ) bad_commands['nosetests'] = bad_commands['test'] for command in ('upload_docs', 'easy_install', 'bdist', 'bdist_dumb', - 'register', 'check', 'install_data', 'install_headers', - 'install_lib', 'install_scripts', ): + 'register', 'check', 'install_data', 'install_headers', + 'install_lib', 'install_scripts', ): bad_commands[command] = "`setup.py %s` is not supported" % command for command in bad_commands.keys(): @@ -412,7 +411,8 @@ def parse_setuppy_commands(): # If we got here, we didn't detect what setup.py command was given import warnings warnings.warn("Unrecognized setuptools command, proceeding with " - "generating Cython sources and expanding templates", stacklevel=2) + "generating Cython sources and expanding templates", + stacklevel=2) return True @@ -447,25 +447,24 @@ def setup_package(): 'f2py%s.%s = numpy.f2py.f2py2e:main' % sys.version_info[:2], ] - cmdclass={"sdist": sdist_checked, - } + cmdclass = {"sdist": sdist_checked, } metadata = dict( - name = 'numpy', - maintainer = "NumPy Developers", - maintainer_email = "numpy-discussion@python.org", - description = DOCLINES[0], - long_description = "\n".join(DOCLINES[2:]), - url = "https://www.numpy.org", - author = "Travis E. Oliphant et al.", - download_url = "https://pypi.python.org/pypi/numpy", + name='numpy', + maintainer="NumPy Developers", + maintainer_email="numpy-discussion@python.org", + description=DOCLINES[0], + long_description="\n".join(DOCLINES[2:]), + url="https://www.numpy.org", + author="Travis E. Oliphant et al.", + download_url="https://pypi.python.org/pypi/numpy", project_urls={ "Bug Tracker": "https://github.com/numpy/numpy/issues", "Documentation": get_docs_url(), "Source Code": "https://github.com/numpy/numpy", }, - license = 'BSD', + license='BSD', classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f], - platforms = ["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"], + platforms=["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"], test_suite='pytest', cmdclass=cmdclass, python_requires='>=3.6', @@ -486,8 +485,7 @@ def setup_package(): # patches distutils, even though we don't use it import setuptools # noqa: F401 from numpy.distutils.core import setup - cwd = os.path.abspath(os.path.dirname(__file__)) - if not 'sdist' in sys.argv: + if 'sdist' not in sys.argv: # Generate Cython sources, unless we're generating an sdist generate_cython() From c549dbb47d8b4e2d9ee6572ddaa78e494ec41238 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 14:51:01 +0300 Subject: [PATCH 171/409] Add comment to remind to keep consistent with classifiers Co-authored-by: Eric Wieser --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a4e498af09a3..e4fe91faec32 100755 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ ISRELEASED = False VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) +# keep this consistent with the `Programming Language :: Python :: ...` classifiers above if sys.version_info >= (3, 9): warnings.warn( f"NumPy {VERSION} does not support Python " From a30c410b1743e38ae32501f199a643629708cbd7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 14:52:21 +0300 Subject: [PATCH 172/409] Clarify warning, it might work before official support Co-authored-by: Eric Wieser --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e4fe91faec32..ffa3107afd00 100755 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ # keep this consistent with the `Programming Language :: Python :: ...` classifiers above if sys.version_info >= (3, 9): warnings.warn( - f"NumPy {VERSION} does not support Python " + f"NumPy {VERSION} may not yet support Python " f"{sys.version_info.major}.{sys.version_info.minor}.", RuntimeWarning, ) From cac2df03a9f5caac03482b95f4db4c145ac2aa8a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 14:53:20 +0300 Subject: [PATCH 173/409] Already compatible with 3.10 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ffa3107afd00..09c66a4eef65 100755 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) # keep this consistent with the `Programming Language :: Python :: ...` classifiers above -if sys.version_info >= (3, 9): +if sys.version_info >= (3, 10): warnings.warn( f"NumPy {VERSION} may not yet support Python " f"{sys.version_info.major}.{sys.version_info.minor}.", From 7178a1dc24484248a86525f22a8d80bf0d4510f3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 14:59:45 +0300 Subject: [PATCH 174/409] Update comment Co-authored-by: Eric Wieser --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 09c66a4eef65..fccc2277863d 100755 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ ISRELEASED = False VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO) -# keep this consistent with the `Programming Language :: Python :: ...` classifiers above +# The first version not in the `Programming Language :: Python :: ...` classifiers above if sys.version_info >= (3, 10): warnings.warn( f"NumPy {VERSION} may not yet support Python " From 79a8e16ed2de533dc91e474c912cfaef8e1a1e92 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 4 Oct 2020 14:59:53 +0300 Subject: [PATCH 175/409] Add 3.10 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index fccc2277863d..e99313502afc 100755 --- a/setup.py +++ b/setup.py @@ -43,6 +43,7 @@ Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 +Programming Language :: Python :: 3.9 Programming Language :: Python :: 3 :: Only Programming Language :: Python :: Implementation :: CPython Topic :: Software Development From e592c27da4e3747184aa29acc799d06bcc77519b Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Sun, 4 Oct 2020 15:10:42 +0200 Subject: [PATCH 176/409] ENH: Add `Typing :: Typed` to the PyPi classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1f5212676310..cf0858e5f9ab 100755 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ Programming Language :: Python :: Implementation :: CPython Topic :: Software Development Topic :: Scientific/Engineering +Typing :: Typed Operating System :: Microsoft :: Windows Operating System :: POSIX Operating System :: Unix From d84ff867373b60927d0dc3ff511a22828a9ac8dc Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Sun, 4 Oct 2020 21:39:30 +0900 Subject: [PATCH 177/409] DOC: Fix the references for macros --- doc/source/reference/arrays.interface.rst | 8 ++++---- doc/source/reference/c-api/array.rst | 8 +++++++- doc/source/reference/c-api/config.rst | 6 ++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/doc/source/reference/arrays.interface.rst b/doc/source/reference/arrays.interface.rst index 73e4aef0c9f8..6d12cf999790 100644 --- a/doc/source/reference/arrays.interface.rst +++ b/doc/source/reference/arrays.interface.rst @@ -231,10 +231,10 @@ as:: The flags member may consist of 5 bits showing how the data should be interpreted and one bit showing how the Interface should be -interpreted. The data-bits are :const:`CONTIGUOUS` (0x1), -:const:`FORTRAN` (0x2), :const:`ALIGNED` (0x100), :const:`NOTSWAPPED` -(0x200), and :const:`WRITEABLE` (0x400). A final flag -:const:`ARR_HAS_DESCR` (0x800) indicates whether or not this structure +interpreted. The data-bits are :c:macro:`NPY_ARRAY_C_CONTIGUOUS` (0x1), +:c:macro:`NPY_ARRAY_F_CONTIGUOUS` (0x2), :c:macro:`NPY_ARRAY_ALIGNED` (0x100), +:c:macro:`NPY_ARRAY_NOTSWAPPED` (0x200), and :c:macro:`NPY_ARRAY_WRITEABLE` (0x400). A final flag +:c:macro:`NPY_ARR_HAS_DESCR` (0x800) indicates whether or not this structure has the arrdescr field. The field should not be accessed unless this flag is present. diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index cfe4d2d51fa5..9fe45d2def92 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3259,6 +3259,8 @@ Memory management :c:data:`NPY_USE_PYMEM` is 0, if :c:data:`NPY_USE_PYMEM` is 1, then the Python memory allocator is used. + .. c:macro:: NPY_USE_PYMEM + .. c:function:: int PyArray_ResolveWritebackIfCopy(PyArrayObject* obj) If ``obj.flags`` has :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` or (deprecated) @@ -3289,9 +3291,13 @@ be accomplished using two groups of macros. Typically, if one macro in a group is used in a code block, all of them must be used in the same code block. Currently, :c:data:`NPY_ALLOW_THREADS` is defined to the python-defined :c:data:`WITH_THREADS` constant unless the environment -variable :c:data:`NPY_NOSMP` is set in which case +variable ``NPY_NOSMP`` is set in which case :c:data:`NPY_ALLOW_THREADS` is defined to be 0. +.. c:macro:: NPY_ALLOW_THREADS + +.. c:macro:: WITH_THREADS + Group 1 """"""" diff --git a/doc/source/reference/c-api/config.rst b/doc/source/reference/c-api/config.rst index c3e2c98af078..5fdbc0ff1ece 100644 --- a/doc/source/reference/c-api/config.rst +++ b/doc/source/reference/c-api/config.rst @@ -102,6 +102,12 @@ Platform information One of :c:data:`NPY_CPU_BIG`, :c:data:`NPY_CPU_LITTLE`, or :c:data:`NPY_CPU_UNKNOWN_ENDIAN`. + .. c:macro:: NPY_CPU_BIG + + .. c:macro:: NPY_CPU_LITTLE + + .. c:macro:: NPY_CPU_UNKNOWN_ENDIAN + Compiler directives ------------------- From 7fb0b02df1ca5b388f81f88b22f478d65ce1b74e Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Sun, 4 Oct 2020 13:13:05 -0400 Subject: [PATCH 178/409] DOC: Remove CoC pages from Sphinx code_of_conduct.rst and report_handling_manual.rst duplicate pages now on Hugo. --- doc/source/dev/conduct/code_of_conduct.rst | 163 ------------- .../dev/conduct/report_handling_manual.rst | 220 ------------------ doc/source/dev/index.rst | 2 - 3 files changed, 385 deletions(-) delete mode 100644 doc/source/dev/conduct/code_of_conduct.rst delete mode 100644 doc/source/dev/conduct/report_handling_manual.rst diff --git a/doc/source/dev/conduct/code_of_conduct.rst b/doc/source/dev/conduct/code_of_conduct.rst deleted file mode 100644 index f2f0a536dc56..000000000000 --- a/doc/source/dev/conduct/code_of_conduct.rst +++ /dev/null @@ -1,163 +0,0 @@ -NumPy Code of Conduct -===================== - - -Introduction ------------- - -This code of conduct applies to all spaces managed by the NumPy project, -including all public and private mailing lists, issue trackers, wikis, blogs, -Twitter, and any other communication channel used by our community. The NumPy -project does not organise in-person events, however events related to our -community should have a code of conduct similar in spirit to this one. - -This code of conduct should be honored by everyone who participates in -the NumPy community formally or informally, or claims any affiliation with the -project, in any project-related activities and especially when representing the -project, in any role. - -This code is not exhaustive or complete. It serves to distill our common -understanding of a collaborative, shared environment and goals. Please try to -follow this code in spirit as much as in letter, to create a friendly and -productive environment that enriches the surrounding community. - - -Specific Guidelines -------------------- - -We strive to: - -1. Be open. We invite anyone to participate in our community. We prefer to use - public methods of communication for project-related messages, unless - discussing something sensitive. This applies to messages for help or - project-related support, too; not only is a public support request much more - likely to result in an answer to a question, it also ensures that any - inadvertent mistakes in answering are more easily detected and corrected. - -2. Be empathetic, welcoming, friendly, and patient. We work together to resolve - conflict, and assume good intentions. We may all experience some frustration - from time to time, but we do not allow frustration to turn into a personal - attack. A community where people feel uncomfortable or threatened is not a - productive one. - -3. Be collaborative. Our work will be used by other people, and in turn we will - depend on the work of others. When we make something for the benefit of the - project, we are willing to explain to others how it works, so that they can - build on the work to make it even better. Any decision we make will affect - users and colleagues, and we take those consequences seriously when making - decisions. - -4. Be inquisitive. Nobody knows everything! Asking questions early avoids many - problems later, so we encourage questions, although we may direct them to - the appropriate forum. We will try hard to be responsive and helpful. - -5. Be careful in the words that we choose. We are careful and respectful in - our communication and we take responsibility for our own speech. Be kind to - others. Do not insult or put down other participants. We will not accept - harassment or other exclusionary behaviour, such as: - - - Violent threats or language directed against another person. - - Sexist, racist, or otherwise discriminatory jokes and language. - - Posting sexually explicit or violent material. - - Posting (or threatening to post) other people's personally identifying information ("doxing"). - - Sharing private content, such as emails sent privately or non-publicly, - or unlogged forums such as IRC channel history, without the sender's consent. - - Personal insults, especially those using racist or sexist terms. - - Unwelcome sexual attention. - - Excessive profanity. Please avoid swearwords; people differ greatly in their sensitivity to swearing. - - Repeated harassment of others. In general, if someone asks you to stop, then stop. - - Advocating for, or encouraging, any of the above behaviour. - - -Diversity Statement -------------------- - -The NumPy project welcomes and encourages participation by everyone. We are -committed to being a community that everyone enjoys being part of. Although -we may not always be able to accommodate each individual's preferences, we try -our best to treat everyone kindly. - -No matter how you identify yourself or how others perceive you: we welcome you. -Though no list can hope to be comprehensive, we explicitly honour diversity in: -age, culture, ethnicity, genotype, gender identity or expression, language, -national origin, neurotype, phenotype, political beliefs, profession, race, -religion, sexual orientation, socioeconomic status, subculture and technical -ability, to the extent that these do not conflict with this code of conduct. - - -Though we welcome people fluent in all languages, NumPy development is -conducted in English. - -Standards for behaviour in the NumPy community are detailed in the Code of -Conduct above. Participants in our community should uphold these standards -in all their interactions and help others to do so as well (see next section). - - -Reporting Guidelines --------------------- - -We know that it is painfully common for internet communication to start at or -devolve into obvious and flagrant abuse. We also recognize that sometimes -people may have a bad day, or be unaware of some of the guidelines in this Code -of Conduct. Please keep this in mind when deciding on how to respond to a -breach of this Code. - -For clearly intentional breaches, report those to the Code of Conduct committee -(see below). For possibly unintentional breaches, you may reply to the person -and point out this code of conduct (either in public or in private, whatever is -most appropriate). If you would prefer not to do that, please feel free to -report to the Code of Conduct Committee directly, or ask the Committee for -advice, in confidence. - -You can report issues to the NumPy Code of Conduct committee, at -numpy-conduct@googlegroups.com. Currently, the committee consists of: - -- Stefan van der Walt -- Melissa Weber Mendonça -- Anirudh Subramanian - -If your report involves any members of the committee, or if they feel they have -a conflict of interest in handling it, then they will recuse themselves from -considering your report. Alternatively, if for any reason you feel -uncomfortable making a report to the committee, then you can also contact: - -- Senior `NumFOCUS staff `__: conduct@numfocus.org - - -Incident reporting resolution & Code of Conduct enforcement ------------------------------------------------------------ - -*This section summarizes the most important points, more details can be found -in* :ref:`CoC_reporting_manual`. - -We will investigate and respond to all complaints. The NumPy Code of Conduct -Committee and the NumPy Steering Committee (if involved) will protect the -identity of the reporter, and treat the content of complaints as confidential -(unless the reporter agrees otherwise). - -In case of severe and obvious breaches, e.g. personal threat or violent, sexist -or racist language, we will immediately disconnect the originator from NumPy -communication channels; please see the manual for details. - -In cases not involving clear severe and obvious breaches of this code of -conduct, the process for acting on any received code of conduct violation -report will be: - -1. acknowledge report is received -2. reasonable discussion/feedback -3. mediation (if feedback didn't help, and only if both reporter and reportee agree to this) -4. enforcement via transparent decision (see :ref:`CoC_resolutions`) by the - Code of Conduct Committee - -The committee will respond to any report as soon as possible, and at most -within 72 hours. - - -Endnotes --------- - -We are thankful to the groups behind the following documents, from which we -drew content and inspiration: - -- `The SciPy Code of Conduct `_ - diff --git a/doc/source/dev/conduct/report_handling_manual.rst b/doc/source/dev/conduct/report_handling_manual.rst deleted file mode 100644 index d39b615bb8c1..000000000000 --- a/doc/source/dev/conduct/report_handling_manual.rst +++ /dev/null @@ -1,220 +0,0 @@ -:orphan: - -.. _CoC_reporting_manual: - -NumPy Code of Conduct - How to follow up on a report ----------------------------------------------------- - -This is the manual followed by NumPy's Code of Conduct Committee. It's used -when we respond to an issue to make sure we're consistent and fair. - -Enforcing the Code of Conduct impacts our community today and for the future. -It's an action that we do not take lightly. When reviewing enforcement -measures, the Code of Conduct Committee will keep the following values and -guidelines in mind: - -* Act in a personal manner rather than impersonal. The Committee can engage - the parties to understand the situation, while respecting the privacy and any - necessary confidentiality of reporters. However, sometimes it is necessary - to communicate with one or more individuals directly: the Committee's goal is - to improve the health of our community rather than only produce a formal - decision. - -* Emphasize empathy for individuals rather than judging behavior, avoiding - binary labels of "good" and "bad/evil". Overt, clear-cut aggression and - harassment exists and we will be address that firmly. But many scenarios - that can prove challenging to resolve are those where normal disagreements - devolve into unhelpful or harmful behavior from multiple parties. - Understanding the full context and finding a path that re-engages all is - hard, but ultimately the most productive for our community. - -* We understand that email is a difficult medium and can be isolating. - Receiving criticism over email, without personal contact, can be - particularly painful. This makes it especially important to keep an - atmosphere of open-minded respect of the views of others. It also means - that we must be transparent in our actions, and that we will do everything - in our power to make sure that all our members are treated fairly and with - sympathy. - -* Discrimination can be subtle and it can be unconscious. It can show itself - as unfairness and hostility in otherwise ordinary interactions. We know - that this does occur, and we will take care to look out for it. We would - very much like to hear from you if you feel you have been treated unfairly, - and we will use these procedures to make sure that your complaint is heard - and addressed. - -* Help increase engagement in good discussion practice: try to identify where - discussion may have broken down and provide actionable information, pointers - and resources that can lead to positive change on these points. - -* Be mindful of the needs of new members: provide them with explicit support - and consideration, with the aim of increasing participation from - underrepresented groups in particular. - -* Individuals come from different cultural backgrounds and native languages. - Try to identify any honest misunderstandings caused by a non-native speaker - and help them understand the issue and what they can change to avoid causing - offence. Complex discussion in a foreign language can be very intimidating, - and we want to grow our diversity also across nationalities and cultures. - -*Mediation*: voluntary, informal mediation is a tool at our disposal. In -contexts such as when two or more parties have all escalated to the point of -inappropriate behavior (something sadly common in human conflict), it may be -useful to facilitate a mediation process. This is only an example: the -Committee can consider mediation in any case, mindful that the process is meant -to be strictly voluntary and no party can be pressured to participate. If the -Committee suggests mediation, it should: - -* Find a candidate who can serve as a mediator. -* Obtain the agreement of the reporter(s). The reporter(s) have complete - freedom to decline the mediation idea, or to propose an alternate mediator. -* Obtain the agreement of the reported person(s). -* Settle on the mediator: while parties can propose a different mediator than - the suggested candidate, only if common agreement is reached on all terms can - the process move forward. -* Establish a timeline for mediation to complete, ideally within two weeks. - -The mediator will engage with all the parties and seek a resolution that is -satisfactory to all. Upon completion, the mediator will provide a report -(vetted by all parties to the process) to the Committee, with recommendations -on further steps. The Committee will then evaluate these results (whether -satisfactory resolution was achieved or not) and decide on any additional -action deemed necessary. - - -How the committee will respond to reports -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When the committee (or a committee member) receives a report, they will first -determine whether the report is about a clear and severe breach (as defined -below). If so, immediate action needs to be taken in addition to the regular -report handling process. - -Clear and severe breach actions -+++++++++++++++++++++++++++++++ - -We know that it is painfully common for internet communication to start at or -devolve into obvious and flagrant abuse. We will deal quickly with clear and -severe breaches like personal threats, violent, sexist or racist language. - -When a member of the Code of Conduct committee becomes aware of a clear and -severe breach, they will do the following: - -* Immediately disconnect the originator from all NumPy communication channels. -* Reply to the reporter that their report has been received and that the - originator has been disconnected. -* In every case, the moderator should make a reasonable effort to contact the - originator, and tell them specifically how their language or actions - qualify as a "clear and severe breach". The moderator should also say - that, if the originator believes this is unfair or they want to be - reconnected to NumPy, they have the right to ask for a review, as below, by - the Code of Conduct Committee. - The moderator should copy this explanation to the Code of Conduct Committee. -* The Code of Conduct Committee will formally review and sign off on all cases - where this mechanism has been applied to make sure it is not being used to - control ordinary heated disagreement. - -Report handling -+++++++++++++++ - -When a report is sent to the committee they will immediately reply to the -reporter to confirm receipt. This reply must be sent within 72 hours, and the -group should strive to respond much quicker than that. - -If a report doesn't contain enough information, the committee will obtain all -relevant data before acting. The committee is empowered to act on the Steering -Council’s behalf in contacting any individuals involved to get a more complete -account of events. - -The committee will then review the incident and determine, to the best of their -ability: - -* What happened. -* Whether this event constitutes a Code of Conduct violation. -* Who are the responsible party(ies). -* Whether this is an ongoing situation, and there is a threat to anyone's - physical safety. - -This information will be collected in writing, and whenever possible the -group's deliberations will be recorded and retained (i.e. chat transcripts, -email discussions, recorded conference calls, summaries of voice conversations, -etc). - -It is important to retain an archive of all activities of this committee to -ensure consistency in behavior and provide institutional memory for the -project. To assist in this, the default channel of discussion for this -committee will be a private mailing list accessible to current and future -members of the committee as well as members of the Steering Council upon -justified request. If the Committee finds the need to use off-list -communications (e.g. phone calls for early/rapid response), it should in all -cases summarize these back to the list so there's a good record of the process. - -The Code of Conduct Committee should aim to have a resolution agreed upon within -two weeks. In the event that a resolution can't be determined in that time, the -committee will respond to the reporter(s) with an update and projected timeline -for resolution. - - -.. _CoC_resolutions: - -Resolutions -~~~~~~~~~~~ - -The committee must agree on a resolution by consensus. If the group cannot reach -consensus and deadlocks for over a week, the group will turn the matter over to -the Steering Council for resolution. - - -Possible responses may include: - -* Taking no further action - - - if we determine no violations have occurred. - - if the matter has been resolved publicly while the committee was considering responses. - -* Coordinating voluntary mediation: if all involved parties agree, the - Committee may facilitate a mediation process as detailed above. -* Remind publicly, and point out that some behavior/actions/language have been - judged inappropriate and why in the current context, or can but hurtful to - some people, requesting the community to self-adjust. -* A private reprimand from the committee to the individual(s) involved. In this - case, the group chair will deliver that reprimand to the individual(s) over - email, cc'ing the group. -* A public reprimand. In this case, the committee chair will deliver that - reprimand in the same venue that the violation occurred, within the limits of - practicality. E.g., the original mailing list for an email violation, but - for a chat room discussion where the person/context may be gone, they can be - reached by other means. The group may choose to publish this message - elsewhere for documentation purposes. -* A request for a public or private apology, assuming the reporter agrees to - this idea: they may at their discretion refuse further contact with the - violator. The chair will deliver this request. The committee may, if it - chooses, attach "strings" to this request: for example, the group may ask a - violator to apologize in order to retain one’s membership on a mailing list. -* A "mutually agreed upon hiatus" where the committee asks the individual to - temporarily refrain from community participation. If the individual chooses - not to take a temporary break voluntarily, the committee may issue a - "mandatory cooling off period". -* A permanent or temporary ban from some or all NumPy spaces (mailing lists, - gitter.im, etc.). The group will maintain records of all such bans so that - they may be reviewed in the future or otherwise maintained. - -Once a resolution is agreed upon, but before it is enacted, the committee will -contact the original reporter and any other affected parties and explain the -proposed resolution. The committee will ask if this resolution is acceptable, -and must note feedback for the record. - -Finally, the committee will make a report to the NumPy Steering Council (as -well as the NumPy core team in the event of an ongoing resolution, such as a -ban). - -The committee will never publicly discuss the issue; all public statements will -be made by the chair of the Code of Conduct Committee or the NumPy Steering -Council. - - -Conflicts of Interest -~~~~~~~~~~~~~~~~~~~~~ - -In the event of any conflict of interest, a committee member must immediately -notify the other members, and recuse themselves if necessary. diff --git a/doc/source/dev/index.rst b/doc/source/dev/index.rst index c4f35b68ff6b..020df0b2bd29 100644 --- a/doc/source/dev/index.rst +++ b/doc/source/dev/index.rst @@ -9,7 +9,6 @@ Contributing to NumPy .. toctree:: :hidden: - conduct/code_of_conduct Git Basics development_environment development_workflow @@ -293,7 +292,6 @@ The rest of the story .. toctree:: :maxdepth: 2 - conduct/code_of_conduct Git Basics development_environment development_workflow From 5ee77370b92f24b06a221774c6b1c4292e6b09ef Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 4 Oct 2020 19:15:08 +0100 Subject: [PATCH 179/409] DOC: Improve [source] links for C extension types Previously, these would all link to `numpy/core/__init__.py`. Now the scalar type and `ndarray` link to the files where the `PyTypeObject` is defined. In future, we should do this for all extension types, probably automatically. --- doc/source/conf.py | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index fe7ea096740d..5bb8870b4f8c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -310,6 +310,17 @@ def setup(app): else: print("NOTE: linkcode extension not found -- no links to source generated") + +def _get_c_source_file(obj): + if issubclass(obj, numpy.generic): + return r"core/src/multiarray/scalartypes.c.src" + elif obj is numpy.ndarray: + return r"core/src/multiarray/arrayobject.c" + else: + # todo: come up with a better way to generate these + return None + + def linkcode_resolve(domain, info): """ Determine the URL corresponding to Python object @@ -340,25 +351,33 @@ def linkcode_resolve(domain, info): else: obj = unwrap(obj) - try: - fn = inspect.getsourcefile(obj) - except Exception: - fn = None - if not fn: - return None + fn = None + lineno = None - try: - source, lineno = inspect.getsourcelines(obj) - except Exception: - lineno = None + # Make a poor effort at linking C extension types + if isinstance(obj, type) and obj.__module__ == 'numpy': + fn = _get_c_source_file(obj) + + if fn is None: + try: + fn = inspect.getsourcefile(obj) + except Exception: + fn = None + if not fn: + return None + + try: + source, lineno = inspect.getsourcelines(obj) + except Exception: + lineno = None + + fn = relpath(fn, start=dirname(numpy.__file__)) if lineno: linespec = "#L%d-L%d" % (lineno, lineno + len(source) - 1) else: linespec = "" - fn = relpath(fn, start=dirname(numpy.__file__)) - if 'dev' in numpy.__version__: return "https://github.com/numpy/numpy/blob/master/numpy/%s%s" % ( fn, linespec) From 2877fb04ecd9816edadac9d42555e06aaa807f2d Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 4 Oct 2020 21:01:54 +0100 Subject: [PATCH 180/409] DOC: Change the value of type.__name__ on the scalar types for sphinx builds By default, the `.__name__` of the numeric `np.generic` subclasses is their bitlength name, such as `np.int64`. This is convenient when working interactively, because it lets users see the size of their array easily; but in docs it is confusing, as the sizes of the integers in the doc build may not match their size on the platform of the user reading them. Without this change, `..autoclass:: numpy.short` would just display "alias of uint16", which is backwards. Rather than changing the names globally, or adding a build flag to change the names, this uses `ctypes` to modify the scalar names at startup. This resembles the approach taken by the `forbiddenfruit` module for patching builtin slots, although that would be overkill here. The timing of when we perform this patching is important - we can't do it until after `numpy.core._umath_multiarray` has been loaded, but we need to do it before `numpy.core._add_newdocs` generates the name-based docstrings. Similarly, we can't just disable `numpy.core._add_newdocs` until later, as it populates docstrings in `ndarray` on which `numpy.ma.core` does further processing. To resolve this, we split out the scalar docstrings in `numpy.core._add_newdocs` into a new module `numpy.core._add_newdocs_scalars` that _is_ safe to disable until later. --- doc/source/conf.py | 56 +++++++++ numpy/core/__init__.py | 1 + numpy/core/_add_newdocs.py | 182 --------------------------- numpy/core/_add_newdocs_scalars.py | 195 +++++++++++++++++++++++++++++ 4 files changed, 252 insertions(+), 182 deletions(-) create mode 100644 numpy/core/_add_newdocs_scalars.py diff --git a/doc/source/conf.py b/doc/source/conf.py index 5bb8870b4f8c..98a2565dbbb1 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -6,6 +6,62 @@ # Minimum version, enforced by sphinx needs_sphinx = '2.2.0' + +# This is a nasty hack to use platform-agnostic names for types in the +# documentation. + +# must be kept alive to hold the patched names +_name_cache = {} + +def replace_scalar_type_names(): + """ Rename numpy types to use the canonical names to make sphinx behave """ + import ctypes + + Py_ssize_t = ctypes.c_int64 if ctypes.sizeof(ctypes.c_void_p) == 8 else ctypes.c_int32 + + class PyObject(ctypes.Structure): + pass + + class PyTypeObject(ctypes.Structure): + pass + + PyObject._fields_ = [ + ('ob_refcnt', Py_ssize_t), + ('ob_type', ctypes.POINTER(PyTypeObject)), + ] + + + PyTypeObject._fields_ = [ + # varhead + ('ob_base', PyObject), + ('ob_size', Py_ssize_t), + # declaration + ('tp_name', ctypes.c_char_p), + ] + + # prevent numpy attaching docstrings to the scalar types + assert 'numpy.core._add_newdocs_scalars' not in sys.modules + sys.modules['numpy.core._add_newdocs_scalars'] = object() + + import numpy + + # change the __name__ of the scalar types + for name in [ + 'byte', 'short', 'intc', 'int_', 'longlong', + 'ubyte', 'ushort', 'uintc', 'uint', 'ulonglong', + 'half', 'single', 'double', 'longdouble', + 'half', 'csingle', 'cdouble', 'clongdouble', + ]: + typ = getattr(numpy, name) + c_typ = PyTypeObject.from_address(id(typ)) + c_typ.tp_name = _name_cache[typ] = b"numpy." + name.encode('utf8') + + # now generate the docstrings as usual + del sys.modules['numpy.core._add_newdocs_scalars'] + import numpy.core._add_newdocs_scalars + +replace_scalar_type_names() + # ----------------------------------------------------------------------------- # General configuration # ----------------------------------------------------------------------------- diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py index a0769cc89112..e8d3a381b602 100644 --- a/numpy/core/__init__.py +++ b/numpy/core/__init__.py @@ -96,6 +96,7 @@ # do this after everything else, to minimize the chance of this misleadingly # appearing in an import-time traceback from . import _add_newdocs +from . import _add_newdocs_scalars # add these for module-freeze analysis (like PyInstaller) from . import _dtype_ctypes from . import _internal diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index b8cf12c60c1c..aa858761da78 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -9,8 +9,6 @@ """ -from numpy.core import numerictypes as _numerictypes -from numpy.core import dtype from numpy.core.function_base import add_newdoc from numpy.core.overrides import array_function_like_doc @@ -6283,183 +6281,3 @@ def refer_to_array_attribute(attr, method=True): Abstract base class of all character string scalar types. """) - - -############################################################################## -# -# Documentation for concrete scalar classes -# -############################################################################## - -def numeric_type_aliases(aliases): - def type_aliases_gen(): - for alias, doc in aliases: - try: - alias_type = getattr(_numerictypes, alias) - except AttributeError: - # The set of aliases that actually exist varies between platforms - pass - else: - yield (alias_type, alias, doc) - return list(type_aliases_gen()) - - -possible_aliases = numeric_type_aliases([ - ('int8', '8-bit signed integer (-128 to 127)'), - ('int16', '16-bit signed integer (-32768 to 32767)'), - ('int32', '32-bit signed integer (-2147483648 to 2147483647)'), - ('int64', '64-bit signed integer (-9223372036854775808 to 9223372036854775807)'), - ('intp', 'Signed integer large enough to fit pointer, compatible with C ``intptr_t``'), - ('uint8', '8-bit unsigned integer (0 to 255)'), - ('uint16', '16-bit unsigned integer (0 to 65535)'), - ('uint32', '32-bit unsigned integer (0 to 4294967295)'), - ('uint64', '64-bit unsigned integer (0 to 18446744073709551615)'), - ('uintp', 'Unsigned integer large enough to fit pointer, compatible with C ``uintptr_t``'), - ('float16', '16-bit-precision floating-point number type: sign bit, 5 bits exponent, 10 bits mantissa'), - ('float32', '32-bit-precision floating-point number type: sign bit, 8 bits exponent, 23 bits mantissa'), - ('float64', '64-bit precision floating-point number type: sign bit, 11 bits exponent, 52 bits mantissa'), - ('float96', '96-bit extended-precision floating-point number type'), - ('float128', '128-bit extended-precision floating-point number type'), - ('complex64', 'Complex number type composed of 2 32-bit-precision floating-point numbers'), - ('complex128', 'Complex number type composed of 2 64-bit-precision floating-point numbers'), - ('complex192', 'Complex number type composed of 2 96-bit extended-precision floating-point numbers'), - ('complex256', 'Complex number type composed of 2 128-bit extended-precision floating-point numbers'), - ]) - - -def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): - o = getattr(_numerictypes, obj) - - character_code = dtype(o).char - canonical_name_doc = "" if obj == o.__name__ else "Canonical name: ``np.{}``.\n ".format(obj) - alias_doc = ''.join("Alias: ``np.{}``.\n ".format(alias) for alias in fixed_aliases) - alias_doc += ''.join("Alias *on this platform*: ``np.{}``: {}.\n ".format(alias, doc) - for (alias_type, alias, doc) in possible_aliases if alias_type is o) - - docstring = """ - {doc} - Character code: ``'{character_code}'``. - {canonical_name_doc}{alias_doc} - """.format(doc=doc.strip(), character_code=character_code, - canonical_name_doc=canonical_name_doc, alias_doc=alias_doc) - - add_newdoc('numpy.core.numerictypes', obj, docstring) - - -add_newdoc_for_scalar_type('bool_', ['bool8'], - """ - Boolean type (True or False), stored as a byte. - """) - -add_newdoc_for_scalar_type('byte', [], - """ - Signed integer type, compatible with C ``char``. - """) - -add_newdoc_for_scalar_type('short', [], - """ - Signed integer type, compatible with C ``short``. - """) - -add_newdoc_for_scalar_type('intc', [], - """ - Signed integer type, compatible with C ``int``. - """) - -add_newdoc_for_scalar_type('int_', [], - """ - Signed integer type, compatible with Python `int` anc C ``long``. - """) - -add_newdoc_for_scalar_type('longlong', [], - """ - Signed integer type, compatible with C ``long long``. - """) - -add_newdoc_for_scalar_type('ubyte', [], - """ - Unsigned integer type, compatible with C ``unsigned char``. - """) - -add_newdoc_for_scalar_type('ushort', [], - """ - Unsigned integer type, compatible with C ``unsigned short``. - """) - -add_newdoc_for_scalar_type('uintc', [], - """ - Unsigned integer type, compatible with C ``unsigned int``. - """) - -add_newdoc_for_scalar_type('uint', [], - """ - Unsigned integer type, compatible with C ``unsigned long``. - """) - -add_newdoc_for_scalar_type('ulonglong', [], - """ - Signed integer type, compatible with C ``unsigned long long``. - """) - -add_newdoc_for_scalar_type('half', [], - """ - Half-precision floating-point number type. - """) - -add_newdoc_for_scalar_type('single', [], - """ - Single-precision floating-point number type, compatible with C ``float``. - """) - -add_newdoc_for_scalar_type('double', ['float_'], - """ - Double-precision floating-point number type, compatible with Python `float` - and C ``double``. - """) - -add_newdoc_for_scalar_type('longdouble', ['longfloat'], - """ - Extended-precision floating-point number type, compatible with C - ``long double`` but not necessarily with IEEE 754 quadruple-precision. - """) - -add_newdoc_for_scalar_type('csingle', ['singlecomplex'], - """ - Complex number type composed of two single-precision floating-point - numbers. - """) - -add_newdoc_for_scalar_type('cdouble', ['cfloat', 'complex_'], - """ - Complex number type composed of two double-precision floating-point - numbers, compatible with Python `complex`. - """) - -add_newdoc_for_scalar_type('clongdouble', ['clongfloat', 'longcomplex'], - """ - Complex number type composed of two extended-precision floating-point - numbers. - """) - -add_newdoc_for_scalar_type('object_', [], - """ - Any Python object. - """) - -# TODO: work out how to put this on the base class, np.floating -for float_name in ('half', 'single', 'double', 'longdouble'): - add_newdoc('numpy.core.numerictypes', float_name, ('as_integer_ratio', - """ - {ftype}.as_integer_ratio() -> (int, int) - - Return a pair of integers, whose ratio is exactly equal to the original - floating point number, and with a positive denominator. - Raise OverflowError on infinities and a ValueError on NaNs. - - >>> np.{ftype}(10.0).as_integer_ratio() - (10, 1) - >>> np.{ftype}(0.0).as_integer_ratio() - (0, 1) - >>> np.{ftype}(-.25).as_integer_ratio() - (-1, 4) - """.format(ftype=float_name))) diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py new file mode 100644 index 000000000000..c367c18ed094 --- /dev/null +++ b/numpy/core/_add_newdocs_scalars.py @@ -0,0 +1,195 @@ +""" +This file is separate from ``_add_newdocs.py`` so that it can be mocked out by +our sphinx ``conf.py`` during doc builds, where we want to avoid showing +platform-dependent information. +""" +from numpy.core import dtype +from numpy.core import numerictypes as _numerictypes +from numpy.core.function_base import add_newdoc + +############################################################################## +# +# Documentation for concrete scalar classes +# +############################################################################## + +def numeric_type_aliases(aliases): + def type_aliases_gen(): + for alias, doc in aliases: + try: + alias_type = getattr(_numerictypes, alias) + except AttributeError: + # The set of aliases that actually exist varies between platforms + pass + else: + yield (alias_type, alias, doc) + return list(type_aliases_gen()) + + +possible_aliases = numeric_type_aliases([ + ('int8', '8-bit signed integer (``-128`` to ``127``)'), + ('int16', '16-bit signed integer (``-32_768`` to ``32_767``)'), + ('int32', '32-bit signed integer (``-2_147_483_648`` to ``2_147_483_647``)'), + ('int64', '64-bit signed integer (``-9_223_372_036_854_775_808`` to ``9_223_372_036_854_775_807``)'), + ('intp', 'Signed integer large enough to fit pointer, compatible with C ``intptr_t``'), + ('uint8', '8-bit unsigned integer (``0`` to ``255``)'), + ('uint16', '16-bit unsigned integer (``0`` to ``65_535``)'), + ('uint32', '32-bit unsigned integer (``0`` to ``4_294_967_295``)'), + ('uint64', '64-bit unsigned integer (``0`` to ``18_446_744_073_709_551_615``)'), + ('uintp', 'Unsigned integer large enough to fit pointer, compatible with C ``uintptr_t``'), + ('float16', '16-bit-precision floating-point number type: sign bit, 5 bits exponent, 10 bits mantissa'), + ('float32', '32-bit-precision floating-point number type: sign bit, 8 bits exponent, 23 bits mantissa'), + ('float64', '64-bit precision floating-point number type: sign bit, 11 bits exponent, 52 bits mantissa'), + ('float96', '96-bit extended-precision floating-point number type'), + ('float128', '128-bit extended-precision floating-point number type'), + ('complex64', 'Complex number type composed of 2 32-bit-precision floating-point numbers'), + ('complex128', 'Complex number type composed of 2 64-bit-precision floating-point numbers'), + ('complex192', 'Complex number type composed of 2 96-bit extended-precision floating-point numbers'), + ('complex256', 'Complex number type composed of 2 128-bit extended-precision floating-point numbers'), + ]) + + +def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): + # note: `:field: value` is rST syntax which renders as field lists. + o = getattr(_numerictypes, obj) + + character_code = dtype(o).char + canonical_name_doc = "" if obj == o.__name__ else ":Canonical name: `numpy.{}`\n ".format(obj) + alias_doc = ''.join(":Alias: `numpy.{}`\n ".format(alias) for alias in fixed_aliases) + alias_doc += ''.join(":Alias on this platform: `numpy.{}`: {}.\n ".format(alias, doc) + for (alias_type, alias, doc) in possible_aliases if alias_type is o) + docstring = """ + {doc} + + :Character code: ``'{character_code}'`` + {canonical_name_doc}{alias_doc} + """.format(doc=doc.strip(), character_code=character_code, + canonical_name_doc=canonical_name_doc, alias_doc=alias_doc) + + add_newdoc('numpy.core.numerictypes', obj, docstring) + + +add_newdoc_for_scalar_type('bool_', ['bool8'], + """ + Boolean type (True or False), stored as a byte. + + .. warning:: + + The :class:`bool_` type is not a subclass of the :class:`int_` type + (the :class:`bool_` is not even a number type). This is different + than Python's default implementation of :class:`bool` as a + sub-class of :class:`int`. + """) + +add_newdoc_for_scalar_type('byte', [], + """ + Signed integer type, compatible with C ``char``. + """) + +add_newdoc_for_scalar_type('short', [], + """ + Signed integer type, compatible with C ``short``. + """) + +add_newdoc_for_scalar_type('intc', [], + """ + Signed integer type, compatible with C ``int``. + """) + +add_newdoc_for_scalar_type('int_', [], + """ + Signed integer type, compatible with Python `int` and C ``long``. + """) + +add_newdoc_for_scalar_type('longlong', [], + """ + Signed integer type, compatible with C ``long long``. + """) + +add_newdoc_for_scalar_type('ubyte', [], + """ + Unsigned integer type, compatible with C ``unsigned char``. + """) + +add_newdoc_for_scalar_type('ushort', [], + """ + Unsigned integer type, compatible with C ``unsigned short``. + """) + +add_newdoc_for_scalar_type('uintc', [], + """ + Unsigned integer type, compatible with C ``unsigned int``. + """) + +add_newdoc_for_scalar_type('uint', [], + """ + Unsigned integer type, compatible with C ``unsigned long``. + """) + +add_newdoc_for_scalar_type('ulonglong', [], + """ + Signed integer type, compatible with C ``unsigned long long``. + """) + +add_newdoc_for_scalar_type('half', [], + """ + Half-precision floating-point number type. + """) + +add_newdoc_for_scalar_type('single', [], + """ + Single-precision floating-point number type, compatible with C ``float``. + """) + +add_newdoc_for_scalar_type('double', ['float_'], + """ + Double-precision floating-point number type, compatible with Python `float` + and C ``double``. + """) + +add_newdoc_for_scalar_type('longdouble', ['longfloat'], + """ + Extended-precision floating-point number type, compatible with C + ``long double`` but not necessarily with IEEE 754 quadruple-precision. + """) + +add_newdoc_for_scalar_type('csingle', ['singlecomplex'], + """ + Complex number type composed of two single-precision floating-point + numbers. + """) + +add_newdoc_for_scalar_type('cdouble', ['cfloat', 'complex_'], + """ + Complex number type composed of two double-precision floating-point + numbers, compatible with Python `complex`. + """) + +add_newdoc_for_scalar_type('clongdouble', ['clongfloat', 'longcomplex'], + """ + Complex number type composed of two extended-precision floating-point + numbers. + """) + +add_newdoc_for_scalar_type('object_', [], + """ + Any Python object. + """) + +# TODO: work out how to put this on the base class, np.floating +for float_name in ('half', 'single', 'double', 'longdouble'): + add_newdoc('numpy.core.numerictypes', float_name, ('as_integer_ratio', + """ + {ftype}.as_integer_ratio() -> (int, int) + + Return a pair of integers, whose ratio is exactly equal to the original + floating point number, and with a positive denominator. + Raise OverflowError on infinities and a ValueError on NaNs. + + >>> np.{ftype}(10.0).as_integer_ratio() + (10, 1) + >>> np.{ftype}(0.0).as_integer_ratio() + (0, 1) + >>> np.{ftype}(-.25).as_integer_ratio() + (-1, 4) + """.format(ftype=float_name))) From 3edc19fabf6bbe6fcc8fb8155b3a9f72382a0372 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 4 Oct 2020 21:03:57 +0100 Subject: [PATCH 181/409] DOC: Overhaul the scalar type documentation to use `..autoclass` This remove the tables. since they only had three columns, and using the character code is advised against anyway. With this change, the individual scalar types as well as their aliases are now valid sphinx python domain targets. --- doc/source/reference/arrays.scalars.rst | 319 ++++++++++++++++-------- 1 file changed, 220 insertions(+), 99 deletions(-) diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst index 46d2bb8faeef..174dd2b917a0 100644 --- a/doc/source/reference/arrays.scalars.rst +++ b/doc/source/reference/arrays.scalars.rst @@ -41,6 +41,13 @@ of the flexible itemsize array types (:class:`string`, pointer for the platform. All the number types can be obtained using bit-width names as well. + +.. TODO - use something like this instead of the diagram above, as it generates + links to the classes and is a vector graphic. Unfortunately it looks worse + and the html element providing the linked regions is misaligned. + + .. inheritance-diagram:: byte short intc int_ longlong ubyte ushort uintc uint ulonglong half single double longdouble csingle cdouble clongdouble bool_ datetime64 timedelta64 object_ bytes_ str_ void + .. [#] However, array scalars are immutable, so none of the array scalar attributes are settable. @@ -51,14 +58,8 @@ of the flexible itemsize array types (:class:`string`, Built-in scalar types ===================== -The built-in scalar types are shown below. Along with their (mostly) -C-derived names, the integer, float, and complex data-types are also -available using a bit-width convention so that an array of the right -size can always be ensured (e.g. :class:`int8`, :class:`float64`, -:class:`complex128`). Two aliases (:class:`intp` and :class:`uintp`) -pointing to the integer type that is sufficiently large to hold a C pointer -are also provided. The C-like names are associated with character codes, -which are shown in the table. Use of the character codes, however, +The built-in scalar types are shown below. The C-like names are associated with character codes, +which are shown in their descriptions. Use of the character codes, however, is discouraged. Some of the scalar types are essentially equivalent to fundamental @@ -84,13 +85,6 @@ The :class:`bool_` data type is very similar to the Python on the C-level the size of the actual bool data is not the same as a Python Boolean scalar. -.. warning:: - - The :class:`bool_` type is not a subclass of the :class:`int_` type - (the :class:`bool_` is not even a number type). This is different - than Python's default implementation of :class:`bool` as a - sub-class of :class:`int`. - .. warning:: The :class:`int_` type does **not** inherit from the @@ -99,84 +93,113 @@ Python Boolean scalar. .. tip:: The default data type in NumPy is :class:`float_`. -In the tables below, ``platform?`` means that the type may not be -available on all platforms. Compatibility with different C or Python -types is indicated: two types are compatible if their data is of the -same size and interpreted in the same way. - -Booleans: - -=================== ============================= =============== -Type Remarks Character code -=================== ============================= =============== -:class:`bool_` compatible: Python bool ``'?'`` -:class:`bool8` 8 bits -=================== ============================= =============== - -Integers: - -=================== ============================= =============== -:class:`byte` compatible: C ``char`` ``'b'`` -:class:`short` compatible: C ``short`` ``'h'`` -:class:`intc` compatible: C ``int`` ``'i'`` -:class:`int_` compatible: C ``long`` ``'l'`` -:class:`longlong` compatible: C ``long long`` ``'q'`` -:class:`intp` large enough to fit a pointer ``'p'`` -:class:`int8` 8 bits -:class:`int16` 16 bits -:class:`int32` 32 bits -:class:`int64` 64 bits -=================== ============================= =============== - -Unsigned integers: - -=================== ================================= =============== -:class:`ubyte` compatible: C ``unsigned char`` ``'B'`` -:class:`ushort` compatible: C ``unsigned short`` ``'H'`` -:class:`uintc` compatible: C ``unsigned int`` ``'I'`` -:class:`uint` compatible: C ``long`` ``'L'`` -:class:`ulonglong` compatible: C ``long long`` ``'Q'`` -:class:`uintp` large enough to fit a pointer ``'P'`` -:class:`uint8` 8 bits -:class:`uint16` 16 bits -:class:`uint32` 32 bits -:class:`uint64` 64 bits -=================== ================================= =============== - -Floating-point numbers: - -=================== ============================= =============== -:class:`half` ``'e'`` -:class:`single` compatible: C float ``'f'`` -:class:`double` compatible: C double -:class:`float_` compatible: Python float ``'d'`` -:class:`longfloat` compatible: C long float ``'g'`` -:class:`float16` 16 bits -:class:`float32` 32 bits -:class:`float64` 64 bits -:class:`float96` 96 bits, platform? -:class:`float128` 128 bits, platform? -=================== ============================= =============== - -Complex floating-point numbers: - -=================== ============================= =============== -:class:`csingle` ``'F'`` -:class:`complex_` compatible: Python complex ``'D'`` -:class:`clongfloat` ``'G'`` -:class:`complex64` two 32-bit floats -:class:`complex128` two 64-bit floats -:class:`complex192` two 96-bit floats, - platform? -:class:`complex256` two 128-bit floats, - platform? -=================== ============================= =============== - -Any Python object: - -=================== ============================= =============== -:class:`object_` any Python object ``'O'`` -=================== ============================= =============== +.. autoclass:: numpy.generic + :exclude-members: + +.. autoclass:: numpy.number + :exclude-members: + +Integer types +~~~~~~~~~~~~~ + +.. autoclass:: numpy.integer + :exclude-members: + +Signed integer types +++++++++++++++++++++ + +.. autoclass:: numpy.signedinteger + :exclude-members: + +.. autoclass:: numpy.byte + :exclude-members: + +.. autoclass:: numpy.short + :exclude-members: + +.. autoclass:: numpy.intc + :exclude-members: + +.. autoclass:: numpy.int_ + :exclude-members: + +.. autoclass:: numpy.longlong + :exclude-members: + +Unsigned integer types +++++++++++++++++++++++ + +.. autoclass:: numpy.unsignedinteger + :exclude-members: + +.. autoclass:: numpy.ubyte + :exclude-members: + +.. autoclass:: numpy.ushort + :exclude-members: + +.. autoclass:: numpy.uintc + :exclude-members: + +.. autoclass:: numpy.uint + :exclude-members: + +.. autoclass:: numpy.ulonglong + :exclude-members: + +Inexact types +~~~~~~~~~~~~~ + +.. autoclass:: numpy.inexact + :exclude-members: + +Floating-point types +++++++++++++++++++++ + +.. autoclass:: numpy.floating + :exclude-members: + +.. autoclass:: numpy.half + :exclude-members: + +.. autoclass:: numpy.single + :exclude-members: + +.. autoclass:: numpy.double + :exclude-members: + +.. autoclass:: numpy.longdouble + :exclude-members: + +Complex floating-point types +++++++++++++++++++++++++++++ + +.. autoclass:: numpy.complexfloating + :exclude-members: + +.. autoclass:: numpy.csingle + :exclude-members: + +.. autoclass:: numpy.cdouble + :exclude-members: + +.. autoclass:: numpy.clongdouble + :exclude-members: + +Other types +~~~~~~~~~~~ + +.. autoclass:: numpy.bool_ + :exclude-members: + +.. autoclass:: numpy.datetime64 + :exclude-members: + +.. autoclass:: numpy.timedelta64 + :exclude-members: + +.. autoclass:: numpy.object_ + :exclude-members: .. note:: @@ -198,11 +221,17 @@ size and the data they describe can be of different length in different arrays. (In the character codes ``#`` is an integer denoting how many elements the data type consists of.) -=================== ============================== ======== -:class:`bytes_` compatible: Python bytes ``'S#'`` -:class:`unicode_` compatible: Python unicode/str ``'U#'`` -:class:`void` ``'V#'`` -=================== ============================== ======== +.. autoclass:: numpy.flexible + :exclude-members: + +.. autoclass:: numpy.bytes_ + :exclude-members: + +.. autoclass:: numpy.str_ + :exclude-members: + +.. autoclass:: numpy.void + :exclude-members: .. warning:: @@ -217,6 +246,99 @@ elements the data type consists of.) convention more consistent with other Python modules such as the :mod:`struct` module. +Sized aliases +~~~~~~~~~~~~~ + +Along with their (mostly) +C-derived names, the integer, float, and complex data-types are also +available using a bit-width convention so that an array of the right +size can always be ensured. Two aliases (:class:`numpy.intp` and :class:`numpy.uintp`) +pointing to the integer type that is sufficiently large to hold a C pointer +are also provided. + +.. note that these are documented with ..attribute because that is what + autoclass does for aliases under the hood. + +.. autoclass:: numpy.bool8 + +.. attribute:: numpy.int8 + numpy.int16 + numpy.int32 + numpy.int64 + + Aliases for the signed integer types (one of `numpy.byte`, `numpy.short`, + `numpy.intc`, `numpy.int_` and `numpy.longlong`) with the specified number + of bits. + + Compatible with the C99 ``int8_t``, ``int16_t``, ``int32_t``, and + ``int64_t``, respectively. + +.. attribute:: numpy.uint8 + numpy.uint16 + numpy.uint32 + numpy.uint64 + + Alias for the unsigned integer types (one of `numpy.byte`, `numpy.short`, + `numpy.intc`, `numpy.int_` and `numpy.longlong`) with the specified number + of bits. + + Compatible with the C99 ``uint8_t``, ``uint16_t``, ``uint32_t``, and + ``uint64_t``, respectively. + +.. attribute:: numpy.intp + + Alias for the signed integer type (one of `numpy.byte`, `numpy.short`, + `numpy.intc`, `numpy.int_` and `np.longlong`) that is the same size as a + pointer. + + Compatible with the C ``intptr_t``. + + :Character code: ``'p'`` + +.. attribute:: numpy.uintp + + Alias for the unsigned integer type (one of `numpy.byte`, `numpy.short`, + `numpy.intc`, `numpy.int_` and `np.longlong`) that is the same size as a + pointer. + + Compatible with the C ``uintptr_t``. + + :Character code: ``'P'`` + +.. autoclass:: numpy.float16 + +.. autoclass:: numpy.float32 + +.. autoclass:: numpy.float64 + +.. attribute:: numpy.float96 + numpy.float128 + + Alias for `numpy.longdouble`, named after its size in bits. + The existance of these aliases depends on the platform. + +.. autoclass:: numpy.complex64 + +.. autoclass:: numpy.complex128 + +.. attribute:: numpy.complex192 + numpy.complex256 + + Alias for `numpy.clongdouble`, named after its size in bits. + The existance of these aliases depends on the platform. + +Other aliases +~~~~~~~~~~~~~ + +.. autoclass:: float_ + +.. autoclass:: complex_ + +.. autoclass:: longfloat + +.. autoclass:: clongfloat + +.. autoclass:: longcomplex Attributes ========== @@ -276,7 +398,6 @@ The exceptions to the above rules are given below: .. autosummary:: :toctree: generated/ - generic generic.__array__ generic.__array_wrap__ generic.squeeze From f16f01aa669756400dccf5b12ffc75faf2387638 Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Mon, 5 Oct 2020 10:10:27 +0900 Subject: [PATCH 182/409] Fix: Add the entry for `NPY_ARR_HAS_DESCR` --- doc/source/reference/arrays.interface.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/reference/arrays.interface.rst b/doc/source/reference/arrays.interface.rst index 6d12cf999790..49772a298772 100644 --- a/doc/source/reference/arrays.interface.rst +++ b/doc/source/reference/arrays.interface.rst @@ -238,6 +238,8 @@ interpreted. The data-bits are :c:macro:`NPY_ARRAY_C_CONTIGUOUS` (0x1), has the arrdescr field. The field should not be accessed unless this flag is present. + .. c:macro:: NPY_ARR_HAS_DESCR + .. admonition:: New since June 16, 2006: In the past most implementations used the ``desc`` member of the ``PyCObject`` From 37ff6fce0bfdb5df87aafcdd5f38e9ebc5ebe0e6 Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Mon, 5 Oct 2020 12:37:23 +0900 Subject: [PATCH 183/409] DOC: Fix some references for macros --- doc/source/reference/c-api/config.rst | 9 ++++++--- doc/source/reference/c-api/dtype.rst | 12 +++++++++--- doc/source/reference/c-api/iterator.rst | 2 +- doc/source/reference/c-api/types-and-structures.rst | 4 ++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/doc/source/reference/c-api/config.rst b/doc/source/reference/c-api/config.rst index 5fdbc0ff1ece..cec5b973a202 100644 --- a/doc/source/reference/c-api/config.rst +++ b/doc/source/reference/c-api/config.rst @@ -52,12 +52,15 @@ information is available to the pre-processor. .. c:macro:: NPY_SIZEOF_LONG_DOUBLE - sizeof(longdouble) (A macro defines **NPY_SIZEOF_LONGDOUBLE** as well.) +.. c:macro:: NPY_SIZEOF_LONGDOUBLE + + sizeof(longdouble) .. c:macro:: NPY_SIZEOF_PY_INTPTR_T - Size of a pointer on this platform (sizeof(void \*)) (A macro defines - NPY_SIZEOF_INTP as well.) +.. c:macro:: NPY_SIZEOF_INTP + + Size of a pointer on this platform (sizeof(void \*)) Platform information diff --git a/doc/source/reference/c-api/dtype.rst b/doc/source/reference/c-api/dtype.rst index a04d852123ed..47b998302e86 100644 --- a/doc/source/reference/c-api/dtype.rst +++ b/doc/source/reference/c-api/dtype.rst @@ -414,6 +414,12 @@ Printf Formatting For help in printing, the following strings are defined as the correct format specifier in printf and related commands. - :c:data:`NPY_LONGLONG_FMT`, :c:data:`NPY_ULONGLONG_FMT`, - :c:data:`NPY_INTP_FMT`, :c:data:`NPY_UINTP_FMT`, - :c:data:`NPY_LONGDOUBLE_FMT` +.. c:macro:: NPY_LONGLONG_FMT + +.. c:macro:: NPY_ULONGLONG_FMT + +.. c:macro:: NPY_INTP_FMT + +.. c:macro:: NPY_UINTP_FMT + +.. c:macro:: NPY_LONGDOUBLE_FMT diff --git a/doc/source/reference/c-api/iterator.rst b/doc/source/reference/c-api/iterator.rst index 7eac8c367dab..ae96bb3fb056 100644 --- a/doc/source/reference/c-api/iterator.rst +++ b/doc/source/reference/c-api/iterator.rst @@ -1264,7 +1264,7 @@ functions provide that information. NPY_MAX_INTP is placed in the stride. Once the iterator is prepared for iteration (after a reset if - :c:data:`NPY_DELAY_BUFALLOC` was used), call this to get the strides + :c:data:`NPY_ITER_DELAY_BUFALLOC` was used), call this to get the strides which may be used to select a fast inner loop function. For example, if the stride is 0, that means the inner loop can always load its value into a variable once, then use the variable throughout the loop, diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index ee57d4680cb1..cc961df3abf4 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -315,7 +315,7 @@ PyArrayDescr_Type and PyArray_Descr Bits set for the object data-type: ( :c:data:`NPY_LIST_PICKLE` \| :c:data:`NPY_USE_GETITEM` \| :c:data:`NPY_ITEM_IS_POINTER` \| - :c:data:`NPY_REFCOUNT` \| :c:data:`NPY_NEEDS_INIT` \| + :c:data:`NPY_ITEM_REFCOUNT` \| :c:data:`NPY_NEEDS_INIT` \| :c:data:`NPY_NEEDS_PYAPI`). .. c:function:: PyDataType_FLAGCHK(PyArray_Descr *dtype, int flags) @@ -1395,7 +1395,7 @@ PyArrayInterface as the *descr* key in :obj:`__array_interface__`). This can be ``NULL`` if *typekind* and *itemsize* provide enough information. This field is also ignored unless - :c:data:`ARR_HAS_DESCR` flag is on in *flags*. + :c:data:`NPY_ARR_HAS_DESCR` flag is on in *flags*. Internally used structures From bfd80ef54217c4d701163bf6ab09cc8629a9ca85 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 5 Oct 2020 07:12:44 +0000 Subject: [PATCH 184/409] MAINT: Bump hypothesis from 5.36.1 to 5.37.0 Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 5.36.1 to 5.37.0. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-5.36.1...hypothesis-python-5.37.0) Signed-off-by: dependabot-preview[bot] --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index c86d46eb879a..067dd07c888a 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,7 +1,7 @@ cython==0.29.21 wheel setuptools<49.2.0 -hypothesis==5.36.1 +hypothesis==5.37.0 pytest==6.0.2 pytz==2020.1 pytest-cov==2.10.1 From 9a9abf7a07271fed54dcba8ebadc3c725a63d916 Mon Sep 17 00:00:00 2001 From: Stuart Archibald Date: Mon, 5 Oct 2020 14:08:04 +0100 Subject: [PATCH 185/409] DOC: update code of conduct URL As title. Closes: #17458 --- .github/CODE_OF_CONDUCT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md index 57c98060eb3b..079098fae68a 100644 --- a/.github/CODE_OF_CONDUCT.md +++ b/.github/CODE_OF_CONDUCT.md @@ -1 +1 @@ -NumPy has a Code of Conduct, please see: https://www.numpy.org/devdocs/dev/conduct/code_of_conduct.html +NumPy has a Code of Conduct, please see: https://numpy.org/code-of-conduct From 5dbcbb79478a8eddf05a72f7030bdf29a93ff46c Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 17 Sep 2020 01:43:49 +0200 Subject: [PATCH 186/409] ENH: Add annotations for `generic` and `ndarray` bitwise operations --- numpy/__init__.pyi | 79 +++++++++++++++++++++++++++++++-------- numpy/typing/_callable.py | 29 +++++++++++++- 2 files changed, 91 insertions(+), 17 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 9966ef1999a5..1dd5d55528ce 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -7,12 +7,15 @@ from numpy.core._internal import _ctypes from numpy.typing import ArrayLike, DtypeLike, _Shape, _ShapeLike from numpy.typing._callable import ( _BoolOp, + _BoolBitOp, _BoolSub, _BoolTrueDiv, _TD64Div, _IntTrueDiv, _UnsignedIntOp, + _UnsignedIntBitOp, _SignedIntOp, + _SignedIntBitOp, _FloatOp, _ComplexOp, _NumberOp, @@ -677,20 +680,9 @@ class _ArrayOrScalarCommon( def __rmod__(self, other): ... def __divmod__(self, other): ... def __rdivmod__(self, other): ... - def __lshift__(self, other): ... - def __rlshift__(self, other): ... - def __rshift__(self, other): ... - def __rrshift__(self, other): ... - def __and__(self, other): ... - def __rand__(self, other): ... - def __xor__(self, other): ... - def __rxor__(self, other): ... - def __or__(self, other): ... - def __ror__(self, other): ... def __neg__(self: _ArraySelf) -> _ArraySelf: ... def __pos__(self: _ArraySelf) -> _ArraySelf: ... def __abs__(self: _ArraySelf) -> _ArraySelf: ... - def __invert__(self: _ArraySelf) -> _ArraySelf: ... def astype( self: _ArraySelf, dtype: DtypeLike, @@ -1257,6 +1249,17 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): def __rpow__(self, other: ArrayLike) -> Union[ndarray, generic]: ... def __truediv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... def __rtruediv__(self, other: ArrayLike) -> Union[ndarray, generic]: ... + def __invert__(self: _ArraySelf) -> Union[_ArraySelf, integer, bool_]: ... + def __lshift__(self, other: ArrayLike) -> Union[ndarray, integer]: ... + def __rlshift__(self, other: ArrayLike) -> Union[ndarray, integer]: ... + def __rshift__(self, other: ArrayLike) -> Union[ndarray, integer]: ... + def __rrshift__(self, other: ArrayLike) -> Union[ndarray, integer]: ... + def __and__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... + def __rand__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... + def __xor__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... + def __rxor__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... + def __or__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... + def __ror__(self, other: ArrayLike) -> Union[ndarray, integer, bool_]: ... # `np.generic` does not support inplace operations def __iadd__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... def __isub__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... @@ -1265,11 +1268,11 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): def __ifloordiv__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... def __ipow__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... def __imod__(self, other): ... - def __ilshift__(self, other): ... - def __irshift__(self, other): ... - def __iand__(self, other): ... - def __ixor__(self, other): ... - def __ior__(self, other): ... + def __ilshift__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __irshift__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __iand__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __ixor__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... + def __ior__(self: _ArraySelf, other: ArrayLike) -> _ArraySelf: ... # NOTE: while `np.generic` is not technically an instance of `ABCMeta`, # the `@abstractmethod` decorator is herein used to (forcefully) deny @@ -1329,6 +1332,17 @@ class bool_(generic): __rpow__: _BoolOp[int8] __truediv__: _BoolTrueDiv __rtruediv__: _BoolTrueDiv + def __invert__(self) -> bool_: ... + __lshift__: _BoolBitOp[int8] + __rlshift__: _BoolBitOp[int8] + __rshift__: _BoolBitOp[int8] + __rrshift__: _BoolBitOp[int8] + __and__: _BoolBitOp[bool_] + __rand__: _BoolBitOp[bool_] + __xor__: _BoolBitOp[bool_] + __rxor__: _BoolBitOp[bool_] + __or__: _BoolBitOp[bool_] + __ror__: _BoolBitOp[bool_] class object_(generic): def __init__(self, __value: object = ...) -> None: ... @@ -1374,6 +1388,18 @@ class integer(number): # type: ignore def __index__(self) -> int: ... __truediv__: _IntTrueDiv __rtruediv__: _IntTrueDiv + def __invert__(self: _IntType) -> _IntType: ... + # Ensure that objects annotated as `integer` support bit-wise operations + def __lshift__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __rlshift__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __rshift__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __rrshift__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __and__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __rand__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __or__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __ror__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __xor__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... + def __rxor__(self, other: Union[_IntLike, _BoolLike]) -> integer: ... class signedinteger(integer): # type: ignore __add__: _SignedIntOp @@ -1386,6 +1412,16 @@ class signedinteger(integer): # type: ignore __rfloordiv__: _SignedIntOp __pow__: _SignedIntOp __rpow__: _SignedIntOp + __lshift__: _SignedIntBitOp + __rlshift__: _SignedIntBitOp + __rshift__: _SignedIntBitOp + __rrshift__: _SignedIntBitOp + __and__: _SignedIntBitOp + __rand__: _SignedIntBitOp + __xor__: _SignedIntBitOp + __rxor__: _SignedIntBitOp + __or__: _SignedIntBitOp + __ror__: _SignedIntBitOp class int8(signedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... @@ -1429,6 +1465,16 @@ class unsignedinteger(integer): # type: ignore __rfloordiv__: _UnsignedIntOp __pow__: _UnsignedIntOp __rpow__: _UnsignedIntOp + __lshift__: _UnsignedIntBitOp + __rlshift__: _UnsignedIntBitOp + __rshift__: _UnsignedIntBitOp + __rrshift__: _UnsignedIntBitOp + __and__: _UnsignedIntBitOp + __rand__: _UnsignedIntBitOp + __xor__: _UnsignedIntBitOp + __rxor__: _UnsignedIntBitOp + __or__: _UnsignedIntBitOp + __ror__: _UnsignedIntBitOp class uint8(unsignedinteger): def __init__(self, __value: _IntValue = ...) -> None: ... @@ -1458,6 +1504,7 @@ class floating(inexact): # type: ignore __pow__: _FloatOp __rpow__: _FloatOp +_IntType = TypeVar("_IntType", bound=integer) _FloatType = TypeVar('_FloatType', bound=floating) class float16(floating): diff --git a/numpy/typing/_callable.py b/numpy/typing/_callable.py index 5e14b708f1a6..943441cf494e 100644 --- a/numpy/typing/_callable.py +++ b/numpy/typing/_callable.py @@ -9,7 +9,7 @@ """ import sys -from typing import Union, TypeVar, overload, Any +from typing import Union, TypeVar, overload, Any, NoReturn from numpy import ( _BoolLike, @@ -26,6 +26,7 @@ signedinteger, int32, int64, + uint64, floating, float32, float64, @@ -45,6 +46,7 @@ HAVE_PROTOCOL = True if HAVE_PROTOCOL: + _IntType = TypeVar("_IntType", bound=integer) _NumberType = TypeVar("_NumberType", bound=number) _NumberType_co = TypeVar("_NumberType_co", covariant=True, bound=number) _GenericType_co = TypeVar("_GenericType_co", covariant=True, bound=generic) @@ -61,6 +63,14 @@ def __call__(self, __other: complex) -> complex128: ... @overload def __call__(self, __other: _NumberType) -> _NumberType: ... + class _BoolBitOp(Protocol[_GenericType_co]): + @overload + def __call__(self, __other: _BoolLike) -> _GenericType_co: ... + @overload # platform dependent + def __call__(self, __other: int) -> Union[int32, int64]: ... + @overload + def __call__(self, __other: _IntType) -> _IntType: ... + class _BoolSub(Protocol): # Note that `__other: bool_` is absent here @overload # platform dependent @@ -103,6 +113,17 @@ def __call__(self, __other: float) -> floating: ... @overload def __call__(self, __other: complex) -> complexfloating[floating]: ... + class _UnsignedIntBitOp(Protocol): + # The likes of `uint64 | np.signedinteger` will fail as there + # is no signed integer type large enough to hold a `uint64` + # See https://github.com/numpy/numpy/issues/2524 + @overload + def __call__(self, __other: Union[bool, unsignedinteger]) -> unsignedinteger: ... + @overload + def __call__(self: uint64, __other: Union[int, signedinteger]) -> NoReturn: ... + @overload + def __call__(self, __other: Union[int, signedinteger]) -> signedinteger: ... + class _SignedIntOp(Protocol): @overload def __call__(self, __other: Union[int, signedinteger]) -> signedinteger: ... @@ -111,6 +132,9 @@ def __call__(self, __other: float) -> floating: ... @overload def __call__(self, __other: complex) -> complexfloating[floating]: ... + class _SignedIntBitOp(Protocol): + def __call__(self, __other: Union[int, signedinteger]) -> signedinteger: ... + class _FloatOp(Protocol): @overload def __call__(self, __other: _FloatLike) -> floating: ... @@ -125,12 +149,15 @@ def __call__(self, __other: _NumberLike) -> number: ... else: _BoolOp = Any + _BoolBitOp = Any _BoolSub = Any _BoolTrueDiv = Any _TD64Div = Any _IntTrueDiv = Any _UnsignedIntOp = Any + _UnsignedIntBitOp = Any _SignedIntOp = Any + _SignedIntBitOp = Any _FloatOp = Any _ComplexOp = Any _NumberOp = Any From cb92195c59be15415ce795367aeac6619f069257 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 1 Oct 2020 01:26:11 +0200 Subject: [PATCH 187/409] TST: Added tests for bitwise operations --- numpy/typing/tests/data/fail/bitwise_ops.py | 19 +++ numpy/typing/tests/data/pass/bitwise_ops.py | 131 ++++++++++++++++++ numpy/typing/tests/data/reveal/bitwise_ops.py | 131 ++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 numpy/typing/tests/data/fail/bitwise_ops.py create mode 100644 numpy/typing/tests/data/pass/bitwise_ops.py create mode 100644 numpy/typing/tests/data/reveal/bitwise_ops.py diff --git a/numpy/typing/tests/data/fail/bitwise_ops.py b/numpy/typing/tests/data/fail/bitwise_ops.py new file mode 100644 index 000000000000..9be6869ad101 --- /dev/null +++ b/numpy/typing/tests/data/fail/bitwise_ops.py @@ -0,0 +1,19 @@ +import numpy as np + +i8 = np.int64() +i4 = np.int32() +u8 = np.uint64() +b_ = np.bool_() +i = int() + +f8 = np.float64() + +b_ >> f8 # E: No overload variant +i8 << f8 # E: incompatible type +i | f8 # E: Unsupported operand types +i8 ^ f8 # E: incompatible type +u8 & f8 # E: No overload variant +~f8 # E: Unsupported operand type + +# mypys' error message for `NoReturn` is unfortunately pretty nad +a = u8 | 0 # E: Need type annotation diff --git a/numpy/typing/tests/data/pass/bitwise_ops.py b/numpy/typing/tests/data/pass/bitwise_ops.py new file mode 100644 index 000000000000..67449e2c21d8 --- /dev/null +++ b/numpy/typing/tests/data/pass/bitwise_ops.py @@ -0,0 +1,131 @@ +import numpy as np + +i8 = np.int64(1) +u8 = np.uint64(1) + +i4 = np.int32(1) +u4 = np.uint32(1) + +b_ = np.bool_(1) + +b = bool(1) +i = int(1) + +AR = np.array([0, 1, 2], dtype=np.int32) +AR.setflags(write=False) + + +i8 << i8 +i8 >> i8 +i8 | i8 +i8 ^ i8 +i8 & i8 + +i8 << AR +i8 >> AR +i8 | AR +i8 ^ AR +i8 & AR + +i4 << i4 +i4 >> i4 +i4 | i4 +i4 ^ i4 +i4 & i4 + +i8 << i4 +i8 >> i4 +i8 | i4 +i8 ^ i4 +i8 & i4 + +i8 << i +i8 >> i +i8 | i +i8 ^ i +i8 & i + +i8 << b_ +i8 >> b_ +i8 | b_ +i8 ^ b_ +i8 & b_ + +i8 << b +i8 >> b +i8 | b +i8 ^ b +i8 & b + +u8 << u8 +u8 >> u8 +u8 | u8 +u8 ^ u8 +u8 & u8 + +u8 << AR +u8 >> AR +u8 | AR +u8 ^ AR +u8 & AR + +u4 << u4 +u4 >> u4 +u4 | u4 +u4 ^ u4 +u4 & u4 + +u4 << i4 +u4 >> i4 +u4 | i4 +u4 ^ i4 +u4 & i4 + +u4 << i +u4 >> i +u4 | i +u4 ^ i +u4 & i + +u8 << b_ +u8 >> b_ +u8 | b_ +u8 ^ b_ +u8 & b_ + +u8 << b +u8 >> b +u8 | b +u8 ^ b +u8 & b + +b_ << b_ +b_ >> b_ +b_ | b_ +b_ ^ b_ +b_ & b_ + +b_ << AR +b_ >> AR +b_ | AR +b_ ^ AR +b_ & AR + +b_ << b +b_ >> b +b_ | b +b_ ^ b +b_ & b + +b_ << i +b_ >> i +b_ | i +b_ ^ i +b_ & i + +~i8 +~i4 +~u8 +~u4 +~b_ +~AR diff --git a/numpy/typing/tests/data/reveal/bitwise_ops.py b/numpy/typing/tests/data/reveal/bitwise_ops.py new file mode 100644 index 000000000000..ae899f47b690 --- /dev/null +++ b/numpy/typing/tests/data/reveal/bitwise_ops.py @@ -0,0 +1,131 @@ +import numpy as np + +i8 = np.int64(1) +u8 = np.uint64(1) + +i4 = np.int32(1) +u4 = np.uint32(1) + +b_ = np.bool_(1) + +b = bool(1) +i = int(1) + +AR = np.array([0, 1, 2], dtype=np.int32) +AR.setflags(write=False) + + +reveal_type(i8 << i8) # E: numpy.signedinteger +reveal_type(i8 >> i8) # E: numpy.signedinteger +reveal_type(i8 | i8) # E: numpy.signedinteger +reveal_type(i8 ^ i8) # E: numpy.signedinteger +reveal_type(i8 & i8) # E: numpy.signedinteger + +reveal_type(i8 << AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(i8 >> AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(i8 | AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(i8 ^ AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(i8 & AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] + +reveal_type(i4 << i4) # E: numpy.signedinteger +reveal_type(i4 >> i4) # E: numpy.signedinteger +reveal_type(i4 | i4) # E: numpy.signedinteger +reveal_type(i4 ^ i4) # E: numpy.signedinteger +reveal_type(i4 & i4) # E: numpy.signedinteger + +reveal_type(i8 << i4) # E: numpy.signedinteger +reveal_type(i8 >> i4) # E: numpy.signedinteger +reveal_type(i8 | i4) # E: numpy.signedinteger +reveal_type(i8 ^ i4) # E: numpy.signedinteger +reveal_type(i8 & i4) # E: numpy.signedinteger + +reveal_type(i8 << i) # E: numpy.signedinteger +reveal_type(i8 >> i) # E: numpy.signedinteger +reveal_type(i8 | i) # E: numpy.signedinteger +reveal_type(i8 ^ i) # E: numpy.signedinteger +reveal_type(i8 & i) # E: numpy.signedinteger + +reveal_type(i8 << b_) # E: numpy.int64 +reveal_type(i8 >> b_) # E: numpy.int64 +reveal_type(i8 | b_) # E: numpy.int64 +reveal_type(i8 ^ b_) # E: numpy.int64 +reveal_type(i8 & b_) # E: numpy.int64 + +reveal_type(i8 << b) # E: numpy.signedinteger +reveal_type(i8 >> b) # E: numpy.signedinteger +reveal_type(i8 | b) # E: numpy.signedinteger +reveal_type(i8 ^ b) # E: numpy.signedinteger +reveal_type(i8 & b) # E: numpy.signedinteger + +reveal_type(u8 << u8) # E: numpy.unsignedinteger +reveal_type(u8 >> u8) # E: numpy.unsignedinteger +reveal_type(u8 | u8) # E: numpy.unsignedinteger +reveal_type(u8 ^ u8) # E: numpy.unsignedinteger +reveal_type(u8 & u8) # E: numpy.unsignedinteger + +reveal_type(u8 << AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(u8 >> AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(u8 | AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(u8 ^ AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(u8 & AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] + +reveal_type(u4 << u4) # E: numpy.unsignedinteger +reveal_type(u4 >> u4) # E: numpy.unsignedinteger +reveal_type(u4 | u4) # E: numpy.unsignedinteger +reveal_type(u4 ^ u4) # E: numpy.unsignedinteger +reveal_type(u4 & u4) # E: numpy.unsignedinteger + +reveal_type(u4 << i4) # E: numpy.signedinteger +reveal_type(u4 >> i4) # E: numpy.signedinteger +reveal_type(u4 | i4) # E: numpy.signedinteger +reveal_type(u4 ^ i4) # E: numpy.signedinteger +reveal_type(u4 & i4) # E: numpy.signedinteger + +reveal_type(u4 << i) # E: numpy.signedinteger +reveal_type(u4 >> i) # E: numpy.signedinteger +reveal_type(u4 | i) # E: numpy.signedinteger +reveal_type(u4 ^ i) # E: numpy.signedinteger +reveal_type(u4 & i) # E: numpy.signedinteger + +reveal_type(u8 << b_) # E: numpy.uint64 +reveal_type(u8 >> b_) # E: numpy.uint64 +reveal_type(u8 | b_) # E: numpy.uint64 +reveal_type(u8 ^ b_) # E: numpy.uint64 +reveal_type(u8 & b_) # E: numpy.uint64 + +reveal_type(u8 << b) # E: numpy.unsignedinteger +reveal_type(u8 >> b) # E: numpy.unsignedinteger +reveal_type(u8 | b) # E: numpy.unsignedinteger +reveal_type(u8 ^ b) # E: numpy.unsignedinteger +reveal_type(u8 & b) # E: numpy.unsignedinteger + +reveal_type(b_ << b_) # E: numpy.int8 +reveal_type(b_ >> b_) # E: numpy.int8 +reveal_type(b_ | b_) # E: numpy.bool_ +reveal_type(b_ ^ b_) # E: numpy.bool_ +reveal_type(b_ & b_) # E: numpy.bool_ + +reveal_type(b_ << AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(b_ >> AR) # E: Union[numpy.ndarray, numpy.integer] +reveal_type(b_ | AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(b_ ^ AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] +reveal_type(b_ & AR) # E: Union[numpy.ndarray, numpy.integer, numpy.bool_] + +reveal_type(b_ << b) # E: numpy.int8 +reveal_type(b_ >> b) # E: numpy.int8 +reveal_type(b_ | b) # E: numpy.bool_ +reveal_type(b_ ^ b) # E: numpy.bool_ +reveal_type(b_ & b) # E: numpy.bool_ + +reveal_type(b_ << i) # E: Union[numpy.int32, numpy.int64] +reveal_type(b_ >> i) # E: Union[numpy.int32, numpy.int64] +reveal_type(b_ | i) # E: Union[numpy.int32, numpy.int64] +reveal_type(b_ ^ i) # E: Union[numpy.int32, numpy.int64] +reveal_type(b_ & i) # E: Union[numpy.int32, numpy.int64] + +reveal_type(~i8) # E: numpy.int64 +reveal_type(~i4) # E: numpy.int32 +reveal_type(~u8) # E: numpy.uint64 +reveal_type(~u4) # E: numpy.uint32 +reveal_type(~b_) # E: numpy.bool_ +reveal_type(~AR) # E: Union[numpy.ndarray*, numpy.integer, numpy.bool_] From 225c3360721ba515de88a1e191ede58e8b95593d Mon Sep 17 00:00:00 2001 From: stuartarchibald Date: Mon, 5 Oct 2020 18:20:21 +0100 Subject: [PATCH 188/409] BUG: Fixes incorrect error message in numpy.ediff1d (#17457) Fixes the error message reported by ediff1d in the case of an invalid to_begin and updates tests to assert validity. Co-authored-by: Ross Barnowski --- numpy/lib/arraysetops.py | 2 +- numpy/lib/tests/test_arraysetops.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index 9464692e0b2b..6c6c1ff809ce 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -93,7 +93,7 @@ def ediff1d(ary, to_end=None, to_begin=None): else: to_begin = np.asanyarray(to_begin) if not np.can_cast(to_begin, dtype_req, casting="same_kind"): - raise TypeError("dtype of `to_end` must be compatible " + raise TypeError("dtype of `to_begin` must be compatible " "with input `ary` under the `same_kind` rule.") to_begin = to_begin.ravel() diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 81ba789e30d6..847e6cb8a3e7 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -125,32 +125,36 @@ def test_ediff1d(self): assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7)) assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6])) - @pytest.mark.parametrize("ary, prepend, append", [ + @pytest.mark.parametrize("ary, prepend, append, expected", [ # should fail because trying to cast # np.nan standard floating point value # into an integer array: (np.array([1, 2, 3], dtype=np.int64), None, - np.nan), + np.nan, + 'to_end'), # should fail because attempting # to downcast to int type: (np.array([1, 2, 3], dtype=np.int64), np.array([5, 7, 2], dtype=np.float32), - None), + None, + 'to_begin'), # should fail because attempting to cast # two special floating point values - # to integers (on both sides of ary): + # to integers (on both sides of ary), + # `to_begin` is in the error message as the impl checks this first: (np.array([1., 3., 9.], dtype=np.int8), np.nan, - np.nan), + np.nan, + 'to_begin'), ]) - def test_ediff1d_forbidden_type_casts(self, ary, prepend, append): + def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected): # verify resolution of gh-11490 # specifically, raise an appropriate # Exception when attempting to append or # prepend with an incompatible type - msg = 'must be compatible' + msg = 'dtype of `{}` must be compatible'.format(expected) with assert_raises_regex(TypeError, msg): ediff1d(ary=ary, to_end=append, From 72b9f4995045e73cf4431b08877f8d2dbb5fa0d9 Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Mon, 5 Oct 2020 11:13:26 -0700 Subject: [PATCH 189/409] DOC: typo. --- doc/source/reference/arrays.scalars.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst index 174dd2b917a0..3ab4d73013af 100644 --- a/doc/source/reference/arrays.scalars.rst +++ b/doc/source/reference/arrays.scalars.rst @@ -315,7 +315,7 @@ are also provided. numpy.float128 Alias for `numpy.longdouble`, named after its size in bits. - The existance of these aliases depends on the platform. + The existence of these aliases depends on the platform. .. autoclass:: numpy.complex64 From 1e0aa16427795bee76a0138115b93bebb95084a9 Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Tue, 6 Oct 2020 13:37:19 +0900 Subject: [PATCH 190/409] DOC: Add some entries for C types and macros --- .../reference/c-api/types-and-structures.rst | 39 +++-- doc/source/reference/c-api/ufunc.rst | 148 +++++++++++------- 2 files changed, 113 insertions(+), 74 deletions(-) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index cc961df3abf4..45cc6725a27e 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -357,27 +357,25 @@ PyArrayDescr_Type and PyArray_Descr useful as the data-type descriptor for a field in another data-type descriptor. The fields member should be ``NULL`` if this is non- ``NULL`` (the fields member of the base descriptor can be - non- ``NULL`` however). The :c:type:`PyArray_ArrayDescr` structure is - defined using + non- ``NULL`` however). - .. code-block:: c - - typedef struct { - PyArray_Descr *base; - PyObject *shape; - } PyArray_ArrayDescr; + .. c:type:: PyArray_ArrayDescr - The elements of this structure are: + .. code-block:: c - .. c:member:: PyArray_Descr *PyArray_ArrayDescr.base + typedef struct { + PyArray_Descr *base; + PyObject *shape; + } PyArray_ArrayDescr; - The data-type-descriptor object of the base-type. + .. c:member:: PyArray_Descr *PyArray_ArrayDescr.base - .. c:member:: PyObject *PyArray_ArrayDescr.shape + The data-type-descriptor object of the base-type. - The shape (always C-style contiguous) of the sub-array as a Python - tuple. + .. c:member:: PyObject *PyArray_ArrayDescr.shape + The shape (always C-style contiguous) of the sub-array as a Python + tuple. .. c:member:: PyObject *PyArray_Descr.fields @@ -961,9 +959,14 @@ PyUFunc_Type and PyUFuncObject For each distinct core dimension, a set of ``UFUNC_CORE_DIM*`` flags - - :c:data:`UFUNC_CORE_DIM_CAN_IGNORE` if the dim name ends in ``?`` - - :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` if the dim size will be - determined from the operands and not from a :ref:`frozen ` signature + .. c:macro:: UFUNC_CORE_DIM_IGNORE + + if the dim name ends in ``?`` + + .. c:macro:: UFUNC_CORE_DIM_INFERRED + + if the dim size will be determined from the operands + and not from a :ref:`frozen ` signature PyArrayIter_Type and PyArrayIterObject -------------------------------------- @@ -1438,3 +1441,5 @@ for completeness and assistance in understanding the code. ``arrayobject.h`` header. This type is not exposed to Python and could be replaced with a C-structure. As a Python type it takes advantage of reference- counted memory management. + + .. c:type:: PyArrayMapIterObject diff --git a/doc/source/reference/c-api/ufunc.rst b/doc/source/reference/c-api/ufunc.rst index 50963c81f626..1b9b6864224d 100644 --- a/doc/source/reference/c-api/ufunc.rst +++ b/doc/source/reference/c-api/ufunc.rst @@ -12,12 +12,39 @@ Constants .. c:var:: UFUNC_ERR_{HANDLER} - ``{HANDLER}`` can be **IGNORE**, **WARN**, **RAISE**, or **CALL** + .. c:macro:: UFUNC_ERR_IGNORE + + .. c:macro:: UFUNC_ERR_WARN + + .. c:macro:: UFUNC_ERR_RAISE + + .. c:macro:: UFUNC_ERR_CALL .. c:var:: UFUNC_{THING}_{ERR} - ``{THING}`` can be **MASK**, **SHIFT**, or **FPE**, and ``{ERR}`` can - be **DIVIDEBYZERO**, **OVERFLOW**, **UNDERFLOW**, and **INVALID**. + .. c:macro:: UFUNC_MASK_DIVIDEBYZERO + + .. c:macro:: UFUNC_MASK_OVERFLOW + + .. c:macro:: UFUNC_MASK_UNDERFLOW + + .. c:macro:: UFUNC_MASK_INVALID + + .. c:macro:: UFUNC_SHIFT_DIVIDEBYZERO + + .. c:macro:: UFUNC_SHIFT_OVERFLOW + + .. c:macro:: UFUNC_SHIFT_UNDERFLOW + + .. c:macro:: UFUNC_SHIFT_INVALID + + .. c:macro:: UFUNC_FPE_DIVIDEBYZERO + + .. c:macro:: UFUNC_FPE_OVERFLOW + + .. c:macro:: UFUNC_FPE_UNDERFLOW + + .. c:macro:: UFUNC_FPE_INVALID .. c:var:: PyUFunc_{VALUE} @@ -50,6 +77,66 @@ Macros was released (because loop->obj was not true). +Types +----- + +.. c:type:: PyUFuncGenericFunction + + pointers to functions that actually implement the underlying + (element-by-element) function :math:`N` times with the following + signature: + + .. c:function:: void loopfunc( + char** args, npy_intp const *dimensions, npy_intp const *steps, void* data) + + *args* + + An array of pointers to the actual data for the input and output + arrays. The input arguments are given first followed by the output + arguments. + + *dimensions* + + A pointer to the size of the dimension over which this function is + looping. + + *steps* + + A pointer to the number of bytes to jump to get to the + next element in this dimension for each of the input and + output arguments. + + *data* + + Arbitrary data (extra arguments, function names, *etc.* ) + that can be stored with the ufunc and will be passed in + when it is called. + + This is an example of a func specialized for addition of doubles + returning doubles. + + .. code-block:: c + + static void + double_add(char **args, + npy_intp const *dimensions, + npy_intp const *steps, + void *extra) + { + npy_intp i; + npy_intp is1 = steps[0], is2 = steps[1]; + npy_intp os = steps[2], n = dimensions[0]; + char *i1 = args[0], *i2 = args[1], *op = args[2]; + for (i = 0; i < n; i++) { + *((double *)op) = *((double *)i1) + + *((double *)i2); + i1 += is1; + i2 += is2; + op += os; + } + } + + Functions --------- @@ -71,60 +158,7 @@ Functions :param func: Must to an array of length *ntypes* containing - :c:type:`PyUFuncGenericFunction` items. These items are pointers to - functions that actually implement the underlying - (element-by-element) function :math:`N` times with the following - signature: - - .. c:function:: void loopfunc( - char** args, npy_intp const *dimensions, npy_intp const *steps, void* data) - - *args* - - An array of pointers to the actual data for the input and output - arrays. The input arguments are given first followed by the output - arguments. - - *dimensions* - - A pointer to the size of the dimension over which this function is - looping. - - *steps* - - A pointer to the number of bytes to jump to get to the - next element in this dimension for each of the input and - output arguments. - - *data* - - Arbitrary data (extra arguments, function names, *etc.* ) - that can be stored with the ufunc and will be passed in - when it is called. - - This is an example of a func specialized for addition of doubles - returning doubles. - - .. code-block:: c - - static void - double_add(char **args, - npy_intp const *dimensions, - npy_intp const *steps, - void *extra) - { - npy_intp i; - npy_intp is1 = steps[0], is2 = steps[1]; - npy_intp os = steps[2], n = dimensions[0]; - char *i1 = args[0], *i2 = args[1], *op = args[2]; - for (i = 0; i < n; i++) { - *((double *)op) = *((double *)i1) + - *((double *)i2); - i1 += is1; - i2 += is2; - op += os; - } - } + :c:type:`PyUFuncGenericFunction` items. :param data: Should be ``NULL`` or a pointer to an array of size *ntypes* From 04ad2ce13c4b8a0557c08010d3121cbbaa1cad5a Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Tue, 6 Oct 2020 16:41:49 +0900 Subject: [PATCH 191/409] Fixup --- doc/source/reference/c-api/types-and-structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 45cc6725a27e..2f4919f454b8 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -959,11 +959,11 @@ PyUFunc_Type and PyUFuncObject For each distinct core dimension, a set of ``UFUNC_CORE_DIM*`` flags - .. c:macro:: UFUNC_CORE_DIM_IGNORE + .. c:macro:: UFUNC_CORE_DIM_CAN_IGNORE if the dim name ends in ``?`` - .. c:macro:: UFUNC_CORE_DIM_INFERRED + .. c:macro:: UFUNC_CORE_DIM_SIZE_INFERRED if the dim size will be determined from the operands and not from a :ref:`frozen ` signature From 3c5de8a2bb6a75faef20f1b15985c03724271be5 Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Tue, 6 Oct 2020 17:49:08 +0900 Subject: [PATCH 192/409] Fix: Remove the link for `PyArrayMapIterObject` --- doc/source/reference/c-api/types-and-structures.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 2f4919f454b8..59361fd62818 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -1436,10 +1436,8 @@ for completeness and assistance in understanding the code. Advanced indexing is handled with this Python type. It is simply a loose wrapper around the C-structure containing the variables needed for advanced array indexing. The associated C-structure, - :c:type:`PyArrayMapIterObject`, is useful if you are trying to + ``PyArrayMapIterObject``, is useful if you are trying to understand the advanced-index mapping code. It is defined in the ``arrayobject.h`` header. This type is not exposed to Python and could be replaced with a C-structure. As a Python type it takes advantage of reference- counted memory management. - - .. c:type:: PyArrayMapIterObject From d934c2620439cbcf9b6f19b6037c1c67346dd71f Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 6 Oct 2020 13:40:39 +0200 Subject: [PATCH 193/409] REV: Remove the `uint64`/`signedinteger` specific overload Unfortunately the likes of `self: uint64` don't work with protocols. Revisit this once we add proper support for numerical precision. --- numpy/typing/_callable.py | 7 ++----- numpy/typing/tests/data/fail/bitwise_ops.py | 5 +++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/numpy/typing/_callable.py b/numpy/typing/_callable.py index 943441cf494e..194522977428 100644 --- a/numpy/typing/_callable.py +++ b/numpy/typing/_callable.py @@ -9,7 +9,7 @@ """ import sys -from typing import Union, TypeVar, overload, Any, NoReturn +from typing import Union, TypeVar, overload, Any from numpy import ( _BoolLike, @@ -26,7 +26,6 @@ signedinteger, int32, int64, - uint64, floating, float32, float64, @@ -114,14 +113,12 @@ def __call__(self, __other: float) -> floating: ... def __call__(self, __other: complex) -> complexfloating[floating]: ... class _UnsignedIntBitOp(Protocol): - # The likes of `uint64 | np.signedinteger` will fail as there + # TODO: The likes of `uint64 | np.signedinteger` will fail as there # is no signed integer type large enough to hold a `uint64` # See https://github.com/numpy/numpy/issues/2524 @overload def __call__(self, __other: Union[bool, unsignedinteger]) -> unsignedinteger: ... @overload - def __call__(self: uint64, __other: Union[int, signedinteger]) -> NoReturn: ... - @overload def __call__(self, __other: Union[int, signedinteger]) -> signedinteger: ... class _SignedIntOp(Protocol): diff --git a/numpy/typing/tests/data/fail/bitwise_ops.py b/numpy/typing/tests/data/fail/bitwise_ops.py index 9be6869ad101..3b5be95c0d96 100644 --- a/numpy/typing/tests/data/fail/bitwise_ops.py +++ b/numpy/typing/tests/data/fail/bitwise_ops.py @@ -15,5 +15,6 @@ u8 & f8 # E: No overload variant ~f8 # E: Unsupported operand type -# mypys' error message for `NoReturn` is unfortunately pretty nad -a = u8 | 0 # E: Need type annotation +# mypys' error message for `NoReturn` is unfortunately pretty bad +# TODO: Reenable this once we add support for numerical precision for `number`s +# a = u8 | 0 # E: Need type annotation From 96cb0963433d539c7a601edf4ab2261cbec0b897 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 6 Oct 2020 17:27:21 +0100 Subject: [PATCH 194/409] DOC: add some missing scalar aliases --- doc/source/reference/arrays.scalars.rst | 28 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst index 3ab4d73013af..d857f0855af4 100644 --- a/doc/source/reference/arrays.scalars.rst +++ b/doc/source/reference/arrays.scalars.rst @@ -330,15 +330,33 @@ are also provided. Other aliases ~~~~~~~~~~~~~ -.. autoclass:: float_ +The first two of these are conveniences which resemble the names of the +builtin types, in the same style as `bool_`, `int_`, `str_`, `bytes_`, and +`object_`: -.. autoclass:: complex_ +.. autoclass:: numpy.float_ -.. autoclass:: longfloat +.. autoclass:: numpy.complex_ -.. autoclass:: clongfloat +Some more use alternate naming conventions for extended-precision floats and +complex numbers: -.. autoclass:: longcomplex +.. autoclass:: numpy.longfloat + +.. autoclass:: numpy.singlecomplex + +.. autoclass:: numpy.cfloat + +.. autoclass:: numpy.longcomplex + +.. autoclass:: numpy.clongfloat + +The following aliases originate from Python 2, and it is recommended that they +not be used in new code. + +.. autoclass:: numpy.string_ + +.. autoclass:: numpy.unicode_ Attributes ========== From 3879e952dff2ff5a16decc997bf72716b8da5d17 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Tue, 6 Oct 2020 18:17:34 +0100 Subject: [PATCH 195/409] DOC: Fix broken references Unfortunately we can't use `np.` as a module name alias --- doc/source/user/basics.types.rst | 64 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/doc/source/user/basics.types.rst b/doc/source/user/basics.types.rst index 3c39b35d0feb..ec2af409ae86 100644 --- a/doc/source/user/basics.types.rst +++ b/doc/source/user/basics.types.rst @@ -19,78 +19,78 @@ The primitive types supported are tied closely to those in C: - C type - Description - * - `np.bool_` + * - `numpy.bool_` - ``bool`` - Boolean (True or False) stored as a byte - * - `np.byte` + * - `numpy.byte` - ``signed char`` - Platform-defined - * - `np.ubyte` + * - `numpy.ubyte` - ``unsigned char`` - Platform-defined - * - `np.short` + * - `numpy.short` - ``short`` - Platform-defined - * - `np.ushort` + * - `numpy.ushort` - ``unsigned short`` - Platform-defined - * - `np.intc` + * - `numpy.intc` - ``int`` - Platform-defined - * - `np.uintc` + * - `numpy.uintc` - ``unsigned int`` - Platform-defined - * - `np.int_` + * - `numpy.int_` - ``long`` - Platform-defined - * - `np.uint` + * - `numpy.uint` - ``unsigned long`` - Platform-defined - * - `np.longlong` + * - `numpy.longlong` - ``long long`` - Platform-defined - * - `np.ulonglong` + * - `numpy.ulonglong` - ``unsigned long long`` - Platform-defined - * - `np.half` / `np.float16` + * - `numpy.half` / `numpy.float16` - - Half precision float: sign bit, 5 bits exponent, 10 bits mantissa - * - `np.single` + * - `numpy.single` - ``float`` - Platform-defined single precision float: typically sign bit, 8 bits exponent, 23 bits mantissa - * - `np.double` + * - `numpy.double` - ``double`` - Platform-defined double precision float: typically sign bit, 11 bits exponent, 52 bits mantissa. - * - `np.longdouble` + * - `numpy.longdouble` - ``long double`` - Platform-defined extended-precision float - * - `np.csingle` + * - `numpy.csingle` - ``float complex`` - Complex number, represented by two single-precision floats (real and imaginary components) - * - `np.cdouble` + * - `numpy.cdouble` - ``double complex`` - Complex number, represented by two double-precision floats (real and imaginary components). - * - `np.clongdouble` + * - `numpy.clongdouble` - ``long double complex`` - Complex number, represented by two extended-precision floats (real and imaginary components). @@ -105,59 +105,59 @@ aliases are provided: - C type - Description - * - `np.int8` + * - `numpy.int8` - ``int8_t`` - Byte (-128 to 127) - * - `np.int16` + * - `numpy.int16` - ``int16_t`` - Integer (-32768 to 32767) - * - `np.int32` + * - `numpy.int32` - ``int32_t`` - Integer (-2147483648 to 2147483647) - * - `np.int64` + * - `numpy.int64` - ``int64_t`` - Integer (-9223372036854775808 to 9223372036854775807) - * - `np.uint8` + * - `numpy.uint8` - ``uint8_t`` - Unsigned integer (0 to 255) - * - `np.uint16` + * - `numpy.uint16` - ``uint16_t`` - Unsigned integer (0 to 65535) - * - `np.uint32` + * - `numpy.uint32` - ``uint32_t`` - Unsigned integer (0 to 4294967295) - * - `np.uint64` + * - `numpy.uint64` - ``uint64_t`` - Unsigned integer (0 to 18446744073709551615) - * - `np.intp` + * - `numpy.intp` - ``intptr_t`` - Integer used for indexing, typically the same as ``ssize_t`` - * - `np.uintp` + * - `numpy.uintp` - ``uintptr_t`` - Integer large enough to hold a pointer - * - `np.float32` + * - `numpy.float32` - ``float`` - - * - `np.float64` / `np.float_` + * - `numpy.float64` / `numpy.float_` - ``double`` - Note that this matches the precision of the builtin python `float`. - * - `np.complex64` + * - `numpy.complex64` - ``float complex`` - Complex number, represented by two 32-bit floats (real and imaginary components) - * - `np.complex128` / `np.complex_` + * - `numpy.complex128` / `numpy.complex_` - ``double complex`` - Note that this matches the precision of the builtin python `complex`. From 11f051a37bed48c341849e79cac1863a03626f73 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Tue, 6 Oct 2020 15:56:46 -0400 Subject: [PATCH 196/409] TST: Fix doctest for full_like --- numpy/core/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index a023bf0da32e..25235f738bd2 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -409,7 +409,7 @@ def full_like(a, fill_value, dtype=None, order='K', subok=True, shape=None): >>> y = np.arange(6, dtype=np.double) >>> np.full_like(y, 0.1) - array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) """ res = empty_like(a, dtype=dtype, order=order, subok=subok, shape=shape) From dc3e276f0e777c8bc3a5b5cf76636999dde8f82e Mon Sep 17 00:00:00 2001 From: Ross Barnowski Date: Tue, 6 Oct 2020 14:14:23 -0700 Subject: [PATCH 197/409] DOC: rm numpy. from type alias attr definitions. --- doc/source/reference/arrays.scalars.rst | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst index d857f0855af4..13d117af2432 100644 --- a/doc/source/reference/arrays.scalars.rst +++ b/doc/source/reference/arrays.scalars.rst @@ -261,10 +261,10 @@ are also provided. .. autoclass:: numpy.bool8 -.. attribute:: numpy.int8 - numpy.int16 - numpy.int32 - numpy.int64 +.. attribute:: int8 + int16 + int32 + int64 Aliases for the signed integer types (one of `numpy.byte`, `numpy.short`, `numpy.intc`, `numpy.int_` and `numpy.longlong`) with the specified number @@ -273,10 +273,10 @@ are also provided. Compatible with the C99 ``int8_t``, ``int16_t``, ``int32_t``, and ``int64_t``, respectively. -.. attribute:: numpy.uint8 - numpy.uint16 - numpy.uint32 - numpy.uint64 +.. attribute:: uint8 + uint16 + uint32 + uint64 Alias for the unsigned integer types (one of `numpy.byte`, `numpy.short`, `numpy.intc`, `numpy.int_` and `numpy.longlong`) with the specified number @@ -285,7 +285,7 @@ are also provided. Compatible with the C99 ``uint8_t``, ``uint16_t``, ``uint32_t``, and ``uint64_t``, respectively. -.. attribute:: numpy.intp +.. attribute:: intp Alias for the signed integer type (one of `numpy.byte`, `numpy.short`, `numpy.intc`, `numpy.int_` and `np.longlong`) that is the same size as a @@ -295,7 +295,7 @@ are also provided. :Character code: ``'p'`` -.. attribute:: numpy.uintp +.. attribute:: uintp Alias for the unsigned integer type (one of `numpy.byte`, `numpy.short`, `numpy.intc`, `numpy.int_` and `np.longlong`) that is the same size as a @@ -311,8 +311,8 @@ are also provided. .. autoclass:: numpy.float64 -.. attribute:: numpy.float96 - numpy.float128 +.. attribute:: float96 + float128 Alias for `numpy.longdouble`, named after its size in bits. The existence of these aliases depends on the platform. @@ -321,8 +321,8 @@ are also provided. .. autoclass:: numpy.complex128 -.. attribute:: numpy.complex192 - numpy.complex256 +.. attribute:: complex192 + complex256 Alias for `numpy.clongdouble`, named after its size in bits. The existance of these aliases depends on the platform. From 5014f7af75dee81d4636769de69969e8c97b5873 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 1 Oct 2020 18:32:39 +0200 Subject: [PATCH 198/409] MAINT: Move the `np.core.numeric` functions to their own stub file --- numpy/__init__.pyi | 146 +++++++++-------------------------------- numpy/core/numeric.pyi | 117 +++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 114 deletions(-) create mode 100644 numpy/core/numeric.pyi diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 9966ef1999a5..789bac896e56 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -54,10 +54,9 @@ else: def __index__(self) -> int: ... # Ensures that the stubs are picked up -from . import ( +from numpy import ( char, compat, - core, ctypeslib, emath, fft, @@ -127,6 +126,37 @@ from numpy.core._asarray import ( require as require, ) +from numpy.core.numeric import( + zeros_like as zeros_like, + ones as ones, + ones_like as ones_like, + empty_like as empty_like, + full as full, + full_like as full_like, + count_nonzero as count_nonzero, + isfortran as isfortran, + argwhere as argwhere, + flatnonzero as flatnonzero, + correlate as correlate, + convolve as convolve, + outer as outer, + tensordot as tensordot, + roll as roll, + rollaxis as rollaxis, + moveaxis as moveaxis, + cross as cross, + indices as indices, + fromfunction as fromfunction, + isscalar as isscalar, + binary_repr as binary_repr, + base_repr as base_repr, + identity as identity, + allclose as allclose, + isclose as isclose, + array_equal as array_equal, + array_equiv as array_equiv, +) + # Add an object to `__all__` if their stubs are defined in an external file; # their stubs will not be recognized otherwise. # NOTE: This is redundant for objects defined within this file. @@ -1552,13 +1582,6 @@ def zeros( *, like: ArrayLike = ..., ) -> ndarray: ... -def ones( - shape: _ShapeLike, - dtype: DtypeLike = ..., - order: _OrderCF = ..., - *, - like: ArrayLike = ..., -) -> ndarray: ... def empty( shape: _ShapeLike, dtype: DtypeLike = ..., @@ -1566,111 +1589,6 @@ def empty( *, like: ArrayLike = ..., ) -> ndarray: ... -def zeros_like( - a: ArrayLike, - dtype: DtypeLike = ..., - order: _OrderKACF = ..., - subok: bool = ..., - shape: Optional[Union[int, Sequence[int]]] = ..., -) -> ndarray: ... -def ones_like( - a: ArrayLike, - dtype: DtypeLike = ..., - order: _OrderKACF = ..., - subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... -def empty_like( - a: ArrayLike, - dtype: DtypeLike = ..., - order: _OrderKACF = ..., - subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... -def full( - shape: _ShapeLike, - fill_value: Any, - dtype: DtypeLike = ..., - order: _OrderCF = ..., - *, - like: ArrayLike = ..., -) -> ndarray: ... -def full_like( - a: ArrayLike, - fill_value: Any, - dtype: DtypeLike = ..., - order: _OrderKACF = ..., - subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... -def count_nonzero( - a: ArrayLike, axis: Optional[Union[int, Tuple[int], Tuple[int, int]]] = ... -) -> Union[int, ndarray]: ... -def isfortran(a: ndarray) -> bool: ... -def argwhere(a: ArrayLike) -> ndarray: ... -def flatnonzero(a: ArrayLike) -> ndarray: ... - -_CorrelateMode = Literal["valid", "same", "full"] - -def correlate(a: ArrayLike, v: ArrayLike, mode: _CorrelateMode = ...) -> ndarray: ... -def convolve(a: ArrayLike, v: ArrayLike, mode: _CorrelateMode = ...) -> ndarray: ... -def outer(a: ArrayLike, b: ArrayLike, out: ndarray = ...) -> ndarray: ... -def tensordot( - a: ArrayLike, - b: ArrayLike, - axes: Union[ - int, Tuple[int, int], Tuple[Tuple[int, int], ...], Tuple[List[int, int], ...] - ] = ..., -) -> ndarray: ... -def roll( - a: ArrayLike, - shift: Union[int, Tuple[int, ...]], - axis: Optional[Union[int, Tuple[int, ...]]] = ..., -) -> ndarray: ... -def rollaxis(a: ArrayLike, axis: int, start: int = ...) -> ndarray: ... -def moveaxis( - a: ndarray, - source: Union[int, Sequence[int]], - destination: Union[int, Sequence[int]], -) -> ndarray: ... -def cross( - a: ArrayLike, - b: ArrayLike, - axisa: int = ..., - axisb: int = ..., - axisc: int = ..., - axis: Optional[int] = ..., -) -> ndarray: ... -def indices( - dimensions: Sequence[int], dtype: dtype = ..., sparse: bool = ... -) -> Union[ndarray, Tuple[ndarray, ...]]: ... -def fromfunction( - function: Callable, - shape: Tuple[int, int], - *, - like: ArrayLike = ..., - **kwargs, -) -> Any: ... -def isscalar(element: Any) -> bool: ... -def binary_repr(num: int, width: Optional[int] = ...) -> str: ... -def base_repr(number: int, base: int = ..., padding: int = ...) -> str: ... -def identity(n: int, dtype: DtypeLike = ..., *, like: ArrayLike = ...) -> ndarray: ... -def allclose( - a: ArrayLike, - b: ArrayLike, - rtol: float = ..., - atol: float = ..., - equal_nan: bool = ..., -) -> bool: ... -def isclose( - a: ArrayLike, - b: ArrayLike, - rtol: float = ..., - atol: float = ..., - equal_nan: bool = ..., -) -> Union[bool_, ndarray]: ... -def array_equal(a1: ArrayLike, a2: ArrayLike) -> bool: ... -def array_equiv(a1: ArrayLike, a2: ArrayLike) -> bool: ... # # Constants diff --git a/numpy/core/numeric.pyi b/numpy/core/numeric.pyi new file mode 100644 index 000000000000..19720fbdcc9b --- /dev/null +++ b/numpy/core/numeric.pyi @@ -0,0 +1,117 @@ +from typing import Any, Optional, Union, Sequence, Tuple + +from numpy import ndarray, dtype, bool_, _OrderKACF, _OrderCF +from numpy.typing import ArrayLike, DtypeLike, _ShapeLike + +def zeros_like( + a: ArrayLike, + dtype: DtypeLike = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[Union[int, Sequence[int]]] = ..., +) -> ndarray: ... +def ones( + shape: _ShapeLike, + dtype: DtypeLike = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> ndarray: ... +def ones_like( + a: ArrayLike, + dtype: DtypeLike = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> ndarray: ... +def empty_like( + a: ArrayLike, + dtype: DtypeLike = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> ndarray: ... +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: DtypeLike = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> ndarray: ... +def full_like( + a: ArrayLike, + fill_value: Any, + dtype: DtypeLike = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: Optional[_ShapeLike] = ..., +) -> ndarray: ... +def count_nonzero( + a: ArrayLike, axis: Optional[Union[int, Tuple[int], Tuple[int, int]]] = ... +) -> Union[int, ndarray]: ... +def isfortran(a: ndarray) -> bool: ... +def argwhere(a: ArrayLike) -> ndarray: ... +def flatnonzero(a: ArrayLike) -> ndarray: ... + +_CorrelateMode = Literal["valid", "same", "full"] + +def correlate(a: ArrayLike, v: ArrayLike, mode: _CorrelateMode = ...) -> ndarray: ... +def convolve(a: ArrayLike, v: ArrayLike, mode: _CorrelateMode = ...) -> ndarray: ... +def outer(a: ArrayLike, b: ArrayLike, out: ndarray = ...) -> ndarray: ... +def tensordot( + a: ArrayLike, + b: ArrayLike, + axes: Union[ + int, Tuple[int, int], Tuple[Tuple[int, int], ...], Tuple[List[int, int], ...] + ] = ..., +) -> ndarray: ... +def roll( + a: ArrayLike, + shift: Union[int, Tuple[int, ...]], + axis: Optional[Union[int, Tuple[int, ...]]] = ..., +) -> ndarray: ... +def rollaxis(a: ArrayLike, axis: int, start: int = ...) -> ndarray: ... +def moveaxis( + a: ndarray, + source: Union[int, Sequence[int]], + destination: Union[int, Sequence[int]], +) -> ndarray: ... +def cross( + a: ArrayLike, + b: ArrayLike, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: Optional[int] = ..., +) -> ndarray: ... +def indices( + dimensions: Sequence[int], dtype: dtype = ..., sparse: bool = ... +) -> Union[ndarray, Tuple[ndarray, ...]]: ... +def fromfunction( + function: Callable, + shape: Tuple[int, int], + *, + like: ArrayLike = ..., + **kwargs, +) -> Any: ... +def isscalar(element: Any) -> bool: ... +def binary_repr(num: int, width: Optional[int] = ...) -> str: ... +def base_repr(number: int, base: int = ..., padding: int = ...) -> str: ... +def identity(n: int, dtype: DtypeLike = ..., *, like: ArrayLike = ...) -> ndarray: ... +def allclose( + a: ArrayLike, + b: ArrayLike, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> bool: ... +def isclose( + a: ArrayLike, + b: ArrayLike, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> Union[bool_, ndarray]: ... +def array_equal(a1: ArrayLike, a2: ArrayLike) -> bool: ... +def array_equiv(a1: ArrayLike, a2: ArrayLike) -> bool: ... From 789b21719fe89a3b38cae5a8800a4c0c3886a0f0 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Tue, 6 Oct 2020 16:08:06 -0700 Subject: [PATCH 199/409] NEP: update NEP 42 with discussion of type hinting applications (#17447) * NEP: update NEP 42 with discussion of type hinting applications As discussed in https://github.com/numpy/numpy/pull/16759, the new DType classes provide a good path forward for making `ndarray` generic over DType. Update NEP 42 to discuss those applications in more detail. * NEP: discuss typing for use scalar types in NEP 42 Also clean up the language a bit. --- doc/neps/nep-0042-new-dtypes.rst | 51 ++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/doc/neps/nep-0042-new-dtypes.rst b/doc/neps/nep-0042-new-dtypes.rst index 2d1e3a329e6c..99887a45100b 100644 --- a/doc/neps/nep-0042-new-dtypes.rst +++ b/doc/neps/nep-0042-new-dtypes.rst @@ -259,21 +259,48 @@ including the type hierarchy and the use of abstract DTypes. Class getter ============================================================================== -To create a dtype instance from a scalar type users now call ``np.dtype`` (for -instance, ``np.dtype(np.int64)``). - -To get the DType of a scalar type, we propose this getter syntax:: +To create a DType instance from a scalar type users now call +``np.dtype`` (for instance, ``np.dtype(np.int64)``). Sometimes it is +also necessary to access the underlying DType class; this comes up in +particular with type hinting because the "type" of a DType instance is +the DType class. Taking inspiration from type hinting, we propose the +following getter syntax:: np.dtype[np.int64] -The notation works equally well with built-in and user-defined DTypes -and is inspired by and potentially useful for type hinting. +to get the DType class corresponding to a scalar type. The notation +works equally well with built-in and user-defined DTypes. This getter eliminates the need to create an explicit name for every -DType, crowding the ``np`` namespace; the getter itself signifies the type. +DType, crowding the ``np`` namespace; the getter itself signifies the +type. It also opens the possibility of making ``np.ndarray`` generic +over DType class using annotations like:: + + np.ndarray[np.dtype[np.float64]] + +The above is fairly verbose, so it is possible that we will include +aliases like:: + + Float64 = np.dtype[np.float64] + +in ``numpy.typing``, thus keeping annotations concise but still +avoiding crowding the ``np`` namespace as discussed above. For a +user-defined DType:: + + class UserDtype(dtype): ... + +one can do ``np.ndarray[UserDtype]``, keeping annotations concise in +that case without introducing boilerplate in NumPy itself. For a user +user-defined scalar type:: + + class UserScalar(generic): ... + +we would need to add a typing overload to ``dtype``:: + + @overload + __new__(cls, dtype: Type[UserScalar], ...) -> UserDtype -Since getter calls won't be needed often, this is unlikely to be burdensome. -Classes can also offer concise alternatives. +to allow ``np.dtype[UserScalar]``. The initial implementation probably will return only concrete (not abstract) DTypes. @@ -393,7 +420,7 @@ casting and array coercion, which are described in detail below. sortfunction`` that must return ``NotImplemented`` if the given ``sortkind`` is not known. -* Functions that cannot be removed are implemented as special methods. +* Functions that cannot be removed are implemented as special methods. Many of these were previously defined part of the :c:type:`PyArray_ArrFuncs` slot of the dtype instance (``PyArray_Descr *``) and include functions such as ``nonzero``, ``fill`` (used for ``np.arange``), and @@ -408,7 +435,7 @@ casting and array coercion, which are described in detail below. object to ensure uniqueness for all DTypes. On the C side, ``kind`` and ``char`` are set to ``\0`` (NULL character). While ``kind`` will be discouraged, the current ``np.issubdtype`` - may remain the preferred method for this type of check. + may remain the preferred method for this type of check. * A method ``ensure_canonical(self) -> dtype`` returns a new dtype (or ``self``) with the ``canonical`` flag set. @@ -1229,7 +1256,7 @@ Non-parametric dtypes do not have to implement: * ``discover_descr_from_pyobject`` (uses ``default_descr`` instead) * ``common_instance`` (uses ``default_descr`` instead) -* ``ensure_canonical`` (uses ``default_descr`` instead). +* ``ensure_canonical`` (uses ``default_descr`` instead). Sorting is expected to be implemented using: From f249b1727c4b2b83c5ab524d44ec918a1882f435 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 7 Oct 2020 02:01:40 +0200 Subject: [PATCH 200/409] MAINT: Add an empty `__init__.pyi` file to `np.core` Prevents mypy crashes related to import cycles. See https://github.com/numpy/numpy/issues/17316 --- numpy/core/__init__.pyi | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 numpy/core/__init__.pyi diff --git a/numpy/core/__init__.pyi b/numpy/core/__init__.pyi new file mode 100644 index 000000000000..e69de29bb2d1 From 590201aa6ef313f9d34fa4136d6f0b3b3c36e951 Mon Sep 17 00:00:00 2001 From: Sahil Siddiq <39989901+valdaarhun@users.noreply.github.com> Date: Wed, 7 Oct 2020 10:38:29 +0530 Subject: [PATCH 201/409] MAINT: Chain exceptions in "_polybase.py" (#17453) * Removed "from" keyword and changed "raise e" to "raise" --- numpy/polynomial/_polybase.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/numpy/polynomial/_polybase.py b/numpy/polynomial/_polybase.py index f4a67a2227f9..59c380f10e3a 100644 --- a/numpy/polynomial/_polybase.py +++ b/numpy/polynomial/_polybase.py @@ -547,8 +547,8 @@ def __divmod__(self, other): othercoef = self._get_coefficients(other) try: quo, rem = self._div(self.coef, othercoef) - except ZeroDivisionError as e: - raise e + except ZeroDivisionError: + raise except Exception: return NotImplemented quo = self.__class__(quo, self.domain, self.window) @@ -605,8 +605,8 @@ def __rmod__(self, other): def __rdivmod__(self, other): try: quo, rem = self._div(other, self.coef) - except ZeroDivisionError as e: - raise e + except ZeroDivisionError: + raise except Exception: return NotImplemented quo = self.__class__(quo, self.domain, self.window) From 58fe5c6d371dd76e9163dff5467f9f3bc328ce4a Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Tue, 6 Oct 2020 22:26:25 -0700 Subject: [PATCH 202/409] ENH: type np.unicode_ as np.str_ The two are aliases; i.e. >>> np.unicode_ is np.str_ True so make them aliases on the typing level too. --- numpy/__init__.pyi | 3 ++- numpy/typing/tests/data/reveal/scalars.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 9966ef1999a5..1a3116d83dd1 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -468,7 +468,6 @@ uint0: Any uintc: Any uintp: Any ulonglong: Any -unicode_: Any union1d: Any unique: Any unpackbits: Any @@ -1527,6 +1526,8 @@ class str_(character, str): self, __value: bytes, encoding: str = ..., errors: str = ... ) -> None: ... +unicode_ = str_ + # TODO(alan): Platform dependent types # longcomplex, longdouble, longfloat # bytes, short, intc, intp, longlong diff --git a/numpy/typing/tests/data/reveal/scalars.py b/numpy/typing/tests/data/reveal/scalars.py index ec3713b0fc42..b0dd762dfc7a 100644 --- a/numpy/typing/tests/data/reveal/scalars.py +++ b/numpy/typing/tests/data/reveal/scalars.py @@ -14,3 +14,5 @@ reveal_type(np.complex64().real) # E: numpy.float32 reveal_type(np.complex128().imag) # E: numpy.float64 + +reveal_type(np.unicode_('foo')) # E: numpy.str_ From 2c0df1e6bf7da7c49f20622c5207e691aa078281 Mon Sep 17 00:00:00 2001 From: takanori-pskq Date: Wed, 7 Oct 2020 13:36:40 +0900 Subject: [PATCH 203/409] DOC: Fix the entries for members of structures --- .../reference/c-api/types-and-structures.rst | 502 +++++++++--------- 1 file changed, 251 insertions(+), 251 deletions(-) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 59361fd62818..8759af6a4638 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -93,84 +93,84 @@ PyArray_Type and PyArrayObject PyObject *weakreflist; } PyArrayObject; -.. c:macro:: PyArrayObject.PyObject_HEAD + .. c:macro:: PyObject_HEAD - This is needed by all Python objects. It consists of (at least) - a reference count member ( ``ob_refcnt`` ) and a pointer to the - typeobject ( ``ob_type`` ). (Other elements may also be present - if Python was compiled with special options see - Include/object.h in the Python source tree for more - information). The ob_type member points to a Python type - object. + This is needed by all Python objects. It consists of (at least) + a reference count member ( ``ob_refcnt`` ) and a pointer to the + typeobject ( ``ob_type`` ). (Other elements may also be present + if Python was compiled with special options see + Include/object.h in the Python source tree for more + information). The ob_type member points to a Python type + object. -.. c:member:: char *PyArrayObject.data + .. c:member:: char *data - Accessible via :c:data:`PyArray_DATA`, this data member is a - pointer to the first element of the array. This pointer can - (and normally should) be recast to the data type of the array. + Accessible via :c:data:`PyArray_DATA`, this data member is a + pointer to the first element of the array. This pointer can + (and normally should) be recast to the data type of the array. -.. c:member:: int PyArrayObject.nd + .. c:member:: int nd - An integer providing the number of dimensions for this - array. When nd is 0, the array is sometimes called a rank-0 - array. Such arrays have undefined dimensions and strides and - cannot be accessed. Macro :c:data:`PyArray_NDIM` defined in - ``ndarraytypes.h`` points to this data member. :c:data:`NPY_MAXDIMS` - is the largest number of dimensions for any array. + An integer providing the number of dimensions for this + array. When nd is 0, the array is sometimes called a rank-0 + array. Such arrays have undefined dimensions and strides and + cannot be accessed. Macro :c:data:`PyArray_NDIM` defined in + ``ndarraytypes.h`` points to this data member. :c:data:`NPY_MAXDIMS` + is the largest number of dimensions for any array. -.. c:member:: npy_intp PyArrayObject.dimensions + .. c:member:: npy_intp dimensions - An array of integers providing the shape in each dimension as - long as nd :math:`\geq` 1. The integer is always large enough - to hold a pointer on the platform, so the dimension size is - only limited by memory. :c:data:`PyArray_DIMS` is the macro - associated with this data member. + An array of integers providing the shape in each dimension as + long as nd :math:`\geq` 1. The integer is always large enough + to hold a pointer on the platform, so the dimension size is + only limited by memory. :c:data:`PyArray_DIMS` is the macro + associated with this data member. -.. c:member:: npy_intp *PyArrayObject.strides + .. c:member:: npy_intp *strides - An array of integers providing for each dimension the number of - bytes that must be skipped to get to the next element in that - dimension. Associated with macro :c:data:`PyArray_STRIDES`. + An array of integers providing for each dimension the number of + bytes that must be skipped to get to the next element in that + dimension. Associated with macro :c:data:`PyArray_STRIDES`. -.. c:member:: PyObject *PyArrayObject.base + .. c:member:: PyObject *base - Pointed to by :c:data:`PyArray_BASE`, this member is used to hold a - pointer to another Python object that is related to this array. - There are two use cases: + Pointed to by :c:data:`PyArray_BASE`, this member is used to hold a + pointer to another Python object that is related to this array. + There are two use cases: - - If this array does not own its own memory, then base points to the - Python object that owns it (perhaps another array object) - - If this array has the (deprecated) :c:data:`NPY_ARRAY_UPDATEIFCOPY` or - :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` flag set, then this array is a working - copy of a "misbehaved" array. + - If this array does not own its own memory, then base points to the + Python object that owns it (perhaps another array object) + - If this array has the (deprecated) :c:data:`NPY_ARRAY_UPDATEIFCOPY` or + :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` flag set, then this array is a working + copy of a "misbehaved" array. - When ``PyArray_ResolveWritebackIfCopy`` is called, the array pointed to - by base will be updated with the contents of this array. + When ``PyArray_ResolveWritebackIfCopy`` is called, the array pointed to + by base will be updated with the contents of this array. -.. c:member:: PyArray_Descr *PyArrayObject.descr + .. c:member:: PyArray_Descr *descr - A pointer to a data-type descriptor object (see below). The - data-type descriptor object is an instance of a new built-in - type which allows a generic description of memory. There is a - descriptor structure for each data type supported. This - descriptor structure contains useful information about the type - as well as a pointer to a table of function pointers to - implement specific functionality. As the name suggests, it is - associated with the macro :c:data:`PyArray_DESCR`. + A pointer to a data-type descriptor object (see below). The + data-type descriptor object is an instance of a new built-in + type which allows a generic description of memory. There is a + descriptor structure for each data type supported. This + descriptor structure contains useful information about the type + as well as a pointer to a table of function pointers to + implement specific functionality. As the name suggests, it is + associated with the macro :c:data:`PyArray_DESCR`. -.. c:member:: int PyArrayObject.flags + .. c:member:: int flags - Pointed to by the macro :c:data:`PyArray_FLAGS`, this data member represents - the flags indicating how the memory pointed to by data is to be - interpreted. Possible flags are :c:data:`NPY_ARRAY_C_CONTIGUOUS`, - :c:data:`NPY_ARRAY_F_CONTIGUOUS`, :c:data:`NPY_ARRAY_OWNDATA`, - :c:data:`NPY_ARRAY_ALIGNED`, :c:data:`NPY_ARRAY_WRITEABLE`, - :c:data:`NPY_ARRAY_WRITEBACKIFCOPY`, and :c:data:`NPY_ARRAY_UPDATEIFCOPY`. + Pointed to by the macro :c:data:`PyArray_FLAGS`, this data member represents + the flags indicating how the memory pointed to by data is to be + interpreted. Possible flags are :c:data:`NPY_ARRAY_C_CONTIGUOUS`, + :c:data:`NPY_ARRAY_F_CONTIGUOUS`, :c:data:`NPY_ARRAY_OWNDATA`, + :c:data:`NPY_ARRAY_ALIGNED`, :c:data:`NPY_ARRAY_WRITEABLE`, + :c:data:`NPY_ARRAY_WRITEBACKIFCOPY`, and :c:data:`NPY_ARRAY_UPDATEIFCOPY`. -.. c:member:: PyObject *PyArrayObject.weakreflist + .. c:member:: PyObject *weakreflist - This member allows array objects to have weak references (using the - weakref module). + This member allows array objects to have weak references (using the + weakref module). PyArrayDescr_Type and PyArray_Descr @@ -226,195 +226,195 @@ PyArrayDescr_Type and PyArray_Descr npy_hash_t hash; } PyArray_Descr; -.. c:member:: PyTypeObject *PyArray_Descr.typeobj + .. c:member:: PyTypeObject *typeobj - Pointer to a typeobject that is the corresponding Python type for - the elements of this array. For the builtin types, this points to - the corresponding array scalar. For user-defined types, this - should point to a user-defined typeobject. This typeobject can - either inherit from array scalars or not. If it does not inherit - from array scalars, then the :c:data:`NPY_USE_GETITEM` and - :c:data:`NPY_USE_SETITEM` flags should be set in the ``flags`` member. + Pointer to a typeobject that is the corresponding Python type for + the elements of this array. For the builtin types, this points to + the corresponding array scalar. For user-defined types, this + should point to a user-defined typeobject. This typeobject can + either inherit from array scalars or not. If it does not inherit + from array scalars, then the :c:data:`NPY_USE_GETITEM` and + :c:data:`NPY_USE_SETITEM` flags should be set in the ``flags`` member. -.. c:member:: char PyArray_Descr.kind + .. c:member:: char kind - A character code indicating the kind of array (using the array - interface typestring notation). A 'b' represents Boolean, a 'i' - represents signed integer, a 'u' represents unsigned integer, 'f' - represents floating point, 'c' represents complex floating point, 'S' - represents 8-bit zero-terminated bytes, 'U' represents 32-bit/character - unicode string, and 'V' represents arbitrary. + A character code indicating the kind of array (using the array + interface typestring notation). A 'b' represents Boolean, a 'i' + represents signed integer, a 'u' represents unsigned integer, 'f' + represents floating point, 'c' represents complex floating point, 'S' + represents 8-bit zero-terminated bytes, 'U' represents 32-bit/character + unicode string, and 'V' represents arbitrary. -.. c:member:: char PyArray_Descr.type + .. c:member:: char type - A traditional character code indicating the data type. + A traditional character code indicating the data type. -.. c:member:: char PyArray_Descr.byteorder + .. c:member:: char byteorder - A character indicating the byte-order: '>' (big-endian), '<' (little- - endian), '=' (native), '\|' (irrelevant, ignore). All builtin data- - types have byteorder '='. + A character indicating the byte-order: '>' (big-endian), '<' (little- + endian), '=' (native), '\|' (irrelevant, ignore). All builtin data- + types have byteorder '='. -.. c:member:: char PyArray_Descr.flags + .. c:member:: char flags - A data-type bit-flag that determines if the data-type exhibits object- - array like behavior. Each bit in this member is a flag which are named - as: + A data-type bit-flag that determines if the data-type exhibits object- + array like behavior. Each bit in this member is a flag which are named + as: - .. c:macro:: NPY_ITEM_REFCOUNT + .. c:macro:: NPY_ITEM_REFCOUNT - Indicates that items of this data-type must be reference - counted (using :c:func:`Py_INCREF` and :c:func:`Py_DECREF` ). + Indicates that items of this data-type must be reference + counted (using :c:func:`Py_INCREF` and :c:func:`Py_DECREF` ). - .. c:macro:: NPY_ITEM_HASOBJECT + .. c:macro:: NPY_ITEM_HASOBJECT - Same as :c:data:`NPY_ITEM_REFCOUNT`. + Same as :c:data:`NPY_ITEM_REFCOUNT`. - .. c:macro:: NPY_LIST_PICKLE + .. c:macro:: NPY_LIST_PICKLE - Indicates arrays of this data-type must be converted to a list - before pickling. + Indicates arrays of this data-type must be converted to a list + before pickling. - .. c:macro:: NPY_ITEM_IS_POINTER + .. c:macro:: NPY_ITEM_IS_POINTER - Indicates the item is a pointer to some other data-type + Indicates the item is a pointer to some other data-type - .. c:macro:: NPY_NEEDS_INIT + .. c:macro:: NPY_NEEDS_INIT - Indicates memory for this data-type must be initialized (set - to 0) on creation. + Indicates memory for this data-type must be initialized (set + to 0) on creation. - .. c:macro:: NPY_NEEDS_PYAPI + .. c:macro:: NPY_NEEDS_PYAPI - Indicates this data-type requires the Python C-API during - access (so don't give up the GIL if array access is going to - be needed). + Indicates this data-type requires the Python C-API during + access (so don't give up the GIL if array access is going to + be needed). - .. c:macro:: NPY_USE_GETITEM + .. c:macro:: NPY_USE_GETITEM - On array access use the ``f->getitem`` function pointer - instead of the standard conversion to an array scalar. Must - use if you don't define an array scalar to go along with - the data-type. + On array access use the ``f->getitem`` function pointer + instead of the standard conversion to an array scalar. Must + use if you don't define an array scalar to go along with + the data-type. - .. c:macro:: NPY_USE_SETITEM + .. c:macro:: NPY_USE_SETITEM - When creating a 0-d array from an array scalar use - ``f->setitem`` instead of the standard copy from an array - scalar. Must use if you don't define an array scalar to go - along with the data-type. + When creating a 0-d array from an array scalar use + ``f->setitem`` instead of the standard copy from an array + scalar. Must use if you don't define an array scalar to go + along with the data-type. - .. c:macro:: NPY_FROM_FIELDS + .. c:macro:: NPY_FROM_FIELDS - The bits that are inherited for the parent data-type if these - bits are set in any field of the data-type. Currently ( - :c:data:`NPY_NEEDS_INIT` \| :c:data:`NPY_LIST_PICKLE` \| - :c:data:`NPY_ITEM_REFCOUNT` \| :c:data:`NPY_NEEDS_PYAPI` ). + The bits that are inherited for the parent data-type if these + bits are set in any field of the data-type. Currently ( + :c:data:`NPY_NEEDS_INIT` \| :c:data:`NPY_LIST_PICKLE` \| + :c:data:`NPY_ITEM_REFCOUNT` \| :c:data:`NPY_NEEDS_PYAPI` ). - .. c:macro:: NPY_OBJECT_DTYPE_FLAGS + .. c:macro:: NPY_OBJECT_DTYPE_FLAGS - Bits set for the object data-type: ( :c:data:`NPY_LIST_PICKLE` - \| :c:data:`NPY_USE_GETITEM` \| :c:data:`NPY_ITEM_IS_POINTER` \| - :c:data:`NPY_ITEM_REFCOUNT` \| :c:data:`NPY_NEEDS_INIT` \| - :c:data:`NPY_NEEDS_PYAPI`). + Bits set for the object data-type: ( :c:data:`NPY_LIST_PICKLE` + \| :c:data:`NPY_USE_GETITEM` \| :c:data:`NPY_ITEM_IS_POINTER` \| + :c:data:`NPY_ITEM_REFCOUNT` \| :c:data:`NPY_NEEDS_INIT` \| + :c:data:`NPY_NEEDS_PYAPI`). - .. c:function:: PyDataType_FLAGCHK(PyArray_Descr *dtype, int flags) + .. c:function:: PyDataType_FLAGCHK(PyArray_Descr *dtype, int flags) - Return true if all the given flags are set for the data-type - object. + Return true if all the given flags are set for the data-type + object. - .. c:function:: PyDataType_REFCHK(PyArray_Descr *dtype) + .. c:function:: PyDataType_REFCHK(PyArray_Descr *dtype) - Equivalent to :c:func:`PyDataType_FLAGCHK` (*dtype*, - :c:data:`NPY_ITEM_REFCOUNT`). + Equivalent to :c:func:`PyDataType_FLAGCHK` (*dtype*, + :c:data:`NPY_ITEM_REFCOUNT`). -.. c:member:: int PyArray_Descr.type_num + .. c:member:: int type_num - A number that uniquely identifies the data type. For new data-types, - this number is assigned when the data-type is registered. + A number that uniquely identifies the data type. For new data-types, + this number is assigned when the data-type is registered. -.. c:member:: int PyArray_Descr.elsize + .. c:member:: int elsize - For data types that are always the same size (such as long), this - holds the size of the data type. For flexible data types where - different arrays can have a different elementsize, this should be - 0. + For data types that are always the same size (such as long), this + holds the size of the data type. For flexible data types where + different arrays can have a different elementsize, this should be + 0. -.. c:member:: int PyArray_Descr.alignment + .. c:member:: int alignment - A number providing alignment information for this data type. - Specifically, it shows how far from the start of a 2-element - structure (whose first element is a ``char`` ), the compiler - places an item of this type: ``offsetof(struct {char c; type v;}, - v)`` + A number providing alignment information for this data type. + Specifically, it shows how far from the start of a 2-element + structure (whose first element is a ``char`` ), the compiler + places an item of this type: ``offsetof(struct {char c; type v;}, + v)`` -.. c:member:: PyArray_ArrayDescr *PyArray_Descr.subarray + .. c:member:: PyArray_ArrayDescr *subarray - If this is non- ``NULL``, then this data-type descriptor is a - C-style contiguous array of another data-type descriptor. In - other-words, each element that this descriptor describes is - actually an array of some other base descriptor. This is most - useful as the data-type descriptor for a field in another - data-type descriptor. The fields member should be ``NULL`` if this - is non- ``NULL`` (the fields member of the base descriptor can be - non- ``NULL`` however). + If this is non- ``NULL``, then this data-type descriptor is a + C-style contiguous array of another data-type descriptor. In + other-words, each element that this descriptor describes is + actually an array of some other base descriptor. This is most + useful as the data-type descriptor for a field in another + data-type descriptor. The fields member should be ``NULL`` if this + is non- ``NULL`` (the fields member of the base descriptor can be + non- ``NULL`` however). - .. c:type:: PyArray_ArrayDescr + .. c:type:: PyArray_ArrayDescr - .. code-block:: c + .. code-block:: c - typedef struct { - PyArray_Descr *base; - PyObject *shape; - } PyArray_ArrayDescr; + typedef struct { + PyArray_Descr *base; + PyObject *shape; + } PyArray_ArrayDescr; - .. c:member:: PyArray_Descr *PyArray_ArrayDescr.base + .. c:member:: PyArray_Descr *base - The data-type-descriptor object of the base-type. + The data-type-descriptor object of the base-type. - .. c:member:: PyObject *PyArray_ArrayDescr.shape + .. c:member:: PyObject *shape - The shape (always C-style contiguous) of the sub-array as a Python - tuple. + The shape (always C-style contiguous) of the sub-array as a Python + tuple. -.. c:member:: PyObject *PyArray_Descr.fields + .. c:member:: PyObject *fields - If this is non-NULL, then this data-type-descriptor has fields - described by a Python dictionary whose keys are names (and also - titles if given) and whose values are tuples that describe the - fields. Recall that a data-type-descriptor always describes a - fixed-length set of bytes. A field is a named sub-region of that - total, fixed-length collection. A field is described by a tuple - composed of another data- type-descriptor and a byte - offset. Optionally, the tuple may contain a title which is - normally a Python string. These tuples are placed in this - dictionary keyed by name (and also title if given). + If this is non-NULL, then this data-type-descriptor has fields + described by a Python dictionary whose keys are names (and also + titles if given) and whose values are tuples that describe the + fields. Recall that a data-type-descriptor always describes a + fixed-length set of bytes. A field is a named sub-region of that + total, fixed-length collection. A field is described by a tuple + composed of another data- type-descriptor and a byte + offset. Optionally, the tuple may contain a title which is + normally a Python string. These tuples are placed in this + dictionary keyed by name (and also title if given). -.. c:member:: PyObject *PyArray_Descr.names + .. c:member:: PyObject *names - An ordered tuple of field names. It is NULL if no field is - defined. + An ordered tuple of field names. It is NULL if no field is + defined. -.. c:member:: PyArray_ArrFuncs *PyArray_Descr.f + .. c:member:: PyArray_ArrFuncs *f - A pointer to a structure containing functions that the type needs - to implement internal features. These functions are not the same - thing as the universal functions (ufuncs) described later. Their - signatures can vary arbitrarily. + A pointer to a structure containing functions that the type needs + to implement internal features. These functions are not the same + thing as the universal functions (ufuncs) described later. Their + signatures can vary arbitrarily. -.. c:member:: PyObject *PyArray_Descr.metadata + .. c:member:: PyObject *metadata - Metadata about this dtype. + Metadata about this dtype. -.. c:member:: NpyAuxData *PyArray_Descr.c_metadata + .. c:member:: NpyAuxData *c_metadata - Metadata specific to the C implementation - of the particular dtype. Added for NumPy 1.7.0. + Metadata specific to the C implementation + of the particular dtype. Added for NumPy 1.7.0. -.. c:member:: Npy_hash_t *PyArray_Descr.hash + .. c:member:: Npy_hash_t *hash - Currently unused. Reserved for future use in caching - hash values. + Currently unused. Reserved for future use in caching + hash values. .. c:type:: PyArray_ArrFuncs @@ -793,31 +793,31 @@ PyUFunc_Type and PyUFuncObject } PyUFuncObject; - .. c:macro: PyUFuncObject.PyObject_HEAD + .. c:macro: PyObject_HEAD required for all Python objects. - .. c:member:: int PyUFuncObject.nin + .. c:member:: int nin The number of input arguments. - .. c:member:: int PyUFuncObject.nout + .. c:member:: int nout The number of output arguments. - .. c:member:: int PyUFuncObject.nargs + .. c:member:: int nargs The total number of arguments (*nin* + *nout*). This must be less than :c:data:`NPY_MAXARGS`. - .. c:member:: int PyUFuncObject.identity + .. c:member:: int identity Either :c:data:`PyUFunc_One`, :c:data:`PyUFunc_Zero`, :c:data:`PyUFunc_None` or :c:data:`PyUFunc_AllOnes` to indicate the identity for this operation. It is only used for a reduce-like call on an empty array. - .. c:member:: void PyUFuncObject.functions( \ + .. c:member:: void functions( \ char** args, npy_intp* dims, npy_intp* steps, void* extradata) An array of function pointers --- one for each data type @@ -835,7 +835,7 @@ PyUFunc_Type and PyUFuncObject passed in as *extradata*. The size of this function pointer array is ntypes. - .. c:member:: void **PyUFuncObject.data + .. c:member:: void **data Extra data to be passed to the 1-d vector loops or ``NULL`` if no extra-data is needed. This C-array must be the same size ( @@ -844,22 +844,22 @@ PyUFunc_Type and PyUFuncObject just 1-d vector loops that make use of this extra data to receive a pointer to the actual function to call. - .. c:member:: int PyUFuncObject.ntypes + .. c:member:: int ntypes The number of supported data types for the ufunc. This number specifies how many different 1-d loops (of the builtin data types) are available. - .. c:member:: int PyUFuncObject.reserved1 + .. c:member:: int reserved1 Unused. - .. c:member:: char *PyUFuncObject.name + .. c:member:: char *name A string name for the ufunc. This is used dynamically to build the __doc\__ attribute of ufuncs. - .. c:member:: char *PyUFuncObject.types + .. c:member:: char *types An array of :math:`nargs \times ntypes` 8-bit type_numbers which contains the type signature for the function for each of @@ -869,24 +869,24 @@ PyUFunc_Type and PyUFuncObject vector loop. These type numbers do not have to be the same type and mixed-type ufuncs are supported. - .. c:member:: char *PyUFuncObject.doc + .. c:member:: char *doc Documentation for the ufunc. Should not contain the function signature as this is generated dynamically when __doc\__ is retrieved. - .. c:member:: void *PyUFuncObject.ptr + .. c:member:: void *ptr Any dynamically allocated memory. Currently, this is used for dynamic ufuncs created from a python function to store room for the types, data, and name members. - .. c:member:: PyObject *PyUFuncObject.obj + .. c:member:: PyObject *obj For ufuncs dynamically created from python functions, this member holds a reference to the underlying Python function. - .. c:member:: PyObject *PyUFuncObject.userloops + .. c:member:: PyObject *userloops A dictionary of user-defined 1-d vector loops (stored as CObject ptrs) for user-defined types. A loop may be registered by the @@ -894,68 +894,68 @@ PyUFunc_Type and PyUFuncObject User defined type numbers are always larger than :c:data:`NPY_USERDEF`. - .. c:member:: int PyUFuncObject.core_enabled + .. c:member:: int core_enabled 0 for scalar ufuncs; 1 for generalized ufuncs - .. c:member:: int PyUFuncObject.core_num_dim_ix + .. c:member:: int core_num_dim_ix Number of distinct core dimension names in the signature - .. c:member:: int *PyUFuncObject.core_num_dims + .. c:member:: int *core_num_dims Number of core dimensions of each argument - .. c:member:: int *PyUFuncObject.core_dim_ixs + .. c:member:: int *core_dim_ixs Dimension indices in a flattened form; indices of argument ``k`` are stored in ``core_dim_ixs[core_offsets[k] : core_offsets[k] + core_numdims[k]]`` - .. c:member:: int *PyUFuncObject.core_offsets + .. c:member:: int *core_offsets Position of 1st core dimension of each argument in ``core_dim_ixs``, equivalent to cumsum(``core_num_dims``) - .. c:member:: char *PyUFuncObject.core_signature + .. c:member:: char *core_signature Core signature string - .. c:member:: PyUFunc_TypeResolutionFunc *PyUFuncObject.type_resolver + .. c:member:: PyUFunc_TypeResolutionFunc *type_resolver A function which resolves the types and fills an array with the dtypes for the inputs and outputs - .. c:member:: PyUFunc_LegacyInnerLoopSelectionFunc *PyUFuncObject.legacy_inner_loop_selector + .. c:member:: PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector A function which returns an inner loop. The ``legacy`` in the name arises because for NumPy 1.6 a better variant had been planned. This variant has not yet come about. - .. c:member:: void *PyUFuncObject.reserved2 + .. c:member:: void *reserved2 For a possible future loop selector with a different signature. - .. c:member:: PyUFunc_MaskedInnerLoopSelectionFunc *PyUFuncObject.masked_inner_loop_selector + .. c:member:: PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector Function which returns a masked inner loop for the ufunc - .. c:member:: npy_uint32 PyUFuncObject.op_flags + .. c:member:: npy_uint32 op_flags Override the default operand flags for each ufunc operand. - .. c:member:: npy_uint32 PyUFuncObject.iter_flags + .. c:member:: npy_uint32 iter_flags Override the default nditer flags for the ufunc. Added in API version 0x0000000D - .. c:member:: npy_intp *PyUFuncObject.core_dim_sizes + .. c:member:: npy_intp *core_dim_sizes For each distinct core dimension, the possible :ref:`frozen ` size if :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` is 0 - .. c:member:: npy_uint32 *PyUFuncObject.core_dim_flags + .. c:member:: npy_uint32 *core_dim_flags For each distinct core dimension, a set of ``UFUNC_CORE_DIM*`` flags @@ -1015,54 +1015,54 @@ PyArrayIter_Type and PyArrayIterObject Bool contiguous; } PyArrayIterObject; - .. c:member:: int PyArrayIterObject.nd_m1 + .. c:member:: int nd_m1 :math:`N-1` where :math:`N` is the number of dimensions in the underlying array. - .. c:member:: npy_intp PyArrayIterObject.index + .. c:member:: npy_intp index The current 1-d index into the array. - .. c:member:: npy_intp PyArrayIterObject.size + .. c:member:: npy_intp size The total size of the underlying array. - .. c:member:: npy_intp *PyArrayIterObject.coordinates + .. c:member:: npy_intp *coordinates An :math:`N` -dimensional index into the array. - .. c:member:: npy_intp *PyArrayIterObject.dims_m1 + .. c:member:: npy_intp *dims_m1 The size of the array minus 1 in each dimension. - .. c:member:: npy_intp *PyArrayIterObject.strides + .. c:member:: npy_intp *strides The strides of the array. How many bytes needed to jump to the next element in each dimension. - .. c:member:: npy_intp *PyArrayIterObject.backstrides + .. c:member:: npy_intp *backstrides How many bytes needed to jump from the end of a dimension back to its beginning. Note that ``backstrides[k] == strides[k] * dims_m1[k]``, but it is stored here as an optimization. - .. c:member:: npy_intp *PyArrayIterObject.factors + .. c:member:: npy_intp *factors This array is used in computing an N-d index from a 1-d index. It contains needed products of the dimensions. - .. c:member:: PyArrayObject *PyArrayIterObject.ao + .. c:member:: PyArrayObject *ao A pointer to the underlying ndarray this iterator was created to represent. - .. c:member:: char *PyArrayIterObject.dataptr + .. c:member:: char *dataptr This member points to an element in the ndarray indicated by the index. - .. c:member:: Bool PyArrayIterObject.contiguous + .. c:member:: Bool contiguous This flag is true if the underlying array is :c:data:`NPY_ARRAY_C_CONTIGUOUS`. It is used to simplify @@ -1109,32 +1109,32 @@ PyArrayMultiIter_Type and PyArrayMultiIterObject PyArrayIterObject *iters[NPY_MAXDIMS]; } PyArrayMultiIterObject; - .. c:macro: PyArrayMultiIterObject.PyObject_HEAD + .. c:macro: PyObject_HEAD Needed at the start of every Python object (holds reference count and type identification). - .. c:member:: int PyArrayMultiIterObject.numiter + .. c:member:: int numiter The number of arrays that need to be broadcast to the same shape. - .. c:member:: npy_intp PyArrayMultiIterObject.size + .. c:member:: npy_intp size The total broadcasted size. - .. c:member:: npy_intp PyArrayMultiIterObject.index + .. c:member:: npy_intp index The current (1-d) index into the broadcasted result. - .. c:member:: int PyArrayMultiIterObject.nd + .. c:member:: int nd The number of dimensions in the broadcasted result. - .. c:member:: npy_intp *PyArrayMultiIterObject.dimensions + .. c:member:: npy_intp *dimensions The shape of the broadcasted result (only ``nd`` slots are used). - .. c:member:: PyArrayIterObject **PyArrayMultiIterObject.iters + .. c:member:: PyArrayIterObject **iters An array of iterator objects that holds the iterators for the arrays to be broadcast together. On return, the iterators are @@ -1252,12 +1252,12 @@ PyArray_Dims The members of this structure are - .. c:member:: npy_intp *PyArray_Dims.ptr + .. c:member:: npy_intp *ptr A pointer to a list of (:c:type:`npy_intp`) integers which usually represent array shape or array strides. - .. c:member:: int PyArray_Dims.len + .. c:member:: int len The length of the list of integers. It is assumed safe to access *ptr* [0] to *ptr* [len-1]. @@ -1286,26 +1286,26 @@ PyArray_Chunk The members are - .. c:macro: PyArray_Chunk.PyObject_HEAD + .. c:macro: PyObject_HEAD Necessary for all Python objects. Included here so that the :c:type:`PyArray_Chunk` structure matches that of the buffer object (at least to the len member). - .. c:member:: PyObject *PyArray_Chunk.base + .. c:member:: PyObject *base The Python object this chunk of memory comes from. Needed so that memory can be accounted for properly. - .. c:member:: void *PyArray_Chunk.ptr + .. c:member:: void *ptr A pointer to the start of the single-segment chunk of memory. - .. c:member:: npy_intp PyArray_Chunk.len + .. c:member:: npy_intp len The length of the segment in bytes. - .. c:member:: int PyArray_Chunk.flags + .. c:member:: int flags Any data flags (*e.g.* :c:data:`NPY_ARRAY_WRITEABLE` ) that should be used to interpret the memory. @@ -1345,15 +1345,15 @@ PyArrayInterface PyObject *descr; } PyArrayInterface; - .. c:member:: int PyArrayInterface.two + .. c:member:: int two the integer 2 as a sanity check. - .. c:member:: int PyArrayInterface.nd + .. c:member:: int nd the number of dimensions in the array. - .. c:member:: char PyArrayInterface.typekind + .. c:member:: char typekind A character indicating what kind of array is present according to the typestring convention with 't' -> bitfield, 'b' -> Boolean, 'i' -> @@ -1361,11 +1361,11 @@ PyArrayInterface complex floating point, 'O' -> object, 'S' -> (byte-)string, 'U' -> unicode, 'V' -> void. - .. c:member:: int PyArrayInterface.itemsize + .. c:member:: int itemsize The number of bytes each item in the array requires. - .. c:member:: int PyArrayInterface.flags + .. c:member:: int flags Any of the bits :c:data:`NPY_ARRAY_C_CONTIGUOUS` (1), :c:data:`NPY_ARRAY_F_CONTIGUOUS` (2), :c:data:`NPY_ARRAY_ALIGNED` (0x100), @@ -1379,20 +1379,20 @@ PyArrayInterface structure is present (it will be ignored by objects consuming version 2 of the array interface). - .. c:member:: npy_intp *PyArrayInterface.shape + .. c:member:: npy_intp *shape An array containing the size of the array in each dimension. - .. c:member:: npy_intp *PyArrayInterface.strides + .. c:member:: npy_intp *strides An array containing the number of bytes to jump to get to the next element in each dimension. - .. c:member:: void *PyArrayInterface.data + .. c:member:: void *data A pointer *to* the first element of the array. - .. c:member:: PyObject *PyArrayInterface.descr + .. c:member:: PyObject *descr A Python object describing the data-type in more detail (same as the *descr* key in :obj:`__array_interface__`). This can be From a818ad5ffddcbea16dafd01c4cc5d2b82e86215d Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 7 Oct 2020 12:08:11 +0300 Subject: [PATCH 204/409] BLD: circleCI- merge before build, add -n to sphinx --- .circleci/config.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f4536cca5525..d694230a581f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,9 @@ jobs: working_directory: ~/repo steps: - - checkout + - checkout: + post: + git pull --ff-only origin "refs/pull/${CI_PULL_REQUEST//*pull\//}/merge" - run: name: create virtual environment, install dependencies @@ -48,11 +50,23 @@ jobs: (cd doc ; git submodule update --init) python tools/refguide_check.py --rst + - run: + name: build devdocs w/ref warnings + command: | + set +e + # allow this to fail for now: issue 13114 + . venv/bin/activate + cd doc + SPHINXOPTS="-q -n" make -e html + # clear the error + echo ok + - run: name: build devdocs command: | . venv/bin/activate cd doc + make clean SPHINXOPTS=-q make -e html - run: From a7a7b6ba1833f7d5a6acd2e3cc63bfd29cc49005 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Wed, 7 Oct 2020 12:16:00 +0300 Subject: [PATCH 205/409] Update .circleci/config.yml Co-authored-by: Eric Wieser --- .circleci/config.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d694230a581f..bcebb58b872f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -53,13 +53,9 @@ jobs: - run: name: build devdocs w/ref warnings command: | - set +e - # allow this to fail for now: issue 13114 . venv/bin/activate cd doc - SPHINXOPTS="-q -n" make -e html - # clear the error - echo ok + SPHINXOPTS="-q -n" make -e html || echo "ignoring errors for now, see gh-13114" - run: name: build devdocs From 2b05b8f5e82c309858ae23aceddfe15c80e43f0a Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 7 Oct 2020 12:55:23 +0300 Subject: [PATCH 206/409] BLD: try merging in a separate step --- .circleci/config.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bcebb58b872f..7fd46a931185 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,8 +14,7 @@ jobs: steps: - checkout: - post: - git pull --ff-only origin "refs/pull/${CI_PULL_REQUEST//*pull\//}/merge" + - run: git pull --ff-only origin "refs/pull/${CI_PULL_REQUEST//*pull\//}/merge" - run: name: create virtual environment, install dependencies @@ -53,9 +52,13 @@ jobs: - run: name: build devdocs w/ref warnings command: | + set +e + # allow this to fail for now: issue 13114 . venv/bin/activate cd doc - SPHINXOPTS="-q -n" make -e html || echo "ignoring errors for now, see gh-13114" + SPHINXOPTS="-q -n" make -e html + # clear the error + echo ok - run: name: build devdocs From 16934bee9bcb15f4a6a9946dc710a8a8fd535286 Mon Sep 17 00:00:00 2001 From: mattip Date: Wed, 7 Oct 2020 13:18:08 +0300 Subject: [PATCH 207/409] BLD: don't use -q in sphinx build --- .circleci/config.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7fd46a931185..d77cde59b85c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -52,13 +52,10 @@ jobs: - run: name: build devdocs w/ref warnings command: | - set +e - # allow this to fail for now: issue 13114 . venv/bin/activate cd doc - SPHINXOPTS="-q -n" make -e html - # clear the error - echo ok + # Don't use -q, show warning summary" + SPHINXOPTS="-n" make -e html || echo "ignoring errors for now, see gh-13114" - run: name: build devdocs From 248eee330407fc4b36375c0fb42098e403cd2d68 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 7 Oct 2020 12:11:16 +0200 Subject: [PATCH 208/409] MAINT: Removed a number of duplicate placeholders --- numpy/__init__.pyi | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 789bac896e56..76d1b14e3135 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -56,7 +56,6 @@ else: # Ensures that the stubs are picked up from numpy import ( char, - compat, ctypeslib, emath, fft, @@ -248,14 +247,12 @@ can_cast: Any cast: Any cdouble: Any cfloat: Any -char: Any chararray: Any clongdouble: Any clongfloat: Any column_stack: Any common_type: Any compare_chararrays: Any -compat: Any complex256: Any complex_: Any concatenate: Any @@ -265,7 +262,6 @@ copyto: Any corrcoef: Any cov: Any csingle: Any -ctypeslib: Any cumproduct: Any datetime_as_string: Any datetime_data: Any @@ -287,12 +283,10 @@ dstack: Any ediff1d: Any einsum: Any einsum_path: Any -emath: Any errstate: Any expand_dims: Any extract: Any eye: Any -fft: Any fill_diagonal: Any finfo: Any fix: Any @@ -311,7 +305,6 @@ frompyfunc: Any fromregex: Any fromstring: Any genfromtxt: Any -geomspace: Any get_include: Any get_printoptions: Any getbufsize: Any @@ -355,25 +348,18 @@ ix_: Any kaiser: Any kron: Any lexsort: Any -lib: Any -linalg: Any -linspace: Any load: Any loads: Any loadtxt: Any -logspace: Any longcomplex: Any longdouble: Any longfloat: Any longlong: Any lookfor: Any -ma: Any mafromtxt: Any mask_indices: Any mat: Any -math: Any matrix: Any -matrixlib: Any max: Any may_share_memory: Any median: Any @@ -423,7 +409,6 @@ polydiv: Any polyfit: Any polyint: Any polymul: Any -polynomial: Any polysub: Any polyval: Any printoptions: Any @@ -433,11 +418,9 @@ put_along_axis: Any putmask: Any quantile: Any r_: Any -random: Any ravel_multi_index: Any real: Any real_if_close: Any -rec: Any recarray: Any recfromcsv: Any recfromtxt: Any @@ -476,9 +459,7 @@ split: Any stack: Any str0: Any string_: Any -sys: Any take_along_axis: Any -testing: Any tile: Any trapz: Any tri: Any @@ -508,7 +489,6 @@ ushort: Any vander: Any vdot: Any vectorize: Any -version: Any void0: Any vsplit: Any vstack: Any From 065b300e1280b139f7463fa33dc4ddc32744ec0c Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 7 Oct 2020 12:27:17 +0200 Subject: [PATCH 209/409] MAINT: Ignore three builtin modules present in the numpy namespace --- tools/functions_missing_types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/functions_missing_types.py b/tools/functions_missing_types.py index a32e72dad8b1..9df0c6a51d97 100755 --- a/tools/functions_missing_types.py +++ b/tools/functions_missing_types.py @@ -27,6 +27,9 @@ "division", "print_function", "warnings", + "sys", + "os", + "math", # Accidentally public, deprecated, or shouldn't be used "Tester", "alen", From 30e037cf51ff476ef249926f365212e991886a27 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 7 Oct 2020 12:52:44 +0200 Subject: [PATCH 210/409] MAINT: Ignore the private `np.compat` module --- tools/functions_missing_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/functions_missing_types.py b/tools/functions_missing_types.py index 9df0c6a51d97..c2fe156f0716 100755 --- a/tools/functions_missing_types.py +++ b/tools/functions_missing_types.py @@ -37,6 +37,7 @@ "add_newdoc", "add_newdoc_ufunc", "core", + "compat", "fastCopyAndTranspose", "get_array_wrap", "int_asbuffer", From c5fd38a38b931bd74d217663d35e7b8a507df2bc Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Wed, 7 Oct 2020 12:41:45 +0200 Subject: [PATCH 211/409] TST: Updated the module-related tests --- numpy/typing/tests/data/fail/modules.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/numpy/typing/tests/data/fail/modules.py b/numpy/typing/tests/data/fail/modules.py index be031e6e15e5..5e2d820abc85 100644 --- a/numpy/typing/tests/data/fail/modules.py +++ b/numpy/typing/tests/data/fail/modules.py @@ -1,4 +1,10 @@ import numpy as np np.testing.bob # E: Module has no attribute -np.bob # E: Module has no attribute \ No newline at end of file +np.bob # E: Module has no attribute + +# Stdlib modules in the namespace by accident +np.warnings # E: Module has no attribute +np.sys # E: Module has no attribute +np.os # E: Module has no attribute +np.math # E: Module has no attribute From e48ffc616eb7aa4caeb9df9b3b2318a0c49b0020 Mon Sep 17 00:00:00 2001 From: Ashutosh singh <55102089+Ashutosh619-sudo@users.noreply.github.com> Date: Wed, 7 Oct 2020 17:33:52 +0530 Subject: [PATCH 212/409] MAINT: Chaining exception in numpy\numpy\ma\mrecords.py (#17438) * fix runF2c exception --- numpy/ma/mrecords.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py index cd93a9a14313..c017bee95684 100644 --- a/numpy/ma/mrecords.py +++ b/numpy/ma/mrecords.py @@ -198,8 +198,8 @@ def __getattribute__(self, attr): fielddict = ndarray.__getattribute__(self, 'dtype').fields try: res = fielddict[attr][:2] - except (TypeError, KeyError): - raise AttributeError("record array has no attribute %s" % attr) + except (TypeError, KeyError) as e: + raise AttributeError("record array has no attribute %s" % attr) from e # So far, so good _localdict = ndarray.__getattribute__(self, '__dict__') _data = ndarray.view(self, _localdict['_baseclass']) From fd01786ea4c7dde540cede258ad11d08d25bacfc Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Wed, 7 Oct 2020 18:53:33 +0200 Subject: [PATCH 213/409] MAINT: Move aliases for common scalar unions to `numpy.typing` (#17429) * MAINT: Move the `Like` unions to `numpy.typing` --- numpy/__init__.pyi | 20 ++++++++++++-------- numpy/core/fromnumeric.pyi | 13 +++++++++---- numpy/core/function_base.pyi | 4 ++-- numpy/typing/__init__.py | 11 ++++++++++- numpy/typing/_array_like.py | 8 +++++++- numpy/typing/_callable.py | 12 +++++++----- numpy/typing/_scalars.py | 26 ++++++++++++++++++++++++++ 7 files changed, 73 insertions(+), 21 deletions(-) create mode 100644 numpy/typing/_scalars.py diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 789bac896e56..3d40682e7432 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -4,7 +4,18 @@ import datetime as dt from abc import abstractmethod from numpy.core._internal import _ctypes -from numpy.typing import ArrayLike, DtypeLike, _Shape, _ShapeLike +from numpy.typing import ( + ArrayLike, + DtypeLike, + _Shape, + _ShapeLike, + _CharLike, + _BoolLike, + _IntLike, + _FloatLike, + _ComplexLike, + _NumberLike, +) from numpy.typing._callable import ( _BoolOp, _BoolSub, @@ -1309,13 +1320,6 @@ class ndarray(_ArrayOrScalarCommon, Iterable, Sized, Container): # See https://github.com/numpy/numpy-stubs/pull/80 for more details. -_CharLike = Union[str, bytes] -_BoolLike = Union[bool, bool_] -_IntLike = Union[int, integer] -_FloatLike = Union[_IntLike, float, floating] -_ComplexLike = Union[_FloatLike, complex, complexfloating] -_NumberLike = Union[int, float, complex, number, bool_] - class generic(_ArrayOrScalarCommon): @abstractmethod def __init__(self, *args: Any, **kwargs: Any) -> None: ... diff --git a/numpy/core/fromnumeric.pyi b/numpy/core/fromnumeric.pyi index 6b3d2268f35e..3167d12b98c6 100644 --- a/numpy/core/fromnumeric.pyi +++ b/numpy/core/fromnumeric.pyi @@ -10,9 +10,6 @@ from numpy import ( generic, _OrderKACF, _OrderACF, - _IntLike, - _BoolLike, - _NumberLike, _ArrayLikeBool, _ArrayLikeIntOrBool, _ModeKind, @@ -20,7 +17,15 @@ from numpy import ( _SortKind, _SortSide, ) -from numpy.typing import DtypeLike, ArrayLike, _ShapeLike, _Shape +from numpy.typing import ( + DtypeLike, + ArrayLike, + _ShapeLike, + _Shape, + _IntLike, + _BoolLike, + _NumberLike, +) if sys.version_info >= (3, 8): from typing import Literal diff --git a/numpy/core/function_base.pyi b/numpy/core/function_base.pyi index c6ebbd5f5db4..7057122531fd 100644 --- a/numpy/core/function_base.pyi +++ b/numpy/core/function_base.pyi @@ -1,8 +1,8 @@ import sys from typing import overload, Tuple, Union, Sequence, Any -from numpy import ndarray, inexact, _NumberLike -from numpy.typing import ArrayLike, DtypeLike, _SupportsArray +from numpy import ndarray, inexact +from numpy.typing import ArrayLike, DtypeLike, _SupportsArray, _NumberLike if sys.version_info >= (3, 8): from typing import SupportsIndex, Literal diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py index 86fd5e7870e2..987aa39aa1d5 100644 --- a/numpy/typing/__init__.py +++ b/numpy/typing/__init__.py @@ -90,6 +90,16 @@ Please see : https://numpy.org/devdocs/reference/arrays.dtypes.html """ +from ._scalars import ( + _CharLike, + _BoolLike, + _IntLike, + _FloatLike, + _ComplexLike, + _NumberLike, + _ScalarLike, + _VoidLike, +) from ._array_like import _SupportsArray, ArrayLike from ._shape import _Shape, _ShapeLike from ._dtype_like import DtypeLike @@ -97,4 +107,3 @@ from numpy._pytesttester import PytestTester test = PytestTester(__name__) del PytestTester - diff --git a/numpy/typing/_array_like.py b/numpy/typing/_array_like.py index 76c0c839c567..1c00b200f4ed 100644 --- a/numpy/typing/_array_like.py +++ b/numpy/typing/_array_like.py @@ -2,6 +2,7 @@ from typing import Any, overload, Sequence, TYPE_CHECKING, Union from numpy import ndarray +from ._scalars import _ScalarLike from ._dtype_like import DtypeLike if sys.version_info >= (3, 8): @@ -31,4 +32,9 @@ def __array__(self, dtype: DtypeLike = ...) -> ndarray: ... # is resolved. See also the mypy issue: # # https://github.com/python/typing/issues/593 -ArrayLike = Union[bool, int, float, complex, _SupportsArray, Sequence] +ArrayLike = Union[ + _ScalarLike, + Sequence[_ScalarLike], + Sequence[Sequence[Any]], # TODO: Wait for support for recursive types + _SupportsArray, +] diff --git a/numpy/typing/_callable.py b/numpy/typing/_callable.py index 5e14b708f1a6..0d876ae8d76a 100644 --- a/numpy/typing/_callable.py +++ b/numpy/typing/_callable.py @@ -12,11 +12,6 @@ from typing import Union, TypeVar, overload, Any from numpy import ( - _BoolLike, - _IntLike, - _FloatLike, - _ComplexLike, - _NumberLike, generic, bool_, timedelta64, @@ -32,6 +27,13 @@ complexfloating, complex128, ) +from ._scalars import ( + _BoolLike, + _IntLike, + _FloatLike, + _ComplexLike, + _NumberLike, +) if sys.version_info >= (3, 8): from typing import Protocol diff --git a/numpy/typing/_scalars.py b/numpy/typing/_scalars.py new file mode 100644 index 000000000000..e4fc28b074ac --- /dev/null +++ b/numpy/typing/_scalars.py @@ -0,0 +1,26 @@ +from typing import Union, Tuple, Any + +import numpy as np + +# NOTE: `_StrLike` and `_BytesLike` are pointless, as `np.str_` and `np.bytes_` +# are already subclasses of their builtin counterpart + +_CharLike = Union[str, bytes] + +_BoolLike = Union[bool, np.bool_] +_IntLike = Union[int, np.integer] +_FloatLike = Union[_IntLike, float, np.floating] +_ComplexLike = Union[_FloatLike, complex, np.complexfloating] +_NumberLike = Union[int, float, complex, np.number, np.bool_] + +_ScalarLike = Union[ + int, + float, + complex, + str, + bytes, + np.generic, +] + +# `_VoidLike` is technically not a scalar, but it's close enough +_VoidLike = Union[Tuple[Any, ...], np.void] From 2a267e6a49ed68da01761c92deb7c90be207660d Mon Sep 17 00:00:00 2001 From: Takanori H Date: Thu, 8 Oct 2020 02:34:34 +0900 Subject: [PATCH 214/409] DOC: Fixing references in ma docstrings (#16300) Fix broken links in see also section of some ma functions. --- doc/source/reference/routines.ma.rst | 2 +- numpy/ma/core.py | 2 +- numpy/ma/extras.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/reference/routines.ma.rst b/doc/source/reference/routines.ma.rst index 97859ac67a24..18248fe099e5 100644 --- a/doc/source/reference/routines.ma.rst +++ b/doc/source/reference/routines.ma.rst @@ -272,7 +272,7 @@ Filling a masked array ma.common_fill_value ma.default_fill_value ma.maximum_fill_value - ma.maximum_fill_value + ma.minimum_fill_value ma.set_fill_value ma.MaskedArray.get_fill_value diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 4e320576bdb3..313d9e0b9b16 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -5389,7 +5389,7 @@ def round(self, decimals=0, out=None): See Also -------- - numpy.ndarray.around : corresponding function for ndarrays + numpy.ndarray.round : corresponding function for ndarrays numpy.around : equivalent function """ result = self._data.round(decimals=decimals, out=out).view(type(self)) diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 613bcb550dfd..1bf03e966326 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -901,11 +901,11 @@ def compress_rows(a): Suppress whole rows of a 2-D array that contain masked values. This is equivalent to ``np.ma.compress_rowcols(a, 0)``, see - `extras.compress_rowcols` for details. + `compress_rowcols` for details. See Also -------- - extras.compress_rowcols + compress_rowcols """ a = asarray(a) @@ -918,11 +918,11 @@ def compress_cols(a): Suppress whole columns of a 2-D array that contain masked values. This is equivalent to ``np.ma.compress_rowcols(a, 1)``, see - `extras.compress_rowcols` for details. + `compress_rowcols` for details. See Also -------- - extras.compress_rowcols + compress_rowcols """ a = asarray(a) From d868325d62b707ccaa3b14a10b6d9897f2956592 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Wed, 7 Oct 2020 18:46:42 -0400 Subject: [PATCH 215/409] DOC: Use consistent lowercase on docs landing page Corrects titles to all use sentence case, per style guide. --- doc/source/_templates/indexcontent.html | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html index 6633aa9bef90..2dba16ce8a16 100644 --- a/doc/source/_templates/indexcontent.html +++ b/doc/source/_templates/indexcontent.html @@ -12,21 +12,21 @@

{{ docstitle|e }}

For users:

- - - - - - @@ -36,11 +36,11 @@

{{ docstitle|e }}

For developers/contributors:

- - @@ -54,7 +54,7 @@

{{ docstitle|e }}

- + @@ -65,13 +65,13 @@

Acknowledgements

Large parts of this manual originate from Travis E. Oliphant's book "Guide to NumPy" - (which generously entered Public Domain in August 2008). The reference + (which generously entered public domain in August 2008). The reference documentation for many of the functions are written by numerous contributors and developers of NumPy.

The preferred way to update the documentation is by submitting a pull - request on Github (see the Documentation Index). + request on GitHub (see the Documentation index). Please help us to further improve the NumPy documentation!

{% endblock %} From fd0f3dd2723ed7effde52bf31a673c9128a0a28a Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Sun, 5 Jul 2020 15:32:09 -0700 Subject: [PATCH 216/409] ENH: make dtype generic over scalar type This allows representing dtype subclasses via constructs like `np.dtype[np.float64]`. --- numpy/__init__.pyi | 108 ++++++++++++++++++++++-- numpy/typing/__init__.py | 2 +- numpy/typing/_dtype_like.py | 30 ++++--- numpy/typing/tests/data/fail/dtype.py | 9 +- numpy/typing/tests/data/reveal/dtype.py | 24 ++++++ 5 files changed, 151 insertions(+), 22 deletions(-) create mode 100644 numpy/typing/tests/data/reveal/dtype.py diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 3d40682e7432..139f2a1bc580 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -15,6 +15,8 @@ from numpy.typing import ( _FloatLike, _ComplexLike, _NumberLike, + _SupportsDtype, + _VoidDtypeLike, ) from numpy.typing._callable import ( _BoolOp, @@ -527,16 +529,112 @@ where: Any who: Any _NdArraySubClass = TypeVar("_NdArraySubClass", bound=ndarray) +_DTypeScalar = TypeVar("_DTypeScalar", bound=generic) _ByteOrder = Literal["S", "<", ">", "=", "|", "L", "B", "N", "I"] -class dtype: +class dtype(Generic[_DTypeScalar]): names: Optional[Tuple[str, ...]] - def __init__( - self, - dtype: DtypeLike, + # Overload for subclass of generic + @overload + def __new__( + cls, + dtype: Type[_DTypeScalar], align: bool = ..., copy: bool = ..., - ) -> None: ... + ) -> dtype[_DTypeScalar]: ... + # Overloads for string aliases + @overload + def __new__( + cls, + dtype: Literal["float64", "f8", "f8", "float", "double", "float_", "d"], + align: bool = ..., + copy: bool = ..., + ) -> dtype[float64]: ... + @overload + def __new__( + cls, + dtype: Literal["float32", "f4", "f4", "single"], + align: bool = ..., + copy: bool = ..., + ) -> dtype[float32]: ... + @overload + def __new__( + cls, + dtype: Literal["int64", "i8", "i8"], + align: bool = ..., + copy: bool = ..., + ) -> dtype[int64]: ... + @overload + def __new__( + cls, + dtype: Literal["int32", "i4", "i4"], + align: bool = ..., + copy: bool = ..., + ) -> dtype[int32]: ... + # "int" resolves to int_, which is system dependent, and as of now + # untyped. Long-term we'll do something fancier here. + @overload + def __new__( + cls, + dtype: Literal["int"], + align: bool = ..., + copy: bool = ..., + ) -> dtype: ... + # Overloads for Python types. Order is important here. + @overload + def __new__( + cls, + dtype: Type[bool], + align: bool = ..., + copy: bool = ..., + ) -> dtype[bool_]: ... + # See the notes for "int" + @overload + def __new__( + cls, + dtype: Type[int], + align: bool = ..., + copy: bool = ..., + ) -> dtype[Any]: ... + @overload + def __new__( + cls, + dtype: Type[float], + align: bool = ..., + copy: bool = ..., + ) -> dtype[float64]: ... + # None is a special case + @overload + def __new__( + cls, + dtype: None, + align: bool = ..., + copy: bool = ..., + ) -> dtype[float64]: ... + # dtype of a dtype is the same dtype + @overload + def __new__( + cls, + dtype: dtype[_DTypeScalar], + align: bool = ..., + copy: bool = ..., + ) -> dtype[_DTypeScalar]: ... + # TODO: handle _SupportsDtype better + @overload + def __new__( + cls, + dtype: _SupportsDtype, + align: bool = ..., + copy: bool = ..., + ) -> dtype[Any]: ... + # Catchall overload + @overload + def __new__( + cls, + dtype: _VoidDtypeLike, + align: bool = ..., + copy: bool = ..., + ) -> dtype[void]: ... def __eq__(self, other: DtypeLike) -> bool: ... def __ne__(self, other: DtypeLike) -> bool: ... def __gt__(self, other: DtypeLike) -> bool: ... diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py index 987aa39aa1d5..dafabd95aefd 100644 --- a/numpy/typing/__init__.py +++ b/numpy/typing/__init__.py @@ -102,7 +102,7 @@ ) from ._array_like import _SupportsArray, ArrayLike from ._shape import _Shape, _ShapeLike -from ._dtype_like import DtypeLike +from ._dtype_like import _SupportsDtype, _VoidDtypeLike, DtypeLike from numpy._pytesttester import PytestTester test = PytestTester(__name__) diff --git a/numpy/typing/_dtype_like.py b/numpy/typing/_dtype_like.py index 7c1946a3e8b5..5bfd8ffdc0d6 100644 --- a/numpy/typing/_dtype_like.py +++ b/numpy/typing/_dtype_like.py @@ -38,18 +38,9 @@ class _SupportsDtype(Protocol): _DtypeDict = Any _SupportsDtype = Any -# Anything that can be coerced into numpy.dtype. -# Reference: https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html -DtypeLike = Union[ - dtype, - # default data type (float64) - None, - # array-scalar types and generic types - type, # TODO: enumerate these when we add type hints for numpy scalars - # anything with a dtype attribute - _SupportsDtype, - # character codes, type strings or comma-separated fields, e.g., 'float64' - str, + +# Would create a dtype[np.void] +_VoidDtypeLike = Union[ # (flexible_dtype, itemsize) Tuple[_DtypeLikeNested, int], # (fixed_dtype, shape) @@ -67,6 +58,21 @@ class _SupportsDtype(Protocol): Tuple[_DtypeLikeNested, _DtypeLikeNested], ] +# Anything that can be coerced into numpy.dtype. +# Reference: https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html +DtypeLike = Union[ + dtype, + # default data type (float64) + None, + # array-scalar types and generic types + type, # TODO: enumerate these when we add type hints for numpy scalars + # anything with a dtype attribute + _SupportsDtype, + # character codes, type strings or comma-separated fields, e.g., 'float64' + str, + _VoidDtypeLike, +] + # NOTE: while it is possible to provide the dtype as a dict of # dtype-like objects (e.g. `{'field1': ..., 'field2': ..., ...}`), # this syntax is officially discourged and diff --git a/numpy/typing/tests/data/fail/dtype.py b/numpy/typing/tests/data/fail/dtype.py index 3dc027daf243..7d4783d8f651 100644 --- a/numpy/typing/tests/data/fail/dtype.py +++ b/numpy/typing/tests/data/fail/dtype.py @@ -1,15 +1,16 @@ import numpy as np - class Test: not_dtype = float -np.dtype(Test()) # E: Argument 1 to "dtype" has incompatible type +np.dtype(Test()) # E: No overload variant of "dtype" matches -np.dtype( - { # E: Argument 1 to "dtype" has incompatible type +np.dtype( # E: No overload variant of "dtype" matches + { "field1": (float, 1), "field2": (int, 3), } ) + +np.dtype[np.float64](np.int64) # E: Argument 1 to "dtype" has incompatible type diff --git a/numpy/typing/tests/data/reveal/dtype.py b/numpy/typing/tests/data/reveal/dtype.py new file mode 100644 index 000000000000..aca7e8a5e983 --- /dev/null +++ b/numpy/typing/tests/data/reveal/dtype.py @@ -0,0 +1,24 @@ +import numpy as np + +reveal_type(np.dtype(np.float64)) # E: numpy.dtype[numpy.float64*] +reveal_type(np.dtype(np.int64)) # E: numpy.dtype[numpy.int64*] + +# String aliases +reveal_type(np.dtype("float64")) # E: numpy.dtype[numpy.float64] +reveal_type(np.dtype("float32")) # E: numpy.dtype[numpy.float32] +reveal_type(np.dtype("int64")) # E: numpy.dtype[numpy.int64] +reveal_type(np.dtype("int32")) # E: numpy.dtype[numpy.int32] + +# Python types +reveal_type(np.dtype(float)) # E: numpy.dtype[numpy.float64] +reveal_type(np.dtype(int)) # E: numpy.dtype +reveal_type(np.dtype(bool)) # E: numpy.dtype[numpy.bool_] + +# Special case for None +reveal_type(np.dtype(None)) # E: numpy.dtype[numpy.float64] + +# Dtypes of dtypes +reveal_type(np.dtype(np.dtype(np.float64))) # E: numpy.dtype[numpy.float64*] + +# Void +reveal_type(np.dtype(("U", 10))) # E: numpy.dtype[numpy.void] From 02688c220591250082d4ce109eb51421d8412099 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Tue, 6 Oct 2020 22:20:47 -0700 Subject: [PATCH 217/409] MAINT: add more dtype __new__ overloads for missing scalar types --- numpy/__init__.pyi | 282 ++++++++++++++++++++++-- numpy/typing/tests/data/reveal/dtype.py | 9 + 2 files changed, 270 insertions(+), 21 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 139f2a1bc580..f4caaab7ccdc 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -542,75 +542,307 @@ class dtype(Generic[_DTypeScalar]): align: bool = ..., copy: bool = ..., ) -> dtype[_DTypeScalar]: ... - # Overloads for string aliases + # Overloads for string aliases, Python types, and some assorted + # other special cases. Order is sometimes important because of the + # subtype relationships + # + # bool < int < float < complex + # + # so we have to make sure the overloads for the narrowest type is + # first. @overload def __new__( cls, - dtype: Literal["float64", "f8", "f8", "float", "double", "float_", "d"], + dtype: Union[ + Type[bool], + Literal[ + "?", + "=?", + "?", + "bool", + "bool_", + ], + ], align: bool = ..., copy: bool = ..., - ) -> dtype[float64]: ... + ) -> dtype[bool_]: ... @overload def __new__( cls, - dtype: Literal["float32", "f4", "f4", "single"], + dtype: Literal[ + "uint8", + "u1", + "=u1", + "u1", + "B", + "=B", + "B", + ], align: bool = ..., copy: bool = ..., - ) -> dtype[float32]: ... + ) -> dtype[uint8]: ... @overload def __new__( cls, - dtype: Literal["int64", "i8", "i8"], + dtype: Literal[ + "uint16", + "u2", + "=u2", + "u2", + "h", + "=h", + "h", + ], align: bool = ..., copy: bool = ..., - ) -> dtype[int64]: ... + ) -> dtype[uint16]: ... @overload def __new__( cls, - dtype: Literal["int32", "i4", "i4"], + dtype: Literal[ + "uint32", + "u4", + "=u4", + "u4", + "I", + "=I", + "I", + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[uint32]: ... + @overload + def __new__( + cls, + dtype: Literal[ + "uint64", + "u8", + "=u8", + "u8", + "L", + "=L", + "L", + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[uint64]: ... + @overload + def __new__( + cls, + dtype: Literal[ + "int8", + "i1", + "=i1", + "i1", + "b", + "=b", + "b", + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[int8]: ... + @overload + def __new__( + cls, + dtype: Literal[ + "int16", + "i2", + "=i2", + "i2", + "h", + "=h", + "h", + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[int16]: ... + @overload + def __new__( + cls, + dtype: Literal[ + "int32", + "i4", + "=i4", + "i4", + "i", + "=i", + "i", + ], align: bool = ..., copy: bool = ..., ) -> dtype[int32]: ... - # "int" resolves to int_, which is system dependent, and as of now - # untyped. Long-term we'll do something fancier here. @overload def __new__( cls, - dtype: Literal["int"], + dtype: Literal[ + "int64", + "i8", + "=i8", + "i8", + "l", + "=l", + "l", + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[int64]: ... + # "int"/int resolve to int_, which is system dependent and as of + # now untyped. Long-term we'll do something fancier here. + @overload + def __new__( + cls, + dtype: Union[Type[int], Literal["int"]], align: bool = ..., copy: bool = ..., ) -> dtype: ... - # Overloads for Python types. Order is important here. @overload def __new__( cls, - dtype: Type[bool], + dtype: Literal[ + "float16", + "f4", + "=f4", + "f4", + "e", + "=e", + "e", + "half", + ], align: bool = ..., copy: bool = ..., - ) -> dtype[bool_]: ... - # See the notes for "int" + ) -> dtype[float16]: ... @overload def __new__( cls, - dtype: Type[int], + dtype: Literal[ + "float32", + "f4", + "=f4", + "f4", + "f", + "=f", + "f", + "single", + ], align: bool = ..., copy: bool = ..., - ) -> dtype[Any]: ... + ) -> dtype[float32]: ... @overload def __new__( cls, - dtype: Type[float], + dtype: Union[ + None, + Type[float], + Literal[ + "float64", + "f8", + "=f8", + "f8", + "d", + "d", + "float", + "double", + "float_", + ], + ], align: bool = ..., copy: bool = ..., ) -> dtype[float64]: ... - # None is a special case @overload def __new__( cls, - dtype: None, + dtype: Literal[ + "complex64", + "c8", + "=c8", + "c8", + "F", + "=F", + "F", + ], align: bool = ..., copy: bool = ..., - ) -> dtype[float64]: ... + ) -> dtype[complex128]: ... + @overload + def __new__( + cls, + dtype: Union[ + Type[complex], + Literal[ + "complex128", + "c16", + "=c16", + "c16", + "D", + "=D", + "D", + ], + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[complex128]: ... + @overload + def __new__( + cls, + dtype: Union[ + Type[bytes], + Literal[ + "S", + "=S", + "S", + "bytes", + "bytes_", + ], + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[bytes_]: ... + @overload + def __new__( + cls, + dtype: Union[ + Type[str], + Literal[ + "U", + "=U", + # U intentionally not included; they are not + # the same dtype and which one dtype("U") translates + # to is platform-dependent. + "str", + "str_", + ], + ], + align: bool = ..., + copy: bool = ..., + ) -> dtype[str_]: ... # dtype of a dtype is the same dtype @overload def __new__( @@ -627,6 +859,14 @@ class dtype(Generic[_DTypeScalar]): align: bool = ..., copy: bool = ..., ) -> dtype[Any]: ... + # Handle strings that can't be expressed as literals; i.e. s1, s2, ... + @overload + def __new__( + cls, + dtype: str, + align: bool = ..., + copy: bool = ..., + ) -> dtype[Any]: ... # Catchall overload @overload def __new__( diff --git a/numpy/typing/tests/data/reveal/dtype.py b/numpy/typing/tests/data/reveal/dtype.py index aca7e8a5e983..e0802299e973 100644 --- a/numpy/typing/tests/data/reveal/dtype.py +++ b/numpy/typing/tests/data/reveal/dtype.py @@ -8,11 +8,17 @@ reveal_type(np.dtype("float32")) # E: numpy.dtype[numpy.float32] reveal_type(np.dtype("int64")) # E: numpy.dtype[numpy.int64] reveal_type(np.dtype("int32")) # E: numpy.dtype[numpy.int32] +reveal_type(np.dtype("bool")) # E: numpy.dtype[numpy.bool_] +reveal_type(np.dtype("bytes")) # E: numpy.dtype[numpy.bytes_] +reveal_type(np.dtype("str")) # E: numpy.dtype[numpy.str_] # Python types +reveal_type(np.dtype(complex)) # E: numpy.dtype[numpy.complex128] reveal_type(np.dtype(float)) # E: numpy.dtype[numpy.float64] reveal_type(np.dtype(int)) # E: numpy.dtype reveal_type(np.dtype(bool)) # E: numpy.dtype[numpy.bool_] +reveal_type(np.dtype(str)) # E: numpy.dtype[numpy.str_] +reveal_type(np.dtype(bytes)) # E: numpy.dtype[numpy.bytes_] # Special case for None reveal_type(np.dtype(None)) # E: numpy.dtype[numpy.float64] @@ -20,5 +26,8 @@ # Dtypes of dtypes reveal_type(np.dtype(np.dtype(np.float64))) # E: numpy.dtype[numpy.float64*] +# Parameterized dtypes +reveal_type(np.dtype("S8")) # E: numpy.dtype + # Void reveal_type(np.dtype(("U", 10))) # E: numpy.dtype[numpy.void] From 12e3e1a33ebbef9f3b263b258e3eabab6de5f6a5 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 7 Oct 2020 21:03:30 -0700 Subject: [PATCH 218/409] MAINT: fix complex64 overload; add str0/bytes to literals --- numpy/__init__.pyi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index f4caaab7ccdc..c51141d5de85 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -787,7 +787,7 @@ class dtype(Generic[_DTypeScalar]): ], align: bool = ..., copy: bool = ..., - ) -> dtype[complex128]: ... + ) -> dtype[complex64]: ... @overload def __new__( cls, @@ -820,6 +820,7 @@ class dtype(Generic[_DTypeScalar]): ">S", "bytes", "bytes_", + "bytes0", ], ], align: bool = ..., @@ -838,6 +839,7 @@ class dtype(Generic[_DTypeScalar]): # to is platform-dependent. "str", "str_", + "str0", ], ], align: bool = ..., From 980d52eeaf0498f49dfef61f2a6cba6b78100d83 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 7 Oct 2020 23:04:17 -0500 Subject: [PATCH 219/409] BUG: Accept legacy user-dtypes if not isinstance(dtype.type, np.generic) These are now accepted (as a no-op) in `_PyArray_MapPyTypeToDType` retaining previous behaviour (as lookup for a registered user-type was only done if the instance check suceeded. Retain the check for future user DTypes, as it sould be a conscience choice to relax this requirement. --- numpy/core/src/multiarray/array_coercion.c | 36 ++++++++++++++-------- numpy/core/tests/test_dtype.py | 4 --- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 64a06d58b12a..4831dfca6552 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -128,7 +128,9 @@ _prime_global_pytype_to_type_dict(void) /** - * Add a new mapping from a python type to the DType class. + * Add a new mapping from a python type to the DType class. For a user + * defined legacy dtype, this function does nothing unless the pytype + * subclass from `np.generic`. * * This assumes that the DType class is guaranteed to hold on the * python type (this assumption is guaranteed). @@ -145,21 +147,29 @@ _PyArray_MapPyTypeToDType( { PyObject *Dtype_obj = (PyObject *)DType; - if (userdef) { + if (userdef && !PyObject_IsSubclass( + (PyObject *)pytype, (PyObject *)&PyGenericArrType_Type)) { /* - * It seems we did not strictly enforce this in the legacy dtype - * API, but assume that it is always true. Further, this could be - * relaxed in the future. In particular we should have a new - * superclass of ``np.generic`` in order to note enforce the array - * scalar behaviour. + * We expect that user dtypes (for now) will subclass some numpy + * scalar class to allow automatic discovery. */ - if (!PyObject_IsSubclass((PyObject *)pytype, (PyObject *)&PyGenericArrType_Type)) { - PyErr_Format(PyExc_RuntimeError, - "currently it is only possible to register a DType " - "for scalars deriving from `np.generic`, got '%S'.", - (PyObject *)pytype); - return -1; + if (DType->legacy) { + /* + * For legacy user dtypes, discovery relied on subclassing, but + * arbitrary type objects are supported, so do nothing. + */ + return 0; } + /* + * We currently enforce that user DTypes subclass from `np.generic` + * (this should become a `np.generic` base class and may be lifted + * entirely). + */ + PyErr_Format(PyExc_RuntimeError, + "currently it is only possible to register a DType " + "for scalars deriving from `np.generic`, got '%S'.", + (PyObject *)pytype); + return -1; } /* Create the global dictionary if it does not exist */ diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 898ceebcd45b..45cc0b8b30cd 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -1355,10 +1355,6 @@ class mytype: # unnecessary restriction, but one that has been around forever: assert np.dtype(mytype) == np.dtype("O") - with pytest.raises(RuntimeError): - # Registering a second time should fail - create_custom_field_dtype(blueprint, mytype, 0) - def test_custom_structured_dtype_errors(self): class mytype: pass From 6681c87e08a4e9bbd4d7a2aa7fb49e1f5852e359 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Wed, 7 Oct 2020 22:43:26 -0500 Subject: [PATCH 220/409] BUG: Fix usertype typenumber during registration This slightly rearranges the code to ensure that the type number set for a user type is -1 (invalid) if it does not succeed. (This should be unnecessary, but might be relevant in the unlikely event that a usertype does not check the error return correctly.) --- numpy/core/src/multiarray/usertypes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index f8bb5ece7049..1404c9b68c41 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -251,12 +251,16 @@ PyArray_RegisterDataType(PyArray_Descr *descr) PyErr_SetString(PyExc_MemoryError, "RegisterDataType"); return -1; } + userdescrs[NPY_NUMUSERTYPES++] = descr; + descr->type_num = typenum; if (dtypemeta_wrap_legacy_descriptor(descr) < 0) { + descr->type_num = -1; + NPY_NUMUSERTYPES--; return -1; } - descr->type_num = typenum; + return typenum; } From 7e952c739ca9ac123f2e8568c5ded7693be058da Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 7 Oct 2020 21:14:06 -0700 Subject: [PATCH 221/409] MAINT: add str0 as an alias of str_ on the typing level --- numpy/__init__.pyi | 3 +-- numpy/typing/tests/data/reveal/scalars.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 1a3116d83dd1..b8c881fd128a 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -444,7 +444,6 @@ sort_complex: Any source: Any split: Any stack: Any -str0: Any string_: Any sys: Any take_along_axis: Any @@ -1526,7 +1525,7 @@ class str_(character, str): self, __value: bytes, encoding: str = ..., errors: str = ... ) -> None: ... -unicode_ = str_ +unicode_ = str0 = str_ # TODO(alan): Platform dependent types # longcomplex, longdouble, longfloat diff --git a/numpy/typing/tests/data/reveal/scalars.py b/numpy/typing/tests/data/reveal/scalars.py index b0dd762dfc7a..0168ebed8c6a 100644 --- a/numpy/typing/tests/data/reveal/scalars.py +++ b/numpy/typing/tests/data/reveal/scalars.py @@ -16,3 +16,4 @@ reveal_type(np.complex128().imag) # E: numpy.float64 reveal_type(np.unicode_('foo')) # E: numpy.str_ +reveal_type(np.str0('foo')) # E: numpy.str_ From be9b5759ef0893b6e8bf03ca48a5ed83d7b9dc75 Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Wed, 7 Oct 2020 21:17:26 -0700 Subject: [PATCH 222/409] BUG: remove `sys` from the type stubs It is the builtin sys module, which people should not be accessing through the NumPy namespace. --- numpy/__init__.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 3d40682e7432..26673d95556a 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -487,7 +487,6 @@ split: Any stack: Any str0: Any string_: Any -sys: Any take_along_axis: Any testing: Any tile: Any From 86472322736260e7c2fb2ef72e7d01925adc9de0 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Thu, 8 Oct 2020 00:39:04 -0400 Subject: [PATCH 223/409] DOC: New round of NEP 42 edits (gh-17493) * DOC: Revise NEP 42 "common DType" section * DOC: Eliminate duplicate doc in NEP42 * DOC: Fix missing 'not' in NEP42 edit * DOC: Minor typos in NEP42 edit * DOC: Reword opening sentence of NEP 42 revision Also clarify a wording. * DOC: Correct :class: reference in NEP42 edit * DOC: Fix Windows line-enders on NEP42 edit * DOC: New round of common DType edits of NEP 42 * DOC: Uniquify anchors in NEP 42 * DOC: Typo in NEP42 edit * DOC: Last push for NEP 42 edit * Small fixups and moving example One paragraph on `__common_dtype` in the alternatives was already before very confusing. This makes it hopefully more clear. * NEP: Change title to "New and extensible DTypes" Co-authored-by: Sebastian Berg --- doc/neps/nep-0042-new-dtypes.rst | 864 ++++++++++++++++--------------- 1 file changed, 439 insertions(+), 425 deletions(-) diff --git a/doc/neps/nep-0042-new-dtypes.rst b/doc/neps/nep-0042-new-dtypes.rst index 99887a45100b..6f457a9d4984 100644 --- a/doc/neps/nep-0042-new-dtypes.rst +++ b/doc/neps/nep-0042-new-dtypes.rst @@ -1,10 +1,10 @@ .. _NEP42: ============================================================================== -NEP 42 — User-extensible dtypes +NEP 42 — New and extensible DTypes ============================================================================== -:title: User-extensible dtypes +:title: New and extensible DTypes :Author: Sebastian Berg :Author: Ben Nathanson :Author: Marten van Kerkwijk @@ -30,14 +30,15 @@ NEP 42 — User-extensible dtypes Abstract ****************************************************************************** -NumPy's dtype architecture is monolithic, built around a single class that -handles each dtype as an instance. There's no principled way to expand it to -new dtypes, and the code is difficult to read and maintain. +NumPy's dtype architecture is monolithic -- each dtype is an instance of a +single class. There's no principled way to expand it for new dtypes, and the +code is difficult to read and maintain. -As NEP 41 explains, we are proposing a new architecture that is modular and -open to user additions. dtypes will derive from a new ``DType`` class serving -as the extension point for new types. ``np.dtype("float64")`` will return an -instance of a ``Float64`` class, a subclass of root class ``np.dtype``. +As :ref:`NEP 41 ` explains, we are proposing a new architecture that is +modular and open to user additions. dtypes will derive from a new ``DType`` +class serving as the extension point for new types. ``np.dtype("float64")`` +will return an instance of a ``Float64`` class, a subclass of root class +``np.dtype``. This NEP is one of two that lay out the design and API of this new architecture. This NEP addresses dtype implementation; NEP 43 addresses @@ -56,8 +57,8 @@ Motivation and scope Our goal is to allow user code to create fully featured dtypes for a broad variety of uses, from physical units (such as meters) to domain-specific -representations of geometric objects. NEP 41 describes a number of these new -dtypes and their benefits. +representations of geometric objects. :ref:`NEP 41 ` describes a number +of these new dtypes and their benefits. Any design supporting dtypes must consider: @@ -77,21 +78,21 @@ And to provide this, All these are the subjects of this NEP. - The class hierarchy, its relation to the Python scalar types, and its - important attributes are described in `DType class`_. + important attributes are described in `nep42_DType class`_. - The functionality that will support dtype casting is described in `Casting`_. - The implementation of item access and storage, and the way shape and dtype - are determined when creating an array, are described in `Array coercion`_. + are determined when creating an array, are described in :ref:`nep42_array_coercion`. - The functionality for users to define their own DTypes is described in `Public C-API`_. The API here and in NEP 43 is entirely on the C side. A Python-side version -will be proposed in a future NEP. -A future Python API is expected to be similar, but provide a more convenient -API to reuse the functionality of existing DTypes. -It could also provide shorthands to create structured DTypes similar to python's +will be proposed in a future NEP. A future Python API is expected to be +similar, but provide a more convenient API to reuse the functionality of +existing DTypes. It could also provide shorthands to create structured DTypes +similar to Python's `dataclasses `_. @@ -117,6 +118,9 @@ release. ``np.array([np.array(array_like)])`` - array operations may or may not preserve dtype metadata +- Documentation that describes the internal structure of dtypes will need + to be updated. + The new code must pass NumPy's regular test suite, giving some assurance that the changes are compatible with existing code. @@ -134,7 +138,7 @@ future NEP will tackle the Python API. After implementing this NEP, creating a DType will be possible by implementing the following outlined DType base class, -that is further described in `DType class`_: +that is further described in `nep42_DType class`_: .. code-block:: python :dedent: 0 @@ -206,9 +210,9 @@ Other elements of the casting implementation is the ``CastingImpl``: def _get_loop(...) -> lowlevel_C_loop: raise NotImplementedError -which describes the casting from one DType to another. -In NEP 43 this ``CastingImpl`` object is used unchanged to support -universal functions. +which describes the casting from one DType to another. In +NEP 43 this ``CastingImpl`` object is used unchanged to +support universal functions. ****************************************************************************** @@ -229,6 +233,12 @@ Definitions cast Conversion of an array to a different dtype. + parametric type + A dtype whose representation can change based on a parameter value, + like a string dtype with a length parameter. All members of the current + ``flexible`` dtype class are parametric. See + :ref:`NEP 40 `. + promotion Finding a dtype that can perform an operation on a mix of dtypes without loss of information. @@ -241,13 +251,13 @@ On the C level we use ``descriptor`` or ``descr`` to mean dtype instances from DType classes. .. note:: - Perhaps confusingly, NumPy already has a class hierarchy for numeric types, as - seen :ref:`in the figure ` of NEP 40, and the new - DType hierarchy will resemble it. But the existing hierarchy is for scalar - types, not DTypes, and its existence is largely irrelevant here, as NEP 40 and - 41 explain. + NumPy has an existing class hierarchy for scalar types, as + seen :ref:`in the figure ` of + :ref:`NEP 40 `, and the new DType hierarchy will resemble it. The + types are used as an attribute of the single dtype class in the current + NumPy; they're not dtype classes. They neither harm nor help this work. -.. _DType class: +.. _nep42_DType class: ****************************************************************************** The DType class @@ -322,7 +332,7 @@ hierarchy. of any precision; the precision of the results is determined by the precision of the arguments. -3. There is room for user-created families of DTypes. We can envision an +3. There's room for user-created families of DTypes. We can envision an abstract ``Unit`` class for physical units, with a concrete subclass like ``Float64Unit``. Calling ``Unit(np.float64, "m")`` (``m`` for meters) would be equivalent to ``Float64Unit("m")``. @@ -381,7 +391,7 @@ Miscellaneous methods and attributes This section collects definitions in the DType class that are not used in casting and array coercion, which are described in detail below. -* Existing dtype methods and C-side fields are preserved. +* Existing dtype methods (:class:`numpy.dtype`) and C-side fields are preserved. * ``DType.type`` replaces ``dtype.type``. Unless a use case arises, ``dtype.type`` will be deprecated. @@ -396,19 +406,11 @@ casting and array coercion, which are described in detail below. existing code, "canonical" will just signify native byte order, but it can take on new meanings in new DTypes -- for instance, to distinguish a complex-conjugated instance of Complex which stores ``real - imag`` instead - of ``real + imag`` and is thus not the canonical storage. The ISNBO ("is + of ``real + imag``. The ISNBO ("is native byte order") flag might be repurposed as the canonical flag. -* Support is included for parametric DTypes. As explained in - :ref:`NEP 40 `, parametric types have a - value associated with them. A DType will be deemed parametric if it - inherits from ParametricDType. - - Strings are one example of a parametric type -- ``S8`` is different from - ``S4`` because ``S4`` cannot store a length 8 string such as ``"length 8"`` - while ``S8`` can. - Similarly, the ``datetime64`` DType is parametric, since its unit must be specified. - The associated ``type`` is the ``np.datetime64`` scalar. +* Support is included for parametric DTypes. A DType will be deemed parametric + if it inherits from ParametricDType. * DType methods may resemble or even reuse existing Python slots. Thus Python special slots are off-limits for user-defined DTypes (for instance, defining @@ -444,7 +446,7 @@ casting and array coercion, which are described in detail below. functions like sorting that will be implemented in DTypes might eventually be reimplemented as generalized ufuncs. -.. _casting: +.. _nep_42_casting: ****************************************************************************** Casting @@ -452,82 +454,59 @@ Casting We review here the operations related to casting arrays: -- Finding the "common dtype," currently exposed by ``np.promote_types`` or - ``np.result_type`` +- Finding the "common dtype," returned by :func:`numpy.promote_types` and + :func:`numpy.result_type` -- The result of calling ``np.can_cast`` +- The result of calling :func:`numpy.can_cast` -We show how casting arrays with ``arr.astype(new_dtype)`` will be implemented. +We show how casting arrays with ``astype(new_dtype)`` will be implemented. `Common DType` operations ============================================================================== -Common-type operations are vital for array coercion when input types are -mixed. They determine the output dtype of ``np.concatenate()`` and are useful -in themselves. +When input types are mixed, a first step is to find a DType that can hold +the result without loss of information -- a "common DType." -NumPy provides ``np.result_type`` and -``np.promote_types``. -These differ in that ``np.result_type`` can take arrays and scalars as input -and implements value-based promotion [1]_. +Array coercion and concatenation both return a common dtype instance. Most +universal functions use the common DType for dispatching, though they might +not use it for a result (for instance, the result of a comparison is always +bool). -To distinguish between the promotion occurring during universal function -application, we will call it "common type" operation here. +We propose the following implementation: -**Motivation:** +- For two DType classes:: -Furthermore, common type operations may be used to find the correct dtype -to use for functions with different inputs (including universal functions). -This includes an interesting distinction: + __common_dtype__(cls, other : DTypeMeta) -> DTypeMeta -1. Universal functions use the DType classes for dispatching, they thus - require the common DType class (as a first step). - While this can help with finding the correct loop to execute, the loop - may not need the actual common dtype instance. - (Hypothetical example: - ``float_arr + string_arr -> string``, but the output string length is - not the same as ``np.concatenate(float_arr, string_arr)).dtype``.) + Returns a new DType, often one of the inputs, which can represent values + of both input DTypes. This should usually be minimal: + the common DType of ``Int16`` and ``Uint16`` is ``Int32`` and not ``Int64``. + ``__common_dtype__`` may return NotImplemented to defer to other and, + like Python operators, subclasses take precedence (their + ``__common_dtype__`` method is tried first). -2. Array coercion and concatenation require the common dtype *instance*. +- For two instances of the same DType:: -**Implementation:** The implementation of the common dtype (instance) -determination has some overlap with casting. Casting from a specific dtype -(Float64) to a String needs to find the correct string length (a step that is -mainly necessary for parametric dtypes). + __common_instance__(self: SelfT, other : SelfT) -> SelfT -We propose the following implementation: + For nonparametric built-in dtypes, this returns a canonicalized copy of + ``self``, preserving metadata. For nonparametric user types, this provides + a default implementation. -1. ``__common_dtype__(cls, other : DTypeMeta) -> DTypeMeta`` answers what the - common DType class is, given two DType class objects. It may return - ``NotImplemented`` to defer to ``other``. (For abstract DTypes, subclasses - get precedence, concrete types are never superclasses, so always get preference - or are tried from left to right). - -2. ``__common_instance__(self: SelfT, other : SelfT) -> SelfT`` is used when - two instances of the same DType are given. - For built-in dtypes (that are not parametric), this - currently always returns ``self`` (but ensures canonical representation). - This is to preserve metadata. We can thus provide a default implementation - for non-parametric user dtypes. - -These two cases do *not* cover the case where two different dtype instances -need to be promoted. For example `">float64"` and `"S8"`. The solution is -partially "outsourced" to the casting machinery by splitting the operation up -into three steps: - -1. ``Float64.__common_dtype__(type(>float64), type(S8))`` - returns `String` (or defers to ``String.__common_dtype__``). -2. The casting machinery provides the information that `">float64"` casts - to `"S32"` (see below for how casting will be defined). -3. ``String.__common_instance__("S8", "S32")`` returns the final `"S32"`. - -The main reason for this is to avoid the need to implement identical -functionality multiple times. The design (together with casting) naturally -separates the concerns of different Datatypes. In the above example, Float64 -does not need to know about the cast. While the casting machinery -(``CastingImpl[Float64, String]``) could include the third step, it is not -required to do so and the string can always be extended (e.g. with new -encodings) without extending the ``CastingImpl[Float64, String]``. +- For instances of different DTypes, for example ``>float64`` and ``S8``, + the operation is done in three steps: + + 1. ``Float64.__common_dtype__(type(>float64), type(S8))`` + returns ``String`` (or defers to ``String.__common_dtype__``). + + 2. The casting machinery (explained in detail below) provides the + information that ``">float64"`` casts to ``"S32"`` + + 3. ``String.__common_instance__("S8", "S32")`` returns the final ``"S32"``. + +The benefit of this handoff is to reduce duplicated code and keep concerns +separate. DType implementations don't need to know how to cast, and the +results of casting can be extended to new types, such as a new string encoding. This means the implementation will work like this:: @@ -539,7 +518,7 @@ This means the implementation will work like this:: raise TypeError("no common dtype") return common_dtype - def promote_types(dtype1, dtyp2): + def promote_types(dtype1, dtype2): common = common_dtype(type(dtype1), type(dtype2)) if type(dtype1) is not common: @@ -555,269 +534,248 @@ This means the implementation will work like this:: if dtype1 is not dtype2: return common.__common_instance__(dtype1, dtype2) -Some of these steps may be optimized for non-parametric DTypes. - -**Note:** A currently implemented fallback for the ``__common_dtype__`` -operation is to use the "safe" casting logic. Since ``int16`` can safely cast -to ``int64``, it is clear that ``np.promote_types(int16, int64)`` should be -``int64``. +Some of these steps may be optimized for nonparametric DTypes. -However, this cannot define all such operations, and will fail for example for:: +Since the type returned by ``__common_dtype__`` is not necessarily one of the +two arguments, it's not equivalent to NumPy's "safe" casting. +Safe casting works for ``np.promote_types(int16, int64)``, which returns +``int64``, but fails for:: np.promote_types("int64", "float32") -> np.dtype("float64") -In this design, it is the responsibility of the DType author to ensure that -in most cases a safe-cast implies that this will be the result of the -``__common_dtype__`` method. +It is the responsibility of the DType author to ensure that the inputs +can be safely cast to the ``__common_dtype__``. -Note that some exceptions may apply. For example casting ``int32`` to +Exceptions may apply. For example, casting ``int32`` to a (long enough) string is at least at this time considered "safe". However ``np.promote_types(int32, String)`` will *not* be defined. -**Alternatives:** The use of casting for common dtype (instance) determination -neatly separates the concerns and allows for a minimal set of duplicate -functionality being implemented. In cases of mixed DType (classes), it also -adds an additional step to finding the common dtype. The common dtype (of two -instances) could thus be implemented explicitly to avoid this indirection, -potentially only as a fast-path. The above suggestion assumes that this is, -however, not a speed relevant path, since in most cases, e.g. in array -coercion, only a single Python type (and thus dtype) is involved. The proposed -design hinges in the implementation of casting to be separated into its own -ufunc-like object as described below. - -In principle common DType could be defined only based on "safe casting" rules, -if we order all DTypes and find the first one both can cast to safely. -However, the issue with this approach is that a newly added DType can change -the behaviour of an existing program. For example, a new ``int24`` would be -the first valid common type for ``int16`` and ``uint16``, demoting the -currently defined behavior of ``int32``. -Both, the need of a linear type hierarchy and the potential of changing -existing behaviour by adding a new DType, are a downside to using a generic -rule based on "safe casting". -However, a more generic common DType could be implemented in the future, since -``__common_dtype__`` can in principle use casting information internally. - -**Example:** ``object`` always chooses ``object`` as the common DType. For +**Example:** + +``object`` always chooses ``object`` as the common DType. For ``datetime64`` type promotion is defined with no other datatype, but if someone were to implement a new higher precision datetime, then:: - HighPrecisionDatetime.__common_dtype__(np.dtype[np.datetime64]) + HighPrecisionDatetime.__common_dtype__(np.dtype[np.datetime64]) -would return ``HighPrecisionDatetime``, and the below casting may need to -decide how to handle the datetime unit. +would return ``HighPrecisionDatetime``, and the casting implementation, +as described below, may need to decide how to handle the datetime unit. + + +**Alternatives:** + +- We're pushing the decision on common DTypes to the DType classes. Suppose + instead we could turn to a universal algorithm based on safe casting, + imposing a total order on DTypes and returning the first type that both + arguments could cast to safely. + + It would be difficult to devise a reasonable total order, and it would have + to accept new entries. Beyond that, the approach is flawed because + importing a type can change the behavior of a program. For example, a + program requiring the common DType of ``int16`` and ``uint16`` would + ordinarily get the built-in type ``int32`` as the first match; if the + program adds ``import int24``, the first match becomes ``int24`` and the + smaller type might make the program overflow for the first time. [1]_ + +- A more flexible common DType could be implemented in the future where + ``__common_dtype__`` relies on information from the casting logic. + Since ``__commond_dtype__`` is a method a such a default implementation + could be added at a later time. + +- The three-step handling of differing dtypes could, of course, be coalesced. + It would lose the value of splitting in return for a possibly faster + execution. But few cases would benefit. Most cases, such as array coercion, + involve a single Python type (and thus dtype). The cast operation ============================================================================== -Perhaps the most complex and interesting DType operation is casting. Casting +Casting is perhaps the most complex and interesting DType operation. It is much like a typical universal function on arrays, converting one input to a -new output. There are two key distinctions: +new output, with two distinctions: -1. Casting always requires an explicit output datatype. -2. The NumPy iterator API requires access to functions that are lower-level - than what universal functions currently need. +- Casting always requires an explicit output datatype. +- The NumPy iterator API requires access to functions that are lower-level + than what universal functions currently need. -Casting can be complex, and may not implement all details of each input -datatype (such as non-native byte order or unaligned access). Thus casting -naturally is performed in up to three steps: +Casting can be complex and may not implement all details of each input +datatype (such as non-native byte order or unaligned access). So a complex +type conversion might entail 3 steps: -1. The given datatype is normalized and prepared for the actual cast. +1. The input datatype is normalized and prepared for the cast. 2. The cast is performed. -3. The cast result, which is in a normalized form, is cast to the requested +3. The result, which is in a normalized form, is cast to the requested form (non-native byte order). -Often only step 2 is required. - Further, NumPy provides different casting kinds or safety specifiers: -* "equivalent" -* "safe" -* "same_kind" -* "unsafe" +* ``equivalent``, allowing only byte-order changes +* ``safe``, requiring a type large enough to preserve value +* ``same_kind``, requiring a safe cast or one within a kind, like float64 to float32 +* ``unsafe``, allowing any data conversion + +and in some cases a cast may be just a view. + +We need to support the two current signatures of ``arr.astype``: -and in some cases a cast may even be represented as a simple view. +- For DTypes: ``arr.astype(np.String)`` + - current spelling ``arr.astype("S")`` + - ``np.String`` can be an abstract DType -**Motivation:** Similar to the common dtype/DType operation above, we again -have two use cases: +- For dtypes: ``arr.astype(np.dtype("S8"))`` -1. ``arr.astype(np.String)`` (current spelling ``arr.astype("S")``) -2. ``arr.astype(np.dtype("S8"))`` -where the first case is also noted in NEP 40 and 41 as a design goal, since -``np.String`` could also be an abstract DType as mentioned above. +We also have two signatures of ``np.can_cast``: -The implementation of casting should also come with as little duplicate -implementation as necessary, i.e. to avoid unnecessary methods on the DTypes. -Furthermore, it is desirable that casting is implemented similar to universal -functions. +- Instance to class: ``np.can_cast(dtype, DType, "safe")`` +- Instance to instance: ``np.can_cast(dtype, other_dtype, "safe")`` -Analogous to the above, the following also need to be defined: +On the Python level ``dtype`` is overloaded to mean class or instance. -1. ``np.can_cast(dtype, DType, "safe")`` (instance to class) -2. ``np.can_cast(dtype, other_dtype, "safe")`` (casting an instance to another - instance) +A third ``can_cast`` signature, ``np.can_cast(DType, OtherDType, "safe")``,may be used +internally but need not be exposed to Python. -overloading the meaning of ``dtype`` to mean either class or instance (on the -Python level). The question of ``np.can_cast(DType, OtherDType, "safe")`` is -also a possibility and may be used internally. However, it is initially not -necessary to expose to Python. +During DType creation, DTypes will be able to pass a list of ``CastingImpl`` +objects, which can define casting to and from the DType. +One of them should define the cast between instances of that DType. It can be +omitted if the DType has only a single implementation and is nonparametric. -**Implementation:** During DType creation, DTypes will have the ability to -pass a list of ``CastingImpl`` objects, which can define casting to and from -the DType. One of these ``CastingImpl`` objects is special because it should -define the cast within the same DType (from one instance to another). A DType -which does not define this, must have only a single implementation and not be -parametric. +Each ``CastingImpl`` has a distinct DType signature: + + ``CastingImpl[InputDtype, RequestedDtype]`` -Each ``CastingImpl`` has a specific DType signature: -``CastingImpl[InputDtype, RequestedDtype]`` and implements the following methods and attributes: -* ``resolve_descriptors(self, Tuple[DType] : input) -> casting, Tuple[DType]``. - Here ``casting`` signals the casting safeness (safe, unsafe, or same-kind) - and the output dtype tuple is used for more multi-step casting (see below). -* ``get_transferfunction(...) -> function handling cast`` (signature to be decided). - This function returns a low-level implementation of a strided casting function - ("transfer function"). -* ``casting`` attribute with one of equivalent, safe, unsafe, or same-kind. Used to - quickly decide casting safety when this is relevant. -``resolve_descriptors`` provides information about whether or -not a cast is safe and is of importance mainly for parametric DTypes. -``get_transferfunction`` provides NumPy with a function capable of performing -the actual cast. Initially the implementation of ``get_transferfunction`` -will be *private*, and users will only be able to provide strided loops -with the signature. +* To report safeness, + + ``resolve_descriptors(self, Tuple[DType] : input) -> casting, Tuple[DType]``. + + The ``casting`` output reports safeness (safe, unsafe, or same-kind), and + the tuple is used for more multistep casting, as in the example below. -**Performing the cast** +* To get a casting function, -.. _cast_figure: + ``get_loop(...) -> function_to_handle_cast (signature to be decided)`` + + returns a low-level implementation of a strided casting function + ("transfer function") capable of performing the + cast. + + Initially the implementation will be *private*, and users will only be + able to provide strided loops with the signature. + +* For performance, a ``casting`` attribute taking a value of ``equivalent``, ``safe``, + ``unsafe``, or ``same-kind``. + + +**Performing a cast** + +.. _nep42_cast_figure: .. figure:: _static/casting_flow.svg :figclass: align-center -`The above figure `_ illustrates the multi-step logic necessary to -cast for example an ``int24`` with a value of ``42`` to a string of length 20 +The above figure illustrates a multistep +cast of an ``int24`` with a value of ``42`` to a string of length 20 (``"S20"``). -In this example, the implementer only provided the functionality of casting -an ``int24`` to an ``S8`` string (which can hold all 24bit integers). -Due to this limited implementation, the full cast has to do multiple -conversions. The full process is: -1. Call ``CastingImpl[Int24, String].resolve_descriptors((int24, "S20"))``. +We've picked an example where the implementer has only provided limited +functionality: a function to cast an ``int24`` to an ``S8`` string (which can +hold all 24-bit integers). This means multiple conversions are needed. + +The full process is: + +1. Call + + ``CastingImpl[Int24, String].resolve_descriptors((int24, "S20"))``. + This provides the information that ``CastingImpl[Int24, String]`` only implements the cast of ``int24`` to ``"S8"``. + 2. Since ``"S8"`` does not match ``"S20"``, use - ``CastingImpl[String, String].get_transferfunction()`` + + ``CastingImpl[String, String].get_loop()`` + to find the transfer (casting) function to convert an ``"S8"`` into an ``"S20"`` + 3. Fetch the transfer function to convert an ``int24`` to an ``"S8"`` using - ``CastingImpl[Int24, String].get_transferfunction()`` + + ``CastingImpl[Int24, String].get_loop()`` + 4. Perform the actual cast using the two transfer functions: + ``int24(42) -> S8("42") -> S20("42")``. -Note that in this example the ``resolve_descriptors`` function plays a less -central role. It becomes more important for ``np.can_cast``. + ``resolve_descriptors`` allows the implementation for + + ``np.array(42, dtype=int24).astype(String)`` -Further, ``resolve_descriptors`` allows the implementation for -``np.array(42, dtype=int24).astype(String)`` to call -``CastingImpl[Int24, String].resolve_descriptors((int24, None))``. -In this case the result of ``(int24, "S8")`` defines the correct cast: -``np.array(42, dtype=int24),astype(String) == np.array("42", dtype="S8")``. + to call + + ``CastingImpl[Int24, String].resolve_descriptors((int24, None))``. + + In this case the result of ``(int24, "S8")`` defines the correct cast: + + ``np.array(42, dtype=int24).astype(String) == np.array("42", dtype="S8")``. **Casting safety** -To answer the question of casting safety ``np.can_cast(int24, "S20", -casting="safe")``, only the ``resolve_descriptors`` function is required and -is called in the same way as in `the figure describing a cast `_. +To compute ``np.can_cast(int24, "S20", casting="safe")``, only the +``resolve_descriptors`` function is required and +is called in the same way as in :ref:`the figure describing a cast `. + In this case, the calls to ``resolve_descriptors``, will also provide the information that ``int24 -> "S8"`` as well as ``"S8" -> "S20"`` are safe casts, and thus also the ``int24 -> "S20"`` is a safe cast. In some cases, no cast is necessary. For example, on most Linux systems ``np.dtype("long")`` and ``np.dtype("longlong")`` are different dtypes but are -both 64bit integers. -In this case, the cast can be performed using ``long_arr.view("longlong")``. -The information that a cast is a -"view" will be handled by an additional flag. Thus the ``casting`` -can have the 8 values in total: equivalent, safe, unsafe, same-kind as well as equivalent+view, safe+view, -unsafe+view, and same-kind+view. -NumPy currently defines ``dtype1 == dtype2`` to be True only if byte order matches. -This functionality can be replaced with the combination of "equivalent" casting -and the "view" flag. +both 64-bit integers. In this case, the cast can be performed using +``long_arr.view("longlong")``. The information that a cast is a view will be +handled by an additional flag. Thus the ``casting`` can have the 8 values in +total: the original 4 of ``equivalent``, ``safe``, ``unsafe``, and ``same-kind``, +plus ``equivalent+view``, ``safe+view``, ``unsafe+view``, and +``same-kind+view``. NumPy currently defines ``dtype1 == dtype2`` to be True +only if byte order matches. This functionality can be replaced with the +combination of "equivalent" casting and the "view" flag. -(For more information on the ``resolve_descriptors`` signature see the C-API -section below and NEP 43.) +(For more information on the ``resolve_descriptors`` signature see the +:ref:`nep42_C-API` section below and NEP 43.) **Casting between instances of the same DType** -In general one of the casting implementations defined by the DType implementor -must be ``CastingImpl[DType, DType]`` (unless there is only a singleton -instance). To keep the casting to as few steps as possible, this -implementation must initially be capable of any conversions between all instances of this -DType. +To keep down the number of casting +steps, CastingImpl must be capable of any conversion between all instances +of this DType. +In general the DType implementer must include ``CastingImpl[DType, DType]`` +unless there is only a singleton instance. **General multistep casting** -In general we could implement certain casts, such as ``int8`` to ``int24`` -even if the user only provides an ``int16 -> int24`` cast. This proposal -currently does not provide this functionality. However, it could be extended -in the future to either find such casts dynamically, or at least allow -``resolve_descriptors`` to return arbitrary ``dtypes``. If ``CastingImpl[Int8, -Int24].resolve_descriptors((int8, int24))`` returns ``(int16, int24)``, the -actual casting process could be extended to include the ``int8 -> int16`` -cast. This adds an additional step to the casting process. - - -**Alternatives:** The choice of using only the DType classes in the first step -of finding the correct ``CastingImpl`` means that the default implementation -of ``__common_dtype__`` has a reasonable definition of "safe casting" between -DTypes classes (although e.g. the concatenate operation using it may still -fail when attempting to find the actual common instance or cast). - -The split into multiple steps may seem to add complexity rather than reduce -it, however, it consolidates that we have the two distinct signatures of -``np.can_cast(dtype, DTypeClass)`` and ``np.can_cast(dtype, other_dtype)``. -Further, the above API guarantees the separation of concerns for user DTypes. -The user ``Int24`` dtype does not have to handle all string lengths if it does -not wish to do so. Further, if an encoding was added to the ``String`` DType, -this does not affect the overall cast. The ``resolve_descriptors`` function can -keep returning the default encoding and the ``CastingImpl[String, String]`` -can take care of any necessary encoding changes. - -The main alternative to the proposed design is to move most of the information -which is here pushed into the ``CastingImpl`` directly into methods on the -DTypes. This, however, will not allow the close similarity between casting and -universal functions. On the up side, it reduces the necessary indirection as -noted below. - -An initial proposal defined two methods ``__can_cast_to__(self, other)`` to -dynamically return ``CastingImpl``. The advantage of this addition is that it -removes the requirement to define all possible casts at DType creation time (of -one of the involved DTypes). -Such API could be added at a later time. This is similar to Python which -provides ``__getattr__`` for additional control over attribute lookup. - -**Notes:** The proposed ``CastingImpl`` is designed to be identical to the -``PyArrayMethod`` proposed in NEP 43 as part of restructuring ufuncs to handle -new DTypes. +We could implement certain casts, such as ``int8`` to ``int24``, +even if the user provides only an ``int16 -> int24`` cast. This proposal does +not provide that, but future work might find such casts dynamically, or at least +allow ``resolve_descriptors`` to return arbitrary ``dtypes``. + +If ``CastingImpl[Int8, Int24].resolve_descriptors((int8, int24))`` returns +``(int16, int24)``, the actual casting process could be extended to include +the ``int8 -> int16`` cast. This adds a step. -The way dispatching works for ``CastingImpl`` is planned to be limited -initially and fully opaque. In the future, it may or may not be moved into a -special UFunc, or behave more like a universal function. +**Example:** -**Example:** The implementation for casting integers to datetime would generally +The implementation for casting integers to datetime would generally say that this cast is unsafe (because it is always an unsafe cast). Its ``resolve_descriptors`` function may look like:: - def resolve_descriptors(self, given_dtypes): + def resolve_descriptors(self, given_dtypes): from_dtype, to_dtype = given_dtypes - from_dtype = from_dtype.ensure_canonical() # ensure not byte-swapped if to_dtype is None: raise TypeError("Cannot convert to a NumPy datetime without a unit") @@ -831,39 +789,87 @@ Its ``resolve_descriptors`` function may look like:: .. note:: - While NumPy currently defines integer to datetime casts, with the possible + While NumPy currently defines integer-to-datetime casts, with the possible exception of the unit-less ``timedelta64`` it may be better to not define these casts at all. In general we expect that user defined DTypes will be using custom methods such as ``unit.drop_unit(arr)`` or ``arr * unit.seconds``. -****************************************************************************** -Array coercion -****************************************************************************** +**Alternatives:** + +- Our design objectives are: + - Minimize the number of DType methods and avoid code duplication. + - Mirror the implementation of universal functions. + +- The decision to use only the DType classes in the first step of finding the + correct ``CastingImpl`` in addition to defining ``CastingImpl.casting``, + allows to retain the current default implementation of + ``__common_dtype__`` for existing user defined dtypes, which could be + expanded in the future. + +- The split into multiple steps may seem to add complexity rather than reduce + it, but it consolidates the signatures of ``np.can_cast(dtype, DTypeClass)`` + and ``np.can_cast(dtype, other_dtype)``. + + Further, the API guarantees separation of concerns for user DTypes. The user + ``Int24`` dtype does not have to handle all string lengths if it does not + wish to do so. Further, an encoding added to the ``String`` DType would + not affect the overall cast. The ``resolve_descriptors`` function + can keep returning the default encoding and the ``CastingImpl[String, + String]`` can take care of any necessary encoding changes. + +- The main alternative is moving most of the information that is here pushed + into the ``CastingImpl`` directly into methods on the DTypes. But this + obscures the similarity between casting and universal functions. It does + reduce indirection, as noted below. + +- An earlier proposal defined two methods ``__can_cast_to__(self, other)`` to + dynamically return ``CastingImpl``. This + removes the requirement to define all possible casts at DType creation + (of one of the involved DTypes). + + Such an API could be added later. It resembles Python's ``__getattr__`` in + providing additional control over attribute lookup. + + +**Notes:** + +The proposed ``CastingImpl`` is designed to be identical to the +``PyArrayMethod`` proposed in NEP43 as part of restructuring ufuncs to handle +new DTypes. + +The way dispatching works for ``CastingImpl`` is planned to be limited +initially and fully opaque. In the future, it may or may not be moved into a +special UFunc, or behave more like a universal function. + + +.. _nep42_array_coercion: -The following sections discuss the two aspects related to creating an array from -arbitrary python objects. This requires a defined protocol to store data -inside the array. Further, it requires the ability to find the correct dtype -when a user does not provide the dtype explicitly. Coercion to and from Python objects ============================================================================== -**Motivation:** When storing a single value in an array or taking it out, it -is necessary to coerce (convert) it to and from the low-level representation +When storing a single value in an array or taking it out, it is necessary to +coerce it -- that is, convert it -- to and from the low-level representation inside the array. -**Description:** Coercing to and from Python scalars requires two to three -methods: +Coercion is slightly more complex than typical casts. One reason is that a +Python object could itself be a 0-dimensional array or scalar with an +associated DType. + +Coercing to and from Python scalars requires two to three +methods that largely correspond to the current definitions: 1. ``__dtype_setitem__(self, item_pointer, value)`` + 2. ``__dtype_getitem__(self, item_pointer, base_obj) -> object``; ``base_obj`` is for memory management and usually ignored; it points to an object owning the data. Its only role is to support structured datatypes with subarrays within NumPy, which currently return views into the array. The function returns an equivalent Python scalar (i.e. typically a NumPy scalar). + 3. ``__dtype_get_pyitem__(self, item_pointer, base_obj) -> object`` (initially hidden for new-style user-defined datatypes, may be exposed on user request). This corresponds to the ``arr.item()`` method also used by @@ -873,18 +879,20 @@ methods: (The above is meant for C-API. A Python-side API would have to use byte buffers or similar to implement this, which may be useful for prototyping.) -These largely correspond to the current definitions. When a certain scalar +When a certain scalar has a known (different) dtype, NumPy may in the future use casting instead of -``__dtype_setitem__``. A user datatype is (initially) expected to implement +``__dtype_setitem__``. + +A user datatype is (initially) expected to implement ``__dtype_setitem__`` for its own ``DType.type`` and all basic Python scalars it wishes to support (e.g. ``int`` and ``float``). In the future a -function "``known_scalartype``" may be made public to allow a user dtype to signal +function ``known_scalar_type`` may be made public to allow a user dtype to signal which Python scalars it can store directly. **Implementation:** The pseudocode implementation for setting a single item in -an array from an arbitrary Python object ``value`` is (note that some -functions are only defined below):: +an array from an arbitrary Python object ``value`` is (some +functions here are defined later):: def PyArray_Pack(dtype, item_pointer, value): DType = type(dtype) @@ -930,28 +938,35 @@ would require the user to take to duplicate or fall back to existing casting functionality. It is certainly possible to describe the coercion to and from Python objects -using the general casting machinery, -but the ``object`` dtype is special and important enough to be handled by NumPy -using the presented methods. - -**Further Issues and Discussion:** The ``__dtype_setitem__`` function currently duplicates -some code, such as coercion from a string. ``datetime64`` allows assignment -from string, but the same conversion also occurs for casting from the string -dtype to ``datetime64``. In the future, we may expose the ``known_scalartype`` -function to allow the user to implement such duplication. -For example, NumPy would normally use ``np.array(np.string_("2019")).astype(datetime64)``, -but ``datetime64`` could choose to use its ``__dtype_setitem__`` instead, -e.g. for performance reasons. - -There is an issue about how subclasses of scalars should be handled. -We anticipate to stop automatically detecting the dtype for -``np.array(float64_subclass)`` to be float64. -The user can still provide ``dtype=np.float64``. -However, the above automatic casting using ``np.array(scalar_subclass).astype(requested_dtype)`` -will fail. -In many cases, this is not an issue, since the Python ``__float__`` protocol -can be used instead. But in some cases, this will mean that subclasses of -Python scalars will behave differently. +using the general casting machinery, but the ``object`` dtype is special and +important enough to be handled by NumPy using the presented methods. + +**Further issues and discussion:** + +- The ``__dtype_setitem__`` function duplicates some code, such as coercion + from a string. + + ``datetime64`` allows assignment from string, but the same conversion also + occurs for casting from the string dtype to ``datetime64``. + + We may in the future expose the ``known_scalartype`` function to allow the + user to implement such duplication. + + For example, NumPy would normally use + + ``np.array(np.string_("2019")).astype(datetime64)`` + + but ``datetime64`` could choose to use its ``__dtype_setitem__`` instead + for performance reasons. + +- There is an issue about how subclasses of scalars should be handled. We + anticipate to stop automatically detecting the dtype for + ``np.array(float64_subclass)`` to be float64. The user can still provide + ``dtype=np.float64``. However, the above automatic casting using + ``np.array(scalar_subclass).astype(requested_dtype)`` will fail. In many + cases, this is not an issue, since the Python ``__float__`` protocol can be + used instead. But in some cases, this will mean that subclasses of Python + scalars will behave differently. .. note:: @@ -968,8 +983,8 @@ Python scalars will behave differently. DType discovery during array coercion ============================================================================== -An important step in the use of NumPy arrays is creation of the array -from collections of generic Python objects. +An important step in the use of NumPy arrays is creation of the array from +collections of generic Python objects. **Motivation:** Although the distinction is not clear currently, there are two main needs:: @@ -998,10 +1013,12 @@ There are three further issues to consider: 1. It may be desirable to create datatypes associated with normal Python scalars (such as ``datetime.datetime``) that do not have a ``dtype`` attribute already. + 2. In general, a datatype could represent a sequence, however, NumPy currently assumes that sequences are always collections of elements (the sequence cannot be an element itself). An example would be a ``vector`` DType. + 3. An array may itself contain arrays with a specific dtype (even general Python objects). For example: ``np.array([np.array(None, dtype=object)], dtype=np.String)`` @@ -1013,6 +1030,7 @@ of the output array and finding the correct datatype are closely related. **Implementation:** There are two distinct cases above: 1. The user has provided no dtype information. + 2. The user provided a DType class -- as represented, for example, by ``"S"`` representing a string of any length. @@ -1026,10 +1044,11 @@ These two cases shall be implemented by leveraging two pieces of information: 1. ``DType.type``: The current type attribute to indicate which Python scalar type is associated with the DType class (this is a *class* attribute that always exists for any datatype and is not limited to array coercion). + 2. ``__discover_descr_from_pyobject__(cls, obj) -> dtype``: A classmethod that returns the correct descriptor given the input object. Note that only parametric DTypes have to implement this. - For non-parametric DTypes using the default instance will always be acceptable. + For nonparametric DTypes using the default instance will always be acceptable. The Python scalar type which is already associated with a DType through the ``DType.type`` attribute maps from the DType to the Python scalar type. @@ -1066,7 +1085,7 @@ subclass. dispatching/promotion in ufuncs, it will also be necessary to dynamically create ``AbstractPyInt[value]`` classes (creation can be cached), so that they can provide the current value based promotion functionality provided - by ``np.result_type(python_integer, array)`` [1]_. + by ``np.result_type(python_integer, array)`` [2]_ . To allow for a DType to accept inputs as scalars that are not basic Python types or instances of ``DType.type``, we use ``known_scalar_type`` method. @@ -1086,7 +1105,7 @@ operation, this allows it to automatically find that the datetime64 unit should be "minutes". -**NumPy Internal Implementation:** The implementation to find the correct dtype +**NumPy internal implementation:** The implementation to find the correct dtype will work similar to the following pseudocode:: def find_dtype(array_like): @@ -1150,31 +1169,30 @@ should only be implemented for types defined within the same library to avoid the potential for conflicts. It will be the DType implementor's responsibility to be careful about this and use avoid registration when in doubt. -**Alternatives:** Instead of a global mapping, we could rely on the scalar -attribute ``scalar.__associated_array_dtype__``. -This only creates a difference in behaviour for subclasses and the exact -implementation can be undefined initially. -Scalars will be expected to derive from a NumPy scalar. -In principle NumPy could, for a time, still choose to rely on the attribute. - -An earlier proposal for the ``dtype`` discovery algorithm, -was to use a two-pass approach. -First finding only the correct ``DType`` class and only then discovering the parametric -``dtype`` instance. -This was rejected for unnecessary complexity. -The main advantage of this method is that it would have enabled value -based promotion in universal functions, allowing:: +**Alternatives:** + +- Instead of a global mapping, we could rely on the scalar attribute + ``scalar.__associated_array_dtype__``. This only creates a difference in + behavior for subclasses, and the exact implementation can be undefined + initially. Scalars will be expected to derive from a NumPy scalar. In + principle NumPy could, for a time, still choose to rely on the attribute. + +- An earlier proposal for the ``dtype`` discovery algorithm used a two-pass + approach, first finding the correct ``DType`` class and only then + discovering the parametric ``dtype`` instance. It was rejected as + needlessly complex. But it would have enabled value-based promotion + in universal functions, allowing:: np.add(np.array([8], dtype="uint8"), [4]) -to return a ``uint8`` result (instead of ``int16``), which currently happens for:: + to return a ``uint8`` result (instead of ``int16``), which currently happens for:: np.add(np.array([8], dtype="uint8"), 4) -(note the list ``[4]`` instead of scalar ``4``). -This is not a feature NumPy currently has or desires to support. + (note the list ``[4]`` instead of scalar ``4``). + This is not a feature NumPy currently has or desires to support. -**Further Issues and Discussion:** It is possible to create a DType +**Further issues and discussion:** It is possible to create a DType such as Categorical, array, or vector which can only be used if ``dtype=DType`` is provided. Such DTypes cannot roundtrip correctly. For example:: @@ -1186,29 +1204,25 @@ This is a general limitation, but round-tripping is always possible if ``dtype=original_arr.dtype`` is passed. -.. _c-api: +.. _nep42_c-api: ****************************************************************************** Public C-API ****************************************************************************** -A Python side API shall not be defined here. This is a general side approach. - - DType creation ============================================================================== -To create a new DType the user will need to define all the methods and -attributes as presented above and outlined in the `Usage and impact`_ -section. -Some additional methods similar to those currently defined as part of -:c:type:`PyArray_ArrFuncs` will be necessary and part of the slots struct -below. +To create a new DType the user will need to define the methods and attributes +outlined in the `Usage and impact`_ section and detailed throughout this +proposal. + +In addition, some methods similar to those in :c:type:`PyArray_ArrFuncs` will +be needed for the slots struct below. -As already mentioned in NEP 41, the interface to define this DType class in C is -modeled after the `Python limited API `_: -the above-mentioned slots and some additional necessary information will -thus be passed within a slots struct and identified by ``ssize_t`` integers:: +As mentioned in :ref:`NEP 41 `, the interface to define this DType +class in C is modeled after :PEP:`384`: Slots and some additional information +will be passed in a slots struct and identified by ``ssize_t`` integers:: static struct PyArrayMethodDef slots[] = { {NPY_dt_method, method_implementation}, @@ -1227,16 +1241,15 @@ thus be passed within a slots struct and identified by ``ssize_t`` integers:: PyObject* PyArray_InitDTypeMetaFromSpec(PyArrayDTypeMeta_Spec *dtype_spec); -All of this information will be copied. - -**TODO:** The DType author should be able to define new methods for their -DType, up to defining a full type object and in the future possibly even -extending the ``PyArrayDTypeMeta_Type`` struct. We have to decide on how (and -what) to make available to the user initially. A possible initial solution may -be to only allow inheriting from an existing class: ``class MyDType(np.dtype, -MyBaseclass)``. If ``np.dtype`` is first in the method resolution order, this -also prevents overriding some slots, such as ``==`` which may not be desirable. +All of this is passed by copying. +**TODO:** The DType author should be able to define new methods for the +DType, up to defining a full object, and, in the future, possibly even +extending the ``PyArrayDTypeMeta_Type`` struct. We have to decide what to make +available initially. A solution may be to allow inheriting only from an +existing class: ``class MyDType(np.dtype, MyBaseclass)``. If ``np.dtype`` is +first in the method resolution order, this also prevents an undesirable +override of slots like ``==``. The ``slots`` will be identified by names which are prefixed with ``NPY_dt_`` and are: @@ -1251,8 +1264,7 @@ and are: * ``common_instance(self, other) -> dtype or NULL`` Where possible, a default implementation will be provided if the slot is -ommitted or set to ``NULL``. -Non-parametric dtypes do not have to implement: +omitted or set to ``NULL``. Nonparametric dtypes do not have to implement: * ``discover_descr_from_pyobject`` (uses ``default_descr`` instead) * ``common_instance`` (uses ``default_descr`` instead) @@ -1262,17 +1274,16 @@ Sorting is expected to be implemented using: * ``get_sort_function(self, NPY_SORTKIND sort_kind) -> {out_sortfunction, NotImplemented, NULL}``. -Although for convenience, it will be sufficient if the user implements only: +For convenience, it will be sufficient if the user implements only: * ``compare(self, char *item_ptr1, char *item_ptr2, int *res) -> {-1, 0, 1}`` -**Limitations:** Using the above ``PyArrayDTypeMeta_Spec`` struct, the -structure itself can only be extended clumsily (e.g. by adding a version tag -to the ``slots`` to indicate a new, longer version of the struct). We could -also provide the struct using a function, which however will require memory -management but would allow ABI-compatible extension (the struct is freed again -when the DType is created). +**Limitations:** The ``PyArrayDTypeMeta_Spec`` struct is clumsy to extend (for +instance, by adding a version tag to the ``slots`` to indicate a new, longer +version). We could use a function to provide the struct; it would require +memory management but would allow ABI-compatible extension (the struct is +freed again when the DType is created). CastingImpl @@ -1281,24 +1292,24 @@ CastingImpl The external API for ``CastingImpl`` will be limited initially to defining: * ``casting`` attribute, which can be one of the supported casting kinds. - This is the safest cast possible. For example casting between two NumPy + This is the safest cast possible. For example, casting between two NumPy strings is of course "safe" in general, but may be "same kind" in a specific instance if the second string is shorter. If neither type is parametric the ``resolve_descriptors`` must use it. -* ``resolve_descriptors(self, given_descrs[2], loop_descrs[2]) -> int {casting, -1}``: - The ``loop_descrs`` must be set correctly to dtypes which the strided loop +* ``resolve_descriptors(dtypes_in[2], dtypes_out[2], casting_out) -> int {0, + -1}`` The out + dtypes must be set correctly to dtypes which the strided loop (transfer function) can handle. Initially the result must have instances of the same DType class as the ``CastingImpl`` is defined for. The ``casting`` will be set to ``NPY_EQUIV_CASTING``, ``NPY_SAFE_CASTING``, ``NPY_UNSAFE_CASTING``, or ``NPY_SAME_KIND_CASTING``. - A new, additional flag, ``NPY_CAST_IS_VIEW``, can be set to indicate that - no cast is necessary and a view is sufficient to perform the cast. - The return value shall be ``-1`` to indicate that the cast is not possible. - If no error is set, a generic error message will be given. If an error is - already set it will be chained and may provide additional information. - Note that ``self`` represents additional call information; details are given - in NEP 43. + A new, additional flag, + ``NPY_CAST_IS_VIEW``, can be set to indicate that no cast is necessary and a + view is sufficient to perform the cast. The cast should return + ``-1`` when a custom error is set and ``NPY_NO_CASTING`` to indicate + that a generic casting error should be set (this is in most cases + preferable). * ``strided_loop(char **args, npy_intp *dimensions, npy_intp *strides, ...) -> int {0, -1}`` (signature will be fully defined in NEP 43) @@ -1308,7 +1319,7 @@ part of the signature will include information such as the two ``dtype``\s. More optimized loops are in use internally, and will be made available to users in the future (see notes). -Although verbose, the API shall mimic the one for creating a new DType: +Although verbose, the API will mimic the one for creating a new DType: .. code-block:: C @@ -1321,56 +1332,53 @@ Although verbose, the API shall mimic the one for creating a new DType: PyType_Slot *slots; } PyArrayMethod_Spec; -The focus differs between casting and general ufuncs. For example for casts +The focus differs between casting and general ufuncs. For example, for casts ``nin == nout == 1`` is always correct, while for ufuncs ``casting`` is expected to be usually `"safe"`. -**Notes:** We may initially allow users to define only a single loop. However, -internally NumPy optimizes far more, and this should be made public -incrementally, either by allowing multiple versions, such as: +**Notes:** We may initially allow users to define only a single loop. +Internally NumPy optimizes far more, and this should be made public +incrementally in one of two ways: -* contiguous inner loop -* strided inner loop -* scalar inner loop +* Allow multiple versions, such as: -or more likely through exposure of the ``get_loop`` function which is passed -additional information, such as the fixed strides (similar to our internal -API). + * contiguous inner loop + * strided inner loop + * scalar inner loop -The above example does not yet include potential setup and error handling -requirements. Since these are similar to the UFunc machinery, this will be -defined in detail in NEP 43 and then incorporated identically into casting. +* Or, more likely, expose the ``get_loop`` function which is passed additional + information, such as the fixed strides (similar to our internal API). -The slots/methods used will be prefixed ``NPY_uf_`` for similarity to the -ufunc machinery. +The example does not yet include setup and error handling. Since these are +similar to the UFunc machinery, they will be defined in NEP 43 and then +incorporated identically into casting. +The slots/methods used will be prefixed with ``NPY_meth_``. -**Alternatives:** Aside from name changes, and possible signature tweaks, -there seem to be few alternatives to the above structure. -The proposed API using ``*_FromSpec`` function is a good way to achieve a stable -and extensible API. The slots design is extensible and can be -changed without breaking binary compatibility. -Convenience functions can still be provided to allow creation with less code. +**Alternatives:** -One downside of this approach is that compilers cannot warn about function pointer -incompatibilities. +- Aside from name changes and signature tweaks, there seem to be few + alternatives to the above structure. The proposed API using ``*_FromSpec`` + function is a good way to achieve a stable and extensible API. The slots + design is extensible and can be changed without breaking binary + compatibility. Convenience functions can still be provided to allow creation + with less code. + +- One downside is that compilers cannot warn about function-pointer + incompatibilities. ****************************************************************************** Implementation ****************************************************************************** -Steps for implementation are outlined in :ref:`NEP 41 `. This includes -internal restructuring for the new casting and array-coercion. -First, the NumPy will internally be rewritten using the above methods for -casting and array-coercion. - -After that, the new public API will be added incrementally. -We plan to expose it in a preliminary state initially to allow modification -after some experience can be gained. -In addition to the features presented in detail in this NEP, all functionality -currently implemented on the dtypes will be replaced systematically. +Steps for implementation are outlined in the Implementation section of +:ref:`NEP 41 `. In brief, we first will rewrite the internals of +casting and array coercion. After that, the new public API will be added +incrementally. We plan to expose it in a preliminary state initially to gain +experience. All functionality currently implemented on the dtypes will be +replaced systematically as new features are added. ****************************************************************************** @@ -1378,16 +1386,22 @@ Alternatives ****************************************************************************** The space of possible implementations is large, so there have been many -discussions, conceptions, and design documents. These are listed in NEP 40. -Since this NEP encompasses multiple individual decisions, alternatives -are discussed in the above individual sections. +discussions, conceptions, and design documents. These are listed in +:ref:`NEP 40 `. Alternatives were also been discussed in the +relevant sections above. ****************************************************************************** References ****************************************************************************** -.. [1] NumPy currently inspects the value to allow the operations:: +.. [1] To be clear, the program is broken: It should not have stored a value + in the common DType that was below the lowest int16 or above the highest + uint16. It avoided overflow earlier by an accident of implementation. + Nonetheless, we insist that program behavior not be altered just by + importing a type. + +.. [2] NumPy currently inspects the value to allow the operations:: np.array([1], dtype=np.uint8) + 1 np.array([1.2], dtype=np.float32) + 1. From f39453eae98b92efaee39bb855c6bcdf3905e3d6 Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Thu, 8 Oct 2020 22:13:27 +0530 Subject: [PATCH 224/409] Update __init__.pyi --- numpy/__init__.pyi | 85 +++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 26673d95556a..154fc8722dab 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -36,6 +36,7 @@ from typing import ( Container, Callable, Dict, + Final, Generic, IO, Iterable, @@ -1597,48 +1598,48 @@ def empty( # Constants # -Inf: float -Infinity: float -NAN: float -NINF: float -NZERO: float -NaN: float -PINF: float -PZERO: float -e: float -euler_gamma: float -inf: float -infty: float -nan: float -pi: float - -ALLOW_THREADS: int -BUFSIZE: int -CLIP: int -ERR_CALL: int -ERR_DEFAULT: int -ERR_IGNORE: int -ERR_LOG: int -ERR_PRINT: int -ERR_RAISE: int -ERR_WARN: int -FLOATING_POINT_SUPPORT: int -FPE_DIVIDEBYZERO: int -FPE_INVALID: int -FPE_OVERFLOW: int -FPE_UNDERFLOW: int -MAXDIMS: int -MAY_SHARE_BOUNDS: int -MAY_SHARE_EXACT: int -RAISE: int -SHIFT_DIVIDEBYZERO: int -SHIFT_INVALID: int -SHIFT_OVERFLOW: int -SHIFT_UNDERFLOW: int -UFUNC_BUFSIZE_DEFAULT: int -WRAP: int -little_endian: int -tracemalloc_domain: int +Inf: Final[float] +Infinity: Final[float] +NAN: Final[float] +NINF: Final[float] +NZERO: Final[float] +NaN: Final[float] +PINF: Final[float] +PZERO: Final[float] +e: Final[float] +euler_gamma: FInal[float] +inf: Final[float] +infty: Final[float] +nan: Final[float] +pi: Final[float] + +ALLOW_THREADS: Final[int] +BUFSIZE: Final[int] +CLIP: Final[int] +ERR_CALL: Final[int] +ERR_DEFAULT: Final[int] +ERR_IGNORE: Final[int] +ERR_LOG: Final[int] +ERR_PRINT: Final[int] +ERR_RAISE: Final[int] +ERR_WARN: Final[int] +FLOATING_POINT_SUPPORT: Final[int] +FPE_DIVIDEBYZERO: Final[int] +FPE_INVALID: Final[int] +FPE_OVERFLOW: Final[int] +FPE_UNDERFLOW: Final[int] +MAXDIMS: Final[int] +MAY_SHARE_BOUNDS: Final[int] +MAY_SHARE_EXACT: Final[int] +RAISE: Final[int] +SHIFT_DIVIDEBYZERO: Final[int] +SHIFT_INVALID: Final[int] +SHIFT_OVERFLOW: Final[int] +SHIFT_UNDERFLOW: Final[int] +UFUNC_BUFSIZE_DEFAULT: Final[int] +WRAP: Final[int] +little_endian: Final[int] +tracemalloc_domain: Final[int] class ufunc: @property From ff81db7831432ef94e11ecc0cb014260ff55b3d4 Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Thu, 8 Oct 2020 22:23:13 +0530 Subject: [PATCH 225/409] ENH: Marked all constants as `Final --- numpy/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 154fc8722dab..9d5b9d438237 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1612,7 +1612,7 @@ inf: Final[float] infty: Final[float] nan: Final[float] pi: Final[float] - + ALLOW_THREADS: Final[int] BUFSIZE: Final[int] CLIP: Final[int] From 45e04d904099f824f860c5426039e2f4099a714c Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Thu, 8 Oct 2020 23:03:54 +0530 Subject: [PATCH 226/409] Minor bug fix --- numpy/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 9d5b9d438237..0c023caf0bbf 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1607,7 +1607,7 @@ NaN: Final[float] PINF: Final[float] PZERO: Final[float] e: Final[float] -euler_gamma: FInal[float] +euler_gamma: Final[float] inf: Final[float] infty: Final[float] nan: Final[float] From 688202aeb9db604cd3ace797b76291281bc7f97e Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Thu, 8 Oct 2020 23:38:22 +0530 Subject: [PATCH 227/409] Adjusted typing.Final's python 3.8 compatibility --- numpy/__init__.pyi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 0c023caf0bbf..1fbdb259b2a1 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -36,7 +36,6 @@ from typing import ( Container, Callable, Dict, - Final, Generic, IO, Iterable, @@ -59,9 +58,9 @@ from typing import ( ) if sys.version_info >= (3, 8): - from typing import Literal, Protocol, SupportsIndex + from typing import Literal, Protocol, SupportsIndex,Final else: - from typing_extensions import Literal, Protocol + from typing_extensions import Literal, Protocol, Final class SupportsIndex(Protocol): def __index__(self) -> int: ... From 055afcfaaf0004c537cb03de2520d38bbbb19e67 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Thu, 8 Oct 2020 15:28:28 -0400 Subject: [PATCH 228/409] DOC: Fewer blank lines in PR template Reduce the need to scroll, per @mattip suggestion in #17440 --- .github/PULL_REQUEST_TEMPLATE.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index dee33ee5fd13..508c8c034869 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,22 +1,16 @@ - From 20d5999e2f01c32876368b97797047c3adc536bd Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Thu, 8 Oct 2020 17:01:02 -0400 Subject: [PATCH 229/409] DOC: Rename 'Quickstart tutorial' (#17504) Remove tutorial wording in favor of article, minor wording updates. --- doc/source/user/quickstart.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst index 8e38234c598f..b675204e0ad5 100644 --- a/doc/source/user/quickstart.rst +++ b/doc/source/user/quickstart.rst @@ -1,5 +1,5 @@ =================== -Quickstart tutorial +NumPy quickstart =================== .. currentmodule:: numpy @@ -12,26 +12,24 @@ Quickstart tutorial Prerequisites ============= -Before reading this tutorial you should know a bit of Python. If you -would like to refresh your memory, take a look at the `Python +You'll need to know a bit of Python. For a refresher, see the `Python tutorial `__. -If you wish to work the examples in this tutorial, you must also have -some software installed on your computer. Please see -https://scipy.org/install.html for instructions. +To work the examples, you'll need `matplotlib` installed +in addition to NumPy. **Learner profile** -This tutorial is intended as a quick overview of +This is a quick overview of algebra and arrays in NumPy. It demonstrates how n-dimensional (:math:`n>=2`) arrays are represented and can be manipulated. In particular, if you don't know how to apply common functions to n-dimensional arrays (without using for-loops), or if you want to understand axis and shape properties for -n-dimensional arrays, this tutorial might be of help. +n-dimensional arrays, this article might be of help. **Learning Objectives** -After this tutorial, you should be able to: +After reading, you should be able to: - Understand the difference between one-, two- and n-dimensional arrays in NumPy; From 7396e03df3085b96fa4a99de5aaab0499670dd6e Mon Sep 17 00:00:00 2001 From: Takanori H Date: Fri, 9 Oct 2020 07:48:32 +0900 Subject: [PATCH 230/409] DOC: Fix random links in release note (#17483) Co-authored-by: Eric Wieser --- doc/source/release/1.17.0-notes.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/release/1.17.0-notes.rst b/doc/source/release/1.17.0-notes.rst index a93eb21863e5..4bdc6105fc1b 100644 --- a/doc/source/release/1.17.0-notes.rst +++ b/doc/source/release/1.17.0-notes.rst @@ -171,15 +171,15 @@ The functions `load`, and ``lib.format.read_array`` take an `CVE-2019-6446 `_. -.. currentmodule:: numpy.random.mtrand +.. currentmodule:: numpy.random Potential changes to the random stream in old random module ----------------------------------------------------------- Due to bugs in the application of ``log`` to random floating point numbers, the stream may change when sampling from `~RandomState.beta`, `~RandomState.binomial`, `~RandomState.laplace`, `~RandomState.logistic`, `~RandomState.logseries` or -`~RandomState.multinomial` if a ``0`` is generated in the underlying `MT19937 -<~numpy.random.mt11937.MT19937>` random stream. There is a ``1`` in +`~RandomState.multinomial` if a ``0`` is generated in the underlying `MT19937` +random stream. There is a ``1`` in :math:`10^{53}` chance of this occurring, so the probability that the stream changes for any given seed is extremely small. If a ``0`` is encountered in the underlying generator, then the incorrect value produced (either `numpy.inf` or @@ -559,4 +559,3 @@ Structured arrays indexed with non-existent fields raise ``KeyError`` not ``Valu ---------------------------------------------------------------------------------------- ``arr['bad_field']`` on a structured type raises ``KeyError``, for consistency with ``dict['bad_field']``. - From 1b8637d4af9b27735629d5371b9d92a0fd1c147d Mon Sep 17 00:00:00 2001 From: Sayed Adel Date: Thu, 17 Sep 2020 12:11:07 +0200 Subject: [PATCH 231/409] ENH, SIMD: Add partial/non-contig load and store intrinsics for 32/64-bit This patch improves the implementation of memory load/store for VSX --- numpy/core/src/common/simd/avx2/avx2.h | 2 + numpy/core/src/common/simd/avx2/memory.h | 286 ++++++++++++++ numpy/core/src/common/simd/avx512/avx512.h | 3 + numpy/core/src/common/simd/avx512/memory.h | 238 ++++++++++++ numpy/core/src/common/simd/neon/memory.h | 287 ++++++++++++++ numpy/core/src/common/simd/simd.h | 49 +++ numpy/core/src/common/simd/sse/memory.h | 424 ++++++++++++++++++++ numpy/core/src/common/simd/vsx/memory.h | 432 +++++++++++++++------ 8 files changed, 1603 insertions(+), 118 deletions(-) diff --git a/numpy/core/src/common/simd/avx2/avx2.h b/numpy/core/src/common/simd/avx2/avx2.h index c99d628ee408..0641f2314ec6 100644 --- a/numpy/core/src/common/simd/avx2/avx2.h +++ b/numpy/core/src/common/simd/avx2/avx2.h @@ -5,6 +5,8 @@ #define NPY_SIMD 256 #define NPY_SIMD_WIDTH 32 #define NPY_SIMD_F64 1 +// Enough limit to allow us to use _mm256_i32gather_* +#define NPY_SIMD_MAXLOAD_STRIDE32 (0x7fffffff / 8) typedef __m256i npyv_u8; typedef __m256i npyv_s8; diff --git a/numpy/core/src/common/simd/avx2/memory.h b/numpy/core/src/common/simd/avx2/memory.h index 5ea7414fdf5c..e27bf15fec2e 100644 --- a/numpy/core/src/common/simd/avx2/memory.h +++ b/numpy/core/src/common/simd/avx2/memory.h @@ -2,6 +2,8 @@ #error "Not a standalone header" #endif +#include "misc.h" + #ifndef _NPY_SIMD_AVX2_MEMORY_H #define _NPY_SIMD_AVX2_MEMORY_H @@ -66,5 +68,289 @@ NPYV_IMPL_AVX2_MEM_INT(npy_int64, s64) // store higher part #define npyv_storeh_f32(PTR, VEC) _mm_storeu_ps(PTR, _mm256_extractf128_ps(VEC, 1)) #define npyv_storeh_f64(PTR, VEC) _mm_storeu_pd(PTR, _mm256_extractf128_pd(VEC, 1)) +/*************************** + * Non-contiguous Load + ***************************/ +//// 32 +NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride) +{ + assert(llabs(stride) <= NPY_SIMD_MAXLOAD_STRIDE32); + const __m256i steps = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + const __m256i idx = _mm256_mullo_epi32(_mm256_set1_epi32((int)stride), steps); + return _mm256_i32gather_epi32((const int*)ptr, idx, 4); +} +NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride) +{ return npyv_loadn_u32((const npy_uint32*)ptr, stride); } +NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) +{ return _mm256_castsi256_ps(npyv_loadn_u32((const npy_uint32*)ptr, stride)); } +//// 64 +#if 0 // slower +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ + const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride); + return _mm256_i64gather_epi64((const void*)ptr, idx, 8); +} +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ return npyv_loadn_u64((const npy_uint64*)ptr, stride); } +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ return _mm256_castsi256_pd(npyv_loadn_u64((const npy_uint64*)ptr, stride)); } +#endif +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ + __m128d a0 = _mm_castsi128_pd(_mm_loadl_epi64((const __m128i*)ptr)); + __m128d a2 = _mm_castsi128_pd(_mm_loadl_epi64((const __m128i*)(ptr + stride*2))); + __m128d a01 = _mm_loadh_pd(a0, ptr + stride); + __m128d a23 = _mm_loadh_pd(a2, ptr + stride*3); + return _mm256_insertf128_pd(_mm256_castpd128_pd256(a01), a23, 1); +} +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ return _mm256_castpd_si256(npyv_loadn_f64((const double*)ptr, stride)); } +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ return _mm256_castpd_si256(npyv_loadn_f64((const double*)ptr, stride)); } +/*************************** + * Non-contiguous Store + ***************************/ +//// 32 +NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a) +{ + __m128i a0 = _mm256_castsi256_si128(a); + __m128i a1 = _mm256_extracti128_si256(a, 1); + ptr[stride * 0] = _mm_cvtsi128_si32(a0); + ptr[stride * 1] = _mm_extract_epi32(a0, 1); + ptr[stride * 2] = _mm_extract_epi32(a0, 2); + ptr[stride * 3] = _mm_extract_epi32(a0, 3); + ptr[stride * 4] = _mm_cvtsi128_si32(a1); + ptr[stride * 5] = _mm_extract_epi32(a1, 1); + ptr[stride * 6] = _mm_extract_epi32(a1, 2); + ptr[stride * 7] = _mm_extract_epi32(a1, 3); +} +NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, a); } +NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, _mm256_castps_si256(a)); } +//// 64 +NPY_FINLINE void npyv_storen_f64(double *ptr, npy_intp stride, npyv_f64 a) +{ + __m128d a0 = _mm256_castpd256_pd128(a); + __m128d a1 = _mm256_extractf128_pd(a, 1); + _mm_storel_pd(ptr + stride * 0, a0); + _mm_storeh_pd(ptr + stride * 1, a0); + _mm_storel_pd(ptr + stride * 2, a1); + _mm_storeh_pd(ptr + stride * 3, a1); +} +NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) +{ npyv_storen_f64((double*)ptr, stride, _mm256_castsi256_pd(a)); } +NPY_FINLINE void npyv_storen_s64(npy_int64 *ptr, npy_intp stride, npyv_s64 a) +{ npyv_storen_f64((double*)ptr, stride, _mm256_castsi256_pd(a)); } + +/********************************* + * Partial Load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + const __m256i vfill = _mm256_set1_epi32(fill); + const __m256i steps = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi32(vnlane, steps); + __m256i payload = _mm256_maskload_epi32((const int*)ptr, mask); + return _mm256_blendv_epi8(vfill, payload, mask); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + const __m256i steps = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi32(vnlane, steps); + return _mm256_maskload_epi32((const int*)ptr, mask); +} +//// 64 +NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + const __m256i vfill = _mm256_set1_epi64x(fill); + const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); + __m256i payload = _mm256_maskload_epi64((const void*)ptr, mask); + return _mm256_blendv_epi8(vfill, payload, mask); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); + return _mm256_maskload_epi64((const void*)ptr, mask); +} +/********************************* + * Non-contiguous partial load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 +npyv_loadn_till_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + assert(llabs(stride) <= NPY_SIMD_MAXLOAD_STRIDE32); + const __m256i vfill = _mm256_set1_epi32(fill); + const __m256i steps = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + const __m256i idx = _mm256_mullo_epi32(_mm256_set1_epi32((int)stride), steps); + __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi32(vnlane, steps); + return _mm256_mask_i32gather_epi32(vfill, (const int*)ptr, idx, mask, 4); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 +npyv_loadn_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s32(ptr, stride, nlane, 0); } +//// 64 +NPY_FINLINE npyv_s64 +npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + const __m256i vfill = _mm256_set1_epi64x(fill); + const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride); + const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); + return _mm256_mask_i64gather_epi64(vfill, (const void*)ptr, idx, mask, 8); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 +npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s64(ptr, stride, nlane, 0); } +/********************************* + * Partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + const __m256i steps = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + __m256i vnlane = _mm256_set1_epi32(nlane > 8 ? 8 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi32(vnlane, steps); + _mm256_maskstore_epi32((int*)ptr, mask, a); +} +//// 64 +NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); + __m256i vnlane = _mm256_set1_epi64x(nlane > 8 ? 8 : (int)nlane); + __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); + _mm256_maskstore_epi64((void*)ptr, mask, a); +} +/********************************* + * Non-contiguous partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + __m128i a0 = _mm256_castsi256_si128(a); + __m128i a1 = _mm256_extracti128_si256(a, 1); + switch(nlane) { + default: + ptr[stride*7] = _mm_extract_epi32(a1, 3); + case 7: + ptr[stride*6] = _mm_extract_epi32(a1, 2); + case 6: + ptr[stride*5] = _mm_extract_epi32(a1, 1); + case 5: + ptr[stride*4] = _mm_extract_epi32(a1, 0); + case 4: + ptr[stride*3] = _mm_extract_epi32(a0, 3); + case 3: + ptr[stride*2] = _mm_extract_epi32(a0, 2); + case 2: + ptr[stride*1] = _mm_extract_epi32(a0, 1); + case 1: + ptr[stride*0] = _mm_extract_epi32(a0, 0); + } +} +//// 64 +NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + __m128d a0 = _mm256_castpd256_pd128(_mm256_castsi256_pd(a)); + __m128d a1 = _mm256_extractf128_pd(_mm256_castsi256_pd(a), 1); + double *dptr = (double*)ptr; + switch(nlane) { + default: + _mm_storeh_pd(dptr + stride * 3, a1); + case 3: + _mm_storel_pd(dptr + stride * 2, a1); + case 2: + _mm_storeh_pd(dptr + stride * 1, a0); + case 1: + _mm_storel_pd(dptr + stride * 0, a0); + } +} + +/***************************************************************************** + * Implement partial load/store for u32/f32/u64/f64... via reinterpret cast + *****************************************************************************/ +#define NPYV_IMPL_AVX2_REST_PARTIAL_TYPES(F_SFX, T_SFX) \ + NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, \ + npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_load_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane \ + )); \ + } \ + NPY_FINLINE void npyv_store_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_store_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } \ + NPY_FINLINE void npyv_storen_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_storen_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, stride, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } + +NPYV_IMPL_AVX2_REST_PARTIAL_TYPES(u32, s32) +NPYV_IMPL_AVX2_REST_PARTIAL_TYPES(f32, s32) +NPYV_IMPL_AVX2_REST_PARTIAL_TYPES(u64, s64) +NPYV_IMPL_AVX2_REST_PARTIAL_TYPES(f64, s64) #endif // _NPY_SIMD_AVX2_MEMORY_H diff --git a/numpy/core/src/common/simd/avx512/avx512.h b/numpy/core/src/common/simd/avx512/avx512.h index 96fdf72b91f1..b09d772f2fec 100644 --- a/numpy/core/src/common/simd/avx512/avx512.h +++ b/numpy/core/src/common/simd/avx512/avx512.h @@ -4,6 +4,9 @@ #define NPY_SIMD 512 #define NPY_SIMD_WIDTH 64 #define NPY_SIMD_F64 1 +// Enough limit to allow us to use _mm512_i32gather_* and _mm512_i32scatter_* +#define NPY_SIMD_MAXLOAD_STRIDE32 (0x7fffffff / 16) +#define NPY_SIMD_MAXSTORE_STRIDE32 (0x7fffffff / 16) typedef __m512i npyv_u8; typedef __m512i npyv_s8; diff --git a/numpy/core/src/common/simd/avx512/memory.h b/numpy/core/src/common/simd/avx512/memory.h index e212c4555270..bffd6e907246 100644 --- a/numpy/core/src/common/simd/avx512/memory.h +++ b/numpy/core/src/common/simd/avx512/memory.h @@ -90,5 +90,243 @@ NPYV_IMPL_AVX512_MEM_INT(npy_int64, s64) // store higher part #define npyv_storeh_f32(PTR, VEC) _mm256_storeu_ps(PTR, npyv512_higher_ps256(VEC)) #define npyv_storeh_f64(PTR, VEC) _mm256_storeu_pd(PTR, npyv512_higher_pd256(VEC)) +/*************************** + * Non-contiguous Load + ***************************/ +//// 32 +NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride) +{ + assert(llabs(stride) <= NPY_SIMD_MAXLOAD_STRIDE32); + const __m512i steps = npyv_set_s32( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + const __m512i idx = _mm512_mullo_epi32(steps, _mm512_set1_epi32((int)stride)); + return _mm512_i32gather_epi32(idx, (const __m512i*)ptr, 4); +} +NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride) +{ return npyv_loadn_u32((const npy_uint32*)ptr, stride); } +NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) +{ return _mm512_castsi512_ps(npyv_loadn_u32((const npy_uint32*)ptr, stride)); } +//// 64 +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ + const __m512i idx = _mm512_setr_epi64( + 0*stride, 1*stride, 2*stride, 3*stride, + 4*stride, 5*stride, 6*stride, 7*stride + ); + return _mm512_i64gather_epi64(idx, (const __m512i*)ptr, 8); +} +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ return npyv_loadn_u64((const npy_uint64*)ptr, stride); } +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ return _mm512_castsi512_pd(npyv_loadn_u64((const npy_uint64*)ptr, stride)); } +/*************************** + * Non-contiguous Store + ***************************/ +//// 32 +NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a) +{ + assert(llabs(stride) <= NPY_SIMD_MAXSTORE_STRIDE32); + const __m512i steps = _mm512_setr_epi32( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + const __m512i idx = _mm512_mullo_epi32(steps, _mm512_set1_epi32((int)stride)); + _mm512_i32scatter_epi32((__m512i*)ptr, idx, a, 4); +} +NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a) +{ npyv_storen_u32((npy_uint32*)ptr, stride, a); } +NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) +{ npyv_storen_u32((npy_uint32*)ptr, stride, _mm512_castps_si512(a)); } +//// 64 +NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) +{ + const __m512i idx = _mm512_setr_epi64( + 0*stride, 1*stride, 2*stride, 3*stride, + 4*stride, 5*stride, 6*stride, 7*stride + ); + _mm512_i64scatter_epi64((__m512i*)ptr, idx, a, 8); +} +NPY_FINLINE void npyv_storen_s64(npy_int64 *ptr, npy_intp stride, npyv_s64 a) +{ npyv_storen_u64((npy_uint64*)ptr, stride, a); } +NPY_FINLINE void npyv_storen_f64(double *ptr, npy_intp stride, npyv_f64 a) +{ npyv_storen_u64((npy_uint64*)ptr, stride, _mm512_castpd_si512(a)); } + +/********************************* + * Partial Load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + const __m512i vfill = _mm512_set1_epi32(fill); + const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + return _mm512_mask_loadu_epi32(vfill, mask, (const __m512i*)ptr); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + return _mm512_maskz_loadu_epi32(mask, (const __m512i*)ptr); +} +//// 64 +NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + const __m512i vfill = _mm512_set1_epi64(fill); + const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + return _mm512_mask_loadu_epi64(vfill, mask, (const __m512i*)ptr); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + const __mmask8 mask = nlane > 15 ? -1 : (1 << nlane) - 1; + return _mm512_maskz_loadu_epi64(mask, (const __m512i*)ptr); +} +/********************************* + * Non-contiguous partial load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 +npyv_loadn_till_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + assert(llabs(stride) <= NPY_SIMD_MAXLOAD_STRIDE32); + const __m512i steps = npyv_set_s32( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + const __m512i idx = _mm512_mullo_epi32(steps, _mm512_set1_epi32((int)stride)); + const __m512i vfill = _mm512_set1_epi32(fill); + const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + return _mm512_mask_i32gather_epi32(vfill, mask, idx, (const __m512i*)ptr, 4); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 +npyv_loadn_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s32(ptr, stride, nlane, 0); } +//// 64 +NPY_FINLINE npyv_s64 +npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + const __m512i idx = _mm512_setr_epi64( + 0*stride, 1*stride, 2*stride, 3*stride, + 4*stride, 5*stride, 6*stride, 7*stride + ); + const __m512i vfill = _mm512_set1_epi64(fill); + const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + return _mm512_mask_i64gather_epi64(vfill, mask, idx, (const __m512i*)ptr, 8); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 +npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s64(ptr, stride, nlane, 0); } +/********************************* + * Partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + _mm512_mask_storeu_epi32((__m512i*)ptr, mask, a); +} +//// 64 +NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + const __mmask8 mask = nlane > 15 ? -1 : (1 << nlane) - 1; + _mm512_mask_storeu_epi64((__m512i*)ptr, mask, a); +} +/********************************* + * Non-contiguous partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + assert(llabs(stride) <= NPY_SIMD_MAXSTORE_STRIDE32); + const __m512i steps = _mm512_setr_epi32( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ); + const __m512i idx = _mm512_mullo_epi32(steps, _mm512_set1_epi32((int)stride)); + const __mmask16 mask = nlane > 31 ? -1 : (1 << nlane) - 1; + _mm512_mask_i32scatter_epi32((__m512i*)ptr, mask, idx, a, 4); +} +//// 64 +NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + const __m512i idx = _mm512_setr_epi64( + 0*stride, 1*stride, 2*stride, 3*stride, + 4*stride, 5*stride, 6*stride, 7*stride + ); + const __mmask8 mask = nlane > 15 ? -1 : (1 << nlane) - 1; + _mm512_mask_i64scatter_epi64((__m512i*)ptr, mask, idx, a, 8); +} + +/***************************************************************************** + * Implement partial load/store for u32/f32/u64/f64... via reinterpret cast + *****************************************************************************/ +#define NPYV_IMPL_AVX512_REST_PARTIAL_TYPES(F_SFX, T_SFX) \ + NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, \ + npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_load_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane \ + )); \ + } \ + NPY_FINLINE void npyv_store_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_store_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } \ + NPY_FINLINE void npyv_storen_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_storen_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, stride, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } + +NPYV_IMPL_AVX512_REST_PARTIAL_TYPES(u32, s32) +NPYV_IMPL_AVX512_REST_PARTIAL_TYPES(f32, s32) +NPYV_IMPL_AVX512_REST_PARTIAL_TYPES(u64, s64) +NPYV_IMPL_AVX512_REST_PARTIAL_TYPES(f64, s64) #endif // _NPY_SIMD_AVX512_MEMORY_H diff --git a/numpy/core/src/common/simd/neon/memory.h b/numpy/core/src/common/simd/neon/memory.h index afa703584dac..1e258f1bcbef 100644 --- a/numpy/core/src/common/simd/neon/memory.h +++ b/numpy/core/src/common/simd/neon/memory.h @@ -5,6 +5,8 @@ #ifndef _NPY_SIMD_NEON_MEMORY_H #define _NPY_SIMD_NEON_MEMORY_H +#include "misc.h" + /*************************** * load/store ***************************/ @@ -45,5 +47,290 @@ NPYV_IMPL_NEON_MEM(f32, float) #if NPY_SIMD_F64 NPYV_IMPL_NEON_MEM(f64, double) #endif +/*************************** + * Non-contiguous Load + ***************************/ +NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride) +{ + switch (stride) { + case 2: + return vld2q_s32((const int32_t*)ptr).val[0]; + case 3: + return vld3q_s32((const int32_t*)ptr).val[0]; + case 4: + return vld4q_s32((const int32_t*)ptr).val[0]; + default:; + int32x2_t ax = vcreate_s32(*ptr); + int32x4_t a = vcombine_s32(ax, ax); + a = vld1q_lane_s32((const int32_t*)ptr + stride, a, 1); + a = vld1q_lane_s32((const int32_t*)ptr + stride*2, a, 2); + a = vld1q_lane_s32((const int32_t*)ptr + stride*3, a, 3); + return a; + } +} + +NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride) +{ + return npyv_reinterpret_u32_s32( + npyv_loadn_s32((const npy_int32*)ptr, stride) + ); +} +NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) +{ + return npyv_reinterpret_f32_s32( + npyv_loadn_s32((const npy_int32*)ptr, stride) + ); +} +//// 64 +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ + return vcombine_s64( + vld1_s64((const int64_t*)ptr), vld1_s64((const int64_t*)ptr + stride) + ); +} +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ + return npyv_reinterpret_u64_s64( + npyv_loadn_s64((const npy_int64*)ptr, stride) + ); +} +#if NPY_SIMD_F64 +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ + return npyv_reinterpret_f64_s64( + npyv_loadn_s64((const npy_int64*)ptr, stride) + ); +} +#endif +/*************************** + * Non-contiguous Store + ***************************/ +//// 32 +NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a) +{ + vst1q_lane_s32((int32_t*)ptr, a, 0); + vst1q_lane_s32((int32_t*)ptr + stride, a, 1); + vst1q_lane_s32((int32_t*)ptr + stride*2, a, 2); + vst1q_lane_s32((int32_t*)ptr + stride*3, a, 3); +} +NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, npyv_reinterpret_s32_u32(a)); } +NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, npyv_reinterpret_s32_f32(a)); } +//// 64 +NPY_FINLINE void npyv_storen_s64(npy_int64 *ptr, npy_intp stride, npyv_s64 a) +{ + vst1q_lane_s64((int64_t*)ptr, a, 0); + vst1q_lane_s64((int64_t*)ptr + stride, a, 1); +} +NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) +{ npyv_storen_s64((npy_int64*)ptr, stride, npyv_reinterpret_s64_u64(a)); } + +#if NPY_SIMD_F64 +NPY_FINLINE void npyv_storen_f64(double *ptr, npy_intp stride, npyv_f64 a) +{ npyv_storen_s64((npy_int64*)ptr, stride, npyv_reinterpret_s64_f64(a)); } +#endif + +/********************************* + * Partial Load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + return vld1q_lane_s32((const int32_t*)ptr, vdupq_n_s32(fill), 0); + case 2: + return vcombine_s32(vld1_s32((const int32_t*)ptr), vdup_n_s32(fill)); + case 3: + return vcombine_s32( + vld1_s32((const int32_t*)ptr), + vld1_lane_s32((const int32_t*)ptr + 2, vdup_n_s32(fill), 0) + ); + default: + return npyv_load_s32(ptr); + } +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) +{ return npyv_load_till_s32(ptr, nlane, 0); } +//// 64 +NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + if (nlane == 1) { + return vcombine_s64(vld1_s64((const int64_t*)ptr), vdup_n_s64(fill)); + } + return npyv_load_s64(ptr); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) +{ return npyv_load_till_s64(ptr, nlane, 0); } + +/********************************* + * Non-contiguous partial load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 +npyv_loadn_till_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + int32x4_t vfill = vdupq_n_s32(fill); + switch(nlane) { + case 3: + vfill = vld1q_lane_s32((const int32_t*)ptr + stride*2, vfill, 2); + case 2: + vfill = vld1q_lane_s32((const int32_t*)ptr + stride, vfill, 1); + case 1: + vfill = vld1q_lane_s32((const int32_t*)ptr, vfill, 0); + return vfill; + default: + return npyv_loadn_s32(ptr, stride); + } +} +NPY_FINLINE npyv_s32 +npyv_loadn_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s32(ptr, stride, nlane, 0); } + +NPY_FINLINE npyv_s64 +npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + if (nlane == 1) { + return vcombine_s64(vld1_s64((const int64_t*)ptr), vdup_n_s64(fill)); + } + return npyv_loadn_s64(ptr, stride); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s64(ptr, stride, nlane, 0); } + +/********************************* + * Partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + vst1q_lane_s32((int32_t*)ptr, a, 0); + break; + case 2: + vst1_s32((int32_t*)ptr, vget_low_s32(a)); + break; + case 3: + vst1_s32((int32_t*)ptr, vget_low_s32(a)); + vst1q_lane_s32((int32_t*)ptr + 2, a, 2); + break; + default: + npyv_store_s32(ptr, a); + } +} +//// 64 +NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + vst1q_lane_s64((int64_t*)ptr, a, 0); + return; + } + npyv_store_s64(ptr, a); +} +/********************************* + * Non-contiguous partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { + default: + vst1q_lane_s32((int32_t*)ptr + stride*3, a, 3); + case 3: + vst1q_lane_s32((int32_t*)ptr + stride*2, a, 2); + case 2: + vst1q_lane_s32((int32_t*)ptr + stride, a, 1); + case 1: + vst1q_lane_s32((int32_t*)ptr, a, 0); + break; + } +} +//// 64 +NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + vst1q_lane_s64((int64_t*)ptr, a, 0); + return; + } + npyv_storen_s64(ptr, stride, a); +} + +/***************************************************************** + * Implement partial load/store for u32/f32/u64/f64... via casting + *****************************************************************/ +#define NPYV_IMPL_NEON_REST_PARTIAL_TYPES(F_SFX, T_SFX) \ + NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, \ + npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_load_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane \ + )); \ + } \ + NPY_FINLINE void npyv_store_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_store_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } \ + NPY_FINLINE void npyv_storen_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_storen_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, stride, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } + +NPYV_IMPL_NEON_REST_PARTIAL_TYPES(u32, s32) +NPYV_IMPL_NEON_REST_PARTIAL_TYPES(f32, s32) +NPYV_IMPL_NEON_REST_PARTIAL_TYPES(u64, s64) +#if NPY_SIMD_F64 +NPYV_IMPL_NEON_REST_PARTIAL_TYPES(f64, s64) +#endif #endif // _NPY_SIMD_NEON_MEMORY_H diff --git a/numpy/core/src/common/simd/simd.h b/numpy/core/src/common/simd/simd.h index 2f39c8427b5d..8804223c9fef 100644 --- a/numpy/core/src/common/simd/simd.h +++ b/numpy/core/src/common/simd/simd.h @@ -49,6 +49,55 @@ typedef double npyv_lanetype_f64; #define NPY_SIMD_WIDTH 0 #define NPY_SIMD_F64 0 #endif +/** + * Some SIMD extensions currently(AVX2, AVX512F) require (de facto) + * a maximum number of strides sizes when dealing with non-contiguous memory access. + * + * Therefore the following functions must be used to check the maximum + * acceptable limit of strides before using any of non-contiguous load/store intrinsics. + * + * For instance: + * npy_intp ld_stride = step[0] / sizeof(float); + * npy_intp st_stride = step[1] / sizeof(float); + * + * if (npyv_loadable_stride_f32(ld_stride) && npyv_storable_stride_f32(st_stride)) { + * for (;;) + * npyv_f32 a = npyv_loadn_f32(ld_pointer, ld_stride); + * // ... + * npyv_storen_f32(st_pointer, st_stride, a); + * } + * else { + * for (;;) + * // C scalars + * } + */ +#ifndef NPY_SIMD_MAXLOAD_STRIDE32 + #define NPY_SIMD_MAXLOAD_STRIDE32 0 +#endif +#ifndef NPY_SIMD_MAXSTORE_STRIDE32 + #define NPY_SIMD_MAXSTORE_STRIDE32 0 +#endif +#ifndef NPY_SIMD_MAXLOAD_STRIDE64 + #define NPY_SIMD_MAXLOAD_STRIDE64 0 +#endif +#ifndef NPY_SIMD_MAXSTORE_STRIDE64 + #define NPY_SIMD_MAXSTORE_STRIDE64 0 +#endif +#define NPYV_IMPL_MAXSTRIDE(SFX, MAXLOAD, MAXSTORE) \ + NPY_FINLINE int npyv_loadable_stride_##SFX(npy_intp stride) \ + { return MAXLOAD > 0 ? llabs(stride) <= MAXLOAD : 1; } \ + NPY_FINLINE int npyv_storable_stride_##SFX(npy_intp stride) \ + { return MAXSTORE > 0 ? llabs(stride) <= MAXSTORE : 1; } +#if NPY_SIMD + NPYV_IMPL_MAXSTRIDE(u32, NPY_SIMD_MAXLOAD_STRIDE32, NPY_SIMD_MAXSTORE_STRIDE32) + NPYV_IMPL_MAXSTRIDE(s32, NPY_SIMD_MAXLOAD_STRIDE32, NPY_SIMD_MAXSTORE_STRIDE32) + NPYV_IMPL_MAXSTRIDE(f32, NPY_SIMD_MAXLOAD_STRIDE32, NPY_SIMD_MAXSTORE_STRIDE32) + NPYV_IMPL_MAXSTRIDE(u64, NPY_SIMD_MAXLOAD_STRIDE64, NPY_SIMD_MAXSTORE_STRIDE64) + NPYV_IMPL_MAXSTRIDE(s64, NPY_SIMD_MAXLOAD_STRIDE64, NPY_SIMD_MAXSTORE_STRIDE64) +#endif +#if NPY_SIMD_F64 + NPYV_IMPL_MAXSTRIDE(f64, NPY_SIMD_MAXLOAD_STRIDE64, NPY_SIMD_MAXSTORE_STRIDE64) +#endif #ifdef __cplusplus } diff --git a/numpy/core/src/common/simd/sse/memory.h b/numpy/core/src/common/simd/sse/memory.h index 1a555d6f03de..1074c3b02efe 100644 --- a/numpy/core/src/common/simd/sse/memory.h +++ b/numpy/core/src/common/simd/sse/memory.h @@ -5,6 +5,8 @@ #ifndef _NPY_SIMD_SSE_MEMORY_H #define _NPY_SIMD_SSE_MEMORY_H +#include "misc.h" + /*************************** * load/store ***************************/ @@ -70,5 +72,427 @@ NPYV_IMPL_SSE_MEM_INT(npy_int64, s64) // store higher part #define npyv_storeh_f32(PTR, VEC) npyv_storeh_u32((npy_uint32*)(PTR), _mm_castps_si128(VEC)) #define npyv_storeh_f64(PTR, VEC) npyv_storeh_u32((npy_uint32*)(PTR), _mm_castpd_si128(VEC)) +/*************************** + * Non-contiguous Load + ***************************/ +//// 32 +NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride) +{ + __m128i a = _mm_cvtsi32_si128(*ptr); +#ifdef NPY_HAVE_SSE41 + a = _mm_insert_epi32(a, ptr[stride], 1); + a = _mm_insert_epi32(a, ptr[stride*2], 2); + a = _mm_insert_epi32(a, ptr[stride*3], 3); +#else + __m128i a1 = _mm_cvtsi32_si128(ptr[stride]); + __m128i a2 = _mm_cvtsi32_si128(ptr[stride*2]); + __m128i a3 = _mm_cvtsi32_si128(ptr[stride*3]); + a = _mm_unpacklo_epi32(a, a1); + a = _mm_unpacklo_epi64(a, _mm_unpacklo_epi32(a2, a3)); +#endif + return a; +} +NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride) +{ return npyv_loadn_s32((const npy_int32*)ptr, stride); } +NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) +{ return _mm_castsi128_ps(npyv_loadn_s32((const npy_int32*)ptr, stride)); } +//// 64 +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ return _mm_loadh_pd(npyv_loadl_f64(ptr), ptr + stride); } +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ return _mm_castpd_si128(npyv_loadn_f64((const double*)ptr, stride)); } +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ return _mm_castpd_si128(npyv_loadn_f64((const double*)ptr, stride)); } +/*************************** + * Non-contiguous Store + ***************************/ +//// 32 +NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a) +{ + ptr[stride * 0] = _mm_cvtsi128_si32(a); +#ifdef NPY_HAVE_SSE41 + ptr[stride * 1] = _mm_extract_epi32(a, 1); + ptr[stride * 2] = _mm_extract_epi32(a, 2); + ptr[stride * 3] = _mm_extract_epi32(a, 3); +#else + ptr[stride * 1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 1))); + ptr[stride * 2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 2))); + ptr[stride * 3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 3))); +#endif +} +NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, a); } +NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) +{ npyv_storen_s32((npy_int32*)ptr, stride, _mm_castps_si128(a)); } +//// 64 +NPY_FINLINE void npyv_storen_f64(double *ptr, npy_intp stride, npyv_f64 a) +{ + _mm_storel_pd(ptr, a); + _mm_storeh_pd(ptr + stride, a); +} +NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) +{ npyv_storen_f64((double*)ptr, stride, _mm_castsi128_pd(a)); } +NPY_FINLINE void npyv_storen_s64(npy_int64 *ptr, npy_intp stride, npyv_s64 a) +{ npyv_storen_f64((double*)ptr, stride, _mm_castsi128_pd(a)); } + +/********************************* + * Partial Load + *********************************/ +#if defined(__clang__) && __clang_major__ > 7 + /** + * Clang >=8 perform aggressive optimization that tends to + * zero the bits of upper half part of vectors even + * when we try to fill it up with certain scalars, + * which my lead to zero division errors. + */ + #define NPYV__CLANG_ZEROUPPER +#endif +//// 32 +NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); +#ifdef NPYV__CLANG_ZEROUPPER + if (nlane > 3) { + return npyv_load_s32(ptr); + } + npy_int32 NPY_DECL_ALIGNED(16) data[4] = {fill, fill, fill, fill}; + for (npy_uint64 i = 0; i < nlane; ++i) { + data[i] = ptr[i]; + } + return npyv_loada_s32(data); +#else + #ifndef NPY_HAVE_SSE41 + const short *wptr = (const short*)ptr; + #endif + const __m128i vfill = npyv_setall_s32(fill); + __m128i a; + switch(nlane) { + case 2: + return _mm_castpd_si128( + _mm_loadl_pd(_mm_castsi128_pd(vfill), (double*)ptr) + ); + #ifdef NPY_HAVE_SSE41 + case 1: + return _mm_insert_epi32(vfill, ptr[0], 0); + case 3: + a = _mm_loadl_epi64((const __m128i*)ptr); + a = _mm_insert_epi32(a, ptr[2], 2); + a = _mm_insert_epi32(a, fill, 3); + return a; + #else + case 1: + a = _mm_insert_epi16(vfill, wptr[0], 0); + return _mm_insert_epi16(a, wptr[1], 1); + case 3: + a = _mm_loadl_epi64((const __m128i*)ptr); + a = _mm_unpacklo_epi64(a, vfill); + a = _mm_insert_epi16(a, wptr[4], 4); + a = _mm_insert_epi16(a, wptr[5], 5); + return a; + #endif // NPY_HAVE_SSE41 + default: + return npyv_load_s32(ptr); + } +#endif +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + return _mm_cvtsi32_si128(*ptr); + case 2: + return _mm_loadl_epi64((const __m128i*)ptr); + case 3:; + npyv_s32 a = _mm_loadl_epi64((const __m128i*)ptr); + #ifdef NPY_HAVE_SSE41 + return _mm_insert_epi32(a, ptr[2], 2); + #else + return _mm_unpacklo_epi64(a, _mm_cvtsi32_si128(ptr[2])); + #endif + default: + return npyv_load_s32(ptr); + } +} +//// 64 +NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); +#ifdef NPYV__CLANG_ZEROUPPER + if (nlane <= 2) { + npy_int64 NPY_DECL_ALIGNED(16) data[2] = {fill, fill}; + for (npy_uint64 i = 0; i < nlane; ++i) { + data[i] = ptr[i]; + } + return npyv_loada_s64(data); + } +#else + if (nlane == 1) { + const __m128i vfill = npyv_setall_s64(fill); + return _mm_castpd_si128( + _mm_loadl_pd(_mm_castsi128_pd(vfill), (double*)ptr) + ); + } +#endif + return npyv_load_s64(ptr); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) +{ + assert(nlane > 0); + if (nlane == 1) { + return _mm_loadl_epi64((const __m128i*)ptr); + } + return npyv_load_s64(ptr); +} +/********************************* + * Non-contiguous partial load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 +npyv_loadn_till_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); +#ifdef NPYV__CLANG_ZEROUPPER + if (nlane > 3) { + return npyv_loadn_s32(ptr, stride); + } + npy_int32 NPY_DECL_ALIGNED(16) data[4] = {fill, fill, fill, fill}; + for (npy_uint64 i = 0; i < nlane; ++i) { + data[i] = ptr[stride*i]; + } + return npyv_loada_s32(data); +#else + __m128i vfill = npyv_setall_s32(fill); + #ifndef NPY_HAVE_SSE41 + const short *wptr = (const short*)ptr; + #endif + switch(nlane) { + #ifdef NPY_HAVE_SSE41 + case 3: + vfill = _mm_insert_epi32(vfill, ptr[stride*2], 2); + case 2: + vfill = _mm_insert_epi32(vfill, ptr[stride], 1); + case 1: + vfill = _mm_insert_epi32(vfill, ptr[0], 0); + break; + #else + case 3: + vfill = _mm_unpacklo_epi32(_mm_cvtsi32_si128(ptr[stride*2]), vfill); + case 2: + vfill = _mm_unpacklo_epi64(_mm_unpacklo_epi32( + _mm_cvtsi32_si128(*ptr), _mm_cvtsi32_si128(ptr[stride]) + ), vfill); + break; + case 1: + vfill = _mm_insert_epi16(vfill, wptr[0], 0); + vfill = _mm_insert_epi16(vfill, wptr[1], 1); + break; + #endif // NPY_HAVE_SSE41 + default: + return npyv_loadn_s32(ptr, stride); + } // switch + return vfill; +#endif +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 +npyv_loadn_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + return _mm_cvtsi32_si128(ptr[0]); + case 2:; + npyv_s32 a = _mm_cvtsi32_si128(ptr[0]); +#ifdef NPY_HAVE_SSE41 + return _mm_insert_epi32(a, ptr[stride], 1); +#else + return _mm_unpacklo_epi32(a, _mm_cvtsi32_si128(ptr[stride])); +#endif // NPY_HAVE_SSE41 + case 3:; + a = _mm_cvtsi32_si128(ptr[0]); +#ifdef NPY_HAVE_SSE41 + a = _mm_insert_epi32(a, ptr[stride], 1); + a = _mm_insert_epi32(a, ptr[stride*2], 2); + return a; +#else + a = _mm_unpacklo_epi32(a, _mm_cvtsi32_si128(ptr[stride])); + a = _mm_unpacklo_epi64(a, _mm_cvtsi32_si128(ptr[stride*2])); + return a; +#endif // NPY_HAVE_SSE41 + default: + return npyv_loadn_s32(ptr, stride); + } +} +//// 64 +NPY_FINLINE npyv_s64 +npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); +#ifdef NPYV__CLANG_ZEROUPPER + if (nlane <= 2) { + npy_int64 NPY_DECL_ALIGNED(16) data[2] = {fill, fill}; + for (npy_uint64 i = 0; i < nlane; ++i) { + data[i] = ptr[i*stride]; + } + return npyv_loada_s64(data); + } +#else + if (nlane == 1) { + const __m128i vfill = npyv_setall_s64(fill); + return _mm_castpd_si128( + _mm_loadl_pd(_mm_castsi128_pd(vfill), (double*)ptr) + ); + } +#endif + return npyv_loadn_s64(ptr, stride); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane) +{ + assert(nlane > 0); + if (nlane == 1) { + return _mm_loadl_epi64((const __m128i*)ptr); + } + return npyv_loadn_s64(ptr, stride); +} +/********************************* + * Partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + *ptr = _mm_cvtsi128_si32(a); + break; + case 2: + _mm_storel_epi64((__m128i *)ptr, a); + break; + case 3: + _mm_storel_epi64((__m128i *)ptr, a); + #ifdef NPY_HAVE_SSE41 + ptr[2] = _mm_extract_epi32(a, 2); + #else + ptr[2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 2))); + #endif + break; + default: + npyv_store_s32(ptr, a); + } +} +//// 64 +NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + _mm_storel_epi64((__m128i *)ptr, a); + return; + } + npyv_store_s64(ptr, a); +} +/********************************* + * Non-contiguous partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { +#ifdef NPY_HAVE_SSE41 + default: + ptr[stride*3] = _mm_extract_epi32(a, 3); + case 3: + ptr[stride*2] = _mm_extract_epi32(a, 2); + case 2: + ptr[stride*1] = _mm_extract_epi32(a, 1); +#else + default: + ptr[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 3))); + case 3: + ptr[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 2))); + case 2: + ptr[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 0, 1))); +#endif + case 1: + ptr[stride*0] = _mm_cvtsi128_si32(a); + break; + } +} +//// 64 +NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + _mm_storel_epi64((__m128i *)ptr, a); + return; + } + npyv_storen_s64(ptr, stride, a); +} +/***************************************************************** + * Implement partial load/store for u32/f32/u64/f64... via casting + *****************************************************************/ +#define NPYV_IMPL_SSE_REST_PARTIAL_TYPES(F_SFX, T_SFX) \ + NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, \ + npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_load_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane \ + )); \ + } \ + NPY_FINLINE void npyv_store_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_store_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } \ + NPY_FINLINE void npyv_storen_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_storen_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, stride, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } + +NPYV_IMPL_SSE_REST_PARTIAL_TYPES(u32, s32) +NPYV_IMPL_SSE_REST_PARTIAL_TYPES(f32, s32) +NPYV_IMPL_SSE_REST_PARTIAL_TYPES(u64, s64) +NPYV_IMPL_SSE_REST_PARTIAL_TYPES(f64, s64) #endif // _NPY_SIMD_SSE_MEMORY_H diff --git a/numpy/core/src/common/simd/vsx/memory.h b/numpy/core/src/common/simd/vsx/memory.h index e0d908bf906b..08a0a9276cc6 100644 --- a/numpy/core/src/common/simd/vsx/memory.h +++ b/numpy/core/src/common/simd/vsx/memory.h @@ -4,147 +4,343 @@ #ifndef _NPY_SIMD_VSX_MEMORY_H #define _NPY_SIMD_VSX_MEMORY_H + +#include "misc.h" + /**************************** - * load/store + * Private utilities ****************************/ // TODO: test load by cast #define VSX__CAST_lOAD 0 #if VSX__CAST_lOAD - #define npyv__load(PTR, T_VEC) (*((T_VEC*)(PTR))) + #define npyv__load(T_VEC, PTR) (*((T_VEC*)(PTR))) #else /** * CLANG fails to load unaligned addresses via vec_xl, vec_xst * so we failback to vec_vsx_ld, vec_vsx_st */ #if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) - #define npyv__load(PTR, T_VEC) vec_vsx_ld(0, PTR) + #define npyv__load(T_VEC, PTR) vec_vsx_ld(0, PTR) #else - #define npyv__load(PTR, T_VEC) vec_xl(0, PTR) + #define npyv__load(T_VEC, PTR) vec_xl(0, PTR) #endif #endif -// unaligned load -#define npyv_load_u8(PTR) npyv__load(PTR, npyv_u8) -#define npyv_load_s8(PTR) npyv__load(PTR, npyv_s8) -#define npyv_load_u16(PTR) npyv__load(PTR, npyv_u16) -#define npyv_load_s16(PTR) npyv__load(PTR, npyv_s16) -#define npyv_load_u32(PTR) npyv__load(PTR, npyv_u32) -#define npyv_load_s32(PTR) npyv__load(PTR, npyv_s32) -#define npyv_load_f32(PTR) npyv__load(PTR, npyv_f32) -#define npyv_load_f64(PTR) npyv__load(PTR, npyv_f64) -#if VSX__CAST_lOAD - #define npyv_load_u64(PTR) npyv__load(PTR, npyv_u64) - #define npyv_load_s64(PTR) npyv__load(PTR, npyv_s64) +// unaligned store +#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) + #define npyv__store(PTR, VEC) vec_vsx_st(VEC, 0, PTR) #else - #define npyv_load_u64(PTR) ((npyv_u64)npyv_load_u32((const unsigned int*)PTR)) - #define npyv_load_s64(PTR) ((npyv_s64)npyv_load_s32((const unsigned int*)PTR)) + #define npyv__store(PTR, VEC) vec_xst(VEC, 0, PTR) #endif -// aligned load -#define npyv_loada_u8(PTR) vec_ld(0, PTR) -#define npyv_loada_s8 npyv_loada_u8 -#define npyv_loada_u16 npyv_loada_u8 -#define npyv_loada_s16 npyv_loada_u8 -#define npyv_loada_u32 npyv_loada_u8 -#define npyv_loada_s32 npyv_loada_u8 -#define npyv_loada_u64 npyv_load_u64 -#define npyv_loada_s64 npyv_load_s64 -#define npyv_loada_f32 npyv_loada_u8 -#define npyv_loada_f64 npyv_load_f64 -// stream load -#define npyv_loads_u8 npyv_loada_u8 -#define npyv_loads_s8 npyv_loada_s8 -#define npyv_loads_u16 npyv_loada_u16 -#define npyv_loads_s16 npyv_loada_s16 -#define npyv_loads_u32 npyv_loada_u32 -#define npyv_loads_s32 npyv_loada_s32 -#define npyv_loads_u64 npyv_loada_u64 -#define npyv_loads_s64 npyv_loada_s64 -#define npyv_loads_f32 npyv_loada_f32 -#define npyv_loads_f64 npyv_loada_f64 -// load lower part + // avoid aliasing rules #ifdef __cplusplus template - NPY_FINLINE npy_uint64 *npyv__ptr2u64(T_PTR *ptr) - { return npy_uint64 *ptr64 = (npy_uint64*)ptr; return ptr; } + NPY_FINLINE npy_uint64 *npyv__ptr2u64(const T_PTR *ptr) + { npy_uint64 *ptr64 = (npy_uint64*)ptr; return ptr64; } #else - NPY_FINLINE npy_uint64 *npyv__ptr2u64(void *ptr) - { npy_uint64 *ptr64 = ptr; return ptr64; } + NPY_FINLINE npy_uint64 *npyv__ptr2u64(const void *ptr) + { npy_uint64 *ptr64 = (npy_uint64*)ptr; return ptr64; } #endif // __cplusplus -#if defined(__clang__) && !defined(__IBMC__) - // vec_promote doesn't support doubleword on clang - #define npyv_loadl_u64(PTR) npyv_setall_u64(*npyv__ptr2u64(PTR)) -#else - #define npyv_loadl_u64(PTR) vec_promote(*npyv__ptr2u64(PTR), 0) -#endif -#define npyv_loadl_u8(PTR) ((npyv_u8)npyv_loadl_u64(PTR)) -#define npyv_loadl_s8(PTR) ((npyv_s8)npyv_loadl_u64(PTR)) -#define npyv_loadl_u16(PTR) ((npyv_u16)npyv_loadl_u64(PTR)) -#define npyv_loadl_s16(PTR) ((npyv_s16)npyv_loadl_u64(PTR)) -#define npyv_loadl_u32(PTR) ((npyv_u32)npyv_loadl_u64(PTR)) -#define npyv_loadl_s32(PTR) ((npyv_s32)npyv_loadl_u64(PTR)) -#define npyv_loadl_s64(PTR) ((npyv_s64)npyv_loadl_u64(PTR)) -#define npyv_loadl_f32(PTR) ((npyv_f32)npyv_loadl_u64(PTR)) -#define npyv_loadl_f64(PTR) ((npyv_f64)npyv_loadl_u64(PTR)) -// unaligned store -#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) - #define npyv_store_u8(PTR, VEC) vec_vsx_st(VEC, 0, PTR) -#else - #define npyv_store_u8(PTR, VEC) vec_xst(VEC, 0, PTR) -#endif -#define npyv_store_s8 npyv_store_u8 -#define npyv_store_u16 npyv_store_u8 -#define npyv_store_s16 npyv_store_u8 -#define npyv_store_u32 npyv_store_u8 -#define npyv_store_s32 npyv_store_u8 -#define npyv_store_u64(PTR, VEC) npyv_store_u8((unsigned int*)PTR, (npyv_u32)VEC) -#define npyv_store_s64(PTR, VEC) npyv_store_u8((unsigned int*)PTR, (npyv_u32)VEC) -#define npyv_store_f32 npyv_store_u8 -#define npyv_store_f64 npyv_store_u8 -// aligned store -#define npyv_storea_u8(PTR, VEC) vec_st(VEC, 0, PTR) -#define npyv_storea_s8 npyv_storea_u8 -#define npyv_storea_u16 npyv_storea_u8 -#define npyv_storea_s16 npyv_storea_u8 -#define npyv_storea_u32 npyv_storea_u8 -#define npyv_storea_s32 npyv_storea_u8 -#define npyv_storea_u64 npyv_store_u64 -#define npyv_storea_s64 npyv_store_s64 -#define npyv_storea_f32 npyv_storea_u8 -#define npyv_storea_f64 npyv_store_f64 -// stream store -#define npyv_stores_u8 npyv_storea_u8 -#define npyv_stores_s8 npyv_storea_s8 -#define npyv_stores_u16 npyv_storea_u16 -#define npyv_stores_s16 npyv_storea_s16 -#define npyv_stores_u32 npyv_storea_u32 -#define npyv_stores_s32 npyv_storea_s32 -#define npyv_stores_u64 npyv_storea_u64 -#define npyv_stores_s64 npyv_storea_s64 -#define npyv_stores_f32 npyv_storea_f32 -#define npyv_stores_f64 npyv_storea_f64 + +// load lower part +NPY_FINLINE npyv_u64 npyv__loadl(const void *ptr) +{ + #if defined(__clang__) && !defined(__IBMC__) + // vec_promote doesn't support doubleword on clang + return npyv_setall_u64(*npyv__ptr2u64(ptr)); + #else + return vec_promote(*npyv__ptr2u64(ptr), 0); + #endif +} // store lower part -#define npyv_storel_u8(PTR, VEC) \ +#define npyv__storel(PTR, VEC) \ *npyv__ptr2u64(PTR) = vec_extract(((npyv_u64)VEC), 0) -#define npyv_storel_s8 npyv_storel_u8 -#define npyv_storel_u16 npyv_storel_u8 -#define npyv_storel_s16 npyv_storel_u8 -#define npyv_storel_u32 npyv_storel_u8 -#define npyv_storel_s32 npyv_storel_u8 -#define npyv_storel_s64 npyv_storel_u8 -#define npyv_storel_u64 npyv_storel_u8 -#define npyv_storel_f32 npyv_storel_u8 -#define npyv_storel_f64 npyv_storel_u8 -// store higher part -#define npyv_storeh_u8(PTR, VEC) \ + +#define npyv__storeh(PTR, VEC) \ *npyv__ptr2u64(PTR) = vec_extract(((npyv_u64)VEC), 1) -#define npyv_storeh_s8 npyv_storeh_u8 -#define npyv_storeh_u16 npyv_storeh_u8 -#define npyv_storeh_s16 npyv_storeh_u8 -#define npyv_storeh_u32 npyv_storeh_u8 -#define npyv_storeh_s32 npyv_storeh_u8 -#define npyv_storeh_s64 npyv_storeh_u8 -#define npyv_storeh_u64 npyv_storeh_u8 -#define npyv_storeh_f32 npyv_storeh_u8 -#define npyv_storeh_f64 npyv_storeh_u8 + +/**************************** + * load/store + ****************************/ +#define NPYV_IMPL_VSX_MEM(SFX, DW_CAST) \ + NPY_FINLINE npyv_##SFX npyv_load_##SFX(const npyv_lanetype_##SFX *ptr) \ + { return (npyv_##SFX)npyv__load(npyv_##SFX, (const npyv_lanetype_##DW_CAST*)ptr); } \ + NPY_FINLINE npyv_##SFX npyv_loada_##SFX(const npyv_lanetype_##SFX *ptr) \ + { return (npyv_##SFX)vec_ld(0, (const npyv_lanetype_u32*)ptr); } \ + NPY_FINLINE npyv_##SFX npyv_loads_##SFX(const npyv_lanetype_##SFX *ptr) \ + { return npyv_loada_##SFX(ptr); } \ + NPY_FINLINE npyv_##SFX npyv_loadl_##SFX(const npyv_lanetype_##SFX *ptr) \ + { return (npyv_##SFX)npyv__loadl(ptr); } \ + NPY_FINLINE void npyv_store_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \ + { npyv__store((npyv_lanetype_##DW_CAST*)ptr, (npyv_##DW_CAST)vec); } \ + NPY_FINLINE void npyv_storea_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \ + { vec_st((npyv_u32)vec, 0, (npyv_lanetype_u32*)ptr); } \ + NPY_FINLINE void npyv_stores_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \ + { npyv_storea_##SFX(ptr, vec); } \ + NPY_FINLINE void npyv_storel_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \ + { npyv__storel(ptr, vec); } \ + NPY_FINLINE void npyv_storeh_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \ + { npyv__storeh(ptr, vec); } + +NPYV_IMPL_VSX_MEM(u8, u8) +NPYV_IMPL_VSX_MEM(s8, s8) +NPYV_IMPL_VSX_MEM(u16, u16) +NPYV_IMPL_VSX_MEM(s16, s16) +NPYV_IMPL_VSX_MEM(u32, u32) +NPYV_IMPL_VSX_MEM(s32, s32) +NPYV_IMPL_VSX_MEM(u64, f64) +NPYV_IMPL_VSX_MEM(s64, f64) +NPYV_IMPL_VSX_MEM(f32, f32) +NPYV_IMPL_VSX_MEM(f64, f64) + +/*************************** + * Non-contiguous Load + ***************************/ +//// 32 +NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride) +{ + return npyv_set_u32( + ptr[stride * 0], ptr[stride * 1], + ptr[stride * 2], ptr[stride * 3] + ); +} +NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride) +{ return (npyv_s32)npyv_loadn_u32((const npy_uint32*)ptr, stride); } +NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) +{ return (npyv_f32)npyv_loadn_u32((const npy_uint32*)ptr, stride); } +//// 64 +NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) +{ return npyv_set_u64(ptr[0], ptr[stride]); } +NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) +{ return npyv_set_s64(ptr[0], ptr[stride]); } +NPY_FINLINE npyv_f64 npyv_loadn_f64(const double *ptr, npy_intp stride) +{ return npyv_set_f64(ptr[0], ptr[stride]); } +/*************************** + * Non-contiguous Store + ***************************/ +//// 32 +NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a) +{ + ptr[stride * 0] = vec_extract(a, 0); + ptr[stride * 1] = vec_extract(a, 1); + ptr[stride * 2] = vec_extract(a, 2); + ptr[stride * 3] = vec_extract(a, 3); +} +NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a) +{ npyv_storen_u32((npy_uint32*)ptr, stride, (npyv_u32)a); } +NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) +{ npyv_storen_u32((npy_uint32*)ptr, stride, (npyv_u32)a); } +//// 64 +NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) +{ + ptr[stride * 0] = vec_extract(a, 0); + ptr[stride * 1] = vec_extract(a, 1); +} +NPY_FINLINE void npyv_storen_s64(npy_int64 *ptr, npy_intp stride, npyv_s64 a) +{ npyv_storen_u64((npy_uint64*)ptr, stride, (npyv_u64)a); } +NPY_FINLINE void npyv_storen_f64(double *ptr, npy_intp stride, npyv_f64 a) +{ npyv_storen_u64((npy_uint64*)ptr, stride, (npyv_u64)a); } + +/********************************* + * Partial Load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + npyv_s32 vfill = npyv_setall_s32(fill); + switch(nlane) { + case 1: + return vec_insert(ptr[0], vfill, 0); + case 2: + return (npyv_s32)vec_insert( + *npyv__ptr2u64(ptr), (npyv_u64)vfill, 0 + ); + case 3: + vfill = vec_insert(ptr[2], vfill, 2); + return (npyv_s32)vec_insert( + *npyv__ptr2u64(ptr), (npyv_u64)vfill, 0 + ); + default: + return npyv_load_s32(ptr); + } +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) +{ return npyv_load_till_s32(ptr, nlane, 0); } +//// 64 +NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + if (nlane == 1) { + return npyv_set_s64(ptr[0], fill); + } + return npyv_load_s64(ptr); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) +{ return npyv_load_till_s64(ptr, nlane, 0); } +/********************************* + * Non-contiguous partial load + *********************************/ +//// 32 +NPY_FINLINE npyv_s32 +npyv_loadn_till_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npy_int32 fill) +{ + assert(nlane > 0); + npyv_s32 vfill = npyv_setall_s32(fill); + switch(nlane) { + case 3: + vfill = vec_insert(ptr[stride*2], vfill, 2); + case 2: + vfill = vec_insert(ptr[stride], vfill, 1); + case 1: + vfill = vec_insert(*ptr, vfill, 0); + break; + default: + return npyv_loadn_s32(ptr, stride); + } // switch + return vfill; +} +// fill zero to rest lanes +NPY_FINLINE npyv_s32 +npyv_loadn_tillz_s32(const npy_int32 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s32(ptr, stride, nlane, 0); } +//// 64 +NPY_FINLINE npyv_s64 +npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) +{ + assert(nlane > 0); + if (nlane == 1) { + return npyv_set_s64(*ptr, fill); + } + return npyv_loadn_s64(ptr, stride); +} +// fill zero to rest lanes +NPY_FINLINE npyv_s64 npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane) +{ return npyv_loadn_till_s64(ptr, stride, nlane, 0); } +/********************************* + * Partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { + case 1: + *ptr = vec_extract(a, 0); + break; + case 2: + npyv_storel_s32(ptr, a); + break; + case 3: + npyv_storel_s32(ptr, a); + ptr[2] = vec_extract(a, 2); + break; + default: + npyv_store_s32(ptr, a); + } +} +//// 64 +NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + npyv_storel_s64(ptr, a); + return; + } + npyv_store_s64(ptr, a); +} +/********************************* + * Non-contiguous partial store + *********************************/ +//// 32 +NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp nlane, npyv_s32 a) +{ + assert(nlane > 0); + switch(nlane) { + default: + ptr[stride*3] = vec_extract(a, 3); + case 3: + ptr[stride*2] = vec_extract(a, 2); + case 2: + ptr[stride*1] = vec_extract(a, 1); + case 1: + ptr[stride*0] = vec_extract(a, 0); + break; + } +} +//// 64 +NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) +{ + assert(nlane > 0); + if (nlane == 1) { + npyv_storel_s64(ptr, a); + return; + } + npyv_storen_s64(ptr, stride, a); +} +/***************************************************************** + * Implement partial load/store for u32/f32/u64/f64... via casting + *****************************************************************/ +#define NPYV_IMPL_VSX_REST_PARTIAL_TYPES(F_SFX, T_SFX) \ + NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_till_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, \ + npyv_lanetype_##F_SFX fill) \ + { \ + union { \ + npyv_lanetype_##F_SFX from_##F_SFX; \ + npyv_lanetype_##T_SFX to_##T_SFX; \ + } pun = {.from_##F_SFX = fill}; \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_till_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane, pun.to_##T_SFX \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_load_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_load_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, nlane \ + )); \ + } \ + NPY_FINLINE npyv_##F_SFX npyv_loadn_tillz_##F_SFX \ + (const npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane) \ + { \ + return npyv_reinterpret_##F_SFX##_##T_SFX(npyv_loadn_tillz_##T_SFX( \ + (const npyv_lanetype_##T_SFX *)ptr, stride, nlane \ + )); \ + } \ + NPY_FINLINE void npyv_store_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_store_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } \ + NPY_FINLINE void npyv_storen_till_##F_SFX \ + (npyv_lanetype_##F_SFX *ptr, npy_intp stride, npy_uintp nlane, npyv_##F_SFX a) \ + { \ + npyv_storen_till_##T_SFX( \ + (npyv_lanetype_##T_SFX *)ptr, stride, nlane, \ + npyv_reinterpret_##T_SFX##_##F_SFX(a) \ + ); \ + } + +NPYV_IMPL_VSX_REST_PARTIAL_TYPES(u32, s32) +NPYV_IMPL_VSX_REST_PARTIAL_TYPES(f32, s32) +NPYV_IMPL_VSX_REST_PARTIAL_TYPES(u64, s64) +NPYV_IMPL_VSX_REST_PARTIAL_TYPES(f64, s64) #endif // _NPY_SIMD_VSX_MEMORY_H From 2fc25ea5c13667c9f09912815a2bdf9c326cf3c8 Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Fri, 9 Oct 2020 08:07:57 +0530 Subject: [PATCH 232/409] [CODE] Made code cleaner Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> --- numpy/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 1fbdb259b2a1..aacae14309c2 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -58,7 +58,7 @@ from typing import ( ) if sys.version_info >= (3, 8): - from typing import Literal, Protocol, SupportsIndex,Final + from typing import Literal, Protocol, SupportsIndex, Final else: from typing_extensions import Literal, Protocol, Final class SupportsIndex(Protocol): From 3f56ca70e286d8d7eacbe997dbcc5113a1c091b3 Mon Sep 17 00:00:00 2001 From: Royston E Tauro <54945757+lucasace@users.noreply.github.com> Date: Fri, 9 Oct 2020 08:08:39 +0530 Subject: [PATCH 233/409] [CODE]Redundant spaces removed Co-authored-by: Bas van Beek <43369155+BvB93@users.noreply.github.com> --- numpy/__init__.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index aacae14309c2..0bb44ed20e01 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1611,7 +1611,6 @@ inf: Final[float] infty: Final[float] nan: Final[float] pi: Final[float] - ALLOW_THREADS: Final[int] BUFSIZE: Final[int] CLIP: Final[int] From 55ed4981998ce78496e30752192a9732c3570888 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Fri, 9 Oct 2020 10:37:46 -0600 Subject: [PATCH 234/409] BUG: Fix indentation. --- numpy/distutils/fcompiler/nv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/numpy/distutils/fcompiler/nv.py b/numpy/distutils/fcompiler/nv.py index c5f203ee8ddb..8e9f1683558a 100644 --- a/numpy/distutils/fcompiler/nv.py +++ b/numpy/distutils/fcompiler/nv.py @@ -5,14 +5,14 @@ compilers = ['NVHPCFCompiler'] class NVHPCFCompiler(FCompiler): - """ NVIDIA High Performance Computing (HPC) SDK Fortran Compiler + """ NVIDIA High Performance Computing (HPC) SDK Fortran Compiler - https://developer.nvidia.com/hpc-sdk + https://developer.nvidia.com/hpc-sdk - Since august 2020 the NVIDIA HPC SDK includes the compilers formerly known as The Portland Group compilers, - https://www.pgroup.com/index.htm. - See also `numpy.distutils.fcompiler.pg`. - """ + Since august 2020 the NVIDIA HPC SDK includes the compilers formerly known as The Portland Group compilers, + https://www.pgroup.com/index.htm. + See also `numpy.distutils.fcompiler.pg`. + """ compiler_type = 'nv' description = 'NVIDIA HPC SDK' From 73c555fd5ef964896113dc40b09b21fd381085b2 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 9 Oct 2020 14:25:44 -0400 Subject: [PATCH 235/409] DOC: Display real license on license page Changes license.html to include LICENSE.txt, the real license file. Was showing an out-of-date copy. --- doc/source/license.rst | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/doc/source/license.rst b/doc/source/license.rst index 8f360af8830e..beea023ce05a 100644 --- a/doc/source/license.rst +++ b/doc/source/license.rst @@ -1,35 +1,6 @@ ************* -NumPy License +NumPy license ************* -Copyright (c) 2005, NumPy Developers - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - -* Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.. include:: ../../LICENSE.txt + :literal: From 4f8c5cae3c98c2a9e9934d30e26f10dfcd1b1ed1 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 9 Oct 2020 20:07:40 +0100 Subject: [PATCH 236/409] DOC: Add docstrings for some scalar types --- numpy/core/_add_newdocs_scalars.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index c367c18ed094..113f84eb4ffa 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -176,6 +176,46 @@ def add_newdoc_for_scalar_type(obj, fixed_aliases, doc): Any Python object. """) +add_newdoc_for_scalar_type('str_', ['str0', 'unicode_'], + r""" + A unicode string. + + Unlike the builtin `str`, this supports the buffer protocol, exposing its + contents as UCS4: + + >>> memoryview(np.str_("abcd")) + b'a\x00\x00\x00b\x00\x00\x00c\x00\x00\x00d\x00\x00\x00' + + When used in array, this type cannot contain trailing nulls. + """) + +add_newdoc_for_scalar_type('bytes_', ['bytes0'], + r""" + A byte string. + + When used in array, this type cannot contain trailing nulls. + """) + +add_newdoc_for_scalar_type('void', ['void0'], + """ + Either an opaque sequence of bytes, or a structure. + """) + +add_newdoc_for_scalar_type('datetime64', [], + """ + A datetime stored as a 64-bit integer, counting from ``1970-01-01T00:00:00``. + + >>> np.datetime64(10, 'Y') + numpy.datetime64('1980') + >>> np.datetime64(10, 'D') + numpy.datetime64('1970-01-11') + """) + +add_newdoc_for_scalar_type('timedelta64', [], + """ + A timedelta stored as a 64-bit integer. + """) + # TODO: work out how to put this on the base class, np.floating for float_name in ('half', 'single', 'double', 'longdouble'): add_newdoc('numpy.core.numerictypes', float_name, ('as_integer_ratio', From 84a4fcb5af5f9c056e82893223944820af35df18 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 9 Oct 2020 15:27:34 -0400 Subject: [PATCH 237/409] DOC: Fix empty 'C style guide' page (#17503) Add external link to NEP45 in tocs of dev/index Co-authored-by: Ross Barnowski --- doc/source/dev/index.rst | 4 ++-- doc/source/dev/style_guide.rst | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 doc/source/dev/style_guide.rst diff --git a/doc/source/dev/index.rst b/doc/source/dev/index.rst index 020df0b2bd29..d2c162c9a4ec 100644 --- a/doc/source/dev/index.rst +++ b/doc/source/dev/index.rst @@ -13,7 +13,7 @@ Contributing to NumPy development_environment development_workflow ../benchmarking - style_guide + NumPy C style guide releasing governance/index howto-docs @@ -296,7 +296,7 @@ The rest of the story development_environment development_workflow ../benchmarking - style_guide + NumPy C style guide releasing governance/index howto-docs diff --git a/doc/source/dev/style_guide.rst b/doc/source/dev/style_guide.rst deleted file mode 100644 index bede3424d0af..000000000000 --- a/doc/source/dev/style_guide.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _style_guide: - -=================== -NumPy C Style Guide -=================== - -.. include:: ../../C_STYLE_GUIDE.rst.txt - :start-line: 4 From 156cd054e007b05d4ac4829e10a369d19dd2b0b1 Mon Sep 17 00:00:00 2001 From: Lisa Schwetlick Date: Fri, 9 Oct 2020 21:35:47 +0200 Subject: [PATCH 238/409] ENH: add dtype option to cov and corrcoef (#17456) Adds a keyword-only dtype parameter to correlate and coerrcoef to allow user to specify the dtype of the output. Co-authored-by: Eric Wieser Co-authored-by: Ross Barnowski --- .../upcoming_changes/17456.new_feature.rst | 5 +++ numpy/lib/function_base.py | 33 ++++++++++++++----- numpy/lib/tests/test_function_base.py | 12 +++++++ 3 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 doc/release/upcoming_changes/17456.new_feature.rst diff --git a/doc/release/upcoming_changes/17456.new_feature.rst b/doc/release/upcoming_changes/17456.new_feature.rst new file mode 100644 index 000000000000..7ab014e77df9 --- /dev/null +++ b/doc/release/upcoming_changes/17456.new_feature.rst @@ -0,0 +1,5 @@ +``dtype`` option for `cov` and `corrcoef` +---------------------------------------------------- +The ``dtype`` option is now available for `numpy.cov` and `numpy.corrcoef`. +It specifies which data-type the returned result should have. +By default the functions still return a `numpy.float64` result. diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index c7ddbdb8de67..984f3086e5b7 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -2268,13 +2268,13 @@ def _vectorize_call_with_signature(self, func, args): def _cov_dispatcher(m, y=None, rowvar=None, bias=None, ddof=None, - fweights=None, aweights=None): + fweights=None, aweights=None, *, dtype=None): return (m, y, fweights, aweights) @array_function_dispatch(_cov_dispatcher) def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, - aweights=None): + aweights=None, *, dtype=None): """ Estimate a covariance matrix, given data and weights. @@ -2325,6 +2325,11 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, weights can be used to assign probabilities to observation vectors. .. versionadded:: 1.10 + dtype : data-type, optional + Data-type of the result. By default, the return data-type will have + at least `numpy.float64` precision. + + .. versionadded:: 1.20 Returns ------- @@ -2400,13 +2405,16 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, if m.ndim > 2: raise ValueError("m has more than 2 dimensions") - if y is None: - dtype = np.result_type(m, np.float64) - else: + if y is not None: y = np.asarray(y) if y.ndim > 2: raise ValueError("y has more than 2 dimensions") - dtype = np.result_type(m, y, np.float64) + + if dtype is None: + if y is None: + dtype = np.result_type(m, np.float64) + else: + dtype = np.result_type(m, y, np.float64) X = array(m, ndmin=2, dtype=dtype) if not rowvar and X.shape[0] != 1: @@ -2486,12 +2494,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, return c.squeeze() -def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None): +def _corrcoef_dispatcher(x, y=None, rowvar=None, bias=None, ddof=None, *, + dtype=None): return (x, y) @array_function_dispatch(_corrcoef_dispatcher) -def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue): +def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue, *, + dtype=None): """ Return Pearson product-moment correlation coefficients. @@ -2525,6 +2535,11 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue): Has no effect, do not use. .. deprecated:: 1.10.0 + dtype : data-type, optional + Data-type of the result. By default, the return data-type will have + at least `numpy.float64` precision. + + .. versionadded:: 1.20 Returns ------- @@ -2616,7 +2631,7 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue): # 2015-03-15, 1.10 warnings.warn('bias and ddof have no effect and are deprecated', DeprecationWarning, stacklevel=3) - c = cov(x, y, rowvar) + c = cov(x, y, rowvar, dtype=dtype) try: d = diag(c) except ValueError: diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 7bddb941c5c8..4c7c0480c106 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -2023,6 +2023,12 @@ def test_extreme(self): assert_array_almost_equal(c, np.array([[1., -1.], [-1., 1.]])) assert_(np.all(np.abs(c) <= 1.0)) + @pytest.mark.parametrize("test_type", [np.half, np.single, np.double, np.longdouble]) + def test_corrcoef_dtype(self, test_type): + cast_A = self.A.astype(test_type) + res = corrcoef(cast_A, dtype=test_type) + assert test_type == res.dtype + class TestCov: x1 = np.array([[0, 2], [1, 1], [2, 0]]).T @@ -2123,6 +2129,12 @@ def test_unit_fweights_and_aweights(self): aweights=self.unit_weights), self.res1) + @pytest.mark.parametrize("test_type", [np.half, np.single, np.double, np.longdouble]) + def test_cov_dtype(self, test_type): + cast_x1 = self.x1.astype(test_type) + res = cov(cast_x1, dtype=test_type) + assert test_type == res.dtype + class Test_I0: From c6dc9d66ebc7dca16eb83686e74673668b276440 Mon Sep 17 00:00:00 2001 From: Ben Nathanson Date: Fri, 9 Oct 2020 15:40:49 -0400 Subject: [PATCH 239/409] DOC: Update top links in landing page Same intent as PR #17365 -- point "Installation" links to Hugo, start page with "What is NumPy", let Hugo page link to "Troubleshooting ImportError". --- doc/source/_templates/indexcontent.html | 5 +++-- doc/source/user/index.rst | 11 +++++------ doc/source/user/install.rst | 17 +++++++---------- doc/source/user/troubleshooting-importerror.rst | 4 +++- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html index 2dba16ce8a16..6dd6bf9b0851 100644 --- a/doc/source/_templates/indexcontent.html +++ b/doc/source/_templates/indexcontent.html @@ -12,8 +12,9 @@

{{ docstitle|e }}

For users:

- + +