From db962688b5f3cfb6b1e7e83038510ab150d24b52 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sun, 12 Jan 2020 17:43:28 +0200 Subject: [PATCH 1/3] STY: concat strings that should not be seperated --- pandas/_libs/algos.pyx | 9 ++-- pandas/_libs/groupby.pyx | 3 +- pandas/_libs/hashing.pyx | 11 +++-- pandas/_libs/indexing.pyx | 5 +- pandas/_libs/sparse.pyx | 6 +-- pandas/_libs/testing.pyx | 6 +-- pandas/_libs/tslibs/timestamps.pyx | 74 ++++++++++++++++++---------- pandas/_libs/window/aggregations.pyx | 6 +-- 8 files changed, 69 insertions(+), 51 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7a2fc9dc7845a..dd1f38ce3a842 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -914,8 +914,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average', ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported for ' - 'non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1 @@ -971,8 +970,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average', ranks[argsorted[j]] = i + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported for ' - 'non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = j + 1 @@ -1137,8 +1135,7 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average', ranks[i, argsorted[i, z]] = j + 1 elif tiebreak == TIEBREAK_FIRST: if rank_t is object: - raise ValueError('first not supported ' - 'for non-numeric data') + raise ValueError('first not supported for non-numeric data') else: for z in range(j - dups + 1, j + 1): ranks[i, argsorted[i, z]] = z + 1 diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index abb8a6d388d26..93ea94f7b18fc 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -686,8 +686,7 @@ def _group_ohlc(floating[:, :] out, raise ValueError('Output array must have 4 columns') if K > 1: - raise NotImplementedError("Argument 'values' must have only " - "one dimension") + raise NotImplementedError("Argument 'values' must have only one dimension") out[:] = np.nan with nogil: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 5298d8c5ed34e..878da670b2f68 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -51,8 +51,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): k = key.encode(encoding) kb = k if len(k) != 16: - raise ValueError("key should be a 16-byte string encoded, " - f"got {k} (len {len(k)})") + raise ValueError( + f"key should be a 16-byte string encoded, got {k} (len {len(k)})" + ) n = len(arr) @@ -77,8 +78,10 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): hash(val) data = str(val).encode(encoding) else: - raise TypeError(f"{val} of type {type(val)} is not a valid type " - "for hashing, must be string or null") + raise TypeError( + f"{val} of type {type(val)} is not a valid type for hashing, " + "must be string or null" + ) l = len(data) lens[i] = l diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx index 01f4fb060d982..cdccdb504571c 100644 --- a/pandas/_libs/indexing.pyx +++ b/pandas/_libs/indexing.pyx @@ -18,6 +18,7 @@ cdef class _NDFrameIndexerBase: if ndim is None: ndim = self._ndim = self.obj.ndim if ndim > 2: - raise ValueError("NDFrameIndexer does not support " - "NDFrame objects with ndim > 2") + raise ValueError( + "NDFrameIndexer does not support NDFrame objects with ndim > 2" + ) return ndim diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index ee83901040b36..3a6dd506b2428 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -72,9 +72,9 @@ cdef class IntIndex(SparseIndex): """ if self.npoints > self.length: - msg = (f"Too many indices. Expected " - f"{self.length} but found {self.npoints}") - raise ValueError(msg) + raise ValueError( + f"Too many indices. Expected {self.length} but found {self.npoints}" + ) # Indices are vacuously ordered and non-negative # if the sequence of indices is empty. diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 5a30b71a6fea1..0e57b563d4d25 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -127,9 +127,9 @@ cpdef assert_almost_equal(a, b, # classes can't be the same, to raise error assert_class_equal(a, b, obj=obj) - assert has_length(a) and has_length(b), ("Can't compare objects without " - "length, one or both is invalid: " - f"({a}, {b})") + assert has_length(a) and has_length(b), ( + f"Can't compare objects without length, one or both is invalid: ({a}, {b})" + ) if a_is_ndarray and b_is_ndarray: na, nb = a.size, b.size diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index abe7f9e5b4105..83f01c1cfc3d8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -161,8 +161,7 @@ def round_nsint64(values, mode, freq): # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug - raise ValueError("round_nsint64 called with an unrecognized " - "rounding mode") + raise ValueError("round_nsint64 called with an unrecognized rounding mode") # ---------------------------------------------------------------------- @@ -324,8 +323,10 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implemented." - "Use to_datetime() to parse date strings.") + raise NotImplementedError( + "Timestamp.strptime() is not implemented." + "Use to_datetime() to parse date strings." + ) @classmethod def combine(cls, date, time): @@ -381,8 +382,9 @@ class Timestamp(_Timestamp): if tzinfo is not None: if not PyTZInfo_Check(tzinfo): # tzinfo must be a datetime.tzinfo object, GH#17690 - raise TypeError(f'tzinfo must be a datetime.tzinfo object, ' - f'not {type(tzinfo)}') + raise TypeError( + f"tzinfo must be a datetime.tzinfo object, not {type(tzinfo)}" + ) elif tz is not None: raise ValueError('Can provide at most one of tz, tzinfo') @@ -393,8 +395,10 @@ class Timestamp(_Timestamp): # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): - raise ValueError('Cannot pass a date attribute keyword ' - 'argument when passing a date string') + raise ValueError( + "Cannot pass a date attribute keyword " + "argument when passing a date string" + ) elif ts_input is _no_input: # User passed keyword arguments. @@ -578,8 +582,10 @@ timedelta}, default 'raise' @tz.setter def tz(self, value): # GH 3746: Prevent localizing or converting the index by setting tz - raise AttributeError("Cannot directly set timezone. Use tz_localize() " - "or tz_convert() as appropriate") + raise AttributeError( + "Cannot directly set timezone. " + "Use tz_localize() or tz_convert() as appropriate" + ) def __setstate__(self, state): self.value = state[0] @@ -598,9 +604,10 @@ timedelta}, default 'raise' if self.tz is not None: # GH#21333 - warnings.warn("Converting to Period representation will " - "drop timezone information.", - UserWarning) + warnings.warn( + "Converting to Period representation will drop timezone information.", + UserWarning, + ) if freq is None: freq = self.freq @@ -810,13 +817,13 @@ default 'raise' if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') - nonexistent_options = ('raise', 'NaT', 'shift_forward', - 'shift_backward') + nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') if nonexistent not in nonexistent_options and not isinstance( nonexistent, timedelta): - raise ValueError("The nonexistent argument must be one of 'raise', " - "'NaT', 'shift_forward', 'shift_backward' or " - "a timedelta object") + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or a timedelta object" + ) if self.tzinfo is None: # tz naive, localize @@ -833,8 +840,9 @@ default 'raise' value = tz_convert_single(self.value, UTC, self.tz) return Timestamp(value, tz=tz, freq=self.freq) else: - raise TypeError('Cannot localize tz-aware Timestamp, use ' - 'tz_convert for conversions') + raise TypeError( + "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + ) def tz_convert(self, tz): """ @@ -857,17 +865,28 @@ default 'raise' """ if self.tzinfo is None: # tz naive, use tz_localize - raise TypeError('Cannot convert tz-naive Timestamp, use ' - 'tz_localize to localize') + raise TypeError( + "Cannot convert tz-naive Timestamp, use tz_localize to localize" + ) else: # Same UTC timestamp, different time zone return Timestamp(self.value, tz=tz, freq=self.freq) astimezone = tz_convert - def replace(self, year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo=object, fold=0): + def replace( + self, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=object, + fold=0, + ): """ implements datetime.replace, handles nanoseconds. @@ -910,8 +929,9 @@ default 'raise' def validate(k, v): """ validate integers """ if not is_integer_object(v): - raise ValueError(f"value must be an integer, received " - f"{type(v)} for {k}") + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) return v if year is not None: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0348843abc129..b906d46c1d849 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1491,8 +1491,7 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, tot_wgt = np.zeros(in_n, dtype=np.float64) if minp > win_n: - raise ValueError(f"min_periods (minp) must be <= " - f"window (win)") + raise ValueError(f"min_periods {minp} must be <= window {win_n}") elif minp > in_n: minp = in_n + 1 elif minp < 0: @@ -1871,8 +1870,7 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y, bint is_observation if len(input_y) != N: - raise ValueError(f"arrays are of different lengths " - f"({N} and {len(input_y)})") + raise ValueError(f"arrays are of different lengths ({N} and {len(input_y)})") output = np.empty(N, dtype=float) if N == 0: From d1f542cdca5a8362c8537ecb31d0dd8e3406d89b Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sun, 12 Jan 2020 21:23:24 +0200 Subject: [PATCH 2/3] Update pandas/_libs/tslibs/timestamps.pyx Co-Authored-By: Simon Hawkins --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 83f01c1cfc3d8..36566b55e74ad 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -324,7 +324,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ raise NotImplementedError( - "Timestamp.strptime() is not implemented." + "Timestamp.strptime() is not implemented. " "Use to_datetime() to parse date strings." ) From 9f45b89307a669689122140164115922c2214e8b Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Mon, 13 Jan 2020 16:13:09 +0200 Subject: [PATCH 3/3] Reverted error message --- pandas/_libs/window/aggregations.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index b906d46c1d849..fe74d701ef00f 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1491,7 +1491,8 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, tot_wgt = np.zeros(in_n, dtype=np.float64) if minp > win_n: - raise ValueError(f"min_periods {minp} must be <= window {win_n}") + raise ValueError(f"min_periods (minp) must be <= " + f"window (win)") elif minp > in_n: minp = in_n + 1 elif minp < 0: