From db962688b5f3cfb6b1e7e83038510ab150d24b52 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Sun, 12 Jan 2020 17:43:28 +0200
Subject: [PATCH 1/3] STY: concat strings that should not be seperated

---
 pandas/_libs/algos.pyx               |  9 ++--
 pandas/_libs/groupby.pyx             |  3 +-
 pandas/_libs/hashing.pyx             | 11 +++--
 pandas/_libs/indexing.pyx            |  5 +-
 pandas/_libs/sparse.pyx              |  6 +--
 pandas/_libs/testing.pyx             |  6 +--
 pandas/_libs/tslibs/timestamps.pyx   | 74 ++++++++++++++++++----------
 pandas/_libs/window/aggregations.pyx |  6 +--
 8 files changed, 69 insertions(+), 51 deletions(-)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 7a2fc9dc7845a..dd1f38ce3a842 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -914,8 +914,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
                         ranks[argsorted[j]] = i + 1
                 elif tiebreak == TIEBREAK_FIRST:
                     if rank_t is object:
-                        raise ValueError('first not supported for '
-                                         'non-numeric data')
+                        raise ValueError('first not supported for non-numeric data')
                     else:
                         for j in range(i - dups + 1, i + 1):
                             ranks[argsorted[j]] = j + 1
@@ -971,8 +970,7 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
                             ranks[argsorted[j]] = i + 1
                     elif tiebreak == TIEBREAK_FIRST:
                         if rank_t is object:
-                            raise ValueError('first not supported for '
-                                             'non-numeric data')
+                            raise ValueError('first not supported for non-numeric data')
                         else:
                             for j in range(i - dups + 1, i + 1):
                                 ranks[argsorted[j]] = j + 1
@@ -1137,8 +1135,7 @@ def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
                         ranks[i, argsorted[i, z]] = j + 1
                 elif tiebreak == TIEBREAK_FIRST:
                     if rank_t is object:
-                        raise ValueError('first not supported '
-                                         'for non-numeric data')
+                        raise ValueError('first not supported for non-numeric data')
                     else:
                         for z in range(j - dups + 1, j + 1):
                             ranks[i, argsorted[i, z]] = z + 1
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index abb8a6d388d26..93ea94f7b18fc 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -686,8 +686,7 @@ def _group_ohlc(floating[:, :] out,
         raise ValueError('Output array must have 4 columns')
 
     if K > 1:
-        raise NotImplementedError("Argument 'values' must have only "
-                                  "one dimension")
+        raise NotImplementedError("Argument 'values' must have only one dimension")
     out[:] = np.nan
 
     with nogil:
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
index 5298d8c5ed34e..878da670b2f68 100644
--- a/pandas/_libs/hashing.pyx
+++ b/pandas/_libs/hashing.pyx
@@ -51,8 +51,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
     k = <bytes>key.encode(encoding)
     kb = <uint8_t *>k
     if len(k) != 16:
-        raise ValueError("key should be a 16-byte string encoded, "
-                         f"got {k} (len {len(k)})")
+        raise ValueError(
+            f"key should be a 16-byte string encoded, got {k} (len {len(k)})"
+        )
 
     n = len(arr)
 
@@ -77,8 +78,10 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
             hash(val)
             data = <bytes>str(val).encode(encoding)
         else:
-            raise TypeError(f"{val} of type {type(val)} is not a valid type "
-                            "for hashing, must be string or null")
+            raise TypeError(
+                f"{val} of type {type(val)} is not a valid type for hashing, "
+                "must be string or null"
+            )
 
         l = len(data)
         lens[i] = l
diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx
index 01f4fb060d982..cdccdb504571c 100644
--- a/pandas/_libs/indexing.pyx
+++ b/pandas/_libs/indexing.pyx
@@ -18,6 +18,7 @@ cdef class _NDFrameIndexerBase:
         if ndim is None:
             ndim = self._ndim = self.obj.ndim
             if ndim > 2:
-                raise ValueError("NDFrameIndexer does not support "
-                                 "NDFrame objects with ndim > 2")
+                raise ValueError(
+                    "NDFrameIndexer does not support NDFrame objects with ndim > 2"
+                )
         return ndim
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index ee83901040b36..3a6dd506b2428 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -72,9 +72,9 @@ cdef class IntIndex(SparseIndex):
         """
 
         if self.npoints > self.length:
-            msg = (f"Too many indices. Expected "
-                   f"{self.length} but found {self.npoints}")
-            raise ValueError(msg)
+            raise ValueError(
+                f"Too many indices. Expected {self.length} but found {self.npoints}"
+            )
 
         # Indices are vacuously ordered and non-negative
         # if the sequence of indices is empty.
diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
index 5a30b71a6fea1..0e57b563d4d25 100644
--- a/pandas/_libs/testing.pyx
+++ b/pandas/_libs/testing.pyx
@@ -127,9 +127,9 @@ cpdef assert_almost_equal(a, b,
             # classes can't be the same, to raise error
             assert_class_equal(a, b, obj=obj)
 
-        assert has_length(a) and has_length(b), ("Can't compare objects without "
-                                                 "length, one or both is invalid: "
-                                                 f"({a}, {b})")
+        assert has_length(a) and has_length(b), (
+            f"Can't compare objects without length, one or both is invalid: ({a}, {b})"
+        )
 
         if a_is_ndarray and b_is_ndarray:
             na, nb = a.size, b.size
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index abe7f9e5b4105..83f01c1cfc3d8 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -161,8 +161,7 @@ def round_nsint64(values, mode, freq):
 
     # if/elif above should catch all rounding modes defined in enum 'RoundTo':
     # if flow of control arrives here, it is a bug
-    raise ValueError("round_nsint64 called with an unrecognized "
-                     "rounding mode")
+    raise ValueError("round_nsint64 called with an unrecognized rounding mode")
 
 
 # ----------------------------------------------------------------------
@@ -324,8 +323,10 @@ class Timestamp(_Timestamp):
 
         Function is not implemented. Use pd.to_datetime().
         """
-        raise NotImplementedError("Timestamp.strptime() is not implemented."
-                                  "Use to_datetime() to parse date strings.")
+        raise NotImplementedError(
+            "Timestamp.strptime() is not implemented."
+            "Use to_datetime() to parse date strings."
+        )
 
     @classmethod
     def combine(cls, date, time):
@@ -381,8 +382,9 @@ class Timestamp(_Timestamp):
         if tzinfo is not None:
             if not PyTZInfo_Check(tzinfo):
                 # tzinfo must be a datetime.tzinfo object, GH#17690
-                raise TypeError(f'tzinfo must be a datetime.tzinfo object, '
-                                f'not {type(tzinfo)}')
+                raise TypeError(
+                    f"tzinfo must be a datetime.tzinfo object, not {type(tzinfo)}"
+                )
             elif tz is not None:
                 raise ValueError('Can provide at most one of tz, tzinfo')
 
@@ -393,8 +395,10 @@ class Timestamp(_Timestamp):
             # User passed a date string to parse.
             # Check that the user didn't also pass a date attribute kwarg.
             if any(arg is not None for arg in _date_attributes):
-                raise ValueError('Cannot pass a date attribute keyword '
-                                 'argument when passing a date string')
+                raise ValueError(
+                    "Cannot pass a date attribute keyword "
+                    "argument when passing a date string"
+                )
 
         elif ts_input is _no_input:
             # User passed keyword arguments.
@@ -578,8 +582,10 @@ timedelta}, default 'raise'
     @tz.setter
     def tz(self, value):
         # GH 3746: Prevent localizing or converting the index by setting tz
-        raise AttributeError("Cannot directly set timezone. Use tz_localize() "
-                             "or tz_convert() as appropriate")
+        raise AttributeError(
+            "Cannot directly set timezone. "
+            "Use tz_localize() or tz_convert() as appropriate"
+        )
 
     def __setstate__(self, state):
         self.value = state[0]
@@ -598,9 +604,10 @@ timedelta}, default 'raise'
 
         if self.tz is not None:
             # GH#21333
-            warnings.warn("Converting to Period representation will "
-                          "drop timezone information.",
-                          UserWarning)
+            warnings.warn(
+                "Converting to Period representation will drop timezone information.",
+                UserWarning,
+            )
 
         if freq is None:
             freq = self.freq
@@ -810,13 +817,13 @@ default 'raise'
         if ambiguous == 'infer':
             raise ValueError('Cannot infer offset with only one time.')
 
-        nonexistent_options = ('raise', 'NaT', 'shift_forward',
-                               'shift_backward')
+        nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward')
         if nonexistent not in nonexistent_options and not isinstance(
             nonexistent, timedelta):
-            raise ValueError("The nonexistent argument must be one of 'raise', "
-                             "'NaT', 'shift_forward', 'shift_backward' or "
-                             "a timedelta object")
+            raise ValueError(
+                "The nonexistent argument must be one of 'raise', "
+                "'NaT', 'shift_forward', 'shift_backward' or a timedelta object"
+            )
 
         if self.tzinfo is None:
             # tz naive, localize
@@ -833,8 +840,9 @@ default 'raise'
                 value = tz_convert_single(self.value, UTC, self.tz)
                 return Timestamp(value, tz=tz, freq=self.freq)
             else:
-                raise TypeError('Cannot localize tz-aware Timestamp, use '
-                                'tz_convert for conversions')
+                raise TypeError(
+                    "Cannot localize tz-aware Timestamp, use tz_convert for conversions"
+                )
 
     def tz_convert(self, tz):
         """
@@ -857,17 +865,28 @@ default 'raise'
         """
         if self.tzinfo is None:
             # tz naive, use tz_localize
-            raise TypeError('Cannot convert tz-naive Timestamp, use '
-                            'tz_localize to localize')
+            raise TypeError(
+                "Cannot convert tz-naive Timestamp, use tz_localize to localize"
+            )
         else:
             # Same UTC timestamp, different time zone
             return Timestamp(self.value, tz=tz, freq=self.freq)
 
     astimezone = tz_convert
 
-    def replace(self, year=None, month=None, day=None,
-                hour=None, minute=None, second=None, microsecond=None,
-                nanosecond=None, tzinfo=object, fold=0):
+    def replace(
+        self,
+        year=None,
+        month=None,
+        day=None,
+        hour=None,
+        minute=None,
+        second=None,
+        microsecond=None,
+        nanosecond=None,
+        tzinfo=object,
+        fold=0,
+    ):
         """
         implements datetime.replace, handles nanoseconds.
 
@@ -910,8 +929,9 @@ default 'raise'
         def validate(k, v):
             """ validate integers """
             if not is_integer_object(v):
-                raise ValueError(f"value must be an integer, received "
-                                 f"{type(v)} for {k}")
+                raise ValueError(
+                    f"value must be an integer, received {type(v)} for {k}"
+                )
             return v
 
         if year is not None:
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 0348843abc129..b906d46c1d849 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1491,8 +1491,7 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
         tot_wgt = np.zeros(in_n, dtype=np.float64)
 
     if minp > win_n:
-        raise ValueError(f"min_periods (minp) must be <= "
-                         f"window (win)")
+        raise ValueError(f"min_periods {minp} must be <= window {win_n}")
     elif minp > in_n:
         minp = in_n + 1
     elif minp < 0:
@@ -1871,8 +1870,7 @@ def ewmcov(float64_t[:] input_x, float64_t[:] input_y,
         bint is_observation
 
     if <Py_ssize_t>len(input_y) != N:
-        raise ValueError(f"arrays are of different lengths "
-                         f"({N} and {len(input_y)})")
+        raise ValueError(f"arrays are of different lengths ({N} and {len(input_y)})")
 
     output = np.empty(N, dtype=float)
     if N == 0:

From d1f542cdca5a8362c8537ecb31d0dd8e3406d89b Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com>
Date: Sun, 12 Jan 2020 21:23:24 +0200
Subject: [PATCH 2/3] Update pandas/_libs/tslibs/timestamps.pyx

Co-Authored-By: Simon Hawkins <simonjayhawkins@gmail.com>
---
 pandas/_libs/tslibs/timestamps.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 83f01c1cfc3d8..36566b55e74ad 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -324,7 +324,7 @@ class Timestamp(_Timestamp):
         Function is not implemented. Use pd.to_datetime().
         """
         raise NotImplementedError(
-            "Timestamp.strptime() is not implemented."
+            "Timestamp.strptime() is not implemented. "
             "Use to_datetime() to parse date strings."
         )
 

From 9f45b89307a669689122140164115922c2214e8b Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Mon, 13 Jan 2020 16:13:09 +0200
Subject: [PATCH 3/3] Reverted error message

---
 pandas/_libs/window/aggregations.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index b906d46c1d849..fe74d701ef00f 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1491,7 +1491,8 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
         tot_wgt = np.zeros(in_n, dtype=np.float64)
 
     if minp > win_n:
-        raise ValueError(f"min_periods {minp} must be <= window {win_n}")
+        raise ValueError(f"min_periods (minp) must be <= "
+                         f"window (win)")
     elif minp > in_n:
         minp = in_n + 1
     elif minp < 0: