diff --git a/ci/setup_env.sh b/ci/setup_env.sh index 382491a947488..1be35e933eb0d 100755 --- a/ci/setup_env.sh +++ b/ci/setup_env.sh @@ -118,8 +118,8 @@ conda list pandas echo "[Build extensions]" python setup.py build_ext -q -i -# XXX: Some of our environments end up with old verisons of pip (10.x) -# Adding a new enough verison of pip to the requirements explodes the +# XXX: Some of our environments end up with old versions of pip (10.x) +# Adding a new enough version of pip to the requirements explodes the # solve time. Just using pip to update itself. # - py35_macos # - py35_compat diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 503f9b6bfb1f0..1c92da1ce61e3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -797,7 +797,7 @@ The columns were lexicographically sorted previously, The column order now matches the insertion-order of the keys in the ``dict``, considering all the records from top to bottom. As a consequence, the column -order of the resulting DataFrame has changed compared to previous pandas verisons. +order of the resulting DataFrame has changed compared to previous pandas versions. .. ipython:: python @@ -886,7 +886,7 @@ Other API changes - Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) - :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) - :meth:`ExtensionArray.argsort` places NA values at the end of the sorted array. (:issue:`21801`) -- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`) +- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extension data types for a ``fixed`` format. (:issue:`7775`) - Passing duplicate ``names`` in :meth:`read_csv` will now raise a ``ValueError`` (:issue:`17346`) .. _whatsnew_0250.deprecations: @@ -1106,7 +1106,7 @@ Indexing - Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`). - Improved exception message when calling ``.iloc`` or ``.loc`` with a boolean indexer with different length (:issue:`26658`). -- Bug in ``KeyError`` exception message when indexing a :class:`MultiIndex` with a non-existant key not displaying the original key (:issue:`27250`). +- Bug in ``KeyError`` exception message when indexing a :class:`MultiIndex` with a non-existent key not displaying the original key (:issue:`27250`). - Bug in ``.iloc`` and ``.loc`` with a boolean indexer not raising an ``IndexError`` when too few items are passed (:issue:`26658`). - Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`). - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`). diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ca70c8af45f2f..44c6944b6f2b5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -92,7 +92,7 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst): At the beginning function tries to parse date in MM/DD/YYYY format, but if month > 12 - in DD/MM/YYYY (`dayfirst == False`). With `dayfirst == True` function makes an attempt to parse date in - DD/MM/YYYY, if an attemp is wrong - in DD/MM/YYYY + DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY Note ---- @@ -732,7 +732,7 @@ class _timelex: stream = self.stream.replace('\x00', '') # TODO: Change \s --> \s+ (this doesn't match existing behavior) - # TODO: change the punctuation block to punc+ (doesnt match existing) + # TODO: change the punctuation block to punc+ (does not match existing) # TODO: can we merge the two digit patterns? tokens = re.findall('\s|' '(?>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index afa4f1a5a8c76..d7cc82ccf10f2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4424,7 +4424,7 @@ def asof_locs(self, where, mask): every entry in the `where` argument. As in the `asof` function, if the label (a particular entry in - `where`) is not in the index, the latest index label upto the + `where`) is not in the index, the latest index label up to the passed label is chosen and its index returned. If all of the labels in the index are later than a label in `where`, diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 29e297cb28a3b..ce6491b892fad 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -747,7 +747,7 @@ def _maybe_convert_i8(self, key): Returns ------- key: scalar or list-like - The original key if no conversion occured, int if converted scalar, + The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ original = key diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 4fe69f64bd0ae..3bb7bb022dd3a 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -1,7 +1,7 @@ """ Missing data handling for arithmetic operations. -In particular, pandas conventions regarding divison by zero differ +In particular, pandas conventions regarding division by zero differ from numpy in the following ways: 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2) gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 24a255c78f3c0..47ffeda4083a1 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -192,7 +192,7 @@ def json_normalize( 1 {'height': 130, 'weight': 60} NaN Mose Reg 2 {'height': 130, 'weight': 60} 2.0 Faye Raker - Normalizes nested data upto level 1. + Normalizes nested data up to level 1. >>> data = [{'id': 1, ... 'name': "Cole Volk", diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 4b9a52a1fb8f3..77c1c62a1fd1f 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -144,7 +144,7 @@ def read_pickle(path, compression="infer"): path = _stringify_path(path) f, fh = _get_handle(path, "rb", compression=compression, is_text=False) - # 1) try standard libary Pickle + # 1) try standard library Pickle # 2) try pickle_compat (older pandas version) to handle subclass changes # 3) try pickle_compat with latin1 encoding diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 0b674b556b2ee..e2f7a9d75f24e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2730,7 +2730,7 @@ def generate_table(self): Modifies the DataFrame in-place. The DataFrame returned encodes the (v,o) values as uint64s. The - encoding depends on teh dta version, and can be expressed as + encoding depends on the dta version, and can be expressed as enc = v + o * 2 ** (o_size * 8) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index 3e110a4b040da..26ab4ff0ded85 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -37,7 +37,7 @@ def test_frame_values_with_tz(self): expected = np.concatenate([expected, expected], axis=1) tm.assert_numpy_array_equal(result, expected) - # three columns, heterogenous + # three columns, heterogeneous est = "US/Eastern" df = df.assign(C=df.A.dt.tz_convert(est)) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 35a5cccc0ec45..2668197535fcc 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -74,7 +74,7 @@ def test_copy_method(deep): @pytest.mark.parametrize( "kwarg, value", [ - ("names", ["thrid", "fourth"]), + ("names", ["third", "fourth"]), ("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]), ("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]), ], diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 454e2afb8abe0..05f67de7bef09 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -54,7 +54,7 @@ def filepath_or_buffer_id(request): @pytest.fixture def filepath_or_buffer(filepath_or_buffer_id, tmp_path): """ - A fixture yeilding a string representing a filepath, a path-like object + A fixture yielding a string representing a filepath, a path-like object and a StringIO buffer. Also checks that buffer is not closed. """ if filepath_or_buffer_id == "buffer": diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 3ceddfc3c1db4..939ee5fb192ff 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -546,7 +546,7 @@ def test_donot_drop_nonevalues(self): def test_nonetype_top_level_bottom_level(self): # GH21158: If inner level json has a key with a null value - # make sure it doesnt do a new_d.pop twice and except + # make sure it does not do a new_d.pop twice and except data = { "id": None, "location": { @@ -578,7 +578,7 @@ def test_nonetype_top_level_bottom_level(self): def test_nonetype_multiple_levels(self): # GH21158: If inner level json has a key with a null value - # make sure it doesnt do a new_d.pop twice and except + # make sure it does not do a new_d.pop twice and except data = { "id": None, "location": { diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index ddf2c6e65b474..c150ee875db0a 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -669,7 +669,7 @@ def setup_method(self, method): self.offset2 = BDay(2) def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2 @@ -911,7 +911,7 @@ def test_constructor_errors(self, start, end, match): BusinessHour(start=start, end=end) def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2 @@ -2277,7 +2277,7 @@ def test_constructor_errors(self): CustomBusinessHour(start="14:00:05") def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2 @@ -2555,7 +2555,7 @@ def setup_method(self, method): self.offset2 = CDay(2) def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2 @@ -2826,7 +2826,7 @@ class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): _offset = CBMonthEnd def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2 @@ -2975,7 +2975,7 @@ class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): _offset = CBMonthBegin def test_different_normalize_equals(self): - # GH#21404 changed __eq__ to return False when `normalize` doesnt match + # GH#21404 changed __eq__ to return False when `normalize` does not match offset = self._offset() offset2 = self._offset(normalize=True) assert offset != offset2