diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index f7e1e395a76bc..5133bbd285b50 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -11,7 +11,7 @@ class Methods: ["int", "float"], ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], ) - param_names = ["contructor", "window", "dtype", "method"] + param_names = ["constructor", "window", "dtype", "method"] def setup(self, constructor, window, dtype, method): N = 10 ** 5 @@ -72,7 +72,7 @@ class ExpandingMethods: ["int", "float"], ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], ) - param_names = ["contructor", "window", "dtype", "method"] + param_names = ["constructor", "window", "dtype", "method"] def setup(self, constructor, dtype, method): N = 10 ** 5 @@ -86,7 +86,7 @@ def time_expanding(self, constructor, dtype, method): class EWMMethods: params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"]) - param_names = ["contructor", "window", "dtype", "method"] + param_names = ["constructor", "window", "dtype", "method"] def setup(self, constructor, window, dtype, method): N = 10 ** 5 @@ -104,7 +104,7 @@ class VariableWindowMethods(Methods): ["int", "float"], ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], ) - param_names = ["contructor", "window", "dtype", "method"] + param_names = ["constructor", "window", "dtype", "method"] def setup(self, constructor, window, dtype, method): N = 10 ** 5 diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 437cc9b161e8a..880fdc46f43f5 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -24,7 +24,7 @@ jobs: ENV_FILE: ci/deps/azure-36-locale_slow.yaml CONDA_PY: "36" PATTERN: "slow" - # pandas does not use the language (zh_CN), but should support diferent encodings (utf8) + # pandas does not use the language (zh_CN), but should support different encodings (utf8) # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any LANG: "zh_CN.utf8" LC_ALL: "zh_CN.utf8" diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst index 797bdbcf25d17..1b3bcb799d5ce 100644 --- a/doc/source/getting_started/intro_tutorials/02_read_write.rst +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -225,7 +225,7 @@ The method :meth:`~DataFrame.info` provides technical information about a
To user guide -For a complete overview of the input and output possibilites from and to pandas, see the user guide section about :ref:`reader and writer functions `. +For a complete overview of the input and output possibilities from and to pandas, see the user guide section about :ref:`reader and writer functions `. .. raw:: html diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst index f328d7b05b5b6..4167166a3f34a 100644 --- a/doc/source/getting_started/intro_tutorials/03_subset_data.rst +++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst @@ -101,7 +101,7 @@ And have a look at the ``shape`` of the output: titanic["Age"].shape -:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parantheses for attributes) of a +:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parentheses for attributes) of a pandas ``Series`` and ``DataFrame`` containing the number of rows and columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only the number of rows is returned. diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst index f317e7a1f91b4..b6b3c97f2405b 100644 --- a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -165,7 +165,7 @@ index. For example: .. note:: The existence of multiple row/column indices at the same time has not been mentioned within these tutorials. *Hierarchical indexing* - or *MultiIndex* is an advanced and powerfull pandas feature to analyze + or *MultiIndex* is an advanced and powerful pandas feature to analyze higher dimensional data. Multi-indexing is out of scope for this pandas introduction. For the diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst index 3ff64875d807b..936d00f68e3f0 100644 --- a/doc/source/getting_started/intro_tutorials/10_text_data.rst +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -188,7 +188,7 @@ Which passenger of the titanic has the longest name? titanic["Name"].str.len() -To get the longest name we first have to get the lenghts of each of the +To get the longest name we first have to get the lengths of each of the names in the ``Name`` column. By using pandas string methods, the :meth:`Series.str.len` function is applied to each of the names individually (element-wise). diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst index d7f7690f8c3d0..075787d3b9d5b 100644 --- a/doc/source/user_guide/dsintro.rst +++ b/doc/source/user_guide/dsintro.rst @@ -406,7 +406,7 @@ From a list of dataclasses Data Classes as introduced in `PEP557 `__, can be passed into the DataFrame constructor. -Passing a list of dataclasses is equivilent to passing a list of dictionaries. +Passing a list of dataclasses is equivalent to passing a list of dictionaries. Please be aware, that that all values in the list should be dataclasses, mixing types in the list would result in a TypeError. diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 2bd3ff626f2e1..fb815b3a975d1 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -374,7 +374,7 @@ For getting values with a boolean array: df1.loc['a'] > 0 df1.loc[:, df1.loc['a'] > 0] -NA values in a boolean array propogate as ``False``: +NA values in a boolean array propagate as ``False``: .. versionchanged:: 1.0.2 diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c34247a49335d..f3aff0654530e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5005,7 +5005,7 @@ Possible values are: This usually provides better performance for analytic databases like *Presto* and *Redshift*, but has worse performance for traditional SQL backend if the table contains many columns. - For more information check the SQLAlchemy `documention + For more information check the SQLAlchemy `documentation `__. - callable with signature ``(pd_table, conn, keys, data_iter)``: This can be used to implement a more performant insertion method based on diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 61fa24bb77cfc..cddc3cb2600fd 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -259,7 +259,7 @@ Inspecting the ``ddf`` object, we see a few things * There are familiar methods like ``.groupby``, ``.sum``, etc. * There are new attributes like ``.npartitions`` and ``.divisions`` -The partitions and divisions are how Dask parallizes computation. A **Dask** +The partitions and divisions are how Dask parallelizes computation. A **Dask** DataFrame is made up of many **Pandas** DataFrames. A single method call on a Dask DataFrame ends up making many pandas method calls, and Dask knows how to coordinate everything to get the result. diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 1f2f8818c8458..fd8dda4fe365e 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -620,8 +620,8 @@ "aligns = ['left','zero','mid']\n", "for align in aligns:\n", " row = \"{}\".format(align)\n", - " for serie in [test1,test2,test3]:\n", - " s = serie.copy()\n", + " for series in [test1,test2,test3]:\n", + " s = series.copy()\n", " s.name=''\n", " row += \"{}\".format(s.to_frame().style.bar(align=align, \n", " color=['#d65f5f', '#5fba7d'], \n", diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 85de0150a5a28..c756bc87e9b89 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -397,7 +397,7 @@ Other enhancements - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). - :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). -- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) +- :func:`to_timedelta` now supports iso-formatted timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` objects in the constructor (:issue:`2193`) - :class:`DatetimeIndex` has gained the :attr:`DatetimeIndex.timetz` attribute. This returns the local time with timezone information. (:issue:`21358`) - :meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, and :meth:`~Timestamp.floor` for :class:`DatetimeIndex` and :class:`Timestamp` diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ef3bb8161d13f..c42aab6de4cc3 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`) -- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`) +- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containing a :class:`datetime.date` (:issue:`31501`) - Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`) - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`) - Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 720ce7af47a18..c74ffac27a805 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -335,7 +335,7 @@ MultiIndex I/O ^^^ -- Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) +- Bug in :meth:`read_json` where integer overflow was occurring when json contains big number strings. (:issue:`30320`) - `read_csv` will now raise a ``ValueError`` when the arguments `header` and `prefix` both are not `None`. (:issue:`27394`) - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) - Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 4c2b6b8c5a8aa..3ce3bc519b311 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -674,7 +674,7 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - # GH#31499 if we have a np.str_ get_c_string wont recognize + # GH#31499 if we have a np.str_ get_c_string won't recognize # it as a str, even though isinstance does. v = get_c_string(val) else: @@ -709,7 +709,7 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - # GH#31499 if we have a np.str_ get_c_string wont recognize + # GH#31499 if we have a np.str_ get_c_string won't recognize # it as a str, even though isinstance does. v = get_c_string(val) else: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index dc2e8c097bc14..6aa9a8b2dedfd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2059,7 +2059,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, If an array-like object contains only timedelta values or NaT is encountered, whether to convert and return an array of m8[ns] dtype. convert_to_nullable_integer : bool, default False - If an array-like object contains only interger values (and NaN) is + If an array-like object contains only integer values (and NaN) is encountered, whether to convert and return an IntegerArray. Returns diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index da59c635b5a18..0849ba0f29624 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -520,7 +520,7 @@ class _BaseOffset: state = self.__dict__.copy() # we don't want to actually pickle the calendar object - # as its a np.busyday; we recreate on deserilization + # as its a np.busyday; we recreate on deserialization if 'calendar' in state: del state['calendar'] try: diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ebdf7a1e29216..6dfc7ef3c8970 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -349,7 +349,7 @@ cpdef bint _does_string_look_like_datetime(str py_string): elif py_string in _not_datelike_strings: return False else: - # xstrtod with such paramaters copies behavior of python `float` + # xstrtod with such parameters copies behavior of python `float` # cast; for example, " 35.e-1 " is valid string for this cast so, # for correctly xstrtod call necessary to pass these params: # b'.' - a dot is used as separator, b'e' - an exponential form of diff --git a/pandas/_testing.py b/pandas/_testing.py index f96e3872eb8bd..52c711468b49a 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -2206,7 +2206,7 @@ def network( Notes ----- - * ``raise_on_error`` supercedes ``check_before_test`` + * ``raise_on_error`` supersedes ``check_before_test`` Returns ------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b2bff0b0142e2..855ccca4aa574 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -202,7 +202,7 @@ def _check_compatible_with( ---------- other setitem : bool, default False - For __setitem__ we may have stricter compatiblity resrictions than + For __setitem__ we may have stricter compatibility resrictions than for comparisons. Raises @@ -1167,7 +1167,7 @@ def _add_timedelta_arraylike(self, other): ------- Same type as self """ - # overriden by PeriodArray + # overridden by PeriodArray if len(self) != len(other): raise ValueError("cannot add indices of unequal length") diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index fcccd8cc14d6b..f82790ac4c3d9 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -281,7 +281,7 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna).astype("Int64") - # Overrride parent because we have different return types. + # Override parent because we have different return types. @classmethod def _create_arithmetic_method(cls, op): # Note: this handles both arithmetic and comparison methods. diff --git a/pandas/core/base.py b/pandas/core/base.py index e1c6bef66239d..3cf30b3f0abb1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -531,7 +531,7 @@ def _aggregate_multiple_funcs(self, arg, _axis): # raised directly in _aggregate_named pass elif "no results" in str(err): - # raised direcly in _aggregate_multiple_funcs + # raised directly in _aggregate_multiple_funcs pass else: raise diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 418fc7d38d08f..c7c7103654a65 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -116,7 +116,7 @@ def clean_column_name(name: str) -> str: If this name was used in the query string (this makes the query call impossible) an error will be raised by :func:`tokenize_backtick_quoted_string` instead, - which is not catched and propogates to the user level. + which is not caught and propagates to the user level. """ try: tokenized = tokenize_string(f"`{name}`") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d1ba85c50d91d..6c36c7e71759c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1065,7 +1065,7 @@ def dot(self, other): ------- Series or DataFrame If other is a Series, return the matrix product between self and - other as a Serie. If other is a DataFrame or a numpy.array, return + other as a Series. If other is a DataFrame or a numpy.array, return the matrix product of self and other in a DataFrame of a np.array. See Also diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e19021762792f..7abc2a77267de 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8051,7 +8051,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: 2018-04-09 1 2018-04-11 2 - Notice the data for 3 first calender days were returned, not the first + Notice the data for 3 first calendar days were returned, not the first 3 days observed in the dataset, and therefore data for 2018-04-13 was not returned. """ @@ -8113,7 +8113,7 @@ def last(self: FrameOrSeries, offset) -> FrameOrSeries: 2018-04-13 3 2018-04-15 4 - Notice the data for 3 last calender days were returned, not the last + Notice the data for 3 last calendar days were returned, not the last 3 observed days in the dataset, and therefore data for 2018-04-11 was not returned. """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4501dd1ddd887..83064fe22eaff 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5145,7 +5145,7 @@ def insert(self, loc: int, item): ------- new_index : Index """ - # Note: this method is overriden by all ExtensionIndex subclasses, + # Note: this method is overridden by all ExtensionIndex subclasses, # so self is never backed by an EA. arr = np.asarray(self) item = self._coerce_scalar_to_index(item)._values diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3d31e7f8054ec..f4942b72a6ad4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -426,7 +426,7 @@ def memory_usage(self, deep: bool = False) -> int: return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) # IntervalTree doesn't have a is_monotonic_decreasing, so have to override - # the Index implemenation + # the Index implementation @cache_readonly def is_monotonic_decreasing(self) -> bool: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 303fc62d6ad35..4abb56970413b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3292,7 +3292,7 @@ def intersection(self, other, sort=False): lvals = self._values rvals = other._values - uniq_tuples = None # flag whether _inner_indexer was succesful + uniq_tuples = None # flag whether _inner_indexer was successful if self.is_monotonic and other.is_monotonic: try: uniq_tuples = self._inner_indexer(lvals, rvals)[0] diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a5e70bd279d21..87f937f9e7087 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1313,7 +1313,7 @@ def get_corr_func(method): return method else: raise ValueError( - f"Unkown method '{method}', expected one of 'kendall', 'spearman'" + f"Unknown method '{method}', expected one of 'kendall', 'spearman'" ) def _pearson(a, b): @@ -1509,7 +1509,7 @@ def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: Parameters ---------- values : np.ndarray or ExtensionArray - accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate} + accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate} skipna : bool Returns diff --git a/pandas/core/series.py b/pandas/core/series.py index aaaeadc0cf618..9c1f4134746a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -910,7 +910,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - # _convert_slice_indexer to determing if this slice is positional + # _convert_slice_indexer to determin if this slice is positional # or label based, and if the latter, convert to positional slobj = self.index._convert_slice_indexer(key, kind="getitem") return self._slice(slobj) @@ -3958,7 +3958,7 @@ def rename( Parameters ---------- axis : {0 or "index"} - Unused. Accepted for compatability with DataFrame method only. + Unused. Accepted for compatibility with DataFrame method only. index : scalar, hashable sequence, dict-like or function, optional Functions or dict-like are transformations to apply to the index. diff --git a/pandas/core/strings.py b/pandas/core/strings.py index fbc87b1fdac04..7f26c7a26d4d8 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2498,7 +2498,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"): Limit number of splits in output. ``None``, 0 and -1 will be interpreted as return all splits. expand : bool, default False - Expand the splitted strings into separate columns. + Expand the split strings into separate columns. * If ``True``, return DataFrame/MultiIndex expanding dimensionality. * If ``False``, return Series/Index, containing lists of strings. diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3784989de10ab..aeab51149ec4e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -196,7 +196,7 @@ def _dir_additions(self): def _get_win_type(self, kwargs: Dict): """ - Exists for compatibility, overriden by subclass Window. + Exists for compatibility, overridden by subclass Window. Parameters ---------- diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 585e1af3dbc01..1be0f977f9b20 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -101,7 +101,7 @@ def write_th( self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None ) -> None: """ - Method for writting a formatted cell. + Method for writing a formatted cell. If col_space is set on the formatter then that is used for the value of min-width. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 544d45999c14b..0659dfb138b9a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4682,7 +4682,7 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index raise TypeError("MultiIndex not supported here!") inferred_type = lib.infer_dtype(index, skipna=False) - # we wont get inferred_type of "datetime64" or "timedelta64" as these + # we won't get inferred_type of "datetime64" or "timedelta64" as these # would go through the DatetimeIndex/TimedeltaIndex paths above values = np.asarray(index) diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py index 3f85ac8c190db..d7c312b2fda1b 100644 --- a/pandas/tests/arithmetic/test_interval.py +++ b/pandas/tests/arithmetic/test_interval.py @@ -100,7 +100,7 @@ def interval_constructor(self, request): def elementwise_comparison(self, op, array, other): """ - Helper that performs elementwise comparisions between `array` and `other` + Helper that performs elementwise comparisons between `array` and `other` """ other = other if is_list_like(other) else [other] * len(array) return np.array([op(x, y) for x, y in zip(array, other)]) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 3d9469c252914..1cbf64a1529c2 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -146,7 +146,7 @@ def test_periodindex(self): tm.assert_numpy_array_equal(cat3._codes, exp_arr) tm.assert_index_equal(cat3.categories, exp_idx) - def test_categories_assigments(self): + def test_categories_assignments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] @@ -154,7 +154,7 @@ def test_categories_assigments(self): tm.assert_index_equal(s.categories, Index([1, 2, 3])) @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) - def test_categories_assigments_wrong_length_raises(self, new_categories): + def test_categories_assignments_wrong_length_raises(self, new_categories): cat = Categorical(["a", "b", "c", "a"]) msg = ( "new categories need to have the same number of items " diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 4dab86166e13c..cb3a70e934dcb 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1118,7 +1118,7 @@ def test_nbytes_block(self): arr = SparseArray([1, 2, 0, 0, 0], kind="block") result = arr.nbytes # (2 * 8) + 4 + 4 - # sp_values, blocs, blenghts + # sp_values, blocs, blengths assert result == 24 def test_asarray_datetime64(self): diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 9de2ec9799353..4a9fa61bc4233 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1073,14 +1073,14 @@ def test_escapechar(all_parsers): data = '''SEARCH_TERM,ACTUAL_URL "bra tv bord","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" "tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" -"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals serie","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"''' # noqa +"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals series","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"''' # noqa parser = all_parsers result = parser.read_csv( StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8" ) - assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals serie' + assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals series' tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"])) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 0c79ef4378b66..84bc29ebc65e0 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -137,7 +137,7 @@ def test_iterator(self): (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) - def test_read_non_existant(self, reader, module, error_class, fn_ext): + def test_read_non_existent(self, reader, module, error_class, fn_ext): pytest.importorskip(module) path = os.path.join(HERE, "data", "does_not_exist." + fn_ext) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 46ac430a13394..08392d48151a2 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1115,7 +1115,7 @@ def create_data(constructor): tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) - def test_contructor_dict_tuple_indexer(self): + def test_constructor_dict_tuple_indexer(self): # GH 12948 data = {(1, 1, None): -1.0} result = Series(data) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 1566d8f36373b..3a1996b2938a5 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -232,7 +232,7 @@ def test_from_list_dtype(self): assert result._data.blocks[0].is_extension is False -def test_hasnans_unchached_for_series(): +def test_hasnans_uncached_for_series(): # GH#19700 idx = pd.Index([0, 1]) assert idx.hasnans is False diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 536f15ea75d69..c7fc37a278e83 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -171,7 +171,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") -def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): +def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): # Test that # the same conditions from binary_ufunc_scalar apply to # ufuncs with multiple outputs. @@ -204,7 +204,7 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) -def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): +def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc): # Test that the same conditions from unary input apply to multi-output # ufuncs array, _ = arrays_for_binary_ufunc diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 852e1ce489893..cac6a59527a6e 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -600,7 +600,7 @@ def test_nancorr_spearman(self): def test_invalid_method(self): targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] - msg = "Unkown method 'foo', expected one of 'kendall', 'spearman'" + msg = "Unknown method 'foo', expected one of 'kendall', 'spearman'" with pytest.raises(ValueError, match=msg): self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo") diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 2477ff29fbfd5..695a3f74c9452 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -147,13 +147,11 @@ def to_offset(freq) -> Optional[DateOffset]: delta = None stride_sign = None try: - splitted = re.split(libfreqs.opattern, freq) - if splitted[-1] != "" and not splitted[-1].isspace(): + split = re.split(libfreqs.opattern, freq) + if split[-1] != "" and not split[-1].isspace(): # the last element must be blank raise ValueError("last element must be blank") - for sep, stride, name in zip( - splitted[0::4], splitted[1::4], splitted[2::4] - ): + for sep, stride, name in zip(split[0::4], split[1::4], split[2::4]): if sep != "" and not sep.isspace(): raise ValueError("separator must be spaces") prefix = libfreqs._lite_rule_alias.get(name) or name diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py index fbf3bb5cfccf2..c5f257c641b25 100755 --- a/scripts/validate_string_concatenation.py +++ b/scripts/validate_string_concatenation.py @@ -4,7 +4,7 @@ Check where there is a string that needs to be concatenated. -This is necessary after black formating, +This is necessary after black formatting, where for example black transforms this: >>> foo = ( diff --git a/web/pandas/config.yml b/web/pandas/config.yml index a52c580f23530..d943ad3833b52 100644 --- a/web/pandas/config.yml +++ b/web/pandas/config.yml @@ -127,7 +127,7 @@ sponsors: url: https://chanzuckerberg.com/ logo: /static/img/partners/czi.svg kind: regular - description: "pandas is funded by the Essential Open Source Software for Science program of the Chan Zuckerberg Initiative. The funding is used for general maintainance, improve extension types, and a efficient string type." + description: "pandas is funded by the Essential Open Source Software for Science program of the Chan Zuckerberg Initiative. The funding is used for general maintenance, improve extension types, and a efficient string type." inkind: # not included in active so they don't appear in the home page - name: "OVH" url: https://us.ovhcloud.com/