pandas-dev · WillAyd · Mar 21, 2020 · Mar 21, 2020 · Mar 21, 2020
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -11,7 +11,7 @@ class Methods:
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5
@@ -72,7 +72,7 @@ class ExpandingMethods:
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, dtype, method):
         N = 10 ** 5
@@ -86,7 +86,7 @@ def time_expanding(self, constructor, dtype, method):
 class EWMMethods:
 
     params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"])
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5
@@ -104,7 +104,7 @@ class VariableWindowMethods(Methods):
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5

diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
@@ -24,7 +24,7 @@ jobs:
           ENV_FILE: ci/deps/azure-36-locale_slow.yaml
           CONDA_PY: "36"
           PATTERN: "slow"
-          # pandas does not use the language (zh_CN), but should support diferent encodings (utf8)
+          # pandas does not use the language (zh_CN), but should support different encodings (utf8)
           # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any
           LANG: "zh_CN.utf8"
           LC_ALL: "zh_CN.utf8"

diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -225,7 +225,7 @@ The method :meth:`~DataFrame.info` provides technical information about a
     <div class="d-flex flex-row bg-light gs-torefguide">
         <span class="badge badge-info">To user guide</span>
 
-For a complete overview of the input and output possibilites from and to pandas, see the user guide section about :ref:`reader and writer functions <io>`.
+For a complete overview of the input and output possibilities from and to pandas, see the user guide section about :ref:`reader and writer functions <io>`.
 
 .. raw:: html
 

diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -101,7 +101,7 @@ And have a look at the ``shape`` of the output:
 
     titanic["Age"].shape
 
-:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parantheses for attributes) of a
+:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parentheses for attributes) of a
 pandas ``Series`` and ``DataFrame`` containing the number of rows and
 columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only
 the number of rows is returned.

diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst
@@ -165,7 +165,7 @@ index. For example:
 .. note::
     The existence of multiple row/column indices at the same time
     has not been mentioned within these tutorials. *Hierarchical indexing*
-    or *MultiIndex* is an advanced and powerfull pandas feature to analyze
+    or *MultiIndex* is an advanced and powerful pandas feature to analyze
     higher dimensional data.
 
     Multi-indexing is out of scope for this pandas introduction. For the

diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst
@@ -188,7 +188,7 @@ Which passenger of the titanic has the longest name?
 
     titanic["Name"].str.len()
 
-To get the longest name we first have to get the lenghts of each of the
+To get the longest name we first have to get the lengths of each of the
 names in the ``Name`` column. By using pandas string methods, the
 :meth:`Series.str.len` function is applied to each of the names individually
 (element-wise).

diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst
@@ -406,7 +406,7 @@ From a list of dataclasses
 
 Data Classes as introduced in `PEP557 <https://www.python.org/dev/peps/pep-0557>`__,
 can be passed into the DataFrame constructor.
-Passing a list of dataclasses is equivilent to passing a list of dictionaries.
+Passing a list of dataclasses is equivalent to passing a list of dictionaries.
 
 Please be aware, that that all values in the list should be dataclasses, mixing
 types in the list would result in a TypeError.

diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -374,7 +374,7 @@ For getting values with a boolean array:
    df1.loc['a'] > 0
    df1.loc[:, df1.loc['a'] > 0]
 
-NA values in a boolean array propogate as ``False``:
+NA values in a boolean array propagate as ``False``:
 
 .. versionchanged:: 1.0.2
 

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -5005,7 +5005,7 @@ Possible values are:
   This usually provides better performance for analytic databases
   like *Presto* and *Redshift*, but has worse performance for
   traditional SQL backend if the table contains many columns.
-  For more information check the SQLAlchemy `documention
+  For more information check the SQLAlchemy `documentation
   <https://docs.sqlalchemy.org/en/latest/core/dml.html#sqlalchemy.sql.expression.Insert.values.params.*args>`__.
 - callable with signature ``(pd_table, conn, keys, data_iter)``:
   This can be used to implement a more performant insertion method based on

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -259,7 +259,7 @@ Inspecting the ``ddf`` object, we see a few things
 * There are familiar methods like ``.groupby``, ``.sum``, etc.
 * There are new attributes like ``.npartitions`` and ``.divisions``
 
-The partitions and divisions are how Dask parallizes computation. A **Dask**
+The partitions and divisions are how Dask parallelizes computation. A **Dask**
 DataFrame is made up of many **Pandas** DataFrames. A single method call on a
 Dask DataFrame ends up making many pandas method calls, and Dask knows how to
 coordinate everything to get the result.

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -620,8 +620,8 @@
     "aligns = ['left','zero','mid']\n",
     "for align in aligns:\n",
     "    row = \"<tr><th>{}</th>\".format(align)\n",
-    "    for serie in [test1,test2,test3]:\n",
-    "        s = serie.copy()\n",
+    "    for series in [test1,test2,test3]:\n",
+    "        s = series.copy()\n",
     "        s.name=''\n",
     "        row += \"<td>{}</td>\".format(s.to_frame().style.bar(align=align, \n",
     "                                                           color=['#d65f5f', '#5fba7d'], \n",

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -397,7 +397,7 @@ Other enhancements
 - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`).
   The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
 - :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`).
-- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
+- :func:`to_timedelta` now supports iso-formatted timedelta strings (:issue:`21877`)
 - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` objects in the constructor (:issue:`2193`)
 - :class:`DatetimeIndex` has gained the :attr:`DatetimeIndex.timetz` attribute. This returns the local time with timezone information. (:issue:`21358`)
 - :meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, and :meth:`~Timestamp.floor` for :class:`DatetimeIndex` and :class:`Timestamp`

diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
@@ -16,7 +16,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`)
-- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`)
+- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containing a :class:`datetime.date` (:issue:`31501`)
 - Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`)
 - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`)
 - Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -335,7 +335,7 @@ MultiIndex
 
 I/O
 ^^^
-- Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`)
+- Bug in :meth:`read_json` where integer overflow was occurring when json contains big number strings. (:issue:`30320`)
 - `read_csv` will now raise a ``ValueError`` when the arguments `header` and `prefix` both are not `None`. (:issue:`27394`)
 - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`)
 - Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -674,7 +674,7 @@ cdef class StringHashTable(HashTable):
             val = values[i]
 
             if isinstance(val, str):
-                # GH#31499 if we have a np.str_ get_c_string wont recognize
+                # GH#31499 if we have a np.str_ get_c_string won't recognize
                 #  it as a str, even though isinstance does.
                 v = get_c_string(<str>val)
             else:
@@ -709,7 +709,7 @@ cdef class StringHashTable(HashTable):
             val = values[i]
 
             if isinstance(val, str):
-                # GH#31499 if we have a np.str_ get_c_string wont recognize
+                # GH#31499 if we have a np.str_ get_c_string won't recognize
                 #  it as a str, even though isinstance does.
                 v = get_c_string(<str>val)
             else:

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2059,7 +2059,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         If an array-like object contains only timedelta values or NaT is
         encountered, whether to convert and return an array of m8[ns] dtype.
     convert_to_nullable_integer : bool, default False
-        If an array-like object contains only interger values (and NaN) is
+        If an array-like object contains only integer values (and NaN) is
         encountered, whether to convert and return an IntegerArray.
 
     Returns

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -520,7 +520,7 @@ class _BaseOffset:
         state = self.__dict__.copy()
 
         # we don't want to actually pickle the calendar object
-        # as its a np.busyday; we recreate on deserilization
+        # as its a np.busyday; we recreate on deserialization
         if 'calendar' in state:
             del state['calendar']
         try:

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -349,7 +349,7 @@ cpdef bint _does_string_look_like_datetime(str py_string):
         elif py_string in _not_datelike_strings:
             return False
         else:
-            # xstrtod with such paramaters copies behavior of python `float`
+            # xstrtod with such parameters copies behavior of python `float`
             # cast; for example, " 35.e-1 " is valid string for this cast so,
             # for correctly xstrtod call necessary to pass these params:
             # b'.' - a dot is used as separator, b'e' - an exponential form of

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -2206,7 +2206,7 @@ def network(
 
     Notes
     -----
-    * ``raise_on_error`` supercedes ``check_before_test``
+    * ``raise_on_error`` supersedes ``check_before_test``
 
     Returns
     -------

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -202,7 +202,7 @@ def _check_compatible_with(
         ----------
         other
         setitem : bool, default False
-            For __setitem__ we may have stricter compatiblity resrictions than
+            For __setitem__ we may have stricter compatibility resrictions than
             for comparisons.
 
         Raises
@@ -1167,7 +1167,7 @@ def _add_timedelta_arraylike(self, other):
         -------
         Same type as self
         """
-        # overriden by PeriodArray
+        # overridden by PeriodArray
 
         if len(self) != len(other):
             raise ValueError("cannot add indices of unequal length")

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -281,7 +281,7 @@ def value_counts(self, dropna=False):
 
         return value_counts(self._ndarray, dropna=dropna).astype("Int64")
 
-    # Overrride parent because we have different return types.
+    # Override parent because we have different return types.
     @classmethod
     def _create_arithmetic_method(cls, op):
         # Note: this handles both arithmetic and comparison methods.

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -531,7 +531,7 @@ def _aggregate_multiple_funcs(self, arg, _axis):
                         # raised directly in _aggregate_named
                         pass
                     elif "no results" in str(err):
-                        # raised direcly in _aggregate_multiple_funcs
+                        # raised directly in _aggregate_multiple_funcs
                         pass
                     else:
                         raise

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
@@ -116,7 +116,7 @@ def clean_column_name(name: str) -> str:
 
         If this name was used in the query string (this makes the query call impossible)
         an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
-        which is not catched and propogates to the user level.
+        which is not caught and propagates to the user level.
     """
     try:
         tokenized = tokenize_string(f"`{name}`")

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1065,7 +1065,7 @@ def dot(self, other):
         -------
         Series or DataFrame
             If other is a Series, return the matrix product between self and
-            other as a Serie. If other is a DataFrame or a numpy.array, return
+            other as a Series. If other is a DataFrame or a numpy.array, return
             the matrix product of self and other in a DataFrame of a np.array.
 
         See Also

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8051,7 +8051,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries:
         2018-04-09  1
         2018-04-11  2
 
-        Notice the data for 3 first calender days were returned, not the first
+        Notice the data for 3 first calendar days were returned, not the first
         3 days observed in the dataset, and therefore data for 2018-04-13 was
         not returned.
         """
@@ -8113,7 +8113,7 @@ def last(self: FrameOrSeries, offset) -> FrameOrSeries:
         2018-04-13  3
         2018-04-15  4
 
-        Notice the data for 3 last calender days were returned, not the last
+        Notice the data for 3 last calendar days were returned, not the last
         3 observed days in the dataset, and therefore data for 2018-04-11 was
         not returned.
         """

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5145,7 +5145,7 @@ def insert(self, loc: int, item):
         -------
         new_index : Index
         """
-        # Note: this method is overriden by all ExtensionIndex subclasses,
+        # Note: this method is overridden by all ExtensionIndex subclasses,
         #  so self is never backed by an EA.
         arr = np.asarray(self)
         item = self._coerce_scalar_to_index(item)._values

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -426,7 +426,7 @@ def memory_usage(self, deep: bool = False) -> int:
         return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)
 
     # IntervalTree doesn't have a is_monotonic_decreasing, so have to override
-    #  the Index implemenation
+    #  the Index implementation
     @cache_readonly
     def is_monotonic_decreasing(self) -> bool:
         """

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3292,7 +3292,7 @@ def intersection(self, other, sort=False):
         lvals = self._values
         rvals = other._values
 
-        uniq_tuples = None  # flag whether _inner_indexer was succesful
+        uniq_tuples = None  # flag whether _inner_indexer was successful
         if self.is_monotonic and other.is_monotonic:
             try:
                 uniq_tuples = self._inner_indexer(lvals, rvals)[0]

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -1313,7 +1313,7 @@ def get_corr_func(method):
         return method
     else:
         raise ValueError(
-            f"Unkown method '{method}', expected one of 'kendall', 'spearman'"
+            f"Unknown method '{method}', expected one of 'kendall', 'spearman'"
         )
 
     def _pearson(a, b):
@@ -1509,7 +1509,7 @@ def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
     Parameters
     ----------
     values : np.ndarray or ExtensionArray
-    accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minumum.accumulate}
+    accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
     skipna : bool
 
     Returns

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -910,7 +910,7 @@ def __getitem__(self, key):
     def _get_with(self, key):
         # other: fancy integer or otherwise
         if isinstance(key, slice):
-            # _convert_slice_indexer to determing if this slice is positional
+            # _convert_slice_indexer to determin if this slice is positional
             #  or label based, and if the latter, convert to positional
             slobj = self.index._convert_slice_indexer(key, kind="getitem")
             return self._slice(slobj)
@@ -3958,7 +3958,7 @@ def rename(
         Parameters
         ----------
         axis : {0 or "index"}
-            Unused. Accepted for compatability with DataFrame method only.
+            Unused. Accepted for compatibility with DataFrame method only.
         index : scalar, hashable sequence, dict-like or function, optional
             Functions or dict-like are transformations to apply to
             the index.

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -2498,7 +2498,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
         Limit number of splits in output.
         ``None``, 0 and -1 will be interpreted as return all splits.
     expand : bool, default False
-        Expand the splitted strings into separate columns.
+        Expand the split strings into separate columns.
 
         * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
         * If ``False``, return Series/Index, containing lists of strings.

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -196,7 +196,7 @@ def _dir_additions(self):
 
     def _get_win_type(self, kwargs: Dict):
         """
-        Exists for compatibility, overriden by subclass Window.
+        Exists for compatibility, overridden by subclass Window.
 
         Parameters
         ----------

diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
@@ -101,7 +101,7 @@ def write_th(
         self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None
     ) -> None:
         """
-        Method for writting a formatted <th> cell.
+        Method for writing a formatted <th> cell.
 
         If col_space is set on the formatter then that is used for
         the value of min-width.

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -4682,7 +4682,7 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index
         raise TypeError("MultiIndex not supported here!")
 
     inferred_type = lib.infer_dtype(index, skipna=False)
-    # we wont get inferred_type of "datetime64" or "timedelta64" as these
+    # we won't get inferred_type of "datetime64" or "timedelta64" as these
     #  would go through the DatetimeIndex/TimedeltaIndex paths above
 
     values = np.asarray(index)

diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py
@@ -100,7 +100,7 @@ def interval_constructor(self, request):
 
     def elementwise_comparison(self, op, array, other):
         """
-        Helper that performs elementwise comparisions between `array` and `other`
+        Helper that performs elementwise comparisons between `array` and `other`
         """
         other = other if is_list_like(other) else [other] * len(array)
         return np.array([op(x, y) for x, y in zip(array, other)])
-Original file line number
+Diff line change
@@ Expand Up / @@ -2206,7 +2206,7 @@ def network( @@
         Notes
         -----
-        * ``raise_on_error`` supercedes ``check_before_test``
+        * ``raise_on_error`` supersedes ``check_before_test``
         Returns
         -------
@@ Expand Down @@