MarcoGorelli
diff --git a/‎.github/workflows/code-checks.yml
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/code-checks.yml
Lines changed: 3 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 18 additions & 13 deletions b/‎.pre-commit-config.yaml
Lines changed: 18 additions & 13 deletions
diff --git a/‎asv_bench/asv.conf.json
Lines changed: 3 additions & 2 deletions b/‎asv_bench/asv.conf.json
Lines changed: 3 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/join_merge.py
Lines changed: 32 additions & 0 deletions b/‎asv_bench/benchmarks/join_merge.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎doc/source/user_guide/style.ipynb
Lines changed: 2 additions & 1 deletion b/‎doc/source/user_guide/style.ipynb
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 10 additions & 2 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 10 additions & 2 deletions
diff --git a/‎environment.yml
Lines changed: 2 additions & 2 deletions b/‎environment.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/hashtable.pyi
Lines changed: 3 additions & 0 deletions b/‎pandas/_libs/hashtable.pyi
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 6 additions & 15 deletions b/‎pandas/_libs/parsers.pyx
Lines changed: 6 additions & 15 deletions
@@ -36,6 +36,8 @@ jobs:
 
     - name: Run pre-commit
       uses: pre-commit/[email protected]
+      with:
+        extra_args: --verbose --all-files
 
   docstring_typing_pylint:
     name: Docstring validation, typing, and pylint
@@ -93,7 +95,7 @@ jobs:
     - name: Typing + pylint
       uses: pre-commit/[email protected]
       with:
-        extra_args: --hook-stage manual --all-files
+        extra_args: --verbose --hook-stage manual --all-files
       if: ${{ steps.build.outcome == 'success' && always() }}
 
     - name: Run docstring validation script tests
 
@@ -17,22 +17,18 @@ repos:
         entry: python scripts/run_vulture.py
         pass_filenames: true
         require_serial: false
--   repo: https://github.com/python/black
-    rev: 22.10.0
-    hooks:
-    -   id: black
 -   repo: https://github.com/codespell-project/codespell
     rev: v2.2.2
     hooks:
     -   id: codespell
         types_or: [python, rst, markdown]
         additional_dependencies: [tomli]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.2.1
+    rev: v0.9.1
     hooks:
     -   id: cython-lint
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
     -   id: debug-statements
     -   id: end-of-file-fixer
@@ -51,22 +47,22 @@ repos:
         exclude: ^pandas/_libs/src/(klib|headers)/
         args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
 -   repo: https://github.com/PyCQA/flake8
-    rev: 5.0.4
+    rev: 6.0.0
     hooks:
     -   id: flake8
         # Need to patch os.remove rule in pandas-dev-flaker
         exclude: ^ci/fix_wheels.py
         additional_dependencies: &flake8_dependencies
-        - flake8==5.0.4
+        - flake8==6.0.0
         - flake8-bugbear==22.7.1
         - pandas-dev-flaker==0.5.0
 -   repo: https://github.com/pycqa/pylint
-    rev: v2.15.5
+    rev: v2.15.6
     hooks:
     -   id: pylint
         stages: [manual]
 -   repo: https://github.com/pycqa/pylint
-    rev: v2.15.5
+    rev: v2.15.6
     hooks:
     -   id: pylint
         alias: redefined-outer-name
@@ -79,8 +75,6 @@ repos:
             |^pandas/util/_test_decorators\.py  # keep excluded
             |^pandas/_version\.py  # keep excluded
             |^pandas/conftest\.py  # keep excluded
-            |^pandas/core/tools/datetimes\.py
-            |^pandas/io/formats/format\.py
             |^pandas/core/generic\.py
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
@@ -89,7 +83,7 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.2.0
+    rev: v3.2.2
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
@@ -114,6 +108,16 @@ repos:
         additional_dependencies: *flake8_dependencies
 -   repo: local
     hooks:
+    # NOTE: we make `black` a local hook because if it's installed from
+    # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
+    -   id: black
+        name: black
+        description: "Black: The uncompromising Python code formatter"
+        entry: black
+        language: python
+        require_serial: true
+        types_or: [python, pyi]
+        additional_dependencies: [black==22.10.0]
     -   id: pyright
         # note: assumes python env is setup and activated
         name: pyright
@@ -270,6 +274,7 @@ repos:
         entry: python scripts/validate_min_versions_in_sync.py
         language: python
         files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
+        additional_dependencies: [tomli]
     -   id: validate-errors-locations
         name: Validate errors locations
         description: Validate errors are in appropriate locations.
 
@@ -57,7 +57,7 @@
         "odfpy": [],
         "jinja2": [],
     },
-    "conda_channels": ["defaults", "conda-forge"],
+    "conda_channels": ["conda-forge"],
     // Combinations of libraries/python versions can be excluded/included
     // from the set to test. Each entry is a dictionary containing additional
     // key-value pairs to include/exclude.
@@ -125,6 +125,7 @@
     "regression_thresholds": {
     },
     "build_command":
-    ["python setup.py build -j4",
+    ["python -m pip install versioneer[toml]",
+     "python setup.py build -j4",
      "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
 }
@@ -273,6 +273,38 @@ def time_merge_dataframes_cross(self, sort):
         merge(self.left.loc[:2000], self.right.loc[:2000], how="cross", sort=sort)
 
 
+class MergeEA:
+
+    params = [
+        "Int64",
+        "Int32",
+        "Int16",
+        "UInt64",
+        "UInt32",
+        "UInt16",
+        "Float64",
+        "Float32",
+    ]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        N = 10_000
+        indices = np.arange(1, N)
+        key = np.tile(indices[:8000], 10)
+        self.left = DataFrame(
+            {"key": Series(key, dtype=dtype), "value": np.random.randn(80000)}
+        )
+        self.right = DataFrame(
+            {
+                "key": Series(indices[2000:], dtype=dtype),
+                "value2": np.random.randn(7999),
+            }
+        )
+
+    def time_merge(self, dtype):
+        merge(self.left, self.right)
+
+
 class I8Merge:
 
     params = ["inner", "outer", "left", "right"]
 
@@ -1594,8 +1594,9 @@
     "\n",
     "\n",
     "- Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported.\n",
-    "- The following pseudo CSS properties are also available to set excel specific style properties:\n",
+    "- The following pseudo CSS properties are also available to set Excel specific style properties:\n",
     "    - `number-format`\n",
+    "    - `border-style` (for Excel-specific styles: \"hair\", \"mediumDashDot\", \"dashDotDot\", \"mediumDashDotDot\", \"dashDot\", \"slantDashDot\", or \"mediumDashed\")\n",
     "\n",
     "Table level styles, and data cell CSS-classes are not included in the export to Excel: individual cells must have their properties mapped by the `Styler.apply` and/or `Styler.applymap` methods."
    ]
 
@@ -24,7 +24,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
--
+- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -57,6 +57,7 @@ Other enhancements
 - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
 - Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
+- Added support for extension array dtypes in :func:`merge` (:issue:`44240`)
 - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
 - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
 - Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
@@ -572,7 +573,7 @@ Removal of prior version deprecations/changes
 - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
 - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
 - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
-- Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`)
+- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
 - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
 -
 
@@ -613,6 +614,7 @@ Performance improvements
 - Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`)
 - Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
 - Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`)
+- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.bug_fixes:
@@ -624,6 +626,8 @@ Categorical
 ^^^^^^^^^^^
 - Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
+- Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
+-
 
 Datetimelike
 ^^^^^^^^^^^^
@@ -653,7 +657,7 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
 - Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
--
+- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
 
 Conversion
 ^^^^^^^^^^
@@ -706,6 +710,7 @@ MultiIndex
 - Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`)
 - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
 - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
+- Bug in :meth:`MultiIndex.join` losing dtypes when :class:`MultiIndex` has duplicates (:issue:`49830`)
 - Bug in :meth:`MultiIndex.putmask` losing extension array (:issue:`49830`)
 - Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`)
 -
@@ -753,6 +758,8 @@ Reshaping
 - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`)
 - Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`)
 - Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`)
+- Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`)
+-
 
 Sparse
 ^^^^^^
@@ -762,6 +769,7 @@ Sparse
 ExtensionArray
 ^^^^^^^^^^^^^^
 - Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`)
+- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`)
 - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`)
 -
 
 
@@ -85,9 +85,9 @@ dependencies:
   - cxx-compiler
 
   # code checks
-  - black=22.3.0
+  - black=22.10.0
   - cpplint
-  - flake8=5.0.4
+  - flake8=6.0.0
   - flake8-bugbear=22.7.1 # used by flake8, find likely bugs
   - isort>=5.2.1  # check that imports are in the right order
   - mypy=0.990
 
@@ -1,4 +1,5 @@
 from typing import (
+    Any,
     Hashable,
     Literal,
 )
@@ -13,6 +14,7 @@ def unique_label_indices(
 
 class Factorizer:
     count: int
+    uniques: Any
     def __init__(self, size_hint: int) -> None: ...
     def get_count(self) -> int: ...
     def factorize(
@@ -21,6 +23,7 @@ class Factorizer:
         sort: bool = ...,
         na_sentinel=...,
         na_value=...,
+        mask=...,
     ) -> npt.NDArray[np.intp]: ...
 
 class ObjectFactorizer(Factorizer):
 
@@ -1,14 +1,11 @@
 # Copyright (c) 2012, Lambda Foundry, Inc.
 # See LICENSE for the license
-from base64 import decode
 from collections import defaultdict
 from csv import (
     QUOTE_MINIMAL,
     QUOTE_NONE,
     QUOTE_NONNUMERIC,
 )
-from errno import ENOENT
-import inspect
 import sys
 import time
 import warnings
@@ -24,10 +21,7 @@ from pandas.core.arrays import (
 )
 
 cimport cython
-from cpython.bytes cimport (
-    PyBytes_AsString,
-    PyBytes_FromString,
-)
+from cpython.bytes cimport PyBytes_AsString
 from cpython.exc cimport (
     PyErr_Fetch,
     PyErr_Occurred,
@@ -631,7 +625,7 @@ cdef class TextReader:
         cdef:
             Py_ssize_t i, start, field_count, passed_count, unnamed_count, level
             char *word
-            str name, old_name
+            str name
             uint64_t hr, data_line = 0
             list header = []
             set unnamed_cols = set()
@@ -939,7 +933,7 @@ cdef class TextReader:
             object name, na_flist, col_dtype = None
             bint na_filter = 0
             int64_t num_cols
-            dict result
+            dict results
             bint use_nullable_dtypes
 
         start = self.parser_start
@@ -1461,7 +1455,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
                       bint na_filter, kh_str_starts_t *na_hashset,
                       const char *encoding_errors):
     cdef:
-        int error, na_count = 0
+        int na_count = 0
         Py_ssize_t i, lines
         coliter_t it
         const char *word = NULL
@@ -1517,16 +1511,14 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
     "Convert column data into codes, categories"
     cdef:
         int na_count = 0
-        Py_ssize_t i, size, lines
+        Py_ssize_t i, lines
         coliter_t it
         const char *word = NULL
 
         int64_t NA = -1
         int64_t[::1] codes
         int64_t current_category = 0
 
-        char *errors = "strict"
-
         int ret = 0
         kh_str_t *table
         khiter_t k
@@ -1972,7 +1964,6 @@ cdef kh_str_starts_t* kset_from_list(list values) except NULL:
 cdef kh_float64_t* kset_float64_from_list(values) except NULL:
     # caller takes responsibility for freeing the hash table
     cdef:
-        khiter_t k
         kh_float64_t *table
         int ret = 0
         float64_t val
@@ -1983,7 +1974,7 @@ cdef kh_float64_t* kset_float64_from_list(values) except NULL:
     for value in values:
         val = float(value)
 
-        k = kh_put_float64(table, val, &ret)
+        kh_put_float64(table, val, &ret)
 
     if table.n_buckets <= 128:
         # See reasoning in kset_from_list
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ Fixed regressions`
`24`	`24`
`25`	`25`	`Bug fixes`
`26`	`26`	`~~~~~~~~~`
`27`		`--`
	`27`	+- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
`28`	`28`	`-`
`29`	`29`
`30`	`30`	`.. ---------------------------------------------------------------------------`