Skip to content

Commit ab661cf

Browse files
author
MarcoGorelli
committed
Merge remote-tracking branch 'upstream/main' into pandas-devgh-49298
2 parents 06f1a53 + 13f758c commit ab661cf

File tree

76 files changed

+1060
-1079
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1060
-1079
lines changed

.github/workflows/code-checks.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ jobs:
3636

3737
- name: Run pre-commit
3838
uses: pre-commit/[email protected]
39+
with:
40+
extra_args: --verbose --all-files
3941

4042
docstring_typing_pylint:
4143
name: Docstring validation, typing, and pylint
@@ -93,7 +95,7 @@ jobs:
9395
- name: Typing + pylint
9496
uses: pre-commit/[email protected]
9597
with:
96-
extra_args: --hook-stage manual --all-files
98+
extra_args: --verbose --hook-stage manual --all-files
9799
if: ${{ steps.build.outcome == 'success' && always() }}
98100

99101
- name: Run docstring validation script tests

.pre-commit-config.yaml

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,18 @@ repos:
1717
entry: python scripts/run_vulture.py
1818
pass_filenames: true
1919
require_serial: false
20-
- repo: https://github.com/python/black
21-
rev: 22.10.0
22-
hooks:
23-
- id: black
2420
- repo: https://github.com/codespell-project/codespell
2521
rev: v2.2.2
2622
hooks:
2723
- id: codespell
2824
types_or: [python, rst, markdown]
2925
additional_dependencies: [tomli]
3026
- repo: https://github.com/MarcoGorelli/cython-lint
31-
rev: v0.2.1
27+
rev: v0.9.1
3228
hooks:
3329
- id: cython-lint
3430
- repo: https://github.com/pre-commit/pre-commit-hooks
35-
rev: v4.3.0
31+
rev: v4.4.0
3632
hooks:
3733
- id: debug-statements
3834
- id: end-of-file-fixer
@@ -51,22 +47,22 @@ repos:
5147
exclude: ^pandas/_libs/src/(klib|headers)/
5248
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
5349
- repo: https://github.com/PyCQA/flake8
54-
rev: 5.0.4
50+
rev: 6.0.0
5551
hooks:
5652
- id: flake8
5753
# Need to patch os.remove rule in pandas-dev-flaker
5854
exclude: ^ci/fix_wheels.py
5955
additional_dependencies: &flake8_dependencies
60-
- flake8==5.0.4
56+
- flake8==6.0.0
6157
- flake8-bugbear==22.7.1
6258
- pandas-dev-flaker==0.5.0
6359
- repo: https://github.com/pycqa/pylint
64-
rev: v2.15.5
60+
rev: v2.15.6
6561
hooks:
6662
- id: pylint
6763
stages: [manual]
6864
- repo: https://github.com/pycqa/pylint
69-
rev: v2.15.5
65+
rev: v2.15.6
7066
hooks:
7167
- id: pylint
7268
alias: redefined-outer-name
@@ -79,8 +75,6 @@ repos:
7975
|^pandas/util/_test_decorators\.py # keep excluded
8076
|^pandas/_version\.py # keep excluded
8177
|^pandas/conftest\.py # keep excluded
82-
|^pandas/core/tools/datetimes\.py
83-
|^pandas/io/formats/format\.py
8478
|^pandas/core/generic\.py
8579
args: [--disable=all, --enable=redefined-outer-name]
8680
stages: [manual]
@@ -89,7 +83,7 @@ repos:
8983
hooks:
9084
- id: isort
9185
- repo: https://github.com/asottile/pyupgrade
92-
rev: v3.2.0
86+
rev: v3.2.2
9387
hooks:
9488
- id: pyupgrade
9589
args: [--py38-plus]
@@ -114,6 +108,16 @@ repos:
114108
additional_dependencies: *flake8_dependencies
115109
- repo: local
116110
hooks:
111+
# NOTE: we make `black` a local hook because if it's installed from
112+
# PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
113+
- id: black
114+
name: black
115+
description: "Black: The uncompromising Python code formatter"
116+
entry: black
117+
language: python
118+
require_serial: true
119+
types_or: [python, pyi]
120+
additional_dependencies: [black==22.10.0]
117121
- id: pyright
118122
# note: assumes python env is setup and activated
119123
name: pyright
@@ -270,6 +274,7 @@ repos:
270274
entry: python scripts/validate_min_versions_in_sync.py
271275
language: python
272276
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
277+
additional_dependencies: [tomli]
273278
- id: validate-errors-locations
274279
name: Validate errors locations
275280
description: Validate errors are in appropriate locations.

asv_bench/asv.conf.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"odfpy": [],
5858
"jinja2": [],
5959
},
60-
"conda_channels": ["defaults", "conda-forge"],
60+
"conda_channels": ["conda-forge"],
6161
// Combinations of libraries/python versions can be excluded/included
6262
// from the set to test. Each entry is a dictionary containing additional
6363
// key-value pairs to include/exclude.
@@ -125,6 +125,7 @@
125125
"regression_thresholds": {
126126
},
127127
"build_command":
128-
["python setup.py build -j4",
128+
["python -m pip install versioneer[toml]",
129+
"python setup.py build -j4",
129130
"PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"],
130131
}

asv_bench/benchmarks/join_merge.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,38 @@ def time_merge_dataframes_cross(self, sort):
273273
merge(self.left.loc[:2000], self.right.loc[:2000], how="cross", sort=sort)
274274

275275

276+
class MergeEA:
277+
278+
params = [
279+
"Int64",
280+
"Int32",
281+
"Int16",
282+
"UInt64",
283+
"UInt32",
284+
"UInt16",
285+
"Float64",
286+
"Float32",
287+
]
288+
param_names = ["dtype"]
289+
290+
def setup(self, dtype):
291+
N = 10_000
292+
indices = np.arange(1, N)
293+
key = np.tile(indices[:8000], 10)
294+
self.left = DataFrame(
295+
{"key": Series(key, dtype=dtype), "value": np.random.randn(80000)}
296+
)
297+
self.right = DataFrame(
298+
{
299+
"key": Series(indices[2000:], dtype=dtype),
300+
"value2": np.random.randn(7999),
301+
}
302+
)
303+
304+
def time_merge(self, dtype):
305+
merge(self.left, self.right)
306+
307+
276308
class I8Merge:
277309

278310
params = ["inner", "outer", "left", "right"]

doc/source/user_guide/style.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1594,8 +1594,9 @@
15941594
"\n",
15951595
"\n",
15961596
"- Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported.\n",
1597-
"- The following pseudo CSS properties are also available to set excel specific style properties:\n",
1597+
"- The following pseudo CSS properties are also available to set Excel specific style properties:\n",
15981598
" - `number-format`\n",
1599+
" - `border-style` (for Excel-specific styles: \"hair\", \"mediumDashDot\", \"dashDotDot\", \"mediumDashDotDot\", \"dashDot\", \"slantDashDot\", or \"mediumDashed\")\n",
15991600
"\n",
16001601
"Table level styles, and data cell CSS-classes are not included in the export to Excel: individual cells must have their properties mapped by the `Styler.apply` and/or `Styler.applymap` methods."
16011602
]

doc/source/whatsnew/v1.5.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Fixed regressions
2424

2525
Bug fixes
2626
~~~~~~~~~
27-
-
27+
- Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`)
2828
-
2929

3030
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v2.0.0.rst

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Other enhancements
5757
- :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
5858
- Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`)
5959
- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
60+
- Added support for extension array dtypes in :func:`merge` (:issue:`44240`)
6061
- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
6162
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
6263
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
@@ -572,7 +573,7 @@ Removal of prior version deprecations/changes
572573
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
573574
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
574575
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
575-
- Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`)
576+
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
576577
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
577578
-
578579

@@ -613,6 +614,7 @@ Performance improvements
613614
- Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`)
614615
- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
615616
- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`)
617+
- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`)
616618

617619
.. ---------------------------------------------------------------------------
618620
.. _whatsnew_200.bug_fixes:
@@ -624,6 +626,8 @@ Categorical
624626
^^^^^^^^^^^
625627
- Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`)
626628
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
629+
- Bug in :class:`Categorical` constructor when constructing from a :class:`Categorical` object and ``dtype="category"`` losing ordered-ness (:issue:`49309`)
630+
-
627631

628632
Datetimelike
629633
^^^^^^^^^^^^
@@ -653,7 +657,7 @@ Numeric
653657
^^^^^^^
654658
- Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
655659
- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
656-
-
660+
- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
657661

658662
Conversion
659663
^^^^^^^^^^
@@ -706,6 +710,7 @@ MultiIndex
706710
- Bug in :meth:`MultiIndex.union` not sorting when sort=None and index contains missing values (:issue:`49010`)
707711
- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
708712
- Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
713+
- Bug in :meth:`MultiIndex.join` losing dtypes when :class:`MultiIndex` has duplicates (:issue:`49830`)
709714
- Bug in :meth:`MultiIndex.putmask` losing extension array (:issue:`49830`)
710715
- Bug in :meth:`MultiIndex.value_counts` returning a :class:`Series` indexed by flat index of tuples instead of a :class:`MultiIndex` (:issue:`49558`)
711716
-
@@ -753,6 +758,8 @@ Reshaping
753758
- Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`)
754759
- Clarified error message in :func:`merge` when passing invalid ``validate`` option (:issue:`49417`)
755760
- Bug in :meth:`DataFrame.explode` raising ``ValueError`` on multiple columns with ``NaN`` values or empty lists (:issue:`46084`)
761+
- Bug in :meth:`DataFrame.transpose` with ``IntervalDtype`` column with ``timedelta64[ns]`` endpoints (:issue:`44917`)
762+
-
756763

757764
Sparse
758765
^^^^^^
@@ -762,6 +769,7 @@ Sparse
762769
ExtensionArray
763770
^^^^^^^^^^^^^^
764771
- Bug in :meth:`Series.mean` overflowing unnecessarily with nullable integers (:issue:`48378`)
772+
- Bug in :meth:`Series.tolist` for nullable dtypes returning numpy scalars instead of python scalars (:issue:`49890`)
765773
- Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`)
766774
-
767775

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ dependencies:
8585
- cxx-compiler
8686

8787
# code checks
88-
- black=22.3.0
88+
- black=22.10.0
8989
- cpplint
90-
- flake8=5.0.4
90+
- flake8=6.0.0
9191
- flake8-bugbear=22.7.1 # used by flake8, find likely bugs
9292
- isort>=5.2.1 # check that imports are in the right order
9393
- mypy=0.990

pandas/_libs/hashtable.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import (
2+
Any,
23
Hashable,
34
Literal,
45
)
@@ -13,6 +14,7 @@ def unique_label_indices(
1314

1415
class Factorizer:
1516
count: int
17+
uniques: Any
1618
def __init__(self, size_hint: int) -> None: ...
1719
def get_count(self) -> int: ...
1820
def factorize(
@@ -21,6 +23,7 @@ class Factorizer:
2123
sort: bool = ...,
2224
na_sentinel=...,
2325
na_value=...,
26+
mask=...,
2427
) -> npt.NDArray[np.intp]: ...
2528

2629
class ObjectFactorizer(Factorizer):

pandas/_libs/parsers.pyx

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
# Copyright (c) 2012, Lambda Foundry, Inc.
22
# See LICENSE for the license
3-
from base64 import decode
43
from collections import defaultdict
54
from csv import (
65
QUOTE_MINIMAL,
76
QUOTE_NONE,
87
QUOTE_NONNUMERIC,
98
)
10-
from errno import ENOENT
11-
import inspect
129
import sys
1310
import time
1411
import warnings
@@ -24,10 +21,7 @@ from pandas.core.arrays import (
2421
)
2522

2623
cimport cython
27-
from cpython.bytes cimport (
28-
PyBytes_AsString,
29-
PyBytes_FromString,
30-
)
24+
from cpython.bytes cimport PyBytes_AsString
3125
from cpython.exc cimport (
3226
PyErr_Fetch,
3327
PyErr_Occurred,
@@ -631,7 +625,7 @@ cdef class TextReader:
631625
cdef:
632626
Py_ssize_t i, start, field_count, passed_count, unnamed_count, level
633627
char *word
634-
str name, old_name
628+
str name
635629
uint64_t hr, data_line = 0
636630
list header = []
637631
set unnamed_cols = set()
@@ -939,7 +933,7 @@ cdef class TextReader:
939933
object name, na_flist, col_dtype = None
940934
bint na_filter = 0
941935
int64_t num_cols
942-
dict result
936+
dict results
943937
bint use_nullable_dtypes
944938

945939
start = self.parser_start
@@ -1461,7 +1455,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
14611455
bint na_filter, kh_str_starts_t *na_hashset,
14621456
const char *encoding_errors):
14631457
cdef:
1464-
int error, na_count = 0
1458+
int na_count = 0
14651459
Py_ssize_t i, lines
14661460
coliter_t it
14671461
const char *word = NULL
@@ -1517,16 +1511,14 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
15171511
"Convert column data into codes, categories"
15181512
cdef:
15191513
int na_count = 0
1520-
Py_ssize_t i, size, lines
1514+
Py_ssize_t i, lines
15211515
coliter_t it
15221516
const char *word = NULL
15231517

15241518
int64_t NA = -1
15251519
int64_t[::1] codes
15261520
int64_t current_category = 0
15271521

1528-
char *errors = "strict"
1529-
15301522
int ret = 0
15311523
kh_str_t *table
15321524
khiter_t k
@@ -1972,7 +1964,6 @@ cdef kh_str_starts_t* kset_from_list(list values) except NULL:
19721964
cdef kh_float64_t* kset_float64_from_list(values) except NULL:
19731965
# caller takes responsibility for freeing the hash table
19741966
cdef:
1975-
khiter_t k
19761967
kh_float64_t *table
19771968
int ret = 0
19781969
float64_t val
@@ -1983,7 +1974,7 @@ cdef kh_float64_t* kset_float64_from_list(values) except NULL:
19831974
for value in values:
19841975
val = float(value)
19851976

1986-
k = kh_put_float64(table, val, &ret)
1977+
kh_put_float64(table, val, &ret)
19871978

19881979
if table.n_buckets <= 128:
19891980
# See reasoning in kset_from_list

0 commit comments

Comments
 (0)