Skip to content

Commit 0439322

Browse files
Merge branch 'main' into raise-on-parse-int-overflow
2 parents a545602 + dec9be2 commit 0439322

File tree

490 files changed

+5511
-11972
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

490 files changed

+5511
-11972
lines changed

.github/workflows/scorecards.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
persist-credentials: false
3030

3131
- name: "Run analysis"
32-
uses: ossf/[email protected].3
32+
uses: ossf/[email protected].6
3333
with:
3434
results_file: results.sarif
3535
results_format: sarif

.github/workflows/wheels.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
- [windows-2019, win_amd64]
5353
- [windows-2019, win32]
5454
# TODO: support PyPy?
55-
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11-dev"]]# "pp38", "pp39"]
55+
python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]# "pp38", "pp39"]
5656
env:
5757
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
5858
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -73,7 +73,7 @@ jobs:
7373
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
7474

7575
# Used to test the built wheels
76-
- uses: actions/setup-python@v3
76+
- uses: actions/setup-python@v4
7777
with:
7878
python-version: ${{ matrix.python[1] }}
7979

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ repos:
102102
types: [python]
103103
stages: [manual]
104104
additional_dependencies: &pyright_dependencies
105-
105+
106106
- id: pyright_reportGeneralTypeIssues
107107
# note: assumes python env is setup and activated
108108
name: pyright reportGeneralTypeIssues

asv_bench/asv.conf.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"openpyxl": [],
5555
"xlsxwriter": [],
5656
"xlrd": [],
57-
"xlwt": [],
5857
"odfpy": [],
5958
"jinja2": [],
6059
},

asv_bench/benchmarks/array.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,24 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class StringArray:
48+
def setup(self):
49+
N = 100_000
50+
values = tm.rands_array(3, N)
51+
self.values_obj = np.array(values, dtype="object")
52+
self.values_str = np.array(values, dtype="U")
53+
self.values_list = values.tolist()
54+
55+
def time_from_np_object_array(self):
56+
pd.array(self.values_obj, dtype="string")
57+
58+
def time_from_np_str_array(self):
59+
pd.array(self.values_str, dtype="string")
60+
61+
def time_from_list(self):
62+
pd.array(self.values_list, dtype="string")
63+
64+
4765
class ArrowStringArray:
4866

4967
params = [False, True]

asv_bench/benchmarks/groupby.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
Timestamp,
1515
date_range,
1616
period_range,
17+
to_timedelta,
1718
)
1819

1920
from .pandas_vb_common import tm
@@ -35,7 +36,6 @@
3536
"pct_change",
3637
"min",
3738
"var",
38-
"mad",
3939
"describe",
4040
"std",
4141
"quantile",
@@ -52,7 +52,6 @@
5252
"cummax",
5353
"pct_change",
5454
"var",
55-
"mad",
5655
"describe",
5756
"std",
5857
},
@@ -311,7 +310,7 @@ def time_different_python_functions_multicol(self, df):
311310
df.groupby(["key1", "key2"]).agg([sum, min, max])
312311

313312
def time_different_python_functions_singlecol(self, df):
314-
df.groupby("key1").agg([sum, min, max])
313+
df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max])
315314

316315

317316
class GroupStrings:
@@ -437,7 +436,6 @@ class GroupByMethods:
437436
"first",
438437
"head",
439438
"last",
440-
"mad",
441439
"max",
442440
"min",
443441
"median",
@@ -483,7 +481,7 @@ def setup(self, dtype, method, application, ncols):
483481

484482
if method == "describe":
485483
ngroups = 20
486-
elif method in ["mad", "skew"]:
484+
elif method == "skew":
487485
ngroups = 100
488486
else:
489487
ngroups = 1000
@@ -685,7 +683,7 @@ class String:
685683
def setup(self, dtype, method):
686684
cols = list("abcdefghjkl")
687685
self.df = DataFrame(
688-
np.random.randint(0, 100, size=(1_000_000, len(cols))),
686+
np.random.randint(0, 100, size=(10_000, len(cols))),
689687
columns=cols,
690688
dtype=dtype,
691689
)
@@ -990,4 +988,31 @@ def time_sample_weights(self):
990988
self.df.groupby(self.groups).sample(n=1, weights=self.weights)
991989

992990

991+
class Resample:
992+
# GH 28635
993+
def setup(self):
994+
num_timedeltas = 20_000
995+
num_groups = 3
996+
997+
index = MultiIndex.from_product(
998+
[
999+
np.arange(num_groups),
1000+
to_timedelta(np.arange(num_timedeltas), unit="s"),
1001+
],
1002+
names=["groups", "timedeltas"],
1003+
)
1004+
data = np.random.randint(0, 1000, size=(len(index)))
1005+
1006+
self.df = DataFrame(data, index=index).reset_index("timedeltas")
1007+
self.df_multiindex = DataFrame(data, index=index)
1008+
1009+
def time_resample(self):
1010+
self.df.groupby(level="groups").resample("10s", on="timedeltas").mean()
1011+
1012+
def time_resample_multiindex(self):
1013+
self.df_multiindex.groupby(level="groups").resample(
1014+
"10s", level="timedeltas"
1015+
).mean()
1016+
1017+
9931018
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/excel.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def _generate_dataframe():
3333

3434
class WriteExcel:
3535

36-
params = ["openpyxl", "xlsxwriter", "xlwt"]
36+
params = ["openpyxl", "xlsxwriter"]
3737
param_names = ["engine"]
3838

3939
def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
6868

6969
class ReadExcel:
7070

71-
params = ["xlrd", "openpyxl", "odf"]
71+
params = ["openpyxl", "odf"]
7272
param_names = ["engine"]
7373
fname_excel = "spreadsheet.xlsx"
74-
fname_excel_xls = "spreadsheet.xls"
7574
fname_odf = "spreadsheet.ods"
7675

7776
def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
9291
self.df = _generate_dataframe()
9392

9493
self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
95-
self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
9694
self._create_odf()
9795

9896
def time_read_excel(self, engine):
99-
if engine == "xlrd":
100-
fname = self.fname_excel_xls
101-
elif engine == "odf":
97+
if engine == "odf":
10298
fname = self.fname_odf
10399
else:
104100
fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
107103

108104
class ReadExcelNRows(ReadExcel):
109105
def time_read_excel(self, engine):
110-
if engine == "xlrd":
111-
fname = self.fname_excel_xls
112-
elif engine == "odf":
106+
if engine == "odf":
113107
fname = self.fname_odf
114108
else:
115109
fname = self.fname_excel

asv_bench/benchmarks/io/sql.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def setup(self, connection):
3838
},
3939
index=tm.makeStringIndex(N),
4040
)
41-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
41+
self.df.iloc[1000:3000, 1] = np.nan
4242
self.df["date"] = self.df["datetime"].dt.date
4343
self.df["time"] = self.df["datetime"].dt.time
4444
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -88,7 +88,7 @@ def setup(self, connection, dtype):
8888
},
8989
index=tm.makeStringIndex(N),
9090
)
91-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
91+
self.df.iloc[1000:3000, 1] = np.nan
9292
self.df["date"] = self.df["datetime"].dt.date
9393
self.df["time"] = self.df["datetime"].dt.time
9494
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -117,7 +117,7 @@ def setup(self):
117117
},
118118
index=tm.makeStringIndex(N),
119119
)
120-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
120+
self.df.iloc[1000:3000, 1] = np.nan
121121
self.df["date"] = self.df["datetime"].dt.date
122122
self.df["time"] = self.df["datetime"].dt.time
123123
self.df["datetime_string"] = self.df["datetime"].astype(str)
@@ -164,7 +164,7 @@ def setup(self, dtype):
164164
},
165165
index=tm.makeStringIndex(N),
166166
)
167-
self.df.loc[1000:3000, "float_with_nan"] = np.nan
167+
self.df.iloc[1000:3000, 1] = np.nan
168168
self.df["date"] = self.df["datetime"].dt.date
169169
self.df["time"] = self.df["datetime"].dt.time
170170
self.df["datetime_string"] = self.df["datetime"].astype(str)

asv_bench/benchmarks/io/stata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def setup(self, convert_dates):
3838
)
3939
self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32)
4040
self.convert_dates = {"index": convert_dates}
41-
self.df.to_stata(self.fname, self.convert_dates)
41+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4242

4343
def time_read_stata(self, convert_dates):
4444
read_stata(self.fname)
4545

4646
def time_write_stata(self, convert_dates):
47-
self.df.to_stata(self.fname, self.convert_dates)
47+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
4848

4949

5050
class StataMissing(Stata):
@@ -54,7 +54,7 @@ def setup(self, convert_dates):
5454
missing_data = np.random.randn(self.N)
5555
missing_data[missing_data < 0] = np.nan
5656
self.df[f"missing_{i}"] = missing_data
57-
self.df.to_stata(self.fname, self.convert_dates)
57+
self.df.to_stata(self.fname, convert_dates=self.convert_dates)
5858

5959

6060
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/style.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def _style_format(self):
8383
def _style_apply_format_hide(self):
8484
self.st = self.df.style.applymap(lambda v: "color: red;")
8585
self.st.format("{:.3f}")
86-
self.st.hide_index(self.st.index[1:])
87-
self.st.hide_columns(self.st.columns[1:])
86+
self.st.hide(self.st.index[1:], axis=0)
87+
self.st.hide(self.st.columns[1:], axis=1)
8888

8989
def _style_tooltips(self):
9090
ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2])
9191
self.st = self.df.style.set_tooltips(ttips)
92-
self.st.hide_index(self.st.index[12:])
93-
self.st.hide_columns(self.st.columns[12:])
92+
self.st.hide(self.st.index[12:], axis=0)
93+
self.st.hide(self.st.columns[12:], axis=1)

0 commit comments

Comments
 (0)