@@ -354,20 +354,15 @@ def check_accumulate(self, s, op_name, skipna):
354
354
self .assert_series_equal (result , expected , check_dtype = False )
355
355
356
356
@pytest .mark .parametrize ("skipna" , [True , False ])
357
- def test_accumulate_series_raises (
358
- self , data , all_numeric_accumulations , skipna , request
359
- ):
357
+ def test_accumulate_series_raises (self , data , all_numeric_accumulations , skipna ):
360
358
pa_type = data .dtype .pyarrow_dtype
361
359
if (
362
360
(pa .types .is_integer (pa_type ) or pa .types .is_floating (pa_type ))
363
361
and all_numeric_accumulations == "cumsum"
364
362
and not pa_version_under9p0
365
363
):
366
- request .node .add_marker (
367
- pytest .mark .xfail (
368
- reason = f"{ all_numeric_accumulations } implemented for { pa_type } "
369
- )
370
- )
364
+ pytest .skip ("These work, are tested by test_accumulate_series." )
365
+
371
366
op_name = all_numeric_accumulations
372
367
ser = pd .Series (data )
373
368
@@ -377,21 +372,43 @@ def test_accumulate_series_raises(
377
372
@pytest .mark .parametrize ("skipna" , [True , False ])
378
373
def test_accumulate_series (self , data , all_numeric_accumulations , skipna , request ):
379
374
pa_type = data .dtype .pyarrow_dtype
375
+ op_name = all_numeric_accumulations
376
+ ser = pd .Series (data )
377
+
378
+ do_skip = False
379
+ if pa .types .is_string (pa_type ) or pa .types .is_binary (pa_type ):
380
+ if op_name in ["cumsum" , "cumprod" ]:
381
+ do_skip = True
382
+ elif pa .types .is_temporal (pa_type ) and not pa .types .is_duration (pa_type ):
383
+ if op_name in ["cumsum" , "cumprod" ]:
384
+ do_skip = True
385
+ elif pa .types .is_duration (pa_type ):
386
+ if op_name == "cumprod" :
387
+ do_skip = True
388
+
389
+ if do_skip :
390
+ pytest .skip (
391
+ "These should *not* work, we test in test_accumulate_series_raises "
392
+ "that these correctly raise."
393
+ )
394
+
380
395
if all_numeric_accumulations != "cumsum" or pa_version_under9p0 :
381
396
request .node .add_marker (
382
397
pytest .mark .xfail (
383
398
reason = f"{ all_numeric_accumulations } not implemented" ,
384
399
raises = NotImplementedError ,
385
400
)
386
401
)
387
- elif not (pa .types .is_integer (pa_type ) or pa .types .is_floating (pa_type )):
402
+ elif all_numeric_accumulations == "cumsum" and (
403
+ pa .types .is_duration (pa_type ) or pa .types .is_boolean (pa_type )
404
+ ):
388
405
request .node .add_marker (
389
406
pytest .mark .xfail (
390
- reason = f"{ all_numeric_accumulations } not implemented for { pa_type } "
407
+ reason = f"{ all_numeric_accumulations } not implemented for { pa_type } " ,
408
+ raises = NotImplementedError ,
391
409
)
392
410
)
393
- op_name = all_numeric_accumulations
394
- ser = pd .Series (data )
411
+
395
412
self .check_accumulate (ser , op_name , skipna )
396
413
397
414
@@ -418,6 +435,47 @@ def check_reduce(self, ser, op_name, skipna):
418
435
@pytest .mark .parametrize ("skipna" , [True , False ])
419
436
def test_reduce_series (self , data , all_numeric_reductions , skipna , request ):
420
437
pa_dtype = data .dtype .pyarrow_dtype
438
+ opname = all_numeric_reductions
439
+
440
+ ser = pd .Series (data )
441
+
442
+ should_work = True
443
+ if pa .types .is_temporal (pa_dtype ) and opname in [
444
+ "sum" ,
445
+ "var" ,
446
+ "skew" ,
447
+ "kurt" ,
448
+ "prod" ,
449
+ ]:
450
+ if pa .types .is_duration (pa_dtype ) and opname in ["sum" ]:
451
+ # summing timedeltas is one case that *is* well-defined
452
+ pass
453
+ else :
454
+ should_work = False
455
+ elif (
456
+ pa .types .is_string (pa_dtype ) or pa .types .is_binary (pa_dtype )
457
+ ) and opname in [
458
+ "sum" ,
459
+ "mean" ,
460
+ "median" ,
461
+ "prod" ,
462
+ "std" ,
463
+ "sem" ,
464
+ "var" ,
465
+ "skew" ,
466
+ "kurt" ,
467
+ ]:
468
+ should_work = False
469
+
470
+ if not should_work :
471
+ # matching the non-pyarrow versions, these operations *should* not
472
+ # work for these dtypes
473
+ msg = f"does not support reduction '{ opname } '"
474
+ with pytest .raises (TypeError , match = msg ):
475
+ getattr (ser , opname )(skipna = skipna )
476
+
477
+ return
478
+
421
479
xfail_mark = pytest .mark .xfail (
422
480
raises = TypeError ,
423
481
reason = (
@@ -449,24 +507,16 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
449
507
),
450
508
)
451
509
)
452
- elif (
453
- not (
454
- pa .types .is_integer (pa_dtype )
455
- or pa .types .is_floating (pa_dtype )
456
- or pa .types .is_boolean (pa_dtype )
457
- )
458
- and not (
459
- all_numeric_reductions in {"min" , "max" }
460
- and (
461
- (
462
- pa .types .is_temporal (pa_dtype )
463
- and not pa .types .is_duration (pa_dtype )
464
- )
465
- or pa .types .is_string (pa_dtype )
466
- or pa .types .is_binary (pa_dtype )
467
- )
468
- )
469
- and not all_numeric_reductions == "count"
510
+
511
+ elif all_numeric_reductions in [
512
+ "mean" ,
513
+ "median" ,
514
+ "std" ,
515
+ "sem" ,
516
+ ] and pa .types .is_temporal (pa_dtype ):
517
+ request .node .add_marker (xfail_mark )
518
+ elif all_numeric_reductions in ["sum" , "min" , "max" ] and pa .types .is_duration (
519
+ pa_dtype
470
520
):
471
521
request .node .add_marker (xfail_mark )
472
522
elif pa .types .is_boolean (pa_dtype ) and all_numeric_reductions in {
0 commit comments