@@ -458,7 +458,7 @@ def test_to_csv_tabs(
458
458
[True , False ],
459
459
)
460
460
@pytest .mark .skipif (pandas_gbq is None , reason = "required by pd.read_gbq" )
461
- def test_to_gbq_index (scalars_dfs , dataset_id , index ):
461
+ def test_to_gbq_w_index (scalars_dfs , dataset_id , index ):
462
462
"""Test the `to_gbq` API with the `index` parameter."""
463
463
scalars_df , scalars_pandas_df = scalars_dfs
464
464
destination_table = f"{ dataset_id } .test_index_df_to_gbq_{ index } "
@@ -485,48 +485,67 @@ def test_to_gbq_index(scalars_dfs, dataset_id, index):
485
485
pd .testing .assert_frame_equal (df_out , expected , check_index_type = False )
486
486
487
487
488
- @pytest .mark .parametrize (
489
- ("if_exists" , "expected_index" ),
490
- [
491
- pytest .param ("replace" , 1 ),
492
- pytest .param ("append" , 2 ),
493
- pytest .param (
494
- "fail" ,
495
- 0 ,
496
- marks = pytest .mark .xfail (
497
- raises = google .api_core .exceptions .Conflict ,
498
- ),
499
- ),
500
- pytest .param (
501
- "unknown" ,
502
- 0 ,
503
- marks = pytest .mark .xfail (
504
- raises = ValueError ,
505
- ),
506
- ),
507
- ],
508
- )
509
- @pytest .mark .skipif (pandas_gbq is None , reason = "required by pd.read_gbq" )
510
- def test_to_gbq_if_exists (
511
- scalars_df_default_index ,
512
- scalars_pandas_df_default_index ,
513
- dataset_id ,
514
- if_exists ,
515
- expected_index ,
516
- ):
517
- """Test the `to_gbq` API with the `if_exists` parameter."""
518
- destination_table = f"{ dataset_id } .test_to_gbq_if_exists_{ if_exists } "
488
+ def test_to_gbq_if_exists_is_fail (scalars_dfs , dataset_id ):
489
+ scalars_df , scalars_pandas_df = scalars_dfs
490
+ destination_table = f"{ dataset_id } .test_to_gbq_if_exists_is_fails"
491
+ scalars_df .to_gbq (destination_table )
519
492
520
- scalars_df_default_index .to_gbq (destination_table )
521
- scalars_df_default_index .to_gbq (destination_table , if_exists = if_exists )
493
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
494
+ assert len (gcs_df ) == len (scalars_pandas_df )
495
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_pandas_df .columns )
522
496
523
- gcs_df = pd .read_gbq (destination_table )
524
- assert len (gcs_df .index ) == expected_index * len (
525
- scalars_pandas_df_default_index .index
526
- )
527
- pd .testing .assert_index_equal (
528
- gcs_df .columns , scalars_pandas_df_default_index .columns
529
- )
497
+ # Test default value is "fails"
498
+ with pytest .raises (ValueError , match = "Table already exists" ):
499
+ scalars_df .to_gbq (destination_table )
500
+
501
+ with pytest .raises (ValueError , match = "Table already exists" ):
502
+ scalars_df .to_gbq (destination_table , if_exists = "fail" )
503
+
504
+
505
+ def test_to_gbq_if_exists_is_replace (scalars_dfs , dataset_id ):
506
+ scalars_df , scalars_pandas_df = scalars_dfs
507
+ destination_table = f"{ dataset_id } .test_to_gbq_if_exists_is_replace"
508
+ scalars_df .to_gbq (destination_table )
509
+
510
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
511
+ assert len (gcs_df ) == len (scalars_pandas_df )
512
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_pandas_df .columns )
513
+
514
+ # When replacing a table with same schema
515
+ scalars_df .to_gbq (destination_table , if_exists = "replace" )
516
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
517
+ assert len (gcs_df ) == len (scalars_pandas_df )
518
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_pandas_df .columns )
519
+
520
+ # When replacing a table with different schema
521
+ partitial_scalars_df = scalars_df .drop (columns = ["string_col" ])
522
+ partitial_scalars_df .to_gbq (destination_table , if_exists = "replace" )
523
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
524
+ assert len (gcs_df ) == len (partitial_scalars_df )
525
+ pd .testing .assert_index_equal (gcs_df .columns , partitial_scalars_df .columns )
526
+
527
+
528
+ def test_to_gbq_if_exists_is_append (scalars_dfs , dataset_id ):
529
+ scalars_df , scalars_pandas_df = scalars_dfs
530
+ destination_table = f"{ dataset_id } .test_to_gbq_if_exists_is_append"
531
+ scalars_df .to_gbq (destination_table )
532
+
533
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
534
+ assert len (gcs_df ) == len (scalars_pandas_df )
535
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_pandas_df .columns )
536
+
537
+ # When appending to a table with same schema
538
+ scalars_df .to_gbq (destination_table , if_exists = "append" )
539
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
540
+ assert len (gcs_df ) == 2 * len (scalars_pandas_df )
541
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_pandas_df .columns )
542
+
543
+ # When appending to a table with different schema
544
+ partitial_scalars_df = scalars_df .drop (columns = ["string_col" ])
545
+ partitial_scalars_df .to_gbq (destination_table , if_exists = "append" )
546
+ gcs_df = pd .read_gbq (destination_table , index_col = "rowindex" )
547
+ assert len (gcs_df ) == 3 * len (partitial_scalars_df )
548
+ pd .testing .assert_index_equal (gcs_df .columns , scalars_df .columns )
530
549
531
550
532
551
def test_to_gbq_w_duplicate_column_names (
@@ -773,6 +792,27 @@ def test_to_gbq_w_clustering_no_destination(
773
792
assert table .expires is not None
774
793
775
794
795
+ def test_to_gbq_w_clustering_existing_table (
796
+ scalars_df_default_index ,
797
+ dataset_id ,
798
+ bigquery_client ,
799
+ ):
800
+ destination_table = f"{ dataset_id } .test_to_gbq_w_clustering_existing_table"
801
+ scalars_df_default_index .to_gbq (destination_table )
802
+
803
+ table = bigquery_client .get_table (destination_table )
804
+ assert table .clustering_fields is None
805
+ assert table .expires is None
806
+
807
+ with pytest .raises (ValueError , match = "Table clustering fields cannot be changed" ):
808
+ clustering_columns = ["int64_col" ]
809
+ scalars_df_default_index .to_gbq (
810
+ destination_table ,
811
+ if_exists = "replace" ,
812
+ clustering_columns = clustering_columns ,
813
+ )
814
+
815
+
776
816
def test_to_gbq_w_invalid_destination_table (scalars_df_index ):
777
817
with pytest .raises (ValueError ):
778
818
scalars_df_index .to_gbq ("table_id" )
0 commit comments