9
9
import pandas .testing
10
10
import pytest
11
11
12
+ try :
13
+ import db_dtypes
14
+ except ImportError :
15
+ db_dtypes = None
16
+
12
17
13
18
pytest .importorskip ("google.cloud.bigquery" , minversion = "1.24.0" )
14
19
15
20
21
+ @pytest .fixture (params = ["default" , "load_parquet" , "load_csv" ])
22
+ def api_method (request ):
23
+ return request .param
24
+
25
+
16
26
@pytest .fixture
17
27
def method_under_test (credentials , project_id ):
18
28
import pandas_gbq
@@ -23,7 +33,7 @@ def method_under_test(credentials, project_id):
23
33
24
34
25
35
@pytest .mark .parametrize (
26
- ["input_series" ],
36
+ ["input_series" , "skip_csv" ],
27
37
[
28
38
# Ensure that 64-bit floating point numbers are unchanged.
29
39
# See: https://github.com/pydata/pandas-gbq/issues/326
@@ -41,17 +51,13 @@ def method_under_test(credentials, project_id):
41
51
],
42
52
name = "test_col" ,
43
53
),
54
+ False ,
44
55
),
45
56
(
46
57
pandas .Series (
47
58
[
48
59
"abc" ,
49
60
"defg" ,
50
- # Ensure that empty strings are written as empty string,
51
- # not NULL. See:
52
- # https://github.com/googleapis/python-bigquery-pandas/issues/366
53
- "" ,
54
- None ,
55
61
# Ensure that unicode characters are encoded. See:
56
62
# https://github.com/googleapis/python-bigquery-pandas/issues/106
57
63
"信用卡" ,
@@ -60,23 +66,105 @@ def method_under_test(credentials, project_id):
60
66
],
61
67
name = "test_col" ,
62
68
),
69
+ False ,
70
+ ),
71
+ (
72
+ pandas .Series (
73
+ [
74
+ "abc" ,
75
+ "defg" ,
76
+ # Ensure that empty strings are written as empty string,
77
+ # not NULL. See:
78
+ # https://github.com/googleapis/python-bigquery-pandas/issues/366
79
+ "" ,
80
+ None ,
81
+ ],
82
+ name = "empty_strings" ,
83
+ ),
84
+ True ,
63
85
),
64
86
],
65
87
)
66
88
def test_series_round_trip (
67
- method_under_test , random_dataset_id , bigquery_client , input_series
89
+ method_under_test ,
90
+ random_dataset_id ,
91
+ bigquery_client ,
92
+ input_series ,
93
+ api_method ,
94
+ skip_csv ,
68
95
):
96
+ if api_method == "load_csv" and skip_csv :
97
+ pytest .skip ("Loading with CSV not supported." )
69
98
table_id = f"{ random_dataset_id } .round_trip_{ random .randrange (1_000_000 )} "
70
99
input_series = input_series .sort_values ().reset_index (drop = True )
71
100
df = pandas .DataFrame (
72
101
# Some errors only occur in multi-column dataframes. See:
73
102
# https://github.com/googleapis/python-bigquery-pandas/issues/366
74
103
{"test_col" : input_series , "test_col2" : input_series }
75
104
)
76
- method_under_test (df , table_id )
105
+ method_under_test (df , table_id , api_method = api_method )
77
106
78
107
round_trip = bigquery_client .list_rows (table_id ).to_dataframe ()
79
108
round_trip_series = round_trip ["test_col" ].sort_values ().reset_index (drop = True )
80
109
pandas .testing .assert_series_equal (
81
- round_trip_series , input_series , check_exact = True ,
110
+ round_trip_series , input_series , check_exact = True , check_names = False ,
111
+ )
112
+
113
+
114
+ DATAFRAME_ROUND_TRIPS = [
115
+ # Ensure that a DATE column can be written with datetime64[ns] dtype
116
+ # data. See:
117
+ # https://github.com/googleapis/python-bigquery-pandas/issues/362
118
+ (
119
+ pandas .DataFrame (
120
+ {
121
+ "date_col" : pandas .Series (
122
+ ["2021-04-17" , "1999-12-31" , "2038-01-19" ], dtype = "datetime64[ns]" ,
123
+ ),
124
+ }
125
+ ),
126
+ [{"name" : "date_col" , "type" : "DATE" }],
127
+ True ,
128
+ ),
129
+ ]
130
+ if db_dtypes is not None :
131
+ DATAFRAME_ROUND_TRIPS .append (
132
+ (
133
+ pandas .DataFrame (
134
+ {
135
+ "date_col" : pandas .Series (
136
+ ["2021-04-17" , "1999-12-31" , "2038-01-19" ], dtype = "dbdate" ,
137
+ ),
138
+ }
139
+ ),
140
+ [{"name" : "date_col" , "type" : "DATE" }],
141
+ False ,
142
+ )
143
+ )
144
+
145
+
146
+ @pytest .mark .parametrize (
147
+ ["input_df" , "table_schema" , "skip_csv" ], DATAFRAME_ROUND_TRIPS
148
+ )
149
+ def test_dataframe_round_trip_with_table_schema (
150
+ method_under_test ,
151
+ random_dataset_id ,
152
+ bigquery_client ,
153
+ input_df ,
154
+ table_schema ,
155
+ api_method ,
156
+ skip_csv ,
157
+ ):
158
+ if api_method == "load_csv" and skip_csv :
159
+ pytest .skip ("Loading with CSV not supported." )
160
+ table_id = f"{ random_dataset_id } .round_trip_w_schema_{ random .randrange (1_000_000 )} "
161
+ input_df ["row_num" ] = input_df .index
162
+ input_df .sort_values ("row_num" , inplace = True )
163
+ method_under_test (
164
+ input_df , table_id , table_schema = table_schema , api_method = api_method
165
+ )
166
+ round_trip = bigquery_client .list_rows (table_id ).to_dataframe (
167
+ dtypes = dict (zip (input_df .columns , input_df .dtypes ))
82
168
)
169
+ round_trip .sort_values ("row_num" , inplace = True )
170
+ pandas .testing .assert_frame_equal (input_df , round_trip )
0 commit comments