|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
1 | 3 | import pytest
|
2 | 4 |
|
3 | 5 | import re
|
|
7 | 9 | import os
|
8 | 10 | from random import randint
|
9 | 11 | import logging
|
| 12 | +import sys |
10 | 13 |
|
11 | 14 | import numpy as np
|
12 | 15 |
|
@@ -1154,6 +1157,61 @@ def test_google_upload_errors_should_raise_exception(self):
|
1154 | 1157 | gbq.to_gbq(bad_df, self.destination_table + test_id,
|
1155 | 1158 | _get_project_id(), private_key=_get_private_key_path())
|
1156 | 1159 |
|
| 1160 | + def test_upload_chinese_unicode_data(self): |
| 1161 | + test_id = "2" |
| 1162 | + test_size = 6 |
| 1163 | + df = DataFrame(np.random.randn(6, 4), index=range(6), |
| 1164 | + columns=list('ABCD')) |
| 1165 | + df['s'] = u'信用卡' |
| 1166 | + |
| 1167 | + gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(), |
| 1168 | + chunksize=10000) |
| 1169 | + |
| 1170 | + result_df = gbq.read_gbq("SELECT * FROM {0}".format( |
| 1171 | + self.destination_table + test_id), |
| 1172 | + project_id=_get_project_id()) |
| 1173 | + |
| 1174 | + assert len(result_df) == test_size |
| 1175 | + |
| 1176 | + pytest.skipif( |
| 1177 | + sys.version_info.major < 3, |
| 1178 | + reason='Unicode comparison in Py2 not working') |
| 1179 | + |
| 1180 | + result = result_df['s'].sort_values() |
| 1181 | + expected = df['s'].sort_values() |
| 1182 | + |
| 1183 | + tm.assert_numpy_array_equal(expected.values, result.values) |
| 1184 | + |
| 1185 | + def test_upload_other_unicode_data(self): |
| 1186 | + test_id = "3" |
| 1187 | + test_size = 3 |
| 1188 | + df = DataFrame({ |
| 1189 | + 's': ['Skywalker™', 'lego', 'hülle'], |
| 1190 | + 'i': [200, 300, 400], |
| 1191 | + 'd': [ |
| 1192 | + '2017-12-13 17:40:39', '2017-12-13 17:40:39', |
| 1193 | + '2017-12-13 17:40:39' |
| 1194 | + ] |
| 1195 | + }) |
| 1196 | + |
| 1197 | + gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(), |
| 1198 | + chunksize=10000) |
| 1199 | + |
| 1200 | + result_df = gbq.read_gbq("SELECT * FROM {0}".format( |
| 1201 | + self.destination_table + test_id), |
| 1202 | + project_id=_get_project_id()) |
| 1203 | + |
| 1204 | + assert len(result_df) == test_size |
| 1205 | + |
| 1206 | + pytest.skipif( |
| 1207 | + sys.version_info.major < 3, |
| 1208 | + reason='Unicode comparison in Py2 not working') |
| 1209 | + |
| 1210 | + result = result_df['s'].sort_values() |
| 1211 | + expected = df['s'].sort_values() |
| 1212 | + |
| 1213 | + tm.assert_numpy_array_equal(expected.values, result.values) |
| 1214 | + |
1157 | 1215 | def test_generate_schema(self):
|
1158 | 1216 | df = tm.makeMixedDataFrame()
|
1159 | 1217 | schema = gbq._generate_bq_schema(df)
|
@@ -1467,6 +1525,59 @@ def test_upload_data(self):
|
1467 | 1525 |
|
1468 | 1526 | assert result['num_rows'][0] == test_size
|
1469 | 1527 |
|
| 1528 | + def test_upload_chinese_unicode_data(self): |
| 1529 | + test_id = "2" |
| 1530 | + test_size = 6 |
| 1531 | + df = DataFrame(np.random.randn(6, 4), index=range(6), |
| 1532 | + columns=list('ABCD')) |
| 1533 | + df['s'] = u'信用卡' |
| 1534 | + |
| 1535 | + gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(), |
| 1536 | + chunksize=10000) |
| 1537 | + |
| 1538 | + result_df = gbq.read_gbq("SELECT * FROM {0}".format( |
| 1539 | + self.destination_table + test_id), |
| 1540 | + project_id=_get_project_id()) |
| 1541 | + |
| 1542 | + assert len(result_df) == test_size |
| 1543 | + |
| 1544 | + if sys.version_info.major < 3: |
| 1545 | + pytest.skip(msg='Unicode comparison in Py2 not working') |
| 1546 | + |
| 1547 | + result = result_df['s'].sort_values() |
| 1548 | + expected = df['s'].sort_values() |
| 1549 | + |
| 1550 | + tm.assert_numpy_array_equal(expected.values, result.values) |
| 1551 | + |
| 1552 | + def test_upload_other_unicode_data(self): |
| 1553 | + test_id = "3" |
| 1554 | + test_size = 3 |
| 1555 | + df = DataFrame({ |
| 1556 | + 's': ['Skywalker™', 'lego', 'hülle'], |
| 1557 | + 'i': [200, 300, 400], |
| 1558 | + 'd': [ |
| 1559 | + '2017-12-13 17:40:39', '2017-12-13 17:40:39', |
| 1560 | + '2017-12-13 17:40:39' |
| 1561 | + ] |
| 1562 | + }) |
| 1563 | + |
| 1564 | + gbq.to_gbq(df, self.destination_table + test_id, _get_project_id(), |
| 1565 | + chunksize=10000) |
| 1566 | + |
| 1567 | + result_df = gbq.read_gbq("SELECT * FROM {0}".format( |
| 1568 | + self.destination_table + test_id), |
| 1569 | + project_id=_get_project_id()) |
| 1570 | + |
| 1571 | + assert len(result_df) == test_size |
| 1572 | + |
| 1573 | + if sys.version_info.major < 3: |
| 1574 | + pytest.skip(msg='Unicode comparison in Py2 not working') |
| 1575 | + |
| 1576 | + result = result_df['s'].sort_values() |
| 1577 | + expected = df['s'].sort_values() |
| 1578 | + |
| 1579 | + tm.assert_numpy_array_equal(expected.values, result.values) |
| 1580 | + |
1470 | 1581 |
|
1471 | 1582 | class TestToGBQIntegrationWithServiceAccountKeyContents(object):
|
1472 | 1583 | # Changes to BigQuery table schema may take up to 2 minutes as of May 2015
|
|
0 commit comments