Skip to content

Commit e22f363

Browse files
committed
feat(contrib): init tool upload-kktix-ticket-csv-to-bigquery.py
1 parent 7e00e24 commit e22f363

File tree

3 files changed

+369
-50
lines changed

3 files changed

+369
-50
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import re
4+
5+
import pandas as pd
6+
from google.cloud import bigquery
7+
8+
9+
def upload_dataframe_to_bigquery(df, project_id, dataset_name, table_name):
10+
client = bigquery.Client(project=project_id)
11+
12+
dataset_ref = bigquery.dataset.DatasetReference(project_id, dataset_name)
13+
table_ref = bigquery.table.TableReference(dataset_ref, table_name)
14+
15+
# dump the csv into bigquery
16+
job = client.load_table_from_dataframe(df, table_ref)
17+
18+
job.result()
19+
20+
print(
21+
"Loaded {} rows into {}:{}.".format(job.output_rows, dataset_name, table_name)
22+
)
23+
24+
25+
def sanitize_column_name(column_name):
26+
regex = re.compile("[^a-zA-Z]")
27+
return regex.sub("", column_name)
28+
29+
30+
def sanitize_column_names(df):
31+
sanitized_columns = {}
32+
for column in df.columns:
33+
sanitized_column = sanitize_column_name(column)
34+
sanitized_columns[column] = sanitized_column
35+
36+
return df.rename(columns=sanitized_columns)
37+
38+
39+
def main():
40+
"""
41+
Commandline entrypoint
42+
"""
43+
parser = argparse.ArgumentParser(
44+
description="Sanitize ticket CSV and upload to BigQuery"
45+
)
46+
47+
parser.add_argument(
48+
"csv_file", type=str, help="Ticket CSV file",
49+
)
50+
51+
parser.add_argument("-p", "--project-id", help="BigQuery project ID")
52+
53+
parser.add_argument(
54+
"-d", "--dataset-name", help="BigQuery dataset name to create or append"
55+
)
56+
57+
parser.add_argument(
58+
"-t", "--table-name", help="BigQuery table name to create or append"
59+
)
60+
61+
args = parser.parse_args()
62+
63+
# load the csv into bigquery
64+
with open(args.csv_file, "rb") as source_file:
65+
df = pd.read_csv(args.csv_file)
66+
sanitized_df = sanitize_column_names(df)
67+
upload_dataframe_to_bigquery(
68+
sanitized_df, args.project_id, args.dataset_name, args.table_name
69+
)
70+
71+
72+
if __name__ == "__main__":
73+
main()

0 commit comments

Comments
 (0)