Skip to content

Commit ccd7a7d

Browse files
authored
Merge pull request #40 from sunsingerus/master
table builder
2 parents 591870a + 65621e8 commit ccd7a7d

File tree

4 files changed

+128
-33
lines changed

4 files changed

+128
-33
lines changed

main.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,17 @@ def __init__(self):
3333
# mp.set_start_method('forkserver')
3434

3535
def run(self):
36-
pumper = Pumper(
37-
reader=self.config.reader(),
38-
writer=self.config.writer(),
39-
)
40-
pumper.run()
36+
if self.config.is_table_templates():
37+
templates = self.config.table_builder().templates()
38+
for db in templates:
39+
for table in templates[db]:
40+
print(db, ':', table, ':', templates[db][table])
41+
else:
42+
pumper = Pumper(
43+
reader=self.config.reader(),
44+
writer=self.config.writer(),
45+
)
46+
pumper.run()
4147

4248
def start(self):
4349
if self.config.is_daemon():

src/cliopts.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ def config():
146146
action='store_true',
147147
help='Keep CSV pool files. Useful for debugging'
148148
)
149+
argparser.add_argument(
150+
'--table-templates',
151+
action='store_true',
152+
help='Prepare table templates.'
153+
)
149154

150155
argparser.add_argument(
151156
'--src-server-id',
@@ -262,12 +267,14 @@ def config():
262267

263268
# build options
264269
return Config ({
270+
265271
'app-config': {
266272
'config-file': args.config_file,
267273
'log-file': args.log_file,
268274
'log-level': CLIOpts.log_level_from_string(args.log_level),
269275
'dry': args.dry,
270276
'daemon': args.daemon,
277+
'table-templates': args.table_templates,
271278
'pid_file': args.pid_file,
272279
'mempool': args.mempool or args.csvpool, # csvpool assumes mempool to be enabled
273280
'mempool-max-events-num': args.mempool_max_events_num,
@@ -284,6 +291,15 @@ def config():
284291
},
285292
},
286293

294+
'tablebuilder-config': {
295+
'host': args.src_host,
296+
'port': args.src_port,
297+
'user': args.src_user,
298+
'password': args.src_password,
299+
'dbs': [x for x in args.src_only_schemas.split(',') if x] if args.src_only_schemas else None,
300+
'tables': [x for x in args.src_only_tables.split(',') if x] if args.src_only_tables else None,
301+
},
302+
287303
'reader-config': {
288304
'mysql': {
289305
'connection_settings': {

src/config.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from .converter.csvwriteconverter import CSVWriteConverter
1515
from .converter.chwriteconverter import CHWriteConverter
16-
16+
from .tablebuilder import TableBuilder
1717

1818
class Config(object):
1919

@@ -46,6 +46,12 @@ def is_daemon(self):
4646
def is_pool(self):
4747
return self.config['app-config']['mempool']
4848

49+
def is_table_templates(self):
50+
return self.config['app-config']['table-templates']
51+
52+
def table_builder(self):
53+
return TableBuilder(**self.config['tablebuilder-config'])
54+
4955
def reader(self):
5056
if self.config['reader-config']['file']['csv_file_path']:
5157
return CSVReader(**self.config['reader-config']['file'])

src/tablebuilder.py

Lines changed: 94 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,95 @@
44
import logging
55
import MySQLdb
66

7+
78
class TableBuilder(object):
89

910
connection = None
1011
cursor = None
1112

12-
def template(self, host, user, password=None, db=None, tables=None):
13+
host = None
14+
port = None
15+
user = None
16+
password = None
17+
dbs = None
18+
tables = None
19+
20+
def __init__(self, host, port, user, password=None, dbs=None, tables=None):
21+
self.host = host
22+
self.port = port
23+
self.user = user
24+
self.password = password
25+
self.dbs = dbs
26+
self.tables = tables
27+
28+
def templates(self):
29+
"""
30+
Create templates for specified MySQL tables. In case no tables specified all tables from specified db are templated
31+
32+
:param host: string MySQL host
33+
:param user: string MySQL user
34+
:param password: string MySQL password
35+
:param dbs: list of string MySQL datatabse/ May be omitted, in this case tables has to contain full table names, Ex.: db.table1
36+
:param tables: list of string list of table names. May be short (in case db specified) or full (in the form db.table, in case no db specified)
37+
:return: dict of CREATE TABLE () templates
38+
"""
39+
res = {}
40+
41+
db = None
42+
43+
try:
44+
db = self.dbs[0]
45+
except:
46+
pass
47+
1348
# sanity check
14-
if db is None and tables is None:
15-
return None
49+
if db is None and self.tables is None:
50+
return res
1651

1752
# MySQL connections
1853
self.connection = MySQLdb.connect(
19-
host=host,
20-
user=user,
21-
passwd=password,
54+
host=self.host,
55+
user=self.user,
56+
passwd=self.password,
2257
db=db,
2358
)
2459
self.cursor = self.connection.cursor()
2560

2661
# in case to tables specified - list all tables of the DB specified
27-
if db is not None and tables is None:
62+
if db is not None and self.tables is None:
2863
self.cursor.execute("USE " + db)
29-
tables = []
64+
self.tables = []
3065
self.cursor.execute("SHOW TABLES") # execute 'SHOW TABLES' (but data is not returned)
3166
for (table_name,) in self.cursor:
32-
tables.append(table_name)
33-
34-
# tables can be something like 'db1, db2, db3'
35-
# make [db1, db2, db3]
36-
if isinstance(tables, str):
37-
tables = [table.strip() for table in tables.split(',')]
38-
39-
for table in tables:
40-
print(self.table(table, db))
41-
42-
def table(self, table_name, db=None):
67+
self.tables.append(table_name)
68+
69+
# create dict of table templates
70+
for table in self.tables:
71+
if not db in res:
72+
res[db] = {}
73+
res[db][table] = self.create_table_template(table, db)
74+
75+
# {
76+
# 'db': {
77+
# 'table1': 'CREATE TABLE(...)...',
78+
# 'table2': 'CREATE TABLE(...)...',
79+
# }
80+
# }
81+
return res
82+
83+
def create_table_template(self, table_name, db=None):
84+
"""
85+
Produce template for CH's
86+
CREATE TABLE(
87+
...
88+
columns specification
89+
...
90+
) ENGINE = MergeTree(_SPECIFY_DateField_HERE, (SPECIFY_INDEX_FIELD1, SPECIFY_INDEX_FIELD2, ...etc...), 8192)
91+
for specified MySQL's table
92+
:param table_name: string - name of the table in MySQL which will be used as a base for CH's CREATE TABLE template
93+
:param db: string - name of the DB in MySQL
94+
:return: string - almost-ready-to-use CREATE TABLE statement
95+
"""
4396

4497
# `db`.`table` or just `table`
4598
name = '`{0}`.`{1}`'.format(db, table_name) if db else '`{0}`'.format(table_name)
@@ -52,10 +105,10 @@ def table(self, table_name, db=None):
52105
for (_field, _type, _null, _key, _default, _extra,) in self.cursor:
53106
# Field | Type | Null | Key | Default | Extra
54107

55-
# build ready-to-sql column specification
108+
# build ready-to-sql column specification Ex.:
56109
# `integer_1` Nullable(Int32)
57110
# `u_integer_1` Nullable(UInt32)
58-
ch_columns.append('`{0}` {1}'.format(_field, self.map(mysql_type=_type, null=_null,)))
111+
ch_columns.append('`{0}` {1}'.format(_field, self.map_type(mysql_type=_type, nullable=_null, )))
59112

60113
sql = """
61114
CREATE TABLE {0} (
@@ -67,7 +120,17 @@ def table(self, table_name, db=None):
67120
)
68121
return sql
69122

70-
def map(self, mysql_type, null=False):
123+
def map_type(self, mysql_type, nullable=False):
124+
"""
125+
Map MySQL type (as a string from DESC table statement) to CH type (as string)
126+
:param mysql_type: string MySQL type (from DESC statement). Ex.: 'INT(10) UNSIGNED', 'BOOLEAN'
127+
:param nullable: bool|string True|'yes' is this field nullable
128+
:return: string CH's type specification directly usable in CREATE TABLE statement. Ex.:
129+
Nullable(Int32)
130+
Nullable(UInt32)
131+
"""
132+
133+
# deal with UPPER CASE strings for simplicity
71134
mysql_type = mysql_type.upper()
72135

73136
# Numeric Types
@@ -146,22 +209,26 @@ def map(self, mysql_type, null=False):
146209
ch_type = 'UNKNOWN'
147210

148211
# Deal with NULLs
149-
if isinstance(null, bool):
150-
if null:
212+
if isinstance(nullable, bool):
213+
# for bool - simple statement
214+
if nullable:
151215
ch_type = 'Nullable(' + ch_type + ')'
152-
elif isinstance(null, str):
153-
if null.upper() == "YES":
216+
elif isinstance(nullable, str):
217+
# also accept case-insencitive string 'yes'
218+
if nullable.upper() == "YES":
154219
ch_type = 'Nullable(' + ch_type + ')'
155220

156221
return ch_type
157222

158223
if __name__ == '__main__':
159224
tb = TableBuilder()
160-
tb.template(
225+
templates = tb.templates(
161226
host='127.0.0.1',
162227
user='reader',
163228
password='qwerty',
164229
db='db',
165230
# tables='datatypes, enum_datatypes, json_datatypes',
166231
tables=['datatypes', 'enum_datatypes', 'json_datatypes'],
167232
)
233+
for table in templates:
234+
print(table, '=', templates[table])

0 commit comments

Comments
 (0)