From 674403b2c49fd4e99997609944f49e3c2b261dd4 Mon Sep 17 00:00:00 2001 From: sunsingerus Date: Mon, 27 Nov 2017 15:14:04 +0300 Subject: [PATCH 1/2] table builder clarification --- src/tablebuilder.py | 69 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/src/tablebuilder.py b/src/tablebuilder.py index c01a384..49c8ca3 100644 --- a/src/tablebuilder.py +++ b/src/tablebuilder.py @@ -4,15 +4,28 @@ import logging import MySQLdb + class TableBuilder(object): connection = None cursor = None - def template(self, host, user, password=None, db=None, tables=None): + def templates(self, host, user, password=None, db=None, tables=None): + """ + Create templates for specified MySQL tables. In case no tables specified all tables from specified db are templated + + :param host: string MySQL host + :param user: string MySQL user + :param password: string MySQL password + :param db: string MySQL datatabse/ May be omitted, in this case tables has to contain full table names, Ex.: db.table1 + :param tables: string|list either comma-separated string or list of table names. May be short (in case db specified) or full (in the form db.table, in case no db specified) + :return: dict of CREATE TABLE () templates + """ + res = {} + # sanity check if db is None and tables is None: - return None + return res # MySQL connections self.connection = MySQLdb.connect( @@ -36,10 +49,26 @@ def template(self, host, user, password=None, db=None, tables=None): if isinstance(tables, str): tables = [table.strip() for table in tables.split(',')] + # create dict of table templates for table in tables: - print(self.table(table, db)) - - def table(self, table_name, db=None): + res[table] = self.create_table_template(table, db) + + # {'table1': 'CREATE TABLE(...)...', 'table2': 'CREATE TABLE(...)...'} + return res + + def create_table_template(self, table_name, db=None): + """ + Produce template for CH's + CREATE TABLE( + ... + columns specification + ... + ) ENGINE = MergeTree(_SPECIFY_DateField_HERE, (SPECIFY_INDEX_FIELD1, SPECIFY_INDEX_FIELD2, ...etc...), 8192) + for specified MySQL's table + :param table_name: string - name of the table in MySQL which will be used as a base for CH's CREATE TABLE template + :param db: string - name of the DB in MySQL + :return: string - almost-ready-to-use CREATE TABLE statement + """ # `db`.`table` or just `table` name = '`{0}`.`{1}`'.format(db, table_name) if db else '`{0}`'.format(table_name) @@ -52,10 +81,10 @@ def table(self, table_name, db=None): for (_field, _type, _null, _key, _default, _extra,) in self.cursor: # Field | Type | Null | Key | Default | Extra - # build ready-to-sql column specification + # build ready-to-sql column specification Ex.: # `integer_1` Nullable(Int32) # `u_integer_1` Nullable(UInt32) - ch_columns.append('`{0}` {1}'.format(_field, self.map(mysql_type=_type, null=_null,))) + ch_columns.append('`{0}` {1}'.format(_field, self.map_type(mysql_type=_type, nullable=_null, ))) sql = """ CREATE TABLE {0} ( @@ -67,7 +96,17 @@ def table(self, table_name, db=None): ) return sql - def map(self, mysql_type, null=False): + def map_type(self, mysql_type, nullable=False): + """ + Map MySQL type (as a string from DESC table statement) to CH type (as string) + :param mysql_type: string MySQL type (from DESC statement). Ex.: 'INT(10) UNSIGNED', 'BOOLEAN' + :param nullable: bool|string True|'yes' is this field nullable + :return: string CH's type specification directly usable in CREATE TABLE statement. Ex.: + Nullable(Int32) + Nullable(UInt32) + """ + + # deal with UPPER CASE strings for simplicity mysql_type = mysql_type.upper() # Numeric Types @@ -146,18 +185,20 @@ def map(self, mysql_type, null=False): ch_type = 'UNKNOWN' # Deal with NULLs - if isinstance(null, bool): - if null: + if isinstance(nullable, bool): + # for bool - simple statement + if nullable: ch_type = 'Nullable(' + ch_type + ')' - elif isinstance(null, str): - if null.upper() == "YES": + elif isinstance(nullable, str): + # also accept case-insencitive string 'yes' + if nullable.upper() == "YES": ch_type = 'Nullable(' + ch_type + ')' return ch_type if __name__ == '__main__': tb = TableBuilder() - tb.template( + templates = tb.templates( host='127.0.0.1', user='reader', password='qwerty', @@ -165,3 +206,5 @@ def map(self, mysql_type, null=False): # tables='datatypes, enum_datatypes, json_datatypes', tables=['datatypes', 'enum_datatypes', 'json_datatypes'], ) + for table in templates: + print(table, '=', templates[table]) From 65621e80e303c16d74d298278c9ac50b9dd519b8 Mon Sep 17 00:00:00 2001 From: sunsingerus Date: Mon, 27 Nov 2017 16:21:41 +0300 Subject: [PATCH 2/2] table builder CLI options --- main.py | 16 ++++++++---- src/cliopts.py | 16 ++++++++++++ src/config.py | 8 +++++- src/tablebuilder.py | 62 +++++++++++++++++++++++++++++++-------------- 4 files changed, 77 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index b920247..c9717d7 100755 --- a/main.py +++ b/main.py @@ -33,11 +33,17 @@ def __init__(self): # mp.set_start_method('forkserver') def run(self): - pumper = Pumper( - reader=self.config.reader(), - writer=self.config.writer(), - ) - pumper.run() + if self.config.is_table_templates(): + templates = self.config.table_builder().templates() + for db in templates: + for table in templates[db]: + print(db, ':', table, ':', templates[db][table]) + else: + pumper = Pumper( + reader=self.config.reader(), + writer=self.config.writer(), + ) + pumper.run() def start(self): if self.config.is_daemon(): diff --git a/src/cliopts.py b/src/cliopts.py index 7237146..0d33299 100644 --- a/src/cliopts.py +++ b/src/cliopts.py @@ -146,6 +146,11 @@ def config(): action='store_true', help='Keep CSV pool files. Useful for debugging' ) + argparser.add_argument( + '--table-templates', + action='store_true', + help='Prepare table templates.' + ) argparser.add_argument( '--src-server-id', @@ -262,12 +267,14 @@ def config(): # build options return Config ({ + 'app-config': { 'config-file': args.config_file, 'log-file': args.log_file, 'log-level': CLIOpts.log_level_from_string(args.log_level), 'dry': args.dry, 'daemon': args.daemon, + 'table-templates': args.table_templates, 'pid_file': args.pid_file, 'mempool': args.mempool or args.csvpool, # csvpool assumes mempool to be enabled 'mempool-max-events-num': args.mempool_max_events_num, @@ -284,6 +291,15 @@ def config(): }, }, + 'tablebuilder-config': { + 'host': args.src_host, + 'port': args.src_port, + 'user': args.src_user, + 'password': args.src_password, + 'dbs': [x for x in args.src_only_schemas.split(',') if x] if args.src_only_schemas else None, + 'tables': [x for x in args.src_only_tables.split(',') if x] if args.src_only_tables else None, + }, + 'reader-config': { 'mysql': { 'connection_settings': { diff --git a/src/config.py b/src/config.py index 85bc2cb..ab2f599 100644 --- a/src/config.py +++ b/src/config.py @@ -13,7 +13,7 @@ from .converter.csvwriteconverter import CSVWriteConverter from .converter.chwriteconverter import CHWriteConverter - +from .tablebuilder import TableBuilder class Config(object): @@ -46,6 +46,12 @@ def is_daemon(self): def is_pool(self): return self.config['app-config']['mempool'] + def is_table_templates(self): + return self.config['app-config']['table-templates'] + + def table_builder(self): + return TableBuilder(**self.config['tablebuilder-config']) + def reader(self): if self.config['reader-config']['file']['csv_file_path']: return CSVReader(**self.config['reader-config']['file']) diff --git a/src/tablebuilder.py b/src/tablebuilder.py index 49c8ca3..7a7937a 100644 --- a/src/tablebuilder.py +++ b/src/tablebuilder.py @@ -10,50 +10,74 @@ class TableBuilder(object): connection = None cursor = None - def templates(self, host, user, password=None, db=None, tables=None): + host = None + port = None + user = None + password = None + dbs = None + tables = None + + def __init__(self, host, port, user, password=None, dbs=None, tables=None): + self.host = host + self.port = port + self.user = user + self.password = password + self.dbs = dbs + self.tables = tables + + def templates(self): """ Create templates for specified MySQL tables. In case no tables specified all tables from specified db are templated :param host: string MySQL host :param user: string MySQL user :param password: string MySQL password - :param db: string MySQL datatabse/ May be omitted, in this case tables has to contain full table names, Ex.: db.table1 - :param tables: string|list either comma-separated string or list of table names. May be short (in case db specified) or full (in the form db.table, in case no db specified) + :param dbs: list of string MySQL datatabse/ May be omitted, in this case tables has to contain full table names, Ex.: db.table1 + :param tables: list of string list of table names. May be short (in case db specified) or full (in the form db.table, in case no db specified) :return: dict of CREATE TABLE () templates """ res = {} + db = None + + try: + db = self.dbs[0] + except: + pass + # sanity check - if db is None and tables is None: + if db is None and self.tables is None: return res # MySQL connections self.connection = MySQLdb.connect( - host=host, - user=user, - passwd=password, + host=self.host, + user=self.user, + passwd=self.password, db=db, ) self.cursor = self.connection.cursor() # in case to tables specified - list all tables of the DB specified - if db is not None and tables is None: + if db is not None and self.tables is None: self.cursor.execute("USE " + db) - tables = [] + self.tables = [] self.cursor.execute("SHOW TABLES") # execute 'SHOW TABLES' (but data is not returned) for (table_name,) in self.cursor: - tables.append(table_name) - - # tables can be something like 'db1, db2, db3' - # make [db1, db2, db3] - if isinstance(tables, str): - tables = [table.strip() for table in tables.split(',')] + self.tables.append(table_name) # create dict of table templates - for table in tables: - res[table] = self.create_table_template(table, db) - - # {'table1': 'CREATE TABLE(...)...', 'table2': 'CREATE TABLE(...)...'} + for table in self.tables: + if not db in res: + res[db] = {} + res[db][table] = self.create_table_template(table, db) + + # { + # 'db': { + # 'table1': 'CREATE TABLE(...)...', + # 'table2': 'CREATE TABLE(...)...', + # } + # } return res def create_table_template(self, table_name, db=None):