From 626b6fe84bef7de9b299b61727d6a3c4a44ae9c7 Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 13:59:40 +0300
Subject: [PATCH 1/6] docs

---
 README.md | 105 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 8b57571..f05303b 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,11 @@
 
  * [Introduction](#introduction)
  * [Requirements](#requirements)
+ * [Operation](#operation)
+   * [Requirements and Limitations](#requirements-and-limitations)
+   * [General Schema](#general-schema)
+   * [Example](#example)
+ * [Performance](#performance)
  * [Testing](#testing)
    * [MySQL Data Types](#mysql-data-types)
    * [ClickHouse Data Types](#clickhouse-data-types)
@@ -18,23 +23,25 @@
 
 # Introduction
 
-Utility to read mysql data
+Utility to import data into ClickHouse from MySQL (mainly) and/or CSV files
 
 # Requirements
 
-This package is used for interacting with MySQL:
+Data reader requires **Python 3.x** with additional modules to be installed.
+
+`mysql-replication` package is used for communication with MySQL:
 [https://github.com/noplay/python-mysql-replication](https://github.com/noplay/python-mysql-replication)
 ```bash
 pip install mysql-replication
 ```
 
-This package is used for interacting with ClickHouse:
+`clickhouse-driver` package is used for communication with ClickHouse:
 [https://github.com/mymarilyn/clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
 ```bash
 pip install clickhouse-driver
 ```
 
-You need (at least one of) the `SUPER`, `REPLICATION CLIENT` privilege(s) for this operation
+Also the following (at least one of) MySQL privileges are required for this operation: `SUPER`, `REPLICATION CLIENT` 
 
 ```sql
 CREATE USER 'reader'@'localhost' IDENTIFIED BY 'qwerty';
@@ -52,7 +59,7 @@ GRANT REPLICATION CLIENT, REPLICATION SLAVE, SUPER ON *.* TO 'reader'@'*'
 FLUSH PRIVILEGES;
 ```
 
-MySQL config options required:
+Also the following MySQL config options are required:
 ```ini
 [mysqld]
 server-id		 = 1
@@ -62,6 +69,67 @@ max_binlog_size  = 100M
 binlog-format    = row #Very important if you want to receive write, update and delete row events
 ```
 
+# Operation
+
+## Requirements and Limitations
+
+Data reader understands INSERT SQL statements only. In practice this means that:
+  * You need to create required table in ClickHouse before starting data read procedure. More on how to create target ClickHouse table: [MySQL -> ClickHouse Data Types Mapping](#mysql---clickhouse-data-types-mapping)
+  * UPDATE statements are not handled - meaning UPDATEs within MySQL would not be relayed into ClickHouse
+  * DELETE statements are not handled - meaning DELETEs within MySQL would not be relayed into ClickHouse
+  * DDL statements are not handled. For example, source table structure change can lead to insertion errors 
+
+## General schema
+
+  * Step 1. Data Reader reads data from the source event-by-event (for MySQL binlog) or line-by-line (file).
+  * Step 2. **OPTIONAL** Caching in memory pool. Since ClickHouse prefers to get data in bundles (row-by-row insertion is extremely slow), we need to introduce some caching.
+  Cache can be flushed by either of:
+    * number of rows in cache
+    * number of events in cache 
+    * time elapsed
+    * data source depleted
+  * Step 3. **OPTIONAL** Writing CSV file. Sometimes it is useful to have data also represented as a file
+  * Step 4. Writing data into ClickHouse. Depending on the configuration of the previous steps data are written into ClickHouse by either of:
+    * directly event-by-event or line-by-line
+    * from memory cache as a bulk insert operation
+    * from CSV file via `clickhouse-client` 
+    
+## Example
+
+Let's walk over test example of tool launch command line options
+ 
+```bash
+$PYTHON main.py ${*:1} \
+    --src-resume \
+    --src-wait \
+    --nice-pause=1 \
+    --log-level=info \
+    --log-file=ontime.log \
+    --src-host=127.0.0.1 \
+    --src-user=root \
+    --dst-host=127.0.0.1 \
+    --csvpool \
+    --csvpool-file-path-prefix=qwe_ \
+    --mempool-max-flush-interval=60 \
+    --mempool-max-events-num=1000
+```
+Options description
+  * `--src-resume` - resume data loading from the previous point. When the tool starts - resume from the end of the log 
+  * `--src-wait` - wait for new data to come
+  * `--nice-pause=1` - when no data available sleep for 1 second
+  * `--log-level=info` - log verbosity
+  * `--log-file=ontime.log` - log file name
+  * `--src-host=127.0.0.1` - MySQL source host
+  * `--src-user=root` - MySQL source user (remember about PRIVILEGES for this user)
+  * `--dst-host=127.0.0.1` - ClickHouse host
+  * `--csvpool` - make pool of csv files (assumes `--mempool` also)
+  * `--csvpool-file-path-prefix=qwe_` - put these CSV files having `qwe_` prefix in `CWD`
+  * `--mempool-max-flush-interval=60` - flush mempool at least every 60 seconds
+  * `--mempool-max-events-num=1000` - flush mempool at least each 1000 events (not rows, but events)
+
+# Performance
+
+
 # Testing
 
 ## MySQL Data Types
@@ -844,32 +912,15 @@ CREATE TABLE IF NOT EXISTS `airline`.`ontime` (
 ### Import Data
 
 ```bash
-ls|sort|head -n 100
-
-i=1
-for file in $(ls *.csv|sort|head -n 100); do
-    echo "$i. Copy $file"
-    cp -f $file ontime.csv
-    echo "$i. Import $file"
-    mysqlimport \
-        --ignore-lines=1 \
-        --fields-terminated-by=, \
-        --fields-enclosed-by=\" \
-        --local \
-        -u root \
-        airline ontime.csv
-    rm -f ontime.csv
-    i=$((i+1))
-done
-
 #!/bin/bash
 files_to_import_num=3
 i=1
 for file in $(ls /mnt/nas/work/ontime/*.csv|sort|head -n $files_to_import_num); do
-    echo "$i. Prepare $file"
+    echo "$i. Prepare. Make link to $file"
     rm -f ontime
     ln -s $file ontime
-    echo "$i. Import $file"
+    
+    echo "$i. Import. $file"
     time mysqlimport \
         --ignore-lines=1 \
         --fields-terminated-by=, \
@@ -877,9 +928,9 @@ for file in $(ls /mnt/nas/work/ontime/*.csv|sort|head -n $files_to_import_num);
         --local \
         -u root \
         airline ontime
+        
+    echo "$i. Cleanup. $file"
     rm -f ontime
     i=$((i+1))
 done
-
-
 ```

From 554498738fc1c0e1237b286be05404ca79e027d7 Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 14:00:14 +0300
Subject: [PATCH 2/6] add performance - pypy section into docs

---
 README.md | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/README.md b/README.md
index f05303b..1cccb66 100644
--- a/README.md
+++ b/README.md
@@ -129,6 +129,38 @@ Options description
 
 # Performance
 
+`pypy` significantly improves performance. You should try it.
+For example you can start with [Portable PyPy distribution for Linux](#https://github.com/squeaky-pl/portable-pypy#portable-pypy-distribution-for-linux)
+Unpack it into your place of choice.
+
+```bash
+[user@localhost ~]$ ls -l pypy3.5-5.9-beta-linux_x86_64-portable
+total 32
+drwxr-xr-x  2 user user   140 Oct 24 01:14 bin
+drwxr-xr-x  5 user user  4096 Oct  3 11:57 include
+drwxr-xr-x  4 user user  4096 Oct  3 11:57 lib
+drwxr-xr-x 13 user user  4096 Oct  3 11:56 lib_pypy
+drwxr-xr-x  3 user user    15 Oct  3 11:56 lib-python
+-rw-r--r--  1 user user 11742 Oct  3 11:56 LICENSE
+-rw-r--r--  1 user user  1296 Oct  3 11:56 README.rst
+drwxr-xr-x 14 user user  4096 Oct 24 01:16 site-packages
+drwxr-xr-x  2 user user   195 Oct  3 11:57 virtualenv_support
+```
+
+Install `pip`
+```bash
+pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy -m ensurepip
+```
+Install required modules
+```bash
+pypy3.5-5.9-beta-linux_x86_64-portable/bin/pip3 install mysql-replication
+pypy3.5-5.9-beta-linux_x86_64-portable/bin/pip3 install clickhouse-driver
+```
+
+Now you can run data reader via `pypy`
+```bash
+/home/user/pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy main.py
+```
 
 # Testing
 

From b80abc880bd2cec3b739dd0323f6e980df2612f1 Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 14:02:32 +0300
Subject: [PATCH 3/6] docs: URL fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1cccb66..ad429d1 100644
--- a/README.md
+++ b/README.md
@@ -130,7 +130,7 @@ Options description
 # Performance
 
 `pypy` significantly improves performance. You should try it.
-For example you can start with [Portable PyPy distribution for Linux](#https://github.com/squeaky-pl/portable-pypy#portable-pypy-distribution-for-linux)
+For example you can start with [Portable PyPy distribution for Linux](https://github.com/squeaky-pl/portable-pypy#portable-pypy-distribution-for-linux)
 Unpack it into your place of choice.
 
 ```bash

From 0037d45aae0de7e4877d3e65cd32dcb86c879ac6 Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 14:32:01 +0300
Subject: [PATCH 4/6] airline.ontime test scripts

---
 ...me.sh => run_airline_ontime_data_reader.sh |  4 +++
 run_airline_ontime_import.sh                  | 29 +++++++++++++++++++
 2 files changed, 33 insertions(+)
 rename run_ontime.sh => run_airline_ontime_data_reader.sh (81%)
 create mode 100755 run_airline_ontime_import.sh

diff --git a/run_ontime.sh b/run_airline_ontime_data_reader.sh
similarity index 81%
rename from run_ontime.sh
rename to run_airline_ontime_data_reader.sh
index 837ed97..4157d0b 100755
--- a/run_ontime.sh
+++ b/run_airline_ontime_data_reader.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
+# read airline.ontime test dataset from MySQL and write it to CH
 
+# ugly stub to suppress unsufficient sockets
 sudo bash -c "echo 1 > /proc/sys/net/ipv4/tcp_tw_reuse"
 
+# run data reader with specified Python version
+
 PYTHON=python3.6
 PYTHON=/home/user/pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy
 
diff --git a/run_airline_ontime_import.sh b/run_airline_ontime_import.sh
new file mode 100755
index 0000000..b73fb41
--- /dev/null
+++ b/run_airline_ontime_import.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# import airline.ontime test dataset into MySQL
+
+# looking for csv files in this dir
+FILES_TO_IMPORT_DIR="/mnt/nas/work/ontime"
+
+# limit import to this number of files
+FILES_TO_IMPORT_NUM=3
+
+i=1
+for file in $(ls "$FILES_TO_IMPORT_DIR"/*.csv|sort|head -n $FILES_TO_IMPORT_NUM); do
+    echo "$i. Prepare. Make link to $file"
+    rm -f ontime
+    ln -s $file ontime
+
+    echo "$i. Import. $file"
+    time mysqlimport \
+        --ignore-lines=1 \
+        --fields-terminated-by=, \
+        --fields-enclosed-by=\" \
+        --local \
+        -u root \
+        airline ontime
+
+    echo "$i. Cleanup. $file"
+    rm -f ontime
+
+    i=$((i+1))
+done

From 2268b95ff6a1a570b372893ae8899849cba5c511 Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 15:20:41 +0300
Subject: [PATCH 5/6] airline.ontime dataset

---
 README.md                           | 100 +++++++++++++++-------------
 run_airline_ontime_data_download.sh |  46 +++++++++++++
 2 files changed, 100 insertions(+), 46 deletions(-)
 create mode 100755 run_airline_ontime_data_download.sh

diff --git a/README.md b/README.md
index ad429d1..3c6316d 100644
--- a/README.md
+++ b/README.md
@@ -8,16 +8,18 @@
  * [Requirements](#requirements)
  * [Operation](#operation)
    * [Requirements and Limitations](#requirements-and-limitations)
-   * [General Schema](#general-schema)
+   * [Operation General Schema](#operation-general-schema)
    * [Example](#example)
  * [Performance](#performance)
  * [Testing](#testing)
-   * [MySQL Data Types](#mysql-data-types)
-   * [ClickHouse Data Types](#clickhouse-data-types)
-   * [MySQL -> ClickHouse Data Types Mapping](#mysql---clickhouse-data-types-mapping)
-   * [MySQL Test Tables](#mysql-test-tables)
-   * [ClickHouse Test Tables](#clickhouse-test-tables)
-   * [Airline ontime Test Case](#airline-ontime-test-case)
+   * [Testing General Schema](#testing-general-schema)
+     * [MySQL Data Types](#mysql-data-types)
+     * [ClickHouse Data Types](#clickhouse-data-types)
+     * [MySQL -> ClickHouse Data Types Mapping](#mysql---clickhouse-data-types-mapping)
+     * [MySQL Test Tables](#mysql-test-tables)
+     * [ClickHouse Test Tables](#clickhouse-test-tables)
+   * [Test Cases](#test-cases)
+     * [airline.ontime Test Case](#airline-ontime-test-case)
    
 ---
 
@@ -79,7 +81,7 @@ Data reader understands INSERT SQL statements only. In practice this means that:
   * DELETE statements are not handled - meaning DELETEs within MySQL would not be relayed into ClickHouse
   * DDL statements are not handled. For example, source table structure change can lead to insertion errors 
 
-## General schema
+## Operation General Schema
 
   * Step 1. Data Reader reads data from the source event-by-event (for MySQL binlog) or line-by-line (file).
   * Step 2. **OPTIONAL** Caching in memory pool. Since ClickHouse prefers to get data in bundles (row-by-row insertion is extremely slow), we need to introduce some caching.
@@ -164,9 +166,11 @@ Now you can run data reader via `pypy`
 
 # Testing
 
-## MySQL Data Types
+## Testing General Schema
 
-### Numeric Types
+### MySQL Data Types
+
+#### Numeric Types
 
   * `BIT`  the number of bits per value, from 1 to 64
   * `TINYINT` -128 to 127. The unsigned range is 0 to 255
@@ -182,7 +186,7 @@ Now you can run data reader via `pypy`
   * `DOUBLE`, `REAL` Permissible values are -1.7976931348623157E+308 to -2.2250738585072014E-308, 0, and 2.2250738585072014E-308 to 1.7976931348623157E+308
 
 
-### Date and Time Types
+#### Date and Time Types
 
   * `DATE` The supported range is '1000-01-01' to '9999-12-31'
   * `DATETIME` The supported range is '1000-01-01 00:00:00.000000' to '9999-12-31 23:59:59.999999'
@@ -190,7 +194,7 @@ Now you can run data reader via `pypy`
   * `TIME` The range is '-838:59:59.000000' to '838:59:59.000000'
   * `YEAR` Values display as 1901 to 2155, and 0000
 
-### String Types
+#### String Types
   * `CHAR` The range of M is 0 to 255. If M is omitted, the length is 1.
   * `VARCHAR` The range of M is 0 to 65,535
   * `BINARY` similar to CHAR
@@ -210,7 +214,7 @@ Now you can run data reader via `pypy`
 
 ---
 
-## ClickHouse Data Types
+### ClickHouse Data Types
 
   * `Date` number of days since 1970-01-01
   * `DateTime` Unix timestamp
@@ -234,9 +238,9 @@ Now you can run data reader via `pypy`
 
 ---
 
-## MySQL -> ClickHouse Data Types Mapping
+### MySQL -> ClickHouse Data Types Mapping
 
-### Numeric Types
+#### Numeric Types
 
   * `BIT` -> ??? (possibly `String`?)
   * `TINYINT` -> `Int8`, `UInt8`
@@ -252,7 +256,7 @@ Now you can run data reader via `pypy`
   * `DOUBLE`, `REAL` -> `Float64`
 
 
-### Date and Time Types
+#### Date and Time Types
 
   * `DATE` -> `Date` (for valid values) or `String` (`Date` Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2038, but it may be expanded to 2106))
   * `DATETIME` -> `DateTime` (for valid values) or `String`
@@ -261,7 +265,7 @@ Now you can run data reader via `pypy`
   * `YEAR` -> `UInt16`
 
 
-### String Types
+#### String Types
 
   * `CHAR` -> `FixedString`
   * `VARCHAR` -> `String`
@@ -282,7 +286,7 @@ Now you can run data reader via `pypy`
   * `JSON` -> ?????? (possibly `String`?)
 
 
-## MySQL Test Tables
+### MySQL Test Tables
 
 We have to separate test table into several ones because of this error, produced by MySQL:
 ```text
@@ -594,7 +598,7 @@ INSERT INTO long_varbinary_datatypes SET
 ;
 ```
 
-## ClickHouse Test Tables
+### ClickHouse Test Tables
 
 ```sql
 CREATE TABLE datatypes(
@@ -705,9 +709,26 @@ CREATE TABLE long_varbinary_datatypes(
 ;
 ```
 
-## Airline ontime Test Case
+## Test Cases
+
+### airline.ontime Test Case
+Main Steps
+  * Download airline.ontime dataset
+  * Create airline.ontime MySQL table
+  * Create airline.ontime ClickHouse table
+  * Start data reader (migrate data MySQL -> ClickHouse)
+  * Start data importer (import data into MySQL)
+  * Check how data are loaded into ClickHouse
+
+#### airline.ontime Data Set in CSV files
+Run [download script](run_airline_ontime_data_download.sh)
+```bash
+./run_airline_ontime_data_download.sh
+```
+Downloading can take some time. 
 
-### MySQL Table
+#### airline.ontime MySQL Table
+Create MySQL table of the following structure:
 
 ```sql
 CREATE DATABASE IF NOT EXISTS `airline`;
@@ -824,8 +845,8 @@ CREATE TABLE IF NOT EXISTS `airline`.`ontime` (
 );
 ```
 
-### ClickHouse Table
-
+#### airline.ontime ClickHouse Table
+Create ClickHouse table of the following structure:
 ```sql
 CREATE DATABASE IF NOT EXISTS `airline`;
 CREATE TABLE IF NOT EXISTS `airline`.`ontime` ( 
@@ -941,28 +962,15 @@ CREATE TABLE IF NOT EXISTS `airline`.`ontime` (
 ) ENGINE = MergeTree(FlightDate, (FlightDate, Year, Month, DepDel15), 8192)
 ```
 
-### Import Data
-
+#### airline.ontime Data Reader
+Run [datareader script](run_airline_ontime_data_reader.sh)
 ```bash
-#!/bin/bash
-files_to_import_num=3
-i=1
-for file in $(ls /mnt/nas/work/ontime/*.csv|sort|head -n $files_to_import_num); do
-    echo "$i. Prepare. Make link to $file"
-    rm -f ontime
-    ln -s $file ontime
-    
-    echo "$i. Import. $file"
-    time mysqlimport \
-        --ignore-lines=1 \
-        --fields-terminated-by=, \
-        --fields-enclosed-by=\" \
-        --local \
-        -u root \
-        airline ontime
-        
-    echo "$i. Cleanup. $file"
-    rm -f ontime
-    i=$((i+1))
-done
+./run_airline_ontime_data_reader.sh
 ```
+
+#### airline.ontime Data Importer
+Run [data importer script](run_airline_ontime_import.sh)
+
+```bash
+./run_airline_ontime_import.sh
+``` 
diff --git a/run_airline_ontime_data_download.sh b/run_airline_ontime_data_download.sh
new file mode 100755
index 0000000..85939c7
--- /dev/null
+++ b/run_airline_ontime_data_download.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# download airline.ontime test dataset
+
+echo "Check required commands availability"
+if command -v wget && command -v unzip && command -v clickhouse-client && command -v wc && command -v awk; then
+	echo "Looks like all required commands are available"
+else
+	echo "Please ensure availability of: wget && unzip && clickhouse-client && wc && awk"
+	exit 1
+fi
+
+echo "Download dataset"
+
+ZIP_FILES_DIR="zip"
+echo "Create dir $ZIP_FILES_DIR for downloading zip files"
+mkdir "$ZIP_FILES_DIR"
+
+if [ ! -d "$ZIP_FILES_DIR" ]; then
+	"Can' use dir: $ZIP_FILES_DIR - not available"
+	exit 1
+fi
+
+echo "Download files into $ZIP_FILES_DIR"
+for year in `seq 1987 2017`; do
+	for month in `seq 1 12`; do
+		FILE_NAME="On_Time_On_Time_Performance_${year}_${month}.zip"
+		wget -O "$ZIP_FILES_DIR/$FILE_NAME" "http://transtats.bts.gov/PREZIP/$FILE_NAME"
+	done
+done
+
+echo "Unzip dataset"
+
+ZIP_FILES_DIR="zip"
+CSV_FILES_DIR="csv"
+
+mkdir "$CSV_FILES_DIR"
+
+if [ ! -d "$CSV_FILES_DIR" ]; then
+	"Can' use dir: $CSV_FILES_DIR - not available"
+	exit 1
+fi
+
+for ZIP_FILENAME in `ls "$ZIP_FILES_DIR"/*.zip`; do
+	echo "Unzipping $ZIP_FILENAME to $CSV_FILES_DIR/"
+	unzip -o "$ZIP_FILENAME" -d "$CSV_FILES_DIR/"
+done

From d4ad93fe243284a242021722d29b3bfcb776025f Mon Sep 17 00:00:00 2001
From: sunsingerus <sunsingerus@gmail.com>
Date: Tue, 21 Nov 2017 15:32:48 +0300
Subject: [PATCH 6/6] airline.ontime testcase

---
 README.md                           | 34 ++++++++++++++++++++++++++++-
 run_airline_ontime_data_download.sh |  7 +++---
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 3c6316d..2078fea 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
      * [MySQL Test Tables](#mysql-test-tables)
      * [ClickHouse Test Tables](#clickhouse-test-tables)
    * [Test Cases](#test-cases)
-     * [airline.ontime Test Case](#airline-ontime-test-case)
+     * [airline.ontime Test Case](#airlineontime-test-case)
    
 ---
 
@@ -722,6 +722,13 @@ Main Steps
 
 #### airline.ontime Data Set in CSV files
 Run [download script](run_airline_ontime_data_download.sh)
+You may want to adjust dirs where to keep `ZIP` and `CSV` file
+In `run_airline_ontime_data_download.sh` edit these lines:
+```bash
+ZIP_FILES_DIR="zip"
+CSV_FILES_DIR="csv"
+```
+
 ```bash
 ./run_airline_ontime_data_download.sh
 ```
@@ -964,12 +971,37 @@ CREATE TABLE IF NOT EXISTS `airline`.`ontime` (
 
 #### airline.ontime Data Reader
 Run [datareader script](run_airline_ontime_data_reader.sh)
+You may want to adjust `PYTHON` path and source and target hosts and usernames
+```bash
+PYTHON=python3.6
+PYTHON=/home/user/pypy3.5-5.9-beta-linux_x86_64-portable/bin/pypy
+```
+```bash
+...
+    --src-host=127.0.0.1 \
+    --src-user=root \
+    --dst-host=127.0.0.1 \
+...
+```
 ```bash
 ./run_airline_ontime_data_reader.sh
 ```
 
 #### airline.ontime Data Importer
 Run [data importer script](run_airline_ontime_import.sh)
+You may want to adjust `CSV` files location, number of imported files and MySQL user/password used for import
+```bash
+# looking for csv files in this dir
+FILES_TO_IMPORT_DIR="/mnt/nas/work/ontime"
+
+# limit import to this number of files
+FILES_TO_IMPORT_NUM=3
+```
+```bash
+...
+        -u root \
+...
+```
 
 ```bash
 ./run_airline_ontime_import.sh
diff --git a/run_airline_ontime_data_download.sh b/run_airline_ontime_data_download.sh
index 85939c7..f1bdcce 100755
--- a/run_airline_ontime_data_download.sh
+++ b/run_airline_ontime_data_download.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 # download airline.ontime test dataset
 
+ZIP_FILES_DIR="zip"
+CSV_FILES_DIR="csv"
+
 echo "Check required commands availability"
 if command -v wget && command -v unzip && command -v clickhouse-client && command -v wc && command -v awk; then
 	echo "Looks like all required commands are available"
@@ -11,7 +14,6 @@ fi
 
 echo "Download dataset"
 
-ZIP_FILES_DIR="zip"
 echo "Create dir $ZIP_FILES_DIR for downloading zip files"
 mkdir "$ZIP_FILES_DIR"
 
@@ -30,9 +32,6 @@ done
 
 echo "Unzip dataset"
 
-ZIP_FILES_DIR="zip"
-CSV_FILES_DIR="csv"
-
 mkdir "$CSV_FILES_DIR"
 
 if [ ! -d "$CSV_FILES_DIR" ]; then