Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
**v1.6.0**
### IMPORTANT:
In this versions there is some output changes & fixes that can break your code.
1. Now all arguments inside brackets are parsed as separate strings in the list.
For example:
`file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.

3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),


will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},


### Fixes
1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

### New Features
1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332


**v1.5.4**
### Improvements
#### Snowflake :
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
2.


**v1.5.3**
### Fixes
Expand Down
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,40 @@ for help with debugging & testing support for BigQuery dialect DDLs:
* https://github.com/kalyan939

## Changelog
**v1.6.0**
### IMPORTANT:
In this versions there is some output changes & fixes that can break your code.
1. Now all arguments inside brackets are parsed as separate strings in the list.
For example:
`file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.

3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),


will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},


### Fixes
1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

### New Features
1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332


**v1.5.4**
### Improvements
#### Snowflake :
1. In Snowflake add `pattern` token for external table statement, and improve location rendering
2.


**v1.5.3**
### Fixes
Expand Down
41 changes: 40 additions & 1 deletion docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,46 @@ for help with debugging & testing support for BigQuery dialect DDLs:
Changelog
---------

**v1.6.0**

IMPORTANT:
^^^^^^^^^^

In this versions there is some output changes & fixes that can break your code.


#.
Now all arguments inside brackets are parsed as separate strings in the list.
For example:
``file_format = (TYPE=JSON NULL_IF=('field')`` this was parsed like 'NULL_IF': "('field')",
now it will be: 'NULL_IF': ["'field'"],

#.
Added separate tokens for EQ ``=`` and IN (previously they was parsed as IDs also - for internal info, for contributors.

#.
Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
CHECK (include_exclude_ind IN ('I', 'E')),

will produce this output:

{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
'statement': {'in_statement': {'in': ["'I'", "'E'"],
'name': 'include_exclude_ind'}}},

Fixes
^^^^^


#. DEFAULT word now is not arriving in key 'default' (it was before in some cases)

New Features
^^^^^^^^^^^^


#. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332

**v1.5.4**

Improvements
Expand All @@ -565,7 +605,6 @@ Snowflake :


#. In Snowflake add ``pattern`` token for external table statement, and improve location rendering
2.

**v1.5.3**

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "1.5.4"
version = "1.6.0"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
19 changes: 16 additions & 3 deletions simple_ddl_parser/ddl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
HQL,
MSSQL,
PSQL,
Athena,
BaseSQL,
BigQuery,
IBMDb2,
Expand Down Expand Up @@ -37,6 +38,7 @@ class Dialects(
BigQuery,
IBMDb2,
PSQL,
Athena,
):
pass

Expand Down Expand Up @@ -115,8 +117,13 @@ def set_lexer_tags(self, t: LexToken) -> None:
elif t.type == "CHECK":
self.lexer.check = True

def t_EQ(self, t: LexToken) -> LexToken:
r"(=)+"
t.type = "EQ"
return self.set_last_token(t)

def t_DOT(self, t: LexToken) -> LexToken:
r"\."
r"(\.)+"
t.type = "DOT"
return self.set_last_token(t)

Expand Down Expand Up @@ -154,14 +161,18 @@ def is_creation_name(self, t: LexToken) -> bool:
"TYPE",
"DOMAIN",
"TABLESPACE",
"INDEX",
"CONSTRAINT",
"EXISTS",
]
return (
t.value not in skip_id_tokens
and t.value.upper() not in ["IF"]
and self.lexer.last_token in exceptional_keys
and (
self.lexer.last_token in exceptional_keys
or (
self.lexer.last_token == "INDEX" and self.lexer.is_table is not True
)
)
and not self.exceptional_cases(t.value.upper())
)

Expand All @@ -188,6 +199,8 @@ def t_AUTOINCREMENT(self, t: LexToken):

def t_ID(self, t: LexToken):
r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)"
if len(t.value) > 1 and t.value.endswith(","):
t.value = t.value[:-1]
t.type = tok.symbol_tokens.get(t.value, "ID")

if t.type == "LP":
Expand Down
2 changes: 2 additions & 0 deletions simple_ddl_parser/dialects/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from simple_ddl_parser.dialects.athena import Athena
from simple_ddl_parser.dialects.bigquery import BigQuery
from simple_ddl_parser.dialects.hql import HQL
from simple_ddl_parser.dialects.ibm import IBMDb2
Expand All @@ -22,4 +23,5 @@
"IBMDb2",
"BaseSQL",
"PSQL",
"Athena",
]
11 changes: 11 additions & 0 deletions simple_ddl_parser/dialects/athena.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from typing import List


class Athena:
def p_escaped_by(self, p: List) -> None:
"""expr : expr ESCAPED BY STRING_BASE"""
p[0] = p[1]
p_list = list(p)
if "\\\\" in p_list[-1]:
p_list[-1] = "\\"
p[0]["escaped_by"] = p_list[-1]
5 changes: 3 additions & 2 deletions simple_ddl_parser/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ def p_multiple_options(self, p):
p[0] = p[1]

def p_options(self, p):
"""options : OPTIONS LP id_equals RP"""
"""options : OPTIONS LP multi_id_equals RP"""
p_list = list(p)
if not isinstance(p[1], dict):
p[0] = {"options": p[3]}
options = [{key: value} for key, value in p[3].items()]
p[0] = {"options": options}
else:
p[0] = p[1]
if len(p) == 4:
Expand Down
22 changes: 16 additions & 6 deletions simple_ddl_parser/dialects/hql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,22 @@

class HQL:
def p_expression_location(self, p: List) -> None:
"""expr : expr LOCATION STRING
"""expr : expr LOCATION EQ STRING
| expr LOCATION EQ DQ_STRING
| expr LOCATION EQ multi_id_or_string
| expr LOCATION DQ_STRING
| expr LOCATION STRING
| expr LOCATION multi_id_or_string
| expr LOCATION EQ ID EQ ID EQ ID
"""
# last expr for sample like location=@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/
p[0] = p[1]
p_list = list(p)
p[0]["location"] = p_list[-1]
if len(p_list) == 9:
location = "".join(p_list[4:])
else:
location = p_list[-1]
p[0]["location"] = location

def p_expression_clustered(self, p: List) -> None:
"""expr : expr ID ON LP pid RP
Expand Down Expand Up @@ -73,10 +82,10 @@ def p_multi_assignments(self, p: List) -> None:
p[0].update(p_list[-1])

def p_assignment(self, p: List) -> None:
"""assignment : id id id
| STRING id STRING
| id id STRING
| STRING id id
"""assignment : id EQ id
| STRING EQ STRING
| id EQ STRING
| STRING EQ id
| STRING id"""
p_list = remove_par(list(p))
if "state" in self.lexer.__dict__:
Expand Down Expand Up @@ -142,6 +151,7 @@ def p_expression_partitioned_by_hql(self, p: List) -> None:
"""expr : expr PARTITIONED BY pid_with_type
| expr PARTITIONED BY LP pid RP
| expr PARTITIONED BY LP multiple_funct RP
| expr PARTITIONED BY funct
"""
p[0] = p[1]
p_list = remove_par(list(p))
Expand Down
5 changes: 2 additions & 3 deletions simple_ddl_parser/dialects/ibm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

class IBMDb2:
def p_expr_index_in(self, p: List) -> None:
"""expr : expr INDEX id id"""
"""expr : expr INDEX IN id"""
p_list = list(p)
if p_list[-2].upper() == "IN":
p[1].update({"index_in": p_list[-1]})
p[1].update({"index_in": p_list[-1]})
p[0] = p[1]
6 changes: 3 additions & 3 deletions simple_ddl_parser/dialects/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def p_with(self, p: List) -> None:
p[0]["with"]["properties"] = p_list[-1]["properties"]

def p_equals(self, p: List) -> None:
"""equals : id id id
| id id ON
| id id id DOT id
"""equals : id EQ id
| id EQ ON
| id EQ dot_id
"""
p_list = list(p)
if "." in p_list:
Expand Down
8 changes: 7 additions & 1 deletion simple_ddl_parser/dialects/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@

class MySQL:
def p_engine(self, p: List) -> None:
"""expr : expr ENGINE id id"""
"""expr : expr ENGINE EQ id"""
p_list = list(p)
p[0] = p[1]
p[0]["engine"] = p_list[-1]

def p_db_properties(self, p: List) -> None:
"""expr : expr id EQ id_or_string"""
p_list = list(p)
p[0] = p[1]
p[0][p[2]] = p_list[-1]

def p_on_update(self, p: List) -> None:
"""on_update : ON UPDATE id
| ON UPDATE STRING
Expand Down
1 change: 1 addition & 0 deletions simple_ddl_parser/dialects/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def p_encode(self, p: List) -> None:
def p_expression_diststyle(self, p: List) -> None:
"""expr : expr id id
| expr id KEY
| expr IN id
"""
p_list = list(p)
if p_list[-2] == "IN":
Expand Down
Loading