xnuinside
diff --git a/‎CHANGELOG.txt‎
Lines changed: 29 additions & 0 deletions b/‎CHANGELOG.txt‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 39 additions & 1 deletion b/‎README.md‎
Lines changed: 39 additions & 1 deletion
diff --git a/‎docs/README.rst‎
Lines changed: 58 additions & 1 deletion b/‎docs/README.rst‎
Lines changed: 58 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎simple_ddl_parser/ddl_parser.py‎
Lines changed: 10 additions & 4 deletions b/‎simple_ddl_parser/ddl_parser.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎simple_ddl_parser/dialects/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎simple_ddl_parser/dialects/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎simple_ddl_parser/dialects/athena.py‎
Lines changed: 11 additions & 0 deletions b/‎simple_ddl_parser/dialects/athena.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎simple_ddl_parser/dialects/bigquery.py‎
Lines changed: 3 additions & 2 deletions b/‎simple_ddl_parser/dialects/bigquery.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎simple_ddl_parser/dialects/hql.py‎
Lines changed: 14 additions & 5 deletions b/‎simple_ddl_parser/dialects/hql.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎simple_ddl_parser/dialects/ibm.py‎
Lines changed: 2 additions & 3 deletions b/‎simple_ddl_parser/dialects/ibm.py‎
Lines changed: 2 additions & 3 deletions
@@ -1,3 +1,32 @@
+**v1.6.0**
+### IMPORTANT:
+In this versions there is some output changes & fixes that can break your code.
+1. Now all arguments inside brackets are parsed as separate strings in the list.
+For example:
+ `file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
+ now it will be: 'NULL_IF': ["'field'"],
+
+2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.
+
+3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
+So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
+  CHECK (include_exclude_ind IN ('I', 'E')),
+
+
+will produce this output:
+
+{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
+                         'statement': {'in_statement': {'in': ["'I'", "'E'"],
+                                                        'name': 'include_exclude_ind'}}},
+
+
+### Fixes
+1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)
+
+### New Features
+1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332
+
+
 **v1.5.4**
 ### Improvements
 #### Snowflake :
 
@@ -489,17 +489,55 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 * https://github.com/kalyan939
 
 ## Changelog
+**v1.6.0**
+### IMPORTANT:
+In this versions there is some output changes & fixes that can break your code.
+1. Now all arguments inside brackets are parsed as separate strings in the list.
+For example:
+ `file_format = (TYPE=JSON NULL_IF=('field')` this was parsed like 'NULL_IF': "('field')",
+ now it will be: 'NULL_IF': ["'field'"],
+
+2. Added separate tokens for EQ `=` and IN (previously they was parsed as IDs also - for internal info, for contributors.
+
+3. Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
+So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
+  CHECK (include_exclude_ind IN ('I', 'E')),
+
+
+will produce this output:
+
+{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
+                         'statement': {'in_statement': {'in': ["'I'", "'E'"],
+                                                        'name': 'include_exclude_ind'}}},
+
+
+### Fixes
+1. DEFAULT word now is not arriving in key 'default' (it was before in some cases)
+
+### New Features
+1. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332
+
+
+**v1.5.4**
+### Improvements
+#### Snowflake :
+1. In Snowflake add `pattern` token for external table statement, and improve location rendering
+
+
 **v1.5.3**
 ### Fixes
 
-1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
+1. In Snowflake unexpected error when STRIP_OUTER_ARRAY property in file_format statement - https://github.com/xnuinside/simple-ddl-parser/issues/276
 2.
 
 **v1.5.2**
 ### Improvements
 #### MySQL
 1. Added support for COLLATE - https://github.com/xnuinside/simple-ddl-parser/pull/266/files
 
+### Fixes
+
+1. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
 
 **v1.5.1**
 ### Improvements
 
@@ -555,13 +555,64 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 Changelog
 ---------
 
+**v1.6.0**
+
+IMPORTANT:
+^^^^^^^^^^
+
+In this versions there is some output changes & fixes that can break your code.
+
+
+#. 
+   Now all arguments inside brackets are parsed as separate strings in the list.
+   For example:
+   ``file_format = (TYPE=JSON NULL_IF=('field')`` this was parsed like 'NULL_IF': "('field')",
+   now it will be: 'NULL_IF': ["'field'"],
+
+#. 
+   Added separate tokens for EQ ``=`` and IN (previously they was parsed as IDs also - for internal info, for contributors.
+
+#. 
+   Some check statements in columns now parsed validly, also IN statements parsed as normal lists.
+   So this statement include_exclude_ind CHAR(1) NOT NULL CONSTRAINT chk_metalistcombo_logicalopr
+   CHECK (include_exclude_ind IN ('I', 'E')),
+
+will produce this output:
+
+{'check': {'constraint_name': 'chk_metalistcombo_logicalopr',
+                         'statement': {'in_statement': {'in': ["'I'", "'E'"],
+                                                        'name': 'include_exclude_ind'}}},
+
+Fixes
+^^^^^
+
+
+#. DEFAULT word now is not arriving in key 'default' (it was before in some cases)
+
+New Features
+^^^^^^^^^^^^
+
+
+#. Added Athena output mode and initial support - https://github.com/datacontract/datacontract-cli/issues/332
+
+**v1.5.4**
+
+Improvements
+^^^^^^^^^^^^
+
+Snowflake :
+~~~~~~~~~~~
+
+
+#. In Snowflake add ``pattern`` token for external table statement, and improve location rendering
+
 **v1.5.3**
 
 Fixes
 ^^^^^
 
 
-#. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
+#. In Snowflake unexpected error when STRIP_OUTER_ARRAY property in file_format statement - https://github.com/xnuinside/simple-ddl-parser/issues/276
    2.
 
 **v1.5.2**
@@ -575,6 +626,12 @@ MySQL
 
 #. Added support for COLLATE - https://github.com/xnuinside/simple-ddl-parser/pull/266/files
 
+Fixes
+^^^^^
+
+
+#. In Snowflake Fix unexpected behaviour when file_format name given - https://github.com/xnuinside/simple-ddl-parser/issues/273
+
 **v1.5.1**
 
 Improvements
 
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "1.5.3"
+version = "1.6.0"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"
 
@@ -7,6 +7,7 @@
     HQL,
     MSSQL,
     PSQL,
+    Athena,
     BaseSQL,
     BigQuery,
     IBMDb2,
@@ -37,6 +38,7 @@ class Dialects(
     BigQuery,
     IBMDb2,
     PSQL,
+    Athena,
 ):
     pass
 
@@ -159,14 +161,18 @@ def is_creation_name(self, t: LexToken) -> bool:
             "TYPE",
             "DOMAIN",
             "TABLESPACE",
-            "INDEX",
             "CONSTRAINT",
             "EXISTS",
         ]
         return (
             t.value not in skip_id_tokens
             and t.value.upper() not in ["IF"]
-            and self.lexer.last_token in exceptional_keys
+            and (
+                self.lexer.last_token in exceptional_keys
+                or (
+                    self.lexer.last_token == "INDEX" and self.lexer.is_table is not True
+                )
+            )
             and not self.exceptional_cases(t.value.upper())
         )
 
@@ -193,13 +199,14 @@ def t_AUTOINCREMENT(self, t: LexToken):
 
     def t_ID(self, t: LexToken):
         r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)"
+        if len(t.value) > 1 and t.value.endswith(","):
+            t.value = t.value[:-1]
         t.type = tok.symbol_tokens.get(t.value, "ID")
 
         if t.type == "LP":
             self.lexer.lp_open += 1
             self.lexer.columns_def = True
             self.lexer.last_token = "LP"
-            print(t.type, t.value)
             return t
         elif self.is_token_column_name(t) or self.lexer.last_token == "DOT":
             t.type = "ID"
@@ -249,7 +256,6 @@ def set_lexx_tags(self, t: LexToken):
 
     def set_last_token(self, t: LexToken):
         self.lexer.last_token = t.type
-        print(t.value, t.type)
         return t
 
     def p_id(self, p):
 
@@ -1,3 +1,4 @@
+from simple_ddl_parser.dialects.athena import Athena
 from simple_ddl_parser.dialects.bigquery import BigQuery
 from simple_ddl_parser.dialects.hql import HQL
 from simple_ddl_parser.dialects.ibm import IBMDb2
@@ -22,4 +23,5 @@
     "IBMDb2",
     "BaseSQL",
     "PSQL",
+    "Athena",
 ]
@@ -0,0 +1,11 @@
+from typing import List
+
+
+class Athena:
+    def p_escaped_by(self, p: List) -> None:
+        """expr : expr ESCAPED BY STRING_BASE"""
+        p[0] = p[1]
+        p_list = list(p)
+        if "\\\\" in p_list[-1]:
+            p_list[-1] = "\\"
+        p[0]["escaped_by"] = p_list[-1]
@@ -15,10 +15,11 @@ def p_multiple_options(self, p):
             p[0] = p[1]
 
     def p_options(self, p):
-        """options : OPTIONS LP id_equals RP"""
+        """options : OPTIONS LP multi_id_equals RP"""
         p_list = list(p)
         if not isinstance(p[1], dict):
-            p[0] = {"options": p[3]}
+            options = [{key: value} for key, value in p[3].items()]
+            p[0] = {"options": options}
         else:
             p[0] = p[1]
             if len(p) == 4:
 
@@ -8,10 +8,19 @@ def p_expression_location(self, p: List) -> None:
         """expr : expr LOCATION EQ STRING
         | expr LOCATION EQ DQ_STRING
         | expr LOCATION EQ multi_id_or_string
+        | expr LOCATION DQ_STRING
+        | expr LOCATION STRING
+        | expr LOCATION multi_id_or_string
+        | expr LOCATION EQ ID EQ ID EQ ID
         """
+        # last expr for sample like location=@ADL_Azure_Storage_Account_Container_Name/year=2023/month=08/
         p[0] = p[1]
         p_list = list(p)
-        p[0]["location"] = p_list[-1]
+        if len(p_list) == 9:
+            location = "".join(p_list[4:])
+        else:
+            location = p_list[-1]
+        p[0]["location"] = location
 
     def p_expression_clustered(self, p: List) -> None:
         """expr : expr ID ON LP pid RP
@@ -73,10 +82,10 @@ def p_multi_assignments(self, p: List) -> None:
         p[0].update(p_list[-1])
 
     def p_assignment(self, p: List) -> None:
-        """assignment : id id id
-        |  STRING id STRING
-        |  id id STRING
-        |  STRING id id
+        """assignment : id EQ id
+        |  STRING EQ STRING
+        |  id EQ STRING
+        |  STRING EQ id
         |  STRING id"""
         p_list = remove_par(list(p))
         if "state" in self.lexer.__dict__:
 
@@ -3,8 +3,7 @@
 
 class IBMDb2:
     def p_expr_index_in(self, p: List) -> None:
-        """expr : expr INDEX id id"""
+        """expr : expr INDEX IN id"""
         p_list = list(p)
-        if p_list[-2].upper() == "IN":
-            p[1].update({"index_in": p_list[-1]})
+        p[1].update({"index_in": p_list[-1]})
         p[0] = p[1]