diff --git a/setup.py b/setup.py index 3a8b84f..ba8280a 100755 --- a/setup.py +++ b/setup.py @@ -1,32 +1,20 @@ #!/usr/bin/env python -from setuptools import setup - -import sqlparse +from setuptools import setup, find_packages setup( - name='sqlparse', - version=sqlparse.__version__, - - author='Chris Lyon', - author_email='flushot@gmail.com', - - description='SQL parser and query builder', - long_description=open('README.md').read(), - - url='https://github.com/Flushot/sqlparse', - - license='Apache License 2.0', + name="sqlparse", + version=0.1, + author="Chris Lyon", + author_email="flushot@gmail.com", + description="SQL parser and query builder", + long_description=open("README.md").read(), + url="https://github.com/Flushot/sqlparse", + packages=find_packages(include=["sqlparse", "sqlparse.*"]), + license="Apache License 2.0", classifiers=[ - 'Intended Audience :: Developers' - 'Development Status :: 2 - Pre-Alpha', - 'License :: OSI Approved :: Apache Software License', - ], - - install_requires=[ - 'pyparsing', - 'sqlalchemy', - 'pymongo' + "Intended Audience :: Developers" "Development Status :: 2 - Pre-Alpha", + "License :: OSI Approved :: Apache Software License", ], - - test_suite='sqlparse' + install_requires=["pyparsing", "sqlalchemy", "pymongo"], + test_suite="sqlparse", ) diff --git a/sqlparse/builders/mongo_builder.py b/sqlparse/builders/mongo_builder.py index 0b3c842..36da615 100644 --- a/sqlparse/builders/mongo_builder.py +++ b/sqlparse/builders/mongo_builder.py @@ -13,22 +13,22 @@ class MongoQueryVisitor(IdentifierAndValueVisitor): # Map of SQL operators to MongoDB equivalents # TODO: Create node classes for these operators, rather than relying on operator.name OPERATORS = { - 'not': '$nor', - '!': '$nor', - '!=': '$ne', - '<>': '$ne', - '<': '$lt', - '<=': '$lte', - '>': '$gt', - '>=': '$gte', - 'and': '$and', - '&&': '$and', - 'or': '$or', - '||': '$or', - 'in': '$in', - 'mod': '$mod', - '%': '$mod', - 'like': '$regex', + "not": "$nor", + "!": "$nor", + "!=": "$ne", + "<>": "$ne", + "<": "$lt", + "<=": "$lte", + ">": "$gt", + ">=": "$gte", + "and": "$and", + "&&": "$and", + "or": "$or", + "||": "$or", + "in": "$in", + "mod": "$mod", + "%": "$mod", + "like": "$regex", # Mongo doesn't support: + - * / ** << >> } @@ -39,39 +39,28 @@ def visit_UnaryOperator(self, node): rhs_node = self.visit(node.rhs) if not isinstance(rhs_node, nodes.ListValue): - rhs_node = [ rhs_node ] + rhs_node = [rhs_node] - return { op_name: rhs_node } + return {op_name: rhs_node} def visit_BinaryOperator(self, node): lhs_node = self.visit(node.lhs) rhs_node = self.visit(node.rhs) - if node.name == '=': + if node.name == "=": # Mongo treats equality struct different from other binary operators if isinstance(lhs_node, str): return {lhs_node: rhs_node} else: - raise ValueError('lhs is an expression: %s' % lhs_node) + raise ValueError("lhs is an expression: %s" % lhs_node) - elif node.name in ('xor', '^'): + elif node.name in ("xor", "^"): # Mongo lacks an XOR operator - return { - '$and': [ - {'$or': [lhs_node, rhs_node]}, - {'$and': [ - {'$nor': [lhs_node]}, - {'$nor': [rhs_node]} - ]} - ]} - - elif node.name == 'between': + return {"$and": [{"$or": [lhs_node, rhs_node]}, {"$and": [{"$nor": [lhs_node]}, {"$nor": [rhs_node]}]}]} + + elif node.name == "between": # Mongo lacks a BETWEEN operator - return { - '$and': [ - {lhs_node: {'$gte': rhs_node.begin}}, - {lhs_node: {'$lte': rhs_node.end}} - ]} + return {"$and": [{lhs_node: {"$gte": rhs_node.begin}}, {lhs_node: {"$lte": rhs_node.end}}]} # Standard binary operator else: @@ -80,19 +69,20 @@ def visit_BinaryOperator(self, node): raise ValueError('Mongo visitor does not implement "%s" binary operator' % node.name) # AND and OR have list operands - if op_name in ('$and', '$or'): + if op_name in ("$and", "$or"): return {op_name: [lhs_node, rhs_node]} # Everything else contains a { prop: expr } operand elif isinstance(lhs_node, str): return {lhs_node: {op_name: rhs_node}} else: - raise ValueError('lhs is an expression: %s' % lhs_node) + raise ValueError("lhs is an expression: %s" % lhs_node) class MongoQueryBuilder(QueryBuilder): """ Builds a MongoDB query from a SQL query """ + def parse_and_build(self, query_string): parse_tree = sqlparse.parse_string(query_string) filter_options = {} @@ -105,7 +95,7 @@ def parse_and_build(self, query_string): filter_fields = self._get_fields_option(parse_tree) self.fields = list(filter_fields.keys()) if filter_fields: - filter_options['fields'] = filter_fields + filter_options["fields"] = filter_fields return self._get_filter_criteria(parse_tree), filter_options @@ -113,7 +103,10 @@ def _get_filter_criteria(self, parse_tree): """ Filter criteria specified in WHERE """ - filter_criteria = MongoQueryVisitor().visit(parse_tree.where[0]) + try: + filter_criteria = MongoQueryVisitor().visit(parse_tree.where[0]) + except IndexError: + return {} # print('WHERE: {}', json.dumps(filter_criteria, indent=4)) return filter_criteria @@ -123,17 +116,17 @@ def _get_collection_name(self, parse_tree): """ collections = [str(table.name) for table in parse_tree.tables.values] if len(collections) == 0: - raise ValueError('Collection name required in FROM clause') + raise ValueError("Collection name required in FROM clause") collection = collections[0] # print('FROM: {}', collection) # TODO: parse this as an Identifier instead of a str if not isinstance(collection, str): - raise ValueError('collection name must be a string') + raise ValueError("collection name must be a string") if len(collections) > 1: - raise ValueError('Mongo query requires single collection in FROM clause') + raise ValueError("Mongo query requires single collection in FROM clause") return collection @@ -144,11 +137,11 @@ def _get_fields_option(self, parse_tree): fields = IdentifierAndValueVisitor().visit(parse_tree.columns) # print('SELECT: {}', fields) if not isinstance(fields, list): - raise ValueError('SELECT must be a list') + raise ValueError("SELECT must be a list") filter_fields = {} for field in fields: - if field == '*': + if field == "*": return {} filter_fields[field.name] = 1 diff --git a/sqlparse/grammar.py b/sqlparse/grammar.py index bcdbcf0..3721ffe 100644 --- a/sqlparse/grammar.py +++ b/sqlparse/grammar.py @@ -15,14 +15,32 @@ # limitations under the License. # import pyparsing -from pyparsing import \ - Forward, Group, Combine, Suppress, StringEnd, \ - Optional, ZeroOrMore, OneOrMore, oneOf, \ - operatorPrecedence, opAssoc, \ - Word, Literal, CaselessLiteral, Regex, \ - alphas, nums, alphanums, quotedString, \ - restOfLine, quotedString, delimitedList, \ - ParseResults, ParseException +from pyparsing import ( + Forward, + Group, + Combine, + Suppress, + StringEnd, + Optional, + ZeroOrMore, + OneOrMore, + oneOf, + infixNotation, + opAssoc, + Word, + Literal, + CaselessLiteral, + Regex, + alphas, + nums, + alphanums, + quotedString, + restOfLine, + quotedString, + delimitedList, + ParseResults, + ParseException, +) from .nodes import * @@ -31,31 +49,31 @@ ################################ # Keywords -WHERE = CaselessLiteral('where') -FROM = CaselessLiteral('from') +WHERE = CaselessLiteral("where") +FROM = CaselessLiteral("from") -SELECT = CaselessLiteral('select') -SELECT_DISTINCT = CaselessLiteral('distinct') -SELECT_ALL = CaselessLiteral('all') -AS = CaselessLiteral('as') +SELECT = CaselessLiteral("select") +SELECT_DISTINCT = CaselessLiteral("distinct") +SELECT_ALL = CaselessLiteral("all") +AS = CaselessLiteral("as") -WITH = CaselessLiteral('with') -RECURSIVE = CaselessLiteral('recursive') +WITH = CaselessLiteral("with") +RECURSIVE = CaselessLiteral("recursive") -PIVOT = CaselessLiteral('pivot') +PIVOT = CaselessLiteral("pivot") # UNPIVOT = CaselessLiteral('unpivot') -PIVOT_IN = CaselessLiteral('in') -PIVOT_FOR = CaselessLiteral('for') +PIVOT_IN = CaselessLiteral("in") +PIVOT_FOR = CaselessLiteral("for") -ORDER_BY = CaselessLiteral('order by') -ORDER_ASC = CaselessLiteral('asc') -ORDER_DESC = CaselessLiteral('desc') +ORDER_BY = CaselessLiteral("order by") +ORDER_ASC = CaselessLiteral("asc") +ORDER_DESC = CaselessLiteral("desc") # Special values -VAL_NULL = CaselessLiteral('null') -VAL_TRUE = CaselessLiteral('true') -VAL_FALSE = CaselessLiteral('false') -VAL_UNKNOWN = CaselessLiteral('unknown') +VAL_NULL = CaselessLiteral("null") +VAL_TRUE = CaselessLiteral("true") +VAL_FALSE = CaselessLiteral("false") +VAL_UNKNOWN = CaselessLiteral("unknown") # Joins # JOIN = CaselessLiteral('join') @@ -71,18 +89,18 @@ # ORDER_BY = CaselessLiteral('order by') # Operators (name is operators.FUNC_NAME) -OP_EQUAL = Literal('=') -OP_VAL_NULLSAFE_EQUAL = Literal('<=>') -OP_NOTEQUAL = (Literal('!=') | Literal('<>')) -OP_GT = Literal('>').setName('gt') -OP_LT = Literal('<').setName('lt') -OP_GTE = Literal('>=').setName('ge') -OP_LTE = Literal('<=').setName('le') -OP_IN = CaselessLiteral('in') # sqlalchemy property: lhs.in_(rhs) -OP_LIKE = CaselessLiteral('like') # sqlalchemy property: lhs.like(rhs), lhs.ilike(rhs) -OP_IS = CaselessLiteral('is') # sqlalchemy or_(lhs == rhs, lhs == None) -OP_BETWEEN = CaselessLiteral('between') # sqlalchemy: between -OP_BETWEEN_AND = Suppress(CaselessLiteral('and')) +OP_EQUAL = Literal("=") +OP_VAL_NULLSAFE_EQUAL = Literal("<=>") +OP_NOTEQUAL = Literal("!=") | Literal("<>") +OP_GT = Literal(">").setName("gt") +OP_LT = Literal("<").setName("lt") +OP_GTE = Literal(">=").setName("ge") +OP_LTE = Literal("<=").setName("le") +OP_IN = CaselessLiteral("in") # sqlalchemy property: lhs.in_(rhs) +OP_LIKE = CaselessLiteral("like") # sqlalchemy property: lhs.like(rhs), lhs.ilike(rhs) +OP_IS = CaselessLiteral("is") # sqlalchemy or_(lhs == rhs, lhs == None) +OP_BETWEEN = CaselessLiteral("between") # sqlalchemy: between +OP_BETWEEN_AND = Suppress(CaselessLiteral("and")) # Math # OP_ADD = Literal('+') @@ -102,27 +120,27 @@ # BITOP_XOR = Literal('^') # Conjugates -LOGOP_AND = (CaselessLiteral('and') | CaselessLiteral('&&')) -LOGOP_OR = (CaselessLiteral('or') | CaselessLiteral('||')) -LOGOP_NOT = (CaselessLiteral('not') | CaselessLiteral('!')) -LOGOP_XOR = CaselessLiteral('xor') +LOGOP_AND = CaselessLiteral("and") | CaselessLiteral("&&") +LOGOP_OR = CaselessLiteral("or") | CaselessLiteral("||") +LOGOP_NOT = CaselessLiteral("not") | CaselessLiteral("!") +LOGOP_XOR = CaselessLiteral("xor") # SELECT Statement Operators -SELECTOP_EXCEPT = CaselessLiteral('except') -SELECTOP_INTERSECT = CaselessLiteral('intersect') -SELECTOP_UNION = CaselessLiteral('union') -SELECTOP_UNION_ALL = CaselessLiteral('all') +SELECTOP_EXCEPT = CaselessLiteral("except") +SELECTOP_INTERSECT = CaselessLiteral("intersect") +SELECTOP_UNION = CaselessLiteral("union") +SELECTOP_UNION_ALL = CaselessLiteral("all") # Grouping -L_PAREN = Suppress('(') -R_PAREN = Suppress(')') +L_PAREN = Suppress("(") +R_PAREN = Suppress(")") # Math -E = CaselessLiteral('e') -STAR = Literal('*') -DOT = Literal('.') -PLUS = Literal('+') -MINUS = Literal('-') +E = CaselessLiteral("e") +STAR = Literal("*") +DOT = Literal(".") +PLUS = Literal("+") +MINUS = Literal("-") ################################ # Non-Terminals @@ -134,29 +152,23 @@ selectStmt = Forward() # SELECT -identifier = Word(alphas, alphanums + '_$')('identifier') # a, A1, a_1$ +identifier = Word(alphas, alphanums + "_$")("identifier") # a, A1, a_1$ # alias = (Optional(AS) + identifier).setName('alias') # Projection -columnName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)('column') # TODO: x AS y, x y, x `y`, x 'y', `x`, 'x' +columnName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)( + "column" +) # TODO: x AS y, x y, x `y`, x 'y', `x`, 'x' columnNameList = Group(delimitedList(STAR | columnName)).setParseAction(ListValue) -tableName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)('table') +tableName = delimitedList(identifier, DOT, combine=True).setParseAction(Identifier)("table") tableNameList = Group(delimitedList(tableName)).setParseAction(ListValue) whereExpr = Forward() # WHERE # TODO: indirect comparisons (e.g. "table1.field1.xyz = 3" becomes "table1.any(field1.xyz == 3)") # TODO: math expression grammar (for both lval and rval) -equalityOp = ( - OP_VAL_NULLSAFE_EQUAL ^ - OP_EQUAL ^ - OP_NOTEQUAL ^ - OP_LT ^ - OP_GT ^ - OP_GTE ^ - OP_LTE -) -likeOp = (Optional(LOGOP_NOT) + OP_LIKE) +equalityOp = OP_VAL_NULLSAFE_EQUAL ^ OP_EQUAL ^ OP_NOTEQUAL ^ OP_LT ^ OP_GT ^ OP_GTE ^ OP_LTE +likeOp = Optional(LOGOP_NOT) + OP_LIKE betweenOp = Optional(LOGOP_NOT) + OP_BETWEEN # [ NOT ] BETWEEN @@ -164,44 +176,40 @@ realNumber = ( Combine( - Optional(sign) + ( + Optional(sign) + + ( # decimal present - ((Word(nums) + DOT + Optional(Word(nums)) | (DOT + Word(nums))) + - Optional(E + Optional(sign) + Word(nums))) | + ((Word(nums) + DOT + Optional(Word(nums)) | (DOT + Word(nums))) + Optional(E + Optional(sign) + Word(nums))) + | # negative exp (Word(nums) + Optional(E + Optional(MINUS) + Word(nums))) ) ).setParseAction(RealValue) -).setName('real') # .1, 1.2, 1.2e3, -1.2e+3, 1.2e-3 +).setName( + "real" +) # .1, 1.2, 1.2e3, -1.2e+3, 1.2e-3 intNumber = ( Combine( - Optional(sign) + - Word(nums) + Optional(sign) + + Word(nums) # Optional(E + Optional(PLUS) + Word(nums)) # python int() doesn't grok this ).setParseAction(IntegerValue) -).setName('int') # -1 0 1 23 +).setName( + "int" +) # -1 0 1 23 number = intNumber ^ realNumber -atom = ( - number | - stringValue('string') # normalize quotes -) +atom = number | stringValue("string") # normalize quotes groupSubSelectStmt = Group(R_PAREN + selectStmt + R_PAREN) # todo: subselect must have a LIMIT in this context -columnRval = ( - atom('value') | - columnName('column') | - groupSubSelectStmt('query') -) +columnRval = atom("value") | columnName("column") | groupSubSelectStmt("query") -likePattern = ( - stringValue('value') -) +likePattern = stringValue("value") -inOperand = Suppress(L_PAREN) + Group(delimitedList(columnRval))('value').setParseAction(ListValue) + Suppress(R_PAREN) +inOperand = Suppress(L_PAREN) + Group(delimitedList(columnRval))("value").setParseAction(ListValue) + Suppress(R_PAREN) # TODO: Functions: sum, avg, count, max, min, ifnull/isnull, if # current_date, current_time, current_timestamp, current_user @@ -209,60 +217,58 @@ # cast, convert whereCond = Forward() whereCond << ( - Group(LOGOP_NOT + whereCond)('op').setParseAction(UnaryOperator) | - Group(columnName('column') + equalityOp('op') + columnRval).setParseAction(BinaryOperator) | # x = y, x != y, etc. - Group(columnName('column') + likeOp('op') + likePattern).setParseAction(BinaryOperator) | - Group(columnName('column') + betweenOp('op') + Group(columnRval + OP_BETWEEN_AND + columnRval)('range').setParseAction(RangeValue)).setParseAction(BinaryOperator) | # x between y and z, x not between y and z - Group(columnName('column') + Group( - OP_IS + - Optional(LOGOP_NOT))('op') + - (VAL_NULL | VAL_TRUE | VAL_FALSE | VAL_UNKNOWN)('value') - ) | # x is null, x is not null - Group(columnName('column') + OP_IN('op') + inOperand).setParseAction(BinaryOperator) | + Group(LOGOP_NOT + whereCond)("op").setParseAction(UnaryOperator) + | Group(columnName("column") + equalityOp("op") + columnRval).setParseAction(BinaryOperator) + | Group(columnName("column") + likeOp("op") + likePattern).setParseAction(BinaryOperator) # x = y, x != y, etc. + | Group( + columnName("column") + + betweenOp("op") + + Group(columnRval + OP_BETWEEN_AND + columnRval)("range").setParseAction(RangeValue) + ).setParseAction(BinaryOperator) + | Group( # x between y and z, x not between y and z + columnName("column") + + Group(OP_IS + Optional(LOGOP_NOT))("op") + + (VAL_NULL | VAL_TRUE | VAL_FALSE | VAL_UNKNOWN)("value") + ) + | Group(columnName("column") + OP_IN("op") + inOperand).setParseAction(BinaryOperator) # x is null, x is not null + | # Group( columnName('column') + Combine( LOGOP_NOT + OP_IN )('op') + inOperand ) | - (L_PAREN + whereExpr('expr') + R_PAREN) + (L_PAREN + whereExpr("expr") + R_PAREN) ) -# logOp = operatorPrecedence( +# logOp = infixNotation( # whereExpr('expr'), [ # (LOGOP_NOT, 1, opAssoc.RIGHT, UnaryOperator), # (LOGOP_AND, 2, opAssoc.RIGHT, BinaryOperator), # (LOGOP_OR, 2, opAssoc.RIGHT, BinaryOperator) # ]) whereExpr << ( - whereCond ^ + whereCond + ^ # Group(LOGOP_NOT('op') + whereCond )('expr').setParseAction(UnaryOperator) ^ Group( - whereCond + - OneOrMore( - LOGOP_AND('op') + whereExpr('expr') | - LOGOP_XOR('op') + whereExpr('expr') | - LOGOP_OR('op') + whereExpr('expr') + whereCond + + OneOrMore( + LOGOP_AND("op") + whereExpr("expr") + | LOGOP_XOR("op") + whereExpr("expr") + | LOGOP_OR("op") + whereExpr("expr") ) ).setParseAction(BinaryOperator) ) -columnProjection = ( - Optional(SELECT_DISTINCT | SELECT_ALL).setResultsName('options') + - columnNameList('columns') -) +columnProjection = Optional(SELECT_DISTINCT | SELECT_ALL).setResultsName("options") + columnNameList("columns") -fromClause = Suppress(FROM) + tableNameList('tables') +fromClause = Suppress(FROM) + tableNameList("tables") # TODO: ( LEFT | RIGHT ) ( INNER | OUTER ) JOIN # TODO: PIVOT, UNPIVOT pivotClause = Optional( - Group( - PIVOT + L_PAREN + Group(columnNameList) + - PIVOT_FOR + columnName + - PIVOT_IN + Group(columnNameList) + - R_PAREN - ) -)('pivot') + Group(PIVOT + L_PAREN + Group(columnNameList) + PIVOT_FOR + columnName + PIVOT_IN + Group(columnNameList) + R_PAREN) +)("pivot") -whereClause = Optional(Suppress(WHERE) + whereExpr)('where') +whereClause = Optional(Suppress(WHERE) + whereExpr)("where") # TODO: GROUP BY # TODO: HAVING @@ -270,14 +276,15 @@ # ORDER BY x, y ASC, d DESC, ... orderDirection = ORDER_ASC | ORDER_DESC -orderByColumnList = Group(delimitedList(columnName('column') + Optional(orderDirection)('direction'))) -orderByClause = Optional(Suppress(ORDER_BY) + orderByColumnList('order')) # todo: asc, desc +orderByColumnList = Group(delimitedList(columnName("column") + Optional(orderDirection)("direction"))) +orderByClause = Optional(Suppress(ORDER_BY) + orderByColumnList("order")) # todo: asc, desc selectStmt << ( - Suppress(SELECT) + - columnProjection + - Optional( - fromClause + + Suppress(SELECT) + + columnProjection + + Optional( + fromClause + + # pivotClause + whereClause ) @@ -294,6 +301,6 @@ sqlQuery = selectStmts + StringEnd() # Ignore comments -commentStart = Suppress(oneOf('-- #')) +commentStart = Suppress(oneOf("-- #")) comment = commentStart + restOfLine sqlQuery.ignore(comment) diff --git a/sqlparse/nodes.py b/sqlparse/nodes.py index 34bbda9..f1bec2b 100644 --- a/sqlparse/nodes.py +++ b/sqlparse/nodes.py @@ -73,7 +73,7 @@ class Identifier(ASTNode): """ def __init__(self, tokens): - self.name = tokens[0][0] + self.name = tokens[0] class ModelIdentifier(Identifier):