From 579b5a24a726a0739284714ca35ffff4b6441537 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Sat, 5 Dec 2015 12:38:32 -0500 Subject: [PATCH 01/17] Add equivalentConditions() --- .../catalyst/plans/logical/LogicalPlan.scala | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8f8747e10593..f481c3627a6a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} -abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { +abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper with Logging { private var _analyzed: Boolean = false @@ -127,11 +127,30 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { cleanLeft.children.size == cleanRight.children.size && { logDebug( s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") - cleanRight.cleanArgs == cleanLeft.cleanArgs + (cleanRight.cleanArgs == cleanLeft.cleanArgs) || ((cleanLeft, cleanRight) match { + case (l: Filter, r: Filter) => + (l.cleanArgs.collectFirst({ case e: Expression => e }), + r.cleanArgs.collectFirst({ case e: Expression => e })) match { + case (Some(cond1: Expression), Some(cond2: Expression)) => equivalentConditions(cond1, cond2) + case _ => false + } + //equivalentConditions(l.cleanArgs[0], r.cleanArgs[0]) + // case (l: Project, r: Project) => equivalentProjectLists(l.projectList, r.projectList) + case _ => false + }) } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } + def equivalentConditions(left: Expression, right: Expression): Boolean = { + logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") + val leftPredicates = Set(splitConjunctivePredicates(left)) + val rightPredicates = Set(splitConjunctivePredicates(right)) + // TODO: support OR + logDebug(s"equivalentConditions Result: [${leftPredicates == rightPredicates}]") + leftPredicates == rightPredicates + } + /** Args that have cleaned such that differences in expression id should not affect equality */ protected lazy val cleanArgs: Seq[Any] = { val input = children.flatMap(_.output) From 0a096983f86945415ef6fb91af0be400d4a05aaf Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 13:04:57 -0500 Subject: [PATCH 02/17] set comparison for projection --- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index f481c3627a6a..73f7a7cbfd7b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -134,14 +134,16 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w case (Some(cond1: Expression), Some(cond2: Expression)) => equivalentConditions(cond1, cond2) case _ => false } - //equivalentConditions(l.cleanArgs[0], r.cleanArgs[0]) - // case (l: Project, r: Project) => equivalentProjectLists(l.projectList, r.projectList) + case (l: Project, r: Project) => Set(l.cleanArgs) == Set(r.cleanArgs) case _ => false }) } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } + /** + * Returns true if two conditions are equivalent. Equality check is tolerant of ordering different. + */ def equivalentConditions(left: Expression, right: Expression): Boolean = { logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") val leftPredicates = Set(splitConjunctivePredicates(left)) From 85f1ebca7ec568c84a2f08a131b3512c5a204526 Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 14:48:02 -0500 Subject: [PATCH 03/17] Fix set conversion bug --- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 73f7a7cbfd7b..2ca6530d13ab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -134,7 +134,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w case (Some(cond1: Expression), Some(cond2: Expression)) => equivalentConditions(cond1, cond2) case _ => false } - case (l: Project, r: Project) => Set(l.cleanArgs) == Set(r.cleanArgs) + case (l: Project, r: Project) => l.cleanArgs.toSet == r.cleanArgs.toSet case _ => false }) } && @@ -146,8 +146,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w */ def equivalentConditions(left: Expression, right: Expression): Boolean = { logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") - val leftPredicates = Set(splitConjunctivePredicates(left)) - val rightPredicates = Set(splitConjunctivePredicates(right)) + val leftPredicates = splitConjunctivePredicates(left).toSet + val rightPredicates = splitConjunctivePredicates(right).toSet // TODO: support OR logDebug(s"equivalentConditions Result: [${leftPredicates == rightPredicates}]") leftPredicates == rightPredicates From 1a2b534e01d0010d64fbe3b2382bd59eb0a28a4b Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 15:13:42 -0500 Subject: [PATCH 04/17] Remove set comparison of projection --- .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 2ca6530d13ab..4500c51ad76a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -134,7 +134,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w case (Some(cond1: Expression), Some(cond2: Expression)) => equivalentConditions(cond1, cond2) case _ => false } - case (l: Project, r: Project) => l.cleanArgs.toSet == r.cleanArgs.toSet case _ => false }) } && From 5fcb85ca97e8f48aff7848e51e5dfb187597dbee Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 16:02:32 -0500 Subject: [PATCH 05/17] Add test case for filter condition order --- .../org/apache/spark/sql/catalyst/plans/SameResultSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala index 62d5f6ac7488..a83eab60c727 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala @@ -57,6 +57,7 @@ class SameResultSuite extends SparkFunSuite { test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) + assertSameResult(testRelation.where('a === 'b && 'a === 'c), testRelation2.where('a === 'c && 'a === 'b )) } test("sorts") { From 8f93c6aa7a628b71e789385664352878d3e2fd3d Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 16:32:53 -0500 Subject: [PATCH 06/17] Fix style error --- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 6 ++++-- .../apache/spark/sql/catalyst/plans/SameResultSuite.scala | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 4500c51ad76a..8b606464c8f8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -131,7 +131,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w case (l: Filter, r: Filter) => (l.cleanArgs.collectFirst({ case e: Expression => e }), r.cleanArgs.collectFirst({ case e: Expression => e })) match { - case (Some(cond1: Expression), Some(cond2: Expression)) => equivalentConditions(cond1, cond2) + case (Some(cond1: Expression), Some(cond2: Expression)) + => equivalentConditions(cond1, cond2) case _ => false } case _ => false @@ -141,7 +142,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w } /** - * Returns true if two conditions are equivalent. Equality check is tolerant of ordering different. + * Returns true if two conditions are equivalent. Equality check is tolerant of ordering + * different. */ def equivalentConditions(left: Expression, right: Expression): Boolean = { logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala index a83eab60c727..def1164cc81b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala @@ -28,8 +28,8 @@ import org.apache.spark.sql.catalyst.util._ * Tests for the sameResult function of [[LogicalPlan]]. */ class SameResultSuite extends SparkFunSuite { - val testRelation = LocalRelation('a.int, 'b.int, 'c.int) - val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int) + val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.int) + val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int, 'd.int) def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true): Unit = { val aAnalyzed = a.analyze @@ -57,7 +57,8 @@ class SameResultSuite extends SparkFunSuite { test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) - assertSameResult(testRelation.where('a === 'b && 'a === 'c), testRelation2.where('a === 'c && 'a === 'b )) + assertSameResult(testRelation.where('a === 'b && 'c === 'd), + testRelation2.where('c === 'd && 'a === 'b )) } test("sorts") { From 6eb6fddf82220c3181cd7151b573fe135d2e9c0a Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 19:58:53 -0500 Subject: [PATCH 07/17] add testcase for SameResultSuite --- .../apache/spark/sql/catalyst/plans/SameResultSuite.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala index def1164cc81b..80b0d1b6cb48 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala @@ -58,7 +58,13 @@ class SameResultSuite extends SparkFunSuite { test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) assertSameResult(testRelation.where('a === 'b && 'c === 'd), - testRelation2.where('c === 'd && 'a === 'b )) + testRelation2.where('c === 'd && 'a === 'b ) + ) + + assertSameResult(testRelation.where('a === 'b && 'c === 'd), + testRelation2.where('a === 'c && 'b === 'd), + result = false + ) } test("sorts") { From 02fc878081da8ccd313fd60ed0ff81f9735794c0 Mon Sep 17 00:00:00 2001 From: windscope Date: Sat, 5 Dec 2015 20:44:00 -0500 Subject: [PATCH 08/17] add testcase for OR split filter condition --- .../apache/spark/sql/catalyst/plans/SameResultSuite.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala index 80b0d1b6cb48..96412647e4df 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala @@ -60,11 +60,18 @@ class SameResultSuite extends SparkFunSuite { assertSameResult(testRelation.where('a === 'b && 'c === 'd), testRelation2.where('c === 'd && 'a === 'b ) ) + assertSameResult(testRelation.where('a === 'b || 'c === 'd), + testRelation2.where('c === 'd || 'a === 'b ) + ) assertSameResult(testRelation.where('a === 'b && 'c === 'd), testRelation2.where('a === 'c && 'b === 'd), result = false ) + assertSameResult(testRelation.where('a === 'b || 'c === 'd), + testRelation2.where('a === 'c || 'b === 'd), + result = false + ) } test("sorts") { From bcb6df01a1706d92d728c4dce02a600be88f3fd9 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Sat, 5 Dec 2015 21:04:50 -0500 Subject: [PATCH 09/17] Supported expressions with disjunctive predicates; refactor cleanArgs so that we can reuse cleanExpression(). --- .../catalyst/plans/logical/LogicalPlan.scala | 55 ++++++++++--------- .../spark/sql/execution/CacheManager.scala | 1 + 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8b606464c8f8..5939bd776157 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -129,53 +129,56 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") (cleanRight.cleanArgs == cleanLeft.cleanArgs) || ((cleanLeft, cleanRight) match { case (l: Filter, r: Filter) => - (l.cleanArgs.collectFirst({ case e: Expression => e }), - r.cleanArgs.collectFirst({ case e: Expression => e })) match { - case (Some(cond1: Expression), Some(cond2: Expression)) - => equivalentConditions(cond1, cond2) - case _ => false - } + equivalentConditions(cleanExpression(l.condition, l.children.flatMap(_.output)), + cleanExpression(r.condition, r.children.flatMap(_.output))) case _ => false }) } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } - /** - * Returns true if two conditions are equivalent. Equality check is tolerant of ordering - * different. - */ + /** + * Returns true if two conditions are equivalent. Equality check is tolerant of ordering + * different. + */ def equivalentConditions(left: Expression, right: Expression): Boolean = { logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") - val leftPredicates = splitConjunctivePredicates(left).toSet - val rightPredicates = splitConjunctivePredicates(right).toSet - // TODO: support OR - logDebug(s"equivalentConditions Result: [${leftPredicates == rightPredicates}]") - leftPredicates == rightPredicates + + val leftAndPredicates = splitConjunctivePredicates(left).toSet + val rightAndPredicates = splitConjunctivePredicates(right).toSet + val leftOrPredicates = splitDisjunctivePredicates(left).toSet + val rightOrPredicates = splitDisjunctivePredicates(right).toSet + logDebug(s"equivalentConditions Result: [${leftAndPredicates == rightAndPredicates}]") + // We split the two conditions into conjunctive predicates and disjunctive predicates + // If either of them match, we consider them equivalent conditions + (leftAndPredicates == rightAndPredicates) || (leftOrPredicates == rightOrPredicates) } + /** Clean an expression so that differences in expression id should not affect equality */ + def cleanExpression(e: Expression, input: Seq[Attribute]) = e match { + case a: Alias => + // As the root of the expression, Alias will always take an arbitrary exprId, we need + // to erase that for equality testing. + val cleanedExprId = Alias(a.child, a.name)(ExprId(-1), a.qualifiers) + BindReferences.bindReference(cleanedExprId, input, allowFailures = true) + case other => BindReferences.bindReference(other, input, allowFailures = true) + } + + /** Args that have cleaned such that differences in expression id should not affect equality */ protected lazy val cleanArgs: Seq[Any] = { val input = children.flatMap(_.output) - def cleanExpression(e: Expression) = e match { - case a: Alias => - // As the root of the expression, Alias will always take an arbitrary exprId, we need - // to erase that for equality testing. - val cleanedExprId = Alias(a.child, a.name)(ExprId(-1), a.qualifiers) - BindReferences.bindReference(cleanedExprId, input, allowFailures = true) - case other => BindReferences.bindReference(other, input, allowFailures = true) - } productIterator.map { // Children are checked using sameResult above. case tn: TreeNode[_] if containsChild(tn) => null - case e: Expression => cleanExpression(e) + case e: Expression => cleanExpression(e, input) case s: Option[_] => s.map { - case e: Expression => cleanExpression(e) + case e: Expression => cleanExpression(e, input) case other => other } case s: Seq[_] => s.map { - case e: Expression => cleanExpression(e) + case e: Expression => cleanExpression(e, input) case other => other } case other => other diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 50f6562815c2..f76321d2a79c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -124,6 +124,7 @@ private[sql] class CacheManager extends Logging { found } + /** Optionally returns cached data for the given [[Queryable]] */ private[sql] def lookupCachedData(query: Queryable): Option[CachedData] = readLock { lookupCachedData(query.queryExecution.analyzed) From 13ce03f2f5132eb8264e2f8d785410a8a94efec0 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Sat, 5 Dec 2015 21:50:08 -0500 Subject: [PATCH 10/17] Removed dead code --- .../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index f76321d2a79c..50f6562815c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -124,7 +124,6 @@ private[sql] class CacheManager extends Logging { found } - /** Optionally returns cached data for the given [[Queryable]] */ private[sql] def lookupCachedData(query: Queryable): Option[CachedData] = readLock { lookupCachedData(query.queryExecution.analyzed) From 9043afd3c8cb87484b751062a18655ec9d465b67 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Mon, 7 Dec 2015 11:33:38 -0500 Subject: [PATCH 11/17] Refactor change. Move equivalentConditions to PredicateHelper --- .../sql/catalyst/expressions/predicates.scala | 14 ++++++++++++++ .../catalyst/plans/logical/LogicalPlan.scala | 19 +------------------ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 304b438c84ba..0f5f35aebed2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -65,6 +65,20 @@ trait PredicateHelper { } } + /** + * Returns true if two expressions are equivalent predicates. Equality check is tolerant of + * ordering different. + */ + def equivalentPredicates(left: Expression, right: Expression): Boolean = { + val leftAndPredicates = splitConjunctivePredicates(left).toSet + val rightAndPredicates = splitConjunctivePredicates(right).toSet + val leftOrPredicates = splitDisjunctivePredicates(left).toSet + val rightOrPredicates = splitDisjunctivePredicates(right).toSet + // We split the two conditions into conjunctive predicates and disjunctive predicates + // If either of them match, we consider them equivalent conditions + (leftAndPredicates == rightAndPredicates) || (leftOrPredicates == rightOrPredicates) + } + // Substitute any known alias from a map. protected def replaceAlias( condition: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 5939bd776157..0e6b1917af33 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -129,7 +129,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") (cleanRight.cleanArgs == cleanLeft.cleanArgs) || ((cleanLeft, cleanRight) match { case (l: Filter, r: Filter) => - equivalentConditions(cleanExpression(l.condition, l.children.flatMap(_.output)), + equivalentPredicates(cleanExpression(l.condition, l.children.flatMap(_.output)), cleanExpression(r.condition, r.children.flatMap(_.output))) case _ => false }) @@ -137,23 +137,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } - /** - * Returns true if two conditions are equivalent. Equality check is tolerant of ordering - * different. - */ - def equivalentConditions(left: Expression, right: Expression): Boolean = { - logDebug(s"equivalentConditions: [${left.toString}] with [${right.toString}]") - - val leftAndPredicates = splitConjunctivePredicates(left).toSet - val rightAndPredicates = splitConjunctivePredicates(right).toSet - val leftOrPredicates = splitDisjunctivePredicates(left).toSet - val rightOrPredicates = splitDisjunctivePredicates(right).toSet - logDebug(s"equivalentConditions Result: [${leftAndPredicates == rightAndPredicates}]") - // We split the two conditions into conjunctive predicates and disjunctive predicates - // If either of them match, we consider them equivalent conditions - (leftAndPredicates == rightAndPredicates) || (leftOrPredicates == rightOrPredicates) - } - /** Clean an expression so that differences in expression id should not affect equality */ def cleanExpression(e: Expression, input: Seq[Attribute]) = e match { case a: Alias => From ba29f0b96539e9a99979bfafc3937be04dd9a47b Mon Sep 17 00:00:00 2001 From: windscope Date: Tue, 8 Dec 2015 11:27:36 -0500 Subject: [PATCH 12/17] Fix style issue --- .../apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 0e6b1917af33..9aac023e4cd9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -138,7 +138,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w } /** Clean an expression so that differences in expression id should not affect equality */ - def cleanExpression(e: Expression, input: Seq[Attribute]) = e match { + def cleanExpression(e: Expression, input: Seq[Attribute]): Expression = e match { case a: Alias => // As the root of the expression, Alias will always take an arbitrary exprId, we need // to erase that for equality testing. From b8ba919baebfd9ad6baa27fae57f9b45e3ed08a9 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Wed, 9 Dec 2015 02:13:08 -0500 Subject: [PATCH 13/17] Integrate the functionality of equivalentPredicate with semanticEquals --- .../sql/catalyst/expressions/Expression.scala | 15 +++++++++++++-- .../sql/catalyst/expressions/predicates.scala | 14 -------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 614f0c075fd2..7f179054682e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ * the same output data type. * */ -abstract class Expression extends TreeNode[Expression] { +abstract class Expression extends TreeNode[Expression] with PredicateHelper{ /** * Returns true when an expression is a candidate for static evaluation before the query is @@ -156,7 +156,18 @@ abstract class Expression extends TreeNode[Expression] { if (!deterministic || !other.deterministic) return false val elements1 = this.productIterator.toSeq val elements2 = other.asInstanceOf[Product].productIterator.toSeq - checkSemantic(elements1, elements2) + + + this.getClass() == other.getClass() && ((this, other) match { + // tolerant of ordering different + case (left: And, right: And) => + checkSemantic(splitConjunctivePredicates(left).toSet.toSeq, + splitConjunctivePredicates(right).toSet.toSeq) + case (left: Or, right: Or) => + checkSemantic(splitDisjunctivePredicates(left).toSet.toSeq, + splitDisjunctivePredicates(right).toSet.toSeq) + case _ => checkSemantic(elements1, elements2) + }) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 0f5f35aebed2..304b438c84ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -65,20 +65,6 @@ trait PredicateHelper { } } - /** - * Returns true if two expressions are equivalent predicates. Equality check is tolerant of - * ordering different. - */ - def equivalentPredicates(left: Expression, right: Expression): Boolean = { - val leftAndPredicates = splitConjunctivePredicates(left).toSet - val rightAndPredicates = splitConjunctivePredicates(right).toSet - val leftOrPredicates = splitDisjunctivePredicates(left).toSet - val rightOrPredicates = splitDisjunctivePredicates(right).toSet - // We split the two conditions into conjunctive predicates and disjunctive predicates - // If either of them match, we consider them equivalent conditions - (leftAndPredicates == rightAndPredicates) || (leftOrPredicates == rightOrPredicates) - } - // Substitute any known alias from a map. protected def replaceAlias( condition: Expression, From a0e8c4a842f3c5377291ee5ad07c0ece2f00f22d Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Wed, 9 Dec 2015 02:29:15 -0500 Subject: [PATCH 14/17] Refactor sameResult to utilize semanticEquals --- .../spark/sql/catalyst/expressions/Expression.scala | 4 ++-- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 7f179054682e..52162b4cdc6e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -158,7 +158,7 @@ abstract class Expression extends TreeNode[Expression] with PredicateHelper{ val elements2 = other.asInstanceOf[Product].productIterator.toSeq - this.getClass() == other.getClass() && ((this, other) match { + (this, other) match { // tolerant of ordering different case (left: And, right: And) => checkSemantic(splitConjunctivePredicates(left).toSet.toSeq, @@ -167,7 +167,7 @@ abstract class Expression extends TreeNode[Expression] with PredicateHelper{ checkSemantic(splitDisjunctivePredicates(left).toSet.toSeq, splitDisjunctivePredicates(right).toSet.toSeq) case _ => checkSemantic(elements1, elements2) - }) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 9aac023e4cd9..431c18716bf4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -127,12 +127,10 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w cleanLeft.children.size == cleanRight.children.size && { logDebug( s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") - (cleanRight.cleanArgs == cleanLeft.cleanArgs) || ((cleanLeft, cleanRight) match { - case (l: Filter, r: Filter) => - equivalentPredicates(cleanExpression(l.condition, l.children.flatMap(_.output)), - cleanExpression(r.condition, r.children.flatMap(_.output))) - case _ => false - }) + cleanLeft.cleanArgs.zip(cleanRight.cleanArgs).forall { + case (e1: Expression, e2: Expression) => e1 semanticEquals e2 + case (a1, a2) => a1 == a2 + } } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } From 4af36229b859bf47776490c0a77d3fbb4d747b50 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Wed, 9 Dec 2015 11:27:36 -0500 Subject: [PATCH 15/17] Rewrite semanticEquals in And and Or to ignore their ordering. --- .../sql/catalyst/expressions/Expression.scala | 30 +++++++------------ .../sql/catalyst/expressions/predicates.scala | 20 +++++++++++++ .../catalyst/plans/logical/LogicalPlan.scala | 9 +++--- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index c5d4e38580a4..54312a8332a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -139,35 +139,25 @@ abstract class Expression extends TreeNode[Expression] with PredicateHelper{ */ def childrenResolved: Boolean = children.forall(_.resolved) + def checkSemantic(elements1: Seq[Any], elements2: Seq[Any]): Boolean = { + elements1.length == elements2.length && elements1.zip(elements2).forall { + case (e1: Expression, e2: Expression) => e1 semanticEquals e2 + case (Some(e1: Expression), Some(e2: Expression)) => e1 semanticEquals e2 + case (t1: Traversable[_], t2: Traversable[_]) => checkSemantic(t1.toSeq, t2.toSeq) + case (i1, i2) => i1 == i2 + } + } + /** * Returns true when two expressions will always compute the same result, even if they differ * cosmetically (i.e. capitalization of names in attributes may be different). */ def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { - def checkSemantic(elements1: Seq[Any], elements2: Seq[Any]): Boolean = { - elements1.length == elements2.length && elements1.zip(elements2).forall { - case (e1: Expression, e2: Expression) => e1 semanticEquals e2 - case (Some(e1: Expression), Some(e2: Expression)) => e1 semanticEquals e2 - case (t1: Traversable[_], t2: Traversable[_]) => checkSemantic(t1.toSeq, t2.toSeq) - case (i1, i2) => i1 == i2 - } - } // Non-deterministic expressions cannot be semantic equal if (!deterministic || !other.deterministic) return false val elements1 = this.productIterator.toSeq val elements2 = other.asInstanceOf[Product].productIterator.toSeq - - - (this, other) match { - // tolerant of ordering different - case (left: And, right: And) => - checkSemantic(splitConjunctivePredicates(left).toSet.toSeq, - splitConjunctivePredicates(right).toSet.toSeq) - case (left: Or, right: Or) => - checkSemantic(splitDisjunctivePredicates(left).toSet.toSeq, - splitDisjunctivePredicates(right).toSet.toSeq) - case _ => checkSemantic(elements1, elements2) - } + checkSemantic(elements1, elements2) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 304b438c84ba..cba59bc13bd3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -252,6 +252,16 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with } } + override def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { + // Non-deterministic expressions cannot be semantic equal + if (!deterministic || !other.deterministic) return false + + // we know both expressions are And, so we can tolerate ordering different + val elements1 = splitConjunctivePredicates(this).toSet.toSeq + val elements2 = splitConjunctivePredicates(other).toSet.toSeq + checkSemantic(elements1, elements2) + } + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val eval1 = left.gen(ctx) val eval2 = right.gen(ctx) @@ -301,6 +311,16 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P } } + override def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { + // Non-deterministic expressions cannot be semantic equal + if (!deterministic || !other.deterministic) return false + + // we know both expressions are Or, so we can tolerate ordering different + val elements1 = splitDisjunctivePredicates(this).toSet.toSeq + val elements2 = splitDisjunctivePredicates(other).toSet.toSeq + checkSemantic(elements1, elements2) + } + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val eval1 = left.gen(ctx) val eval2 = right.gen(ctx) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 431c18716bf4..396081772a5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -128,11 +128,10 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper w logDebug( s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") cleanLeft.cleanArgs.zip(cleanRight.cleanArgs).forall { - case (e1: Expression, e2: Expression) => e1 semanticEquals e2 - case (a1, a2) => a1 == a2 - } - } && - (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) + case (e1: Expression, e2: Expression) => e1 semanticEquals e2 + case (a1, a2) => a1 == a2 + } + } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) } /** Clean an expression so that differences in expression id should not affect equality */ From 99626a4cb73becf21b51fc64c3921a768f7d0f63 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Wed, 9 Dec 2015 14:44:23 -0500 Subject: [PATCH 16/17] Rewrite the code block that compares the equivalency of Seq[Expression] in semanticEquals. --- .../sql/catalyst/expressions/Expression.scala | 14 ++++++++++- .../sql/catalyst/expressions/predicates.scala | 24 ++++++++++++------- .../catalyst/plans/logical/LogicalPlan.scala | 2 +- .../sql/catalyst/plans/SameResultSuite.scala | 3 +++ 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 54312a8332a1..eb0c01e20f81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ * the same output data type. * */ -abstract class Expression extends TreeNode[Expression] with PredicateHelper{ +abstract class Expression extends TreeNode[Expression]{ /** * Returns true when an expression is a candidate for static evaluation before the query is @@ -160,6 +160,18 @@ abstract class Expression extends TreeNode[Expression] with PredicateHelper{ checkSemantic(elements1, elements2) } + /** + * Returns a sequence of expressions by removing from q the first expression that is semantically + * equivalent to e. + */ + def removeFirstSemanticEquivalent(seq: Seq[Expression], e: Expression): Seq[Expression] = { + seq match { + case Seq() => Seq() + case x +: rest if x semanticEquals e => rest + case x +: rest => x +: removeFirstSemanticEquivalent(rest, e) + } + } + /** * Returns the hash for this expression. Expressions that compute the same result, even if * they differ cosmetically should return the same hash. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index cba59bc13bd3..732cd4a2ec96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -228,7 +228,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with } } -case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { +case class And(left: Expression, right: Expression) extends BinaryOperator + with Predicate with PredicateHelper{ override def inputType: AbstractDataType = BooleanType @@ -256,10 +257,12 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with // Non-deterministic expressions cannot be semantic equal if (!deterministic || !other.deterministic) return false - // we know both expressions are And, so we can tolerate ordering different - val elements1 = splitConjunctivePredicates(this).toSet.toSeq - val elements2 = splitConjunctivePredicates(other).toSet.toSeq - checkSemantic(elements1, elements2) + // We already know both expressions are And, so we can tolerate ordering different + // Recursively call semanticEquals on subexpressions to check the equivalency of two seqs. + var elements1 = splitConjunctivePredicates(this) + val elements2 = splitConjunctivePredicates(other) + for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) + elements1.isEmpty } override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { @@ -287,7 +290,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with } -case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { +case class Or(left: Expression, right: Expression) extends BinaryOperator + with Predicate with PredicateHelper { override def inputType: AbstractDataType = BooleanType @@ -316,9 +320,11 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P if (!deterministic || !other.deterministic) return false // we know both expressions are Or, so we can tolerate ordering different - val elements1 = splitDisjunctivePredicates(this).toSet.toSeq - val elements2 = splitDisjunctivePredicates(other).toSet.toSeq - checkSemantic(elements1, elements2) + // Recursively call semanticEquals on subexpressions to check the equivalency of two seqs. + var elements1 = splitDisjunctivePredicates(this) + val elements2 = splitDisjunctivePredicates(other) + for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) + elements1.isEmpty } override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 396081772a5e..f63a9fb9ed80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode} -abstract class LogicalPlan extends QueryPlan[LogicalPlan] with PredicateHelper with Logging { +abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { private var _analyzed: Boolean = false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala index 96412647e4df..f9966fb0dfef 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala @@ -63,6 +63,9 @@ class SameResultSuite extends SparkFunSuite { assertSameResult(testRelation.where('a === 'b || 'c === 'd), testRelation2.where('c === 'd || 'a === 'b ) ) + assertSameResult(testRelation.where(('a === 'b || 'c === 'd) && ('e === 'f || 'g === 'h)), + testRelation2.where(('g === 'h || 'e === 'f) && ('c === 'd || 'a === 'b )) + ) assertSameResult(testRelation.where('a === 'b && 'c === 'd), testRelation2.where('a === 'c && 'b === 'd), From 2efca2f4e1599af0b0930be210b300b187ea4845 Mon Sep 17 00:00:00 2001 From: Jiang Chen Date: Wed, 9 Dec 2015 17:37:43 -0500 Subject: [PATCH 17/17] Fix a bug in semanticEqual. Add some comment. --- .../sql/catalyst/expressions/Expression.scala | 2 +- .../sql/catalyst/expressions/predicates.scala | 29 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index eb0c01e20f81..b1ba501e5d6e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -162,7 +162,7 @@ abstract class Expression extends TreeNode[Expression]{ /** * Returns a sequence of expressions by removing from q the first expression that is semantically - * equivalent to e. + * equivalent to e. If such an expression was not found, return seq. */ def removeFirstSemanticEquivalent(seq: Seq[Expression], e: Expression): Seq[Expression] = { seq match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 732cd4a2ec96..85d08d68f7dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -261,8 +261,17 @@ case class And(left: Expression, right: Expression) extends BinaryOperator // Recursively call semanticEquals on subexpressions to check the equivalency of two seqs. var elements1 = splitConjunctivePredicates(this) val elements2 = splitConjunctivePredicates(other) - for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) - elements1.isEmpty + // We can recursively call semanticEquals to check the equivalency for subexpressions, but + // there is no simple solution to compare the equivalency of sequence of expressions. + // Expression class doesn't have order, so we couldn't sort them. We can neither use + // set comparison as Set doesn't support custom compare function, which is semanticEquals. + // To check the equivalency of elements1 and elements2, we first compare their size. Then + // for each element in elements2, we remove its first semantically equivalent expression from + // elements1. If they are semantically equivalent, elements1 should be empty at the end. + elements1.size == elements2.size && { + for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) + elements1.isEmpty + } } override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { @@ -319,12 +328,20 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator // Non-deterministic expressions cannot be semantic equal if (!deterministic || !other.deterministic) return false - // we know both expressions are Or, so we can tolerate ordering different - // Recursively call semanticEquals on subexpressions to check the equivalency of two seqs. + // We know both expressions are Or, so we can tolerate ordering different var elements1 = splitDisjunctivePredicates(this) val elements2 = splitDisjunctivePredicates(other) - for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) - elements1.isEmpty + // We can recursively call semanticEquals to check the equivalency for subexpressions, but + // there is no simple solution to compare the equivalency of sequence of expressions. + // Expression class doesn't have order, so we couldn't sort them. We can neither use + // set comparison as Set doesn't support custom compare function, which is semanticEquals. + // To check the equivalency of elements1 and elements2, we first compare their size. Then + // for each element in elements2, we remove its first semantically equivalent expression from + // elements1. If they are semantically equivalent, elements1 should be empty at the end. + elements1.size == elements2.size && { + for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) + elements1.isEmpty + } } override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {