From eeeed44efae2a42f7aa5bf7d3c3ded16a91133ac Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 7 Oct 2020 05:21:53 +0000 Subject: [PATCH 1/9] finishing the in expression. adding more tests and null support. need confirmation on null behavior and also I wonder why integer field is sufficient for string --- src/enclave/Enclave/ExpressionEvaluation.h | 36 +++++++++++++++++++ src/flatbuffers/Expr.fbs | 6 ++++ .../edu/berkeley/cs/rise/opaque/Utils.scala | 8 +++++ .../cs/rise/opaque/OpaqueOperatorTests.scala | 26 ++++++++++++++ 4 files changed, 76 insertions(+) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index 7aa805b5d7..56da2e28f2 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -702,6 +702,42 @@ class FlatbuffersExpressionEvaluator { } } + case tuix::ExprUnion_In: + { + auto c = static_cast(expr->expr()); + size_t num_children = c->children()->size(); + bool result = false; + + if (num_children < 2){ + throw std::runtime_error(std::string("In can't operate with fewer than 2 args, currently we have") + + std::to_string(num_children)); + } + + auto left_offset = eval_helper(row, (*c->children())[0]); + const tuix::Field *left = flatbuffers::GetTemporaryPointer(builder, left_offset); + for (size_t i=1; ichildren())[i]); + const tuix::Field *item = flatbuffers::GetTemporaryPointer(builder, right_offset); + if (item->value_type() != left->value_type()){ + throw std::runtime_error( + std::string("In can't operate on ") + + std::string(tuix::EnumNameFieldUnion(left->value_type())) + + std::string(" and ") + + std::string(tuix::EnumNameFieldUnion(item->value_type()))); + } + //TODO why integer field passing the string test + if (static_cast(item->value())->value() == static_cast(left->value())->value()){ + result = true; + } + } + return tuix::CreateField( + builder, + tuix::FieldUnion_BooleanField, + tuix::CreateBooleanField(builder, result).Union(), + left->is_null()); + + } + // Conditional expressions case tuix::ExprUnion_If: { diff --git a/src/flatbuffers/Expr.fbs b/src/flatbuffers/Expr.fbs index 28be6c867a..81cd2a2d40 100644 --- a/src/flatbuffers/Expr.fbs +++ b/src/flatbuffers/Expr.fbs @@ -12,6 +12,7 @@ union ExprUnion { GreaterThanOrEqual, EqualTo, Contains, + In, Col, Literal, And, @@ -118,6 +119,11 @@ table Contains { right:Expr; } +// Array expressions +table In{ + children:[Expr]; +} + table Substring { str:Expr; pos:Expr; diff --git a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala index 9ab50842eb..236e832e86 100644 --- a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala +++ b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala @@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.expressions.Contains +import org.apache.spark.sql.catalyst.expressions.In import org.apache.spark.sql.catalyst.expressions.Descending import org.apache.spark.sql.catalyst.expressions.Divide import org.apache.spark.sql.catalyst.expressions.EqualTo @@ -965,6 +966,13 @@ object Utils extends Logging { tuix.Contains.createContains( builder, leftOffset, rightOffset)) + case (In(left, right), childrenOffsets) => + tuix.Expr.createExpr( + builder, + tuix.ExprUnion.In, + tuix.In.createIn( + builder, tuix.In.createChildrenVector(builder, childrenOffsets.toArray))) + case (Year(child), Seq(childOffset)) => tuix.Expr.createExpr( builder, diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index e5ddeaa12a..37b01f2ada 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -389,6 +389,32 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.filter($"word".contains(lit("1"))).collect } + testAgainstSpark("isin") { securityLevel => + //normal test 1 + val ids = Seq((1, 2, 2), (2, 3, 1)) + val df = makeDF(ids, securityLevel, "x", "y", "id") + val c = $"id" isin ($"x", $"y") + df.filter(c).collect + + //normal test 2 + val ids2 = Seq((1, 1, 1), (2, 2, 2), (3,3,3), (4,4,4)) + val df2 = makeDF(ids2, securityLevel, "x", "y", "id") + val c2 = $"id" isin (1 ,2, 4, 5, 6) + df2.filter(c2).collect + + //string test + val ids3 = Seq(("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), ("b", "b", "b"), ("c","c","c"), ("d","d","d")) + val df3 = makeDF(ids3, securityLevel, "x", "y", "id") + val c3 = $"id" isin ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ,"b", "c", "d", "e") + df3.filter(c3).collect + + //null test + val ids4 = Seq((1, 1, 1), (2, 2, 2), (3,3,null.asInstanceOf[Int]), (4,4,4)) + val df4 = makeDF(ids4, securityLevel, "x", "y", "id") + val c4 = $"id" isin (null.asInstanceOf[Int]) + df4.filter(c4).collect + } + testAgainstSpark("year") { securityLevel => val data = Seq(Tuple2(1, new java.sql.Date(new java.util.Date().getTime()))) val df = makeDF(data, securityLevel, "id", "date") From a21bfb85f2bd7c60cdeafdba2cb0d0aa55c1ed6e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 16 Oct 2020 05:51:33 +0000 Subject: [PATCH 2/9] adding additional test --- .../berkeley/cs/rise/opaque/OpaqueOperatorTests.scala | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index 37b01f2ada..16b271ab76 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -389,25 +389,28 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.filter($"word".contains(lit("1"))).collect } - testAgainstSpark("isin") { securityLevel => + testAgainstSpark("isin1") { securityLevel => //normal test 1 val ids = Seq((1, 2, 2), (2, 3, 1)) val df = makeDF(ids, securityLevel, "x", "y", "id") val c = $"id" isin ($"x", $"y") df.filter(c).collect - + } + testAgainstSpark("isin2") { securityLevel => //normal test 2 val ids2 = Seq((1, 1, 1), (2, 2, 2), (3,3,3), (4,4,4)) val df2 = makeDF(ids2, securityLevel, "x", "y", "id") val c2 = $"id" isin (1 ,2, 4, 5, 6) df2.filter(c2).collect - + } + testAgainstSpark("isin3") { securityLevel => //string test val ids3 = Seq(("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), ("b", "b", "b"), ("c","c","c"), ("d","d","d")) val df3 = makeDF(ids3, securityLevel, "x", "y", "id") val c3 = $"id" isin ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ,"b", "c", "d", "e") df3.filter(c3).collect - + } + testAgainstSpark("isin4") { securityLevel => //null test val ids4 = Seq((1, 1, 1), (2, 2, 2), (3,3,null.asInstanceOf[Int]), (4,4,4)) val df4 = makeDF(ids4, securityLevel, "x", "y", "id") From d4931f0f80c9230cef8c4a711d7e84df31451f7e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 16 Oct 2020 05:56:29 +0000 Subject: [PATCH 3/9] adding additional test --- .../cs/rise/opaque/OpaqueOperatorTests.scala | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index 16b271ab76..d5f130c2c1 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -394,28 +394,36 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => val ids = Seq((1, 2, 2), (2, 3, 1)) val df = makeDF(ids, securityLevel, "x", "y", "id") val c = $"id" isin ($"x", $"y") - df.filter(c).collect + val result = df.filter(c) + //result.explain(true) + result.collect } testAgainstSpark("isin2") { securityLevel => //normal test 2 val ids2 = Seq((1, 1, 1), (2, 2, 2), (3,3,3), (4,4,4)) val df2 = makeDF(ids2, securityLevel, "x", "y", "id") val c2 = $"id" isin (1 ,2, 4, 5, 6) - df2.filter(c2).collect + val result = df2.filter(c2) + //result.explain(true) + result.collect } testAgainstSpark("isin3") { securityLevel => //string test val ids3 = Seq(("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), ("b", "b", "b"), ("c","c","c"), ("d","d","d")) val df3 = makeDF(ids3, securityLevel, "x", "y", "id") val c3 = $"id" isin ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ,"b", "c", "d", "e") - df3.filter(c3).collect + val result = df3.filter(c3) + //result.explain(true) + result.collect } testAgainstSpark("isin4") { securityLevel => //null test val ids4 = Seq((1, 1, 1), (2, 2, 2), (3,3,null.asInstanceOf[Int]), (4,4,4)) val df4 = makeDF(ids4, securityLevel, "x", "y", "id") val c4 = $"id" isin (null.asInstanceOf[Int]) - df4.filter(c4).collect + val result = df4.filter(c4) + //result.explain(true) + result.collect } testAgainstSpark("year") { securityLevel => From 3853f33dbf92ee375d9a95a150948fcef23fdfe3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 16 Oct 2020 07:18:03 +0000 Subject: [PATCH 4/9] saving concat implementation and it's passing basic functionality tests --- src/enclave/Enclave/ExpressionEvaluation.h | 50 +++++++++++++++++++ src/flatbuffers/Expr.fbs | 5 ++ .../edu/berkeley/cs/rise/opaque/Utils.scala | 8 +++ .../cs/rise/opaque/OpaqueOperatorTests.scala | 6 +++ 4 files changed, 69 insertions(+) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index 56da2e28f2..b3d7d058e3 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -738,6 +738,56 @@ class FlatbuffersExpressionEvaluator { } + case tuix::ExprUnion_Concat: + { + //implementing this like string concat + auto c = static_cast(expr->expr()); + size_t num_children = c->children()->size(); + //TODO add type checking + size_t total = 0; + + std::vector result; + + for (size_t i =0; i< num_children; i++){ + auto offset = eval_helper(row, (*c->children())[i]); + const tuix::Field *str = flatbuffers::GetTemporaryPointer(builder, offset); + if (str->value_type() != tuix::FieldUnion_StringField) { + throw std::runtime_error( + std::string("tuix::Concat requires String, not ") + + std::string(tuix::EnumNameFieldUnion(str->value_type()))); + } + auto str_field = static_cast(str->value()); + int32_t start = 0; + int32_t end = str_field ->length(); + total += end; + + std::vector stringtoadd( + flatbuffers::VectorIterator(str_field->value()->Data(), + static_cast(start)), + flatbuffers::VectorIterator(str_field->value()->Data(), + static_cast(end))); + result.insert(result.end(), stringtoadd.begin(), stringtoadd.end()); + } + //TODO add null check + + return tuix::CreateField( + builder, + tuix::FieldUnion_StringField, + tuix::CreateStringFieldDirect( + builder, &result, static_cast(total)).Union(), + false); + /* + auto array_field = static_cast(value->value()); + std::string str = to_string(array_field); + std::vector str_vec(str.begin(), str.end()); + return tuix::CreateField( + builder, + tuix::FieldUnion_StringField, + tuix::CreateStringFieldDirect(builder, &str_vec, str_vec.size()).Union(), + result_is_null); + */ + } + // Conditional expressions case tuix::ExprUnion_If: { diff --git a/src/flatbuffers/Expr.fbs b/src/flatbuffers/Expr.fbs index 81cd2a2d40..fe9a1349b7 100644 --- a/src/flatbuffers/Expr.fbs +++ b/src/flatbuffers/Expr.fbs @@ -13,6 +13,7 @@ union ExprUnion { EqualTo, Contains, In, + Concat, Col, Literal, And, @@ -124,6 +125,10 @@ table In{ children:[Expr]; } +table Concat{ + children:[Expr]; +} + table Substring { str:Expr; pos:Expr; diff --git a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala index 236e832e86..d8994d37d2 100644 --- a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala +++ b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala @@ -45,6 +45,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.expressions.Contains import org.apache.spark.sql.catalyst.expressions.In +import org.apache.spark.sql.catalyst.expressions.Concat import org.apache.spark.sql.catalyst.expressions.Descending import org.apache.spark.sql.catalyst.expressions.Divide import org.apache.spark.sql.catalyst.expressions.EqualTo @@ -973,6 +974,13 @@ object Utils extends Logging { tuix.In.createIn( builder, tuix.In.createChildrenVector(builder, childrenOffsets.toArray))) + case (Concat(child), childrenOffsets) => + tuix.Expr.createExpr( + builder, + tuix.ExprUnion.Concat, + tuix.Concat.createConcat( + builder, tuix.Concat.createChildrenVector(builder, childrenOffsets.toArray))) + case (Year(child), Seq(childOffset)) => tuix.Expr.createExpr( builder, diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index d5f130c2c1..443459775a 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -389,6 +389,12 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.filter($"word".contains(lit("1"))).collect } + testAgainstSpark("concat") { securityLevel => + val data = for (i <- 0 until 256) yield ("%03d".format(i) * 3, i.toString) + val df = makeDF(data, securityLevel, "str", "x") + df.select(concat(col("str"),lit(","),col("x"))).collect + } + testAgainstSpark("isin1") { securityLevel => //normal test 1 val ids = Seq((1, 2, 2), (2, 3, 1)) From 889b8c3bb32b5e738c16330a0dae3317b6cc27a1 Mon Sep 17 00:00:00 2001 From: Chenyu Shi Date: Thu, 29 Oct 2020 07:42:51 +0000 Subject: [PATCH 5/9] adding type aware comparison and better error message for IN operator --- src/enclave/Enclave/ExpressionEvaluation.h | 31 +++++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index b3d7d058e3..82276afb8f 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -707,14 +707,17 @@ class FlatbuffersExpressionEvaluator { auto c = static_cast(expr->expr()); size_t num_children = c->children()->size(); bool result = false; - if (num_children < 2){ - throw std::runtime_error(std::string("In can't operate with fewer than 2 args, currently we have") - + std::to_string(num_children)); + throw std::runtime_error(std::string("In can't operate with an empty list, currently we have ") + + std::to_string(num_children - 1) + + std::string("items in the list")); } auto left_offset = eval_helper(row, (*c->children())[0]); const tuix::Field *left = flatbuffers::GetTemporaryPointer(builder, left_offset); + + bool result_is_null = left->is_null(); + for (size_t i=1; ichildren())[i]); const tuix::Field *item = flatbuffers::GetTemporaryPointer(builder, right_offset); @@ -723,18 +726,32 @@ class FlatbuffersExpressionEvaluator { std::string("In can't operate on ") + std::string(tuix::EnumNameFieldUnion(left->value_type())) + std::string(" and ") - + std::string(tuix::EnumNameFieldUnion(item->value_type()))); + + std::string(tuix::EnumNameFieldUnion(item->value_type())) + + ". Please double check the type of each input"); } - //TODO why integer field passing the string test - if (static_cast(item->value())->value() == static_cast(left->value())->value()){ + result_is_null = result_is_null || item ->is_null(); + + // adding dynamic casting + bool temporary_result = + static_cast( + flatbuffers::GetTemporaryPointer( + builder, + eval_binary_comparison( + builder, + flatbuffers::GetTemporaryPointer(builder, left_offset), + flatbuffers::GetTemporaryPointer(builder, right_offset))) + ->value())->value(); + + if (temporary_result){ result = true; } } + return tuix::CreateField( builder, tuix::FieldUnion_BooleanField, tuix::CreateBooleanField(builder, result).Union(), - left->is_null()); + result_is_null); } From b1bad6d6d20928c06c60747ed2a901a0c44187b3 Mon Sep 17 00:00:00 2001 From: Chenyu Shi Date: Thu, 29 Oct 2020 08:12:24 +0000 Subject: [PATCH 6/9] adding null checking for the concat operator and adding one additional test --- src/enclave/Enclave/ExpressionEvaluation.h | 45 ++++++++----------- .../cs/rise/opaque/OpaqueOperatorTests.scala | 12 ++++- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index 82276afb8f..5d32f91010 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -757,10 +757,10 @@ class FlatbuffersExpressionEvaluator { case tuix::ExprUnion_Concat: { - //implementing this like string concat + //implementing this like string concat since each argument in already serialized auto c = static_cast(expr->expr()); size_t num_children = c->children()->size(); - //TODO add type checking + size_t total = 0; std::vector result; @@ -770,39 +770,32 @@ class FlatbuffersExpressionEvaluator { const tuix::Field *str = flatbuffers::GetTemporaryPointer(builder, offset); if (str->value_type() != tuix::FieldUnion_StringField) { throw std::runtime_error( - std::string("tuix::Concat requires String, not ") - + std::string(tuix::EnumNameFieldUnion(str->value_type()))); + std::string("tuix::Concat requires string, not ") + + std::string(tuix::EnumNameFieldUnion(str->value_type())) + + std::string(". You do not need to provide the data as string but the data should be serialized into string before sent to concat")); + } + if (!str->is_null()){ + // skipping over the null input + auto str_field = static_cast(str->value()); + int32_t start = 0; + int32_t end = str_field ->length(); + total += end; + std::vector stringtoadd( + flatbuffers::VectorIterator(str_field->value()->Data(), + static_cast(start)), + flatbuffers::VectorIterator(str_field->value()->Data(), + static_cast(end))); + result.insert(result.end(), stringtoadd.begin(), stringtoadd.end()); } - auto str_field = static_cast(str->value()); - int32_t start = 0; - int32_t end = str_field ->length(); - total += end; - std::vector stringtoadd( - flatbuffers::VectorIterator(str_field->value()->Data(), - static_cast(start)), - flatbuffers::VectorIterator(str_field->value()->Data(), - static_cast(end))); - result.insert(result.end(), stringtoadd.begin(), stringtoadd.end()); } - //TODO add null check return tuix::CreateField( builder, tuix::FieldUnion_StringField, tuix::CreateStringFieldDirect( builder, &result, static_cast(total)).Union(), - false); - /* - auto array_field = static_cast(value->value()); - std::string str = to_string(array_field); - std::vector str_vec(str.begin(), str.end()); - return tuix::CreateField( - builder, - tuix::FieldUnion_StringField, - tuix::CreateStringFieldDirect(builder, &str_vec, str_vec.size()).Union(), - result_is_null); - */ + total==0); } // Conditional expressions diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index 443459775a..9de6afd223 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -389,12 +389,22 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.filter($"word".contains(lit("1"))).collect } - testAgainstSpark("concat") { securityLevel => + testAgainstSpark("concatwithstring") { securityLevel => val data = for (i <- 0 until 256) yield ("%03d".format(i) * 3, i.toString) val df = makeDF(data, securityLevel, "str", "x") df.select(concat(col("str"),lit(","),col("x"))).collect } + testAgainstSpark("concatwithotherdatatype") { securityLevel => + // float causes a formating issue where opaque outputs 1.000000 and spark produces 1.0 so the following line is commented out + // val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, 1.0f) + // you can't serialize date so that's not supported as well + // opaque doesn't support byte + val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i) + val df = makeDF(data, securityLevel, "str", "int") + df.select(concat(col("str"),lit(","),col("int"))).collect + } + testAgainstSpark("isin1") { securityLevel => //normal test 1 val ids = Seq((1, 2, 2), (2, 3, 1)) From f400a64122708f548e8605ec087d8741836c76e5 Mon Sep 17 00:00:00 2001 From: Chenyu Shi Date: Fri, 13 Nov 2020 19:08:58 +0000 Subject: [PATCH 7/9] cleaning up IN&Concat PR --- .../cs/rise/opaque/OpaqueOperatorTests.scala | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index 9de6afd223..70c72bc7eb 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -389,56 +389,48 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.filter($"word".contains(lit("1"))).collect } - testAgainstSpark("concatwithstring") { securityLevel => + testAgainstSpark("concat with string") { securityLevel => val data = for (i <- 0 until 256) yield ("%03d".format(i) * 3, i.toString) val df = makeDF(data, securityLevel, "str", "x") df.select(concat(col("str"),lit(","),col("x"))).collect } - testAgainstSpark("concatwithotherdatatype") { securityLevel => + testAgainstSpark("concat with other datatype") { securityLevel => // float causes a formating issue where opaque outputs 1.000000 and spark produces 1.0 so the following line is commented out // val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, 1.0f) // you can't serialize date so that's not supported as well // opaque doesn't support byte - val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i) - val df = makeDF(data, securityLevel, "str", "int") - df.select(concat(col("str"),lit(","),col("int"))).collect + val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, null.asInstanceOf[Int]) + val df = makeDF(data, securityLevel, "str", "int","null") + df.select(concat(col("str"),lit(","),col("int"),col("null"))).collect } testAgainstSpark("isin1") { securityLevel => - //normal test 1 val ids = Seq((1, 2, 2), (2, 3, 1)) val df = makeDF(ids, securityLevel, "x", "y", "id") val c = $"id" isin ($"x", $"y") val result = df.filter(c) - //result.explain(true) result.collect } testAgainstSpark("isin2") { securityLevel => - //normal test 2 val ids2 = Seq((1, 1, 1), (2, 2, 2), (3,3,3), (4,4,4)) val df2 = makeDF(ids2, securityLevel, "x", "y", "id") val c2 = $"id" isin (1 ,2, 4, 5, 6) val result = df2.filter(c2) - //result.explain(true) result.collect } - testAgainstSpark("isin3") { securityLevel => - //string test + testAgainstSpark("isin with string") { securityLevel => val ids3 = Seq(("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), ("b", "b", "b"), ("c","c","c"), ("d","d","d")) val df3 = makeDF(ids3, securityLevel, "x", "y", "id") val c3 = $"id" isin ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ,"b", "c", "d", "e") val result = df3.filter(c3) - //result.explain(true) result.collect } - testAgainstSpark("isin4") { securityLevel => - //null test + testAgainstSpark("isin with null") { securityLevel => val ids4 = Seq((1, 1, 1), (2, 2, 2), (3,3,null.asInstanceOf[Int]), (4,4,4)) val df4 = makeDF(ids4, securityLevel, "x", "y", "id") val c4 = $"id" isin (null.asInstanceOf[Int]) val result = df4.filter(c4) - //result.explain(true) result.collect } From 1d49af9e6e6009d7d294160e36029b672201b0bb Mon Sep 17 00:00:00 2001 From: Chenyu Shi Date: Wed, 18 Nov 2020 06:06:37 +0000 Subject: [PATCH 8/9] removing in and keeping only changes for concat pr --- src/enclave/Enclave/ExpressionEvaluation.h | 53 ------------------- src/flatbuffers/Expr.fbs | 1 - .../edu/berkeley/cs/rise/opaque/Utils.scala | 7 --- .../cs/rise/opaque/OpaqueOperatorTests.scala | 29 ---------- 4 files changed, 90 deletions(-) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index 5d32f91010..816275be94 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -702,59 +702,6 @@ class FlatbuffersExpressionEvaluator { } } - case tuix::ExprUnion_In: - { - auto c = static_cast(expr->expr()); - size_t num_children = c->children()->size(); - bool result = false; - if (num_children < 2){ - throw std::runtime_error(std::string("In can't operate with an empty list, currently we have ") - + std::to_string(num_children - 1) - + std::string("items in the list")); - } - - auto left_offset = eval_helper(row, (*c->children())[0]); - const tuix::Field *left = flatbuffers::GetTemporaryPointer(builder, left_offset); - - bool result_is_null = left->is_null(); - - for (size_t i=1; ichildren())[i]); - const tuix::Field *item = flatbuffers::GetTemporaryPointer(builder, right_offset); - if (item->value_type() != left->value_type()){ - throw std::runtime_error( - std::string("In can't operate on ") - + std::string(tuix::EnumNameFieldUnion(left->value_type())) - + std::string(" and ") - + std::string(tuix::EnumNameFieldUnion(item->value_type())) - + ". Please double check the type of each input"); - } - result_is_null = result_is_null || item ->is_null(); - - // adding dynamic casting - bool temporary_result = - static_cast( - flatbuffers::GetTemporaryPointer( - builder, - eval_binary_comparison( - builder, - flatbuffers::GetTemporaryPointer(builder, left_offset), - flatbuffers::GetTemporaryPointer(builder, right_offset))) - ->value())->value(); - - if (temporary_result){ - result = true; - } - } - - return tuix::CreateField( - builder, - tuix::FieldUnion_BooleanField, - tuix::CreateBooleanField(builder, result).Union(), - result_is_null); - - } - case tuix::ExprUnion_Concat: { //implementing this like string concat since each argument in already serialized diff --git a/src/flatbuffers/Expr.fbs b/src/flatbuffers/Expr.fbs index fe9a1349b7..d7fec73276 100644 --- a/src/flatbuffers/Expr.fbs +++ b/src/flatbuffers/Expr.fbs @@ -12,7 +12,6 @@ union ExprUnion { GreaterThanOrEqual, EqualTo, Contains, - In, Concat, Col, Literal, diff --git a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala index d8994d37d2..7337251237 100644 --- a/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala +++ b/src/main/scala/edu/berkeley/cs/rise/opaque/Utils.scala @@ -967,13 +967,6 @@ object Utils extends Logging { tuix.Contains.createContains( builder, leftOffset, rightOffset)) - case (In(left, right), childrenOffsets) => - tuix.Expr.createExpr( - builder, - tuix.ExprUnion.In, - tuix.In.createIn( - builder, tuix.In.createChildrenVector(builder, childrenOffsets.toArray))) - case (Concat(child), childrenOffsets) => tuix.Expr.createExpr( builder, diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index 70c72bc7eb..f22f52c82c 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -405,35 +405,6 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => df.select(concat(col("str"),lit(","),col("int"),col("null"))).collect } - testAgainstSpark("isin1") { securityLevel => - val ids = Seq((1, 2, 2), (2, 3, 1)) - val df = makeDF(ids, securityLevel, "x", "y", "id") - val c = $"id" isin ($"x", $"y") - val result = df.filter(c) - result.collect - } - testAgainstSpark("isin2") { securityLevel => - val ids2 = Seq((1, 1, 1), (2, 2, 2), (3,3,3), (4,4,4)) - val df2 = makeDF(ids2, securityLevel, "x", "y", "id") - val c2 = $"id" isin (1 ,2, 4, 5, 6) - val result = df2.filter(c2) - result.collect - } - testAgainstSpark("isin with string") { securityLevel => - val ids3 = Seq(("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), ("b", "b", "b"), ("c","c","c"), ("d","d","d")) - val df3 = makeDF(ids3, securityLevel, "x", "y", "id") - val c3 = $"id" isin ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ,"b", "c", "d", "e") - val result = df3.filter(c3) - result.collect - } - testAgainstSpark("isin with null") { securityLevel => - val ids4 = Seq((1, 1, 1), (2, 2, 2), (3,3,null.asInstanceOf[Int]), (4,4,4)) - val df4 = makeDF(ids4, securityLevel, "x", "y", "id") - val c4 = $"id" isin (null.asInstanceOf[Int]) - val result = df4.filter(c4) - result.collect - } - testAgainstSpark("year") { securityLevel => val data = Seq(Tuple2(1, new java.sql.Date(new java.util.Date().getTime()))) val df = makeDF(data, securityLevel, "id", "date") From 88d10e9875c62701450bb1fdee7a50d8f0f052a2 Mon Sep 17 00:00:00 2001 From: Chenyu Shi Date: Fri, 18 Dec 2020 05:20:46 +0000 Subject: [PATCH 9/9] adding empty string test and removing in PR --- src/enclave/Enclave/ExpressionEvaluation.h | 10 +++++----- src/flatbuffers/Expr.fbs | 5 ----- .../berkeley/cs/rise/opaque/OpaqueOperatorTests.scala | 6 +++--- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/enclave/Enclave/ExpressionEvaluation.h b/src/enclave/Enclave/ExpressionEvaluation.h index 816275be94..fa1a9da567 100644 --- a/src/enclave/Enclave/ExpressionEvaluation.h +++ b/src/enclave/Enclave/ExpressionEvaluation.h @@ -717,21 +717,21 @@ class FlatbuffersExpressionEvaluator { const tuix::Field *str = flatbuffers::GetTemporaryPointer(builder, offset); if (str->value_type() != tuix::FieldUnion_StringField) { throw std::runtime_error( - std::string("tuix::Concat requires string, not ") + std::string("tuix::Concat requires serializable data types, not ") + std::string(tuix::EnumNameFieldUnion(str->value_type())) + std::string(". You do not need to provide the data as string but the data should be serialized into string before sent to concat")); } if (!str->is_null()){ // skipping over the null input auto str_field = static_cast(str->value()); - int32_t start = 0; - int32_t end = str_field ->length(); + uint32_t start = 0; + uint32_t end = str_field ->length(); total += end; std::vector stringtoadd( flatbuffers::VectorIterator(str_field->value()->Data(), - static_cast(start)), + start), flatbuffers::VectorIterator(str_field->value()->Data(), - static_cast(end))); + end)); result.insert(result.end(), stringtoadd.begin(), stringtoadd.end()); } diff --git a/src/flatbuffers/Expr.fbs b/src/flatbuffers/Expr.fbs index d7fec73276..3c98718cc7 100644 --- a/src/flatbuffers/Expr.fbs +++ b/src/flatbuffers/Expr.fbs @@ -119,11 +119,6 @@ table Contains { right:Expr; } -// Array expressions -table In{ - children:[Expr]; -} - table Concat{ children:[Expr]; } diff --git a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala index f22f52c82c..a4b2113cb6 100644 --- a/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala +++ b/src/test/scala/edu/berkeley/cs/rise/opaque/OpaqueOperatorTests.scala @@ -400,9 +400,9 @@ trait OpaqueOperatorTests extends FunSuite with BeforeAndAfterAll { self => // val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, 1.0f) // you can't serialize date so that's not supported as well // opaque doesn't support byte - val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, null.asInstanceOf[Int]) - val df = makeDF(data, securityLevel, "str", "int","null") - df.select(concat(col("str"),lit(","),col("int"),col("null"))).collect + val data = for (i <- 0 until 3) yield ("%03d".format(i) * 3, i, null.asInstanceOf[Int],"") + val df = makeDF(data, securityLevel, "str", "int","null","emptystring") + df.select(concat(col("str"),lit(","),col("int"),col("null"),col("emptystring"))).collect } testAgainstSpark("year") { securityLevel =>