@@ -23,22 +23,22 @@ import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
2323
2424class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
2525
26- private def colName (d : Int , colNum : Int ): String = s " nested ${d}Col $colNum"
26+ private def nestedColName (d : Int , colNum : Int ): String = s " nested ${d}Col $colNum"
2727
2828 private def nestedStructType (
2929 depths : Seq [Int ], colNums : Seq [Int ], nullable : Boolean ): StructType = {
3030 if (depths.length == 1 ) {
3131 StructType (colNums.map { colNum =>
32- StructField (colName (depths.head, colNum), IntegerType , nullable = false )
32+ StructField (nestedColName (depths.head, colNum), IntegerType , nullable = false )
3333 })
3434 } else {
3535 val depth = depths.head
3636 val fields = colNums.foldLeft(Seq .empty[StructField ]) {
3737 case (structFields, colNum) if colNum == 0 =>
38- val nested = nestedStructType(depths.tail, colNums, nullable = nullable )
39- structFields :+ StructField (colName (depth, colNum), nested, nullable = nullable)
38+ val nested = nestedStructType(depths.tail, colNums, nullable)
39+ structFields :+ StructField (nestedColName (depth, colNum), nested, nullable)
4040 case (structFields, colNum) =>
41- structFields :+ StructField (colName (depth, colNum), IntegerType , nullable = false )
41+ structFields :+ StructField (nestedColName (depth, colNum), IntegerType , nullable = false )
4242 }
4343 StructType (fields)
4444 }
@@ -60,10 +60,10 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
6060 * Utility function for generating a DataFrame with nested columns.
6161 *
6262 * @param depth: The depth to which to create nested columns.
63- * @param numColsAtEachDepth: The number of columns to create at each depth. The columns names
64- * are in the format of nested${depth}Col${ index}. The value of each
65- * column will be its index at that depth, or if the index of the column
66- * is 0, then the value could also be a struct .
63+ * @param numColsAtEachDepth: The number of columns to create at each depth. The value of each
64+ * column will be the same as its index (IntegerType) at that depth
65+ * unless the index = 0, in which case it may be a StructType which
66+ * represents the next depth .
6767 * @param nullable: This value is used to set the nullability of StructType columns.
6868 */
6969 private def nestedDf (
@@ -78,7 +78,7 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
7878
7979 spark.createDataFrame(
8080 sparkContext.parallelize(Row (nestedColumn) :: Nil ),
81- StructType (Seq (StructField (colName (0 , 0 ), nestedColumnDataType, nullable = nullable))))
81+ StructType (Seq (StructField (nestedColName (0 , 0 ), nestedColumnDataType, nullable))))
8282 }
8383
8484 test(" nestedDf should generate nested DataFrames" ) {
@@ -139,21 +139,21 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
139139
140140 // drop columns at the current depth
141141 val dropped = if (colNumsToDrop.nonEmpty) {
142- column.dropFields(colNumsToDrop.map(num => colName (depth, num)): _* )
142+ column.dropFields(colNumsToDrop.map(num => nestedColName (depth, num)): _* )
143143 } else column
144144
145145 // add columns at the current depth
146146 val added = colNumsToAdd.foldLeft(dropped) {
147- (res, num) => res.withField(colName (depth, num), lit(num))
147+ (res, num) => res.withField(nestedColName (depth, num), lit(num))
148148 }
149149
150150 if (depths.length == 1 ) {
151151 added
152152 } else {
153153 // add/drop columns at the next depth
154- val nestedColumn = col((0 to depth).map(d => s " ` ${colName (d, 0 )}` " ).mkString(" ." ))
154+ val nestedColumn = col((0 to depth).map(d => s " ` ${nestedColName (d, 0 )}` " ).mkString(" ." ))
155155 added.withField(
156- colName (depth, 0 ),
156+ nestedColName (depth, 0 ),
157157 addDropNestedColumns(nestedColumn, depths.tail, colNumsToAdd, colNumsToDrop))
158158 }
159159 }
@@ -165,16 +165,16 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
165165 val maxDepth = 20
166166
167167 // dataframe with nested*Col0 to nested*Col4 at each of 20 depths
168- val inputDf = nestedDf(maxDepth, 5 , nullable = nullable )
168+ val inputDf = nestedDf(maxDepth, 5 , nullable)
169169
170170 // add nested*Col5 through nested*Col9 at each depth
171171 val resultDf = inputDf.select(addDropNestedColumns(
172- column = col(colName (0 , 0 )),
172+ column = col(nestedColName (0 , 0 )),
173173 depths = 1 to maxDepth,
174174 colNumsToAdd = 5 to 9 ).as(" nested0Col0" ))
175175
176176 // dataframe with nested*Col0 to nested*Col9 at each of 20 depths
177- val expectedDf = nestedDf(maxDepth, 10 , nullable = nullable )
177+ val expectedDf = nestedDf(maxDepth, 10 , nullable)
178178 checkAnswer(resultDf, expectedDf.collect(), expectedDf.schema)
179179 }
180180
@@ -183,16 +183,16 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
183183 val maxDepth = 20
184184
185185 // dataframe with nested*Col0 to nested*Col9 at each of 20 depths
186- val inputDf = nestedDf(maxDepth, 10 , nullable = nullable )
186+ val inputDf = nestedDf(maxDepth, 10 , nullable)
187187
188188 // drop nested*Col5 to nested*Col9 at each of 20 depths
189189 val resultDf = inputDf.select(addDropNestedColumns(
190- column = col(colName (0 , 0 )),
190+ column = col(nestedColName (0 , 0 )),
191191 depths = 1 to maxDepth,
192192 colNumsToDrop = 5 to 9 ).as(" nested0Col0" ))
193193
194194 // dataframe with nested*Col0 to nested*Col4 at each of 20 depths
195- val expectedDf = nestedDf(maxDepth, 5 , nullable = nullable )
195+ val expectedDf = nestedDf(maxDepth, 5 , nullable)
196196 checkAnswer(resultDf, expectedDf.collect(), expectedDf.schema)
197197 }
198198
@@ -201,12 +201,12 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
201201 val maxDepth = 20
202202
203203 // dataframe with nested*Col0 to nested*Col9 at each of 20 depths
204- val inputDf = nestedDf(maxDepth, 10 , nullable = nullable )
204+ val inputDf = nestedDf(maxDepth, 10 , nullable)
205205
206206 // add nested*Col10 through nested*Col14 at each depth
207207 // drop nested*Col5 through nested*Col9 at each depth
208208 val resultDf = inputDf.select(addDropNestedColumns(
209- column = col(colName (0 , 0 )),
209+ column = col(nestedColName (0 , 0 )),
210210 depths = 1 to maxDepth,
211211 colNumsToAdd = 10 to 14 ,
212212 colNumsToDrop = 5 to 9 ).as(" nested0Col0" ))
@@ -217,11 +217,11 @@ class UpdateFieldsPerformanceSuite extends QueryTest with SharedSparkSession {
217217 val depths = 1 to maxDepth
218218 val numCols = (0 to 4 ) ++ (10 to 14 )
219219 val nestedColumn = nestedRow(depths, numCols)
220- val nestedColumnDataType = nestedStructType(depths, numCols, nullable = nullable )
220+ val nestedColumnDataType = nestedStructType(depths, numCols, nullable)
221221
222222 spark.createDataFrame(
223223 sparkContext.parallelize(Row (nestedColumn) :: Nil ),
224- StructType (Seq (StructField (colName (0 , 0 ), nestedColumnDataType, nullable = nullable))))
224+ StructType (Seq (StructField (nestedColName (0 , 0 ), nestedColumnDataType, nullable))))
225225 }
226226 checkAnswer(resultDf, expectedDf.collect(), expectedDf.schema)
227227 }
0 commit comments