Skip to content

Commit e4ad2ec

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into sparkr-cran-changes
2 parents 3299242 + 91575ca commit e4ad2ec

File tree

265 files changed

+4085
-2509
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

265 files changed

+4085
-2509
lines changed

R/pkg/NAMESPACE

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,8 @@ export("partitionBy",
341341
"rowsBetween",
342342
"rangeBetween")
343343

344-
export("window.partitionBy",
345-
"window.orderBy")
346-
344+
export("windowPartitionBy",
345+
"windowOrderBy")
347346

348347
S3method(print, jobj)
349348
S3method(print, structField)
@@ -353,4 +352,3 @@ S3method(structField, character)
353352
S3method(structField, jobj)
354353
S3method(structType, jobj)
355354
S3method(structType, structField)
356-
# window.orderBy window.partitionBy

R/pkg/R/SQLContext.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ getInternalType <- function(x) {
4848
#' @return whatever the target returns
4949
#' @noRd
5050
dispatchFunc <- function(newFuncSig, x, ...) {
51-
funcName <- as.character(sys.call(sys.parent())[[1]])
51+
# When called with SparkR::createDataFrame, sys.call()[[1]] returns c(::, SparkR, createDataFrame)
52+
callsite <- as.character(sys.call(sys.parent())[[1]])
53+
funcName <- callsite[[length(callsite)]]
5254
f <- get(paste0(funcName, ".default"))
5355
# Strip sqlContext from list of parameters and then pass the rest along.
5456
contextNames <- c("org.apache.spark.sql.SQLContext",

R/pkg/R/WindowSpec.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ NULL
2222

2323
#' S4 class that represents a WindowSpec
2424
#'
25-
#' WindowSpec can be created by using window.partitionBy() or window.orderBy()
25+
#' WindowSpec can be created by using windowPartitionBy() or windowOrderBy()
2626
#'
2727
#' @rdname WindowSpec
28-
#' @seealso \link{window.partitionBy}, \link{window.orderBy}
28+
#' @seealso \link{windowPartitionBy}, \link{windowOrderBy}
2929
#'
3030
#' @param sws A Java object reference to the backing Scala WindowSpec
3131
#' @export

R/pkg/R/generics.R

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -779,13 +779,13 @@ setGeneric("rowsBetween", function(x, start, end) { standardGeneric("rowsBetween
779779
#' @export
780780
setGeneric("rangeBetween", function(x, start, end) { standardGeneric("rangeBetween") })
781781

782-
#' @rdname window.partitionBy
782+
#' @rdname windowPartitionBy
783783
#' @export
784-
setGeneric("window.partitionBy", function(x, ...) { standardGeneric("window.partitionBy") })
784+
setGeneric("windowPartitionBy", function(col, ...) { standardGeneric("windowPartitionBy") })
785785

786-
#' @rdname window.orderBy
786+
#' @rdname windowOrderBy
787787
#' @export
788-
setGeneric("window.orderBy", function(x, ...) { standardGeneric("window.orderBy") })
788+
setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy") })
789789

790790
###################### Expression Function Methods ##########################
791791

@@ -1255,7 +1255,6 @@ setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.gl
12551255
#' @export
12561256
setGeneric("glm")
12571257

1258-
#' predict
12591258
#' @rdname predict
12601259
#' @export
12611260
setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1280,7 +1279,6 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s
12801279
#' @export
12811280
setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") })
12821281

1283-
#' write.ml
12841282
#' @rdname write.ml
12851283
#' @export
12861284
setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") })

R/pkg/R/mllib.R

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,34 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
5353
#' @note KMeansModel since 2.0.0
5454
setClass("KMeansModel", representation(jobj = "jobj"))
5555

56+
#' Saves the MLlib model to the input path
57+
#'
58+
#' Saves the MLlib model to the input path. For more information, see the specific
59+
#' MLlib model below.
60+
#' @rdname write.ml
61+
#' @name write.ml
62+
#' @export
63+
#' @seealso \link{spark.glm}, \link{glm}
64+
#' @seealso \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg}
65+
#' @seealso \link{read.ml}
66+
NULL
67+
68+
#' Makes predictions from a MLlib model
69+
#'
70+
#' Makes predictions from a MLlib model. For more information, see the specific
71+
#' MLlib model below.
72+
#' @rdname predict
73+
#' @name predict
74+
#' @export
75+
#' @seealso \link{spark.glm}, \link{glm}
76+
#' @seealso \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg}
77+
NULL
78+
5679
#' Generalized Linear Models
5780
#'
58-
#' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the
59-
#' produced model and save the model to the input path.
81+
#' Fits generalized linear model against a Spark DataFrame.
82+
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
83+
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
6084
#'
6185
#' @param data SparkDataFrame for training.
6286
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -146,7 +170,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
146170
})
147171

148172
# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
149-
#'
173+
150174
#' @param object A fitted generalized linear model
151175
#' @return \code{summary} returns a summary object of the fitted model, a list of components
152176
#' including at least the coefficients, null/residual deviance, null/residual degrees
@@ -186,7 +210,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
186210
})
187211

188212
# Prints the summary of GeneralizedLinearRegressionModel
189-
#'
213+
190214
#' @rdname spark.glm
191215
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function
192216
#' @export
@@ -271,7 +295,8 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
271295
#' K-Means Clustering Model
272296
#'
273297
#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
274-
#' Users can print, make predictions on the produced model and save the model to the input path.
298+
#' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
299+
#' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
275300
#'
276301
#' @param data SparkDataFrame for training
277302
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -345,7 +370,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
345370
})
346371

347372
# Get the summary of a k-means model
348-
#'
373+
349374
#' @param object A fitted k-means model
350375
#' @return \code{summary} returns the model's coefficients, size and cluster
351376
#' @rdname spark.kmeans
@@ -372,7 +397,7 @@ setMethod("summary", signature(object = "KMeansModel"),
372397
})
373398

374399
# Predicted values based on a k-means model
375-
#'
400+
376401
#' @return \code{predict} returns the predicted values based on a k-means model
377402
#' @rdname spark.kmeans
378403
#' @export
@@ -466,7 +491,7 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
466491
})
467492

468493
# Saves the generalized linear model to the input path.
469-
#'
494+
470495
#' @param path The directory where the model is saved
471496
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
472497
#' which means throw exception if the output path exists.
@@ -484,7 +509,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat
484509
})
485510

486511
# Save fitted MLlib model to the input path
487-
#'
512+
488513
#' @param path The directory where the model is saved
489514
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
490515
#' which means throw exception if the output path exists.
@@ -508,6 +533,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
508533
#' @rdname read.ml
509534
#' @name read.ml
510535
#' @export
536+
#' @seealso \link{write.ml}
511537
#' @examples
512538
#' \dontrun{
513539
#' path <- "path/to/model"

R/pkg/R/window.R

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,47 @@
1717

1818
# window.R - Utility functions for defining window in DataFrames
1919

20-
#' window.partitionBy
20+
#' windowPartitionBy
2121
#'
2222
#' Creates a WindowSpec with the partitioning defined.
2323
#'
24-
#' @rdname window.partitionBy
25-
#' @name window.partitionBy
26-
#' @aliases window.partitionBy,character-method
24+
#' @param col A column name or Column by which rows are partitioned to
25+
#' windows.
26+
#' @param ... Optional column names or Columns in addition to col, by
27+
#' which rows are partitioned to windows.
28+
#'
29+
#' @rdname windowPartitionBy
30+
#' @name windowPartitionBy
31+
#' @aliases windowPartitionBy,character-method
2732
#' @export
2833
#' @examples
2934
#' \dontrun{
30-
#' ws <- window.partitionBy("key1", "key2")
35+
#' ws <- windowPartitionBy("key1", "key2")
3136
#' df1 <- select(df, over(lead("value", 1), ws))
3237
#'
33-
#' ws <- window.partitionBy(df$key1, df$key2)
38+
#' ws <- windowPartitionBy(df$key1, df$key2)
3439
#' df1 <- select(df, over(lead("value", 1), ws))
3540
#' }
36-
#' @note window.partitionBy(character) since 2.0.0
37-
setMethod("window.partitionBy",
38-
signature(x = "character"),
39-
function(x, ...) {
41+
#' @note windowPartitionBy(character) since 2.0.0
42+
setMethod("windowPartitionBy",
43+
signature(col = "character"),
44+
function(col, ...) {
4045
windowSpec(
4146
callJStatic("org.apache.spark.sql.expressions.Window",
4247
"partitionBy",
4348
x,
4449
list(...)))
4550
})
4651

47-
#' @rdname window.partitionBy
48-
#' @name window.partitionBy
49-
#' @aliases window.partitionBy,Column-method
52+
#' @rdname windowPartitionBy
53+
#' @name windowPartitionBy
54+
#' @aliases windowPartitionBy,Column-method
5055
#' @export
51-
#' @note window.partitionBy(Column) since 2.0.0
52-
setMethod("window.partitionBy",
53-
signature(x = "Column"),
54-
function(x, ...) {
55-
jcols <- lapply(list(x, ...), function(c) {
56+
#' @note windowPartitionBy(Column) since 2.0.0
57+
setMethod("windowPartitionBy",
58+
signature(col = "Column"),
59+
function(col, ...) {
60+
jcols <- lapply(list(col, ...), function(c) {
5661
c@jc
5762
})
5863
windowSpec(
@@ -61,42 +66,47 @@ setMethod("window.partitionBy",
6166
jcols))
6267
})
6368

64-
#' window.orderBy
69+
#' windowOrderBy
6570
#'
6671
#' Creates a WindowSpec with the ordering defined.
6772
#'
68-
#' @rdname window.orderBy
69-
#' @name window.orderBy
70-
#' @aliases window.orderBy,character-method
73+
#' @param col A column name or Column by which rows are ordered within
74+
#' windows.
75+
#' @param ... Optional column names or Columns in addition to col, by
76+
#' which rows are ordered within windows.
77+
#'
78+
#' @rdname windowOrderBy
79+
#' @name windowOrderBy
80+
#' @aliases windowOrderBy,character-method
7181
#' @export
7282
#' @examples
7383
#' \dontrun{
74-
#' ws <- window.orderBy("key1", "key2")
84+
#' ws <- windowOrderBy("key1", "key2")
7585
#' df1 <- select(df, over(lead("value", 1), ws))
7686
#'
77-
#' ws <- window.orderBy(df$key1, df$key2)
87+
#' ws <- windowOrderBy(df$key1, df$key2)
7888
#' df1 <- select(df, over(lead("value", 1), ws))
7989
#' }
80-
#' @note window.orderBy(character) since 2.0.0
81-
setMethod("window.orderBy",
82-
signature(x = "character"),
83-
function(x, ...) {
90+
#' @note windowOrderBy(character) since 2.0.0
91+
setMethod("windowOrderBy",
92+
signature(col = "character"),
93+
function(col, ...) {
8494
windowSpec(
8595
callJStatic("org.apache.spark.sql.expressions.Window",
8696
"orderBy",
87-
x,
97+
col,
8898
list(...)))
8999
})
90100

91-
#' @rdname window.orderBy
92-
#' @name window.orderBy
93-
#' @aliases window.orderBy,Column-method
101+
#' @rdname windowOrderBy
102+
#' @name windowOrderBy
103+
#' @aliases windowOrderBy,Column-method
94104
#' @export
95-
#' @note window.orderBy(Column) since 2.0.0
96-
setMethod("window.orderBy",
97-
signature(x = "Column"),
98-
function(x, ...) {
99-
jcols <- lapply(list(x, ...), function(c) {
105+
#' @note windowOrderBy(Column) since 2.0.0
106+
setMethod("windowOrderBy",
107+
signature(col = "Column"),
108+
function(col, ...) {
109+
jcols <- lapply(list(col, ...), function(c) {
100110
c@jc
101111
})
102112
windowSpec(

R/pkg/inst/tests/testthat/jarTest.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#
1717
library(SparkR)
1818

19-
sparkSession <- sparkR.session()
19+
sparkR.session()
2020

2121
helloTest <- SparkR:::callJStatic("sparkR.test.hello",
2222
"helloWorld",

R/pkg/inst/tests/testthat/packageInAJarTest.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
library(SparkR)
1818
library(sparkPackageTest)
1919

20-
sparkSession <- sparkR.session()
20+
sparkR.session()
2121

2222
run1 <- myfunc(5L)
2323

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ test_that("read csv as DataFrame", {
237237
"Empty,Dummy,Placeholder")
238238
writeLines(mockLinesCsv, csvPath)
239239

240-
df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.string = "Empty")
240+
df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "Empty")
241241
expect_equal(count(df2), 4)
242242
withoutna2 <- na.omit(df2, how = "any", cols = "year")
243243
expect_equal(count(withoutna2), 3)
@@ -2376,25 +2376,25 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
23762376
test_that("Window functions on a DataFrame", {
23772377
df <- createDataFrame(list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
23782378
schema = c("key", "value"))
2379-
ws <- orderBy(window.partitionBy("key"), "value")
2379+
ws <- orderBy(windowPartitionBy("key"), "value")
23802380
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
23812381
names(result) <- c("key", "value")
23822382
expected <- data.frame(key = c(1L, NA, 2L, NA),
23832383
value = c("1", NA, "2", NA),
23842384
stringsAsFactors = FALSE)
23852385
expect_equal(result, expected)
23862386

2387-
ws <- orderBy(window.partitionBy(df$key), df$value)
2387+
ws <- orderBy(windowPartitionBy(df$key), df$value)
23882388
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
23892389
names(result) <- c("key", "value")
23902390
expect_equal(result, expected)
23912391

2392-
ws <- partitionBy(window.orderBy("value"), "key")
2392+
ws <- partitionBy(windowOrderBy("value"), "key")
23932393
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
23942394
names(result) <- c("key", "value")
23952395
expect_equal(result, expected)
23962396

2397-
ws <- partitionBy(window.orderBy(df$value), df$key)
2397+
ws <- partitionBy(windowOrderBy(df$value), df$key)
23982398
result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
23992399
names(result) <- c("key", "value")
24002400
expect_equal(result, expected)
@@ -2405,7 +2405,8 @@ test_that("createDataFrame sqlContext parameter backward compatibility", {
24052405
a <- 1:3
24062406
b <- c("a", "b", "c")
24072407
ldf <- data.frame(a, b)
2408-
df <- suppressWarnings(createDataFrame(sqlContext, ldf))
2408+
# Call function with namespace :: operator - SPARK-16538
2409+
df <- suppressWarnings(SparkR::createDataFrame(sqlContext, ldf))
24092410
expect_equal(columns(df), c("a", "b"))
24102411
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
24112412
expect_equal(count(df), 3)

common/network-shuffle/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@
5454
<artifactId>jackson-databind</artifactId>
5555
</dependency>
5656

57+
<dependency>
58+
<groupId>io.dropwizard.metrics</groupId>
59+
<artifactId>metrics-core</artifactId>
60+
</dependency>
61+
5762
<dependency>
5863
<groupId>com.fasterxml.jackson.core</groupId>
5964
<artifactId>jackson-annotations</artifactId>

0 commit comments

Comments
 (0)