@@ -986,10 +986,10 @@ setMethod("unique",
986986# ' @param x A SparkDataFrame
987987# ' @param withReplacement Sampling with replacement or not
988988# ' @param fraction The (rough) sample target fraction
989- # ' @param seed Randomness seed value
989+ # ' @param seed Randomness seed value. Default is a random seed.
990990# '
991991# ' @family SparkDataFrame functions
992- # ' @aliases sample,SparkDataFrame,logical,numeric -method
992+ # ' @aliases sample,SparkDataFrame-method
993993# ' @rdname sample
994994# ' @name sample
995995# ' @export
@@ -998,33 +998,47 @@ setMethod("unique",
998998# ' sparkR.session()
999999# ' path <- "path/to/file.json"
10001000# ' df <- read.json(path)
1001+ # ' collect(sample(df, fraction = 0.5))
10011002# ' collect(sample(df, FALSE, 0.5))
1002- # ' collect(sample(df, TRUE, 0.5))
1003+ # ' collect(sample(df, TRUE, 0.5, seed = 3 ))
10031004# '}
10041005# ' @note sample since 1.4.0
10051006setMethod ("sample ",
1006- signature(x = " SparkDataFrame" , withReplacement = " logical" ,
1007- fraction = " numeric" ),
1008- function (x , withReplacement , fraction , seed ) {
1009- if (fraction < 0.0 ) stop(cat(" Negative fraction value:" , fraction ))
1007+ signature(x = " SparkDataFrame" ),
1008+ function (x , withReplacement = FALSE , fraction , seed ) {
1009+ if (! is.numeric(fraction )) {
1010+ stop(paste(" fraction must be numeric; however, got" , class(fraction )))
1011+ }
1012+ if (! is.logical(withReplacement )) {
1013+ stop(paste(" withReplacement must be logical; however, got" , class(withReplacement )))
1014+ }
1015+
10101016 if (! missing(seed )) {
1017+ if (is.null(seed )) {
1018+ stop(" seed must not be NULL or NA; however, got NULL" )
1019+ }
1020+ if (is.na(seed )) {
1021+ stop(" seed must not be NULL or NA; however, got NA" )
1022+ }
1023+
10111024 # TODO : Figure out how to send integer as java.lang.Long to JVM so
10121025 # we can send seed as an argument through callJMethod
1013- sdf <- callJMethod(x @ sdf , " sample" , withReplacement , fraction , as.integer(seed ))
1026+ sdf <- handledCallJMethod(x @ sdf , " sample" , as.logical(withReplacement ),
1027+ as.numeric(fraction ), as.integer(seed ))
10141028 } else {
1015- sdf <- callJMethod(x @ sdf , " sample" , withReplacement , fraction )
1029+ sdf <- handledCallJMethod(x @ sdf , " sample" ,
1030+ as.logical(withReplacement ), as.numeric(fraction ))
10161031 }
10171032 dataFrame(sdf )
10181033 })
10191034
10201035# ' @rdname sample
1021- # ' @aliases sample_frac,SparkDataFrame,logical,numeric -method
1036+ # ' @aliases sample_frac,SparkDataFrame-method
10221037# ' @name sample_frac
10231038# ' @note sample_frac since 1.4.0
10241039setMethod ("sample_frac ",
1025- signature(x = " SparkDataFrame" , withReplacement = " logical" ,
1026- fraction = " numeric" ),
1027- function (x , withReplacement , fraction , seed ) {
1040+ signature(x = " SparkDataFrame" ),
1041+ function (x , withReplacement = FALSE , fraction , seed ) {
10281042 sample(x , withReplacement , fraction , seed )
10291043 })
10301044
@@ -1909,13 +1923,15 @@ setMethod("[", signature(x = "SparkDataFrame"),
19091923# ' @param i,subset (Optional) a logical expression to filter on rows.
19101924# ' For extract operator [[ and replacement operator [[<-, the indexing parameter for
19111925# ' a single Column.
1912- # ' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
1926+ # ' @param j,select expression for the single Column or a list of columns to select from the
1927+ # ' SparkDataFrame.
19131928# ' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
19141929# ' Otherwise, a SparkDataFrame will always be returned.
19151930# ' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
19161931# ' If \code{NULL}, the specified Column is dropped.
19171932# ' @param ... currently not used.
1918- # ' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
1933+ # ' @return A new SparkDataFrame containing only the rows that meet the condition with selected
1934+ # ' columns.
19191935# ' @export
19201936# ' @family SparkDataFrame functions
19211937# ' @aliases subset,SparkDataFrame-method
@@ -2594,12 +2610,12 @@ setMethod("merge",
25942610 } else {
25952611 # if by or both by.x and by.y have length 0, use Cartesian Product
25962612 joinRes <- crossJoin(x , y )
2597- return (joinRes )
2613+ return (joinRes )
25982614 }
25992615
26002616 # sets alias for making colnames unique in dataframes 'x' and 'y'
2601- colsX <- generateAliasesForIntersectedCols (x , by , suffixes [1 ])
2602- colsY <- generateAliasesForIntersectedCols (y , by , suffixes [2 ])
2617+ colsX <- genAliasesForIntersectedCols (x , by , suffixes [1 ])
2618+ colsY <- genAliasesForIntersectedCols (y , by , suffixes [2 ])
26032619
26042620 # selects columns with their aliases from dataframes
26052621 # in case same column names are present in both data frames
@@ -2647,17 +2663,16 @@ setMethod("merge",
26472663# ' @param intersectedColNames a list of intersected column names of the SparkDataFrame
26482664# ' @param suffix a suffix for the column name
26492665# ' @return list of columns
2650- # '
2651- # ' @note generateAliasesForIntersectedCols since 1.6.0
2652- generateAliasesForIntersectedCols <- function (x , intersectedColNames , suffix ) {
2666+ # ' @noRd
2667+ genAliasesForIntersectedCols <- function (x , intersectedColNames , suffix ) {
26532668 allColNames <- names(x )
26542669 # sets alias for making colnames unique in dataframe 'x'
26552670 cols <- lapply(allColNames , function (colName ) {
26562671 col <- getColumn(x , colName )
26572672 if (colName %in% intersectedColNames ) {
26582673 newJoin <- paste(colName , suffix , sep = " " )
26592674 if (newJoin %in% allColNames ){
2660- stop (" The following column name: " , newJoin , " occurs more than once in the 'DataFrame'." ,
2675+ stop(" The following column name: " , newJoin , " occurs more than once in the 'DataFrame'." ,
26612676 " Please use different suffixes for the intersected columns." )
26622677 }
26632678 col <- alias(col , newJoin )
@@ -3044,7 +3059,8 @@ setMethod("describe",
30443059# ' summary(select(df, "age", "height"))
30453060# ' }
30463061# ' @note summary(SparkDataFrame) since 1.5.0
3047- # ' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for previous defaults.
3062+ # ' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for
3063+ # ' previous defaults.
30483064# ' @seealso \link{describe}
30493065setMethod ("summary ",
30503066 signature(object = " SparkDataFrame" ),
@@ -3751,8 +3767,8 @@ setMethod("checkpoint",
37513767# '
37523768# ' Create a multi-dimensional cube for the SparkDataFrame using the specified columns.
37533769# '
3754- # ' If grouping expression is missing \code{cube} creates a single global aggregate and is equivalent to
3755- # ' direct application of \link{agg}.
3770+ # ' If grouping expression is missing \code{cube} creates a single global aggregate and is
3771+ # ' equivalent to direct application of \link{agg}.
37563772# '
37573773# ' @param x a SparkDataFrame.
37583774# ' @param ... character name(s) or Column(s) to group on.
@@ -3786,8 +3802,8 @@ setMethod("cube",
37863802# '
37873803# ' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns.
37883804# '
3789- # ' If grouping expression is missing \code{rollup} creates a single global aggregate and is equivalent to
3790- # ' direct application of \link{agg}.
3805+ # ' If grouping expression is missing \code{rollup} creates a single global aggregate and is
3806+ # ' equivalent to direct application of \link{agg}.
37913807# '
37923808# ' @param x a SparkDataFrame.
37933809# ' @param ... character name(s) or Column(s) to group on.
0 commit comments