Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ exportMethods("%in%",
"isNaN",
"isNotNull",
"isNull",
"kurtosis",
"lag",
"last",
"last_day",
Expand Down Expand Up @@ -204,12 +205,17 @@ exportMethods("%in%",
"shiftLeft",
"shiftRight",
"shiftRightUnsigned",
"sd",
"sign",
"signum",
"sin",
"sinh",
"size",
"skewness",
"soundex",
"stddev",
"stddev_pop",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"stddev_pop" before "stddev_samp",

"stddev_samp",
"sqrt",
"startsWith",
"substr",
Expand All @@ -228,6 +234,10 @@ exportMethods("%in%",
"unhex",
"unix_timestamp",
"upper",
"var",
"variance",
"var_pop",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"var_pop" before "var_samp",

"var_samp",
"weekofyear",
"when",
"year")
Expand Down
186 changes: 174 additions & 12 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,22 @@ setMethod("isNaN",
column(jc)
})

#' kurtosis
#'
#' Aggregate function: returns the kurtosis of the values in a group.
#'
#' @rdname kurtosis
#' @name kurtosis
#' @family agg_funcs
#' @export
#' @examples \dontrun{kurtosis(df$c)}
setMethod("kurtosis",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "kurtosis", x@jc)
column(jc)
})

#' last
#'
#' Aggregate function: returns the last value in a group.
Expand Down Expand Up @@ -861,6 +877,28 @@ setMethod("rtrim",
column(jc)
})

#' sd
#'
#' Aggregate function: alias for \link{stddev_samp}
#'
#' @rdname sd
#' @name sd
#' @family agg_funcs
#' @seealso \link{stddev_pop}, \link{stddev_samp}
#' @export
#' @examples
#'\dontrun{
#'stddev(df$c)
#'select(df, stddev(df$age))
#'agg(df, sd(df$age))
#'}
setMethod("sd",
signature(x = "Column"),
function(x, na.rm = FALSE) {
# In R, sample standard deviation is calculated with the sd() function.
stddev_samp(x)
})

#' second
#'
#' Extracts the seconds as an integer from a given date/timestamp/string.
Expand Down Expand Up @@ -958,6 +996,22 @@ setMethod("size",
column(jc)
})

#' skewness
#'
#' Aggregate function: returns the skewness of the values in a group.
#'
#' @rdname skewness
#' @name skewness
#' @family agg_funcs
#' @export
#' @examples \dontrun{skewness(df$c)}
setMethod("skewness",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "skewness", x@jc)
column(jc)
})

#' soundex
#'
#' Return the soundex code for the specified expression.
Expand All @@ -974,6 +1028,49 @@ setMethod("soundex",
column(jc)
})

#' @rdname sd
#' @name stddev
setMethod("stddev",
signature(x = "Column"),
function(x) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call stddev_samp directly

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an alias on the Scala/Spark side

jc <- callJStatic("org.apache.spark.sql.functions", "stddev", x@jc)
column(jc)
})

#' stddev_pop
#'
#' Aggregate function: returns the population standard deviation of the expression in a group.
#'
#' @rdname stddev_pop
#' @name stddev_pop
#' @family agg_funcs
#' @seealso \link{sd}, \link{stddev_samp}
#' @export
#' @examples \dontrun{stddev_pop(df$c)}
setMethod("stddev_pop",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "stddev_pop", x@jc)
column(jc)
})

#' stddev_samp
#'
#' Aggregate function: returns the unbiased sample standard deviation of the expression in a group.
#'
#' @rdname stddev_samp
#' @name stddev_samp
#' @family agg_funcs
#' @seealso \link{stddev_pop}, \link{sd}
#' @export
#' @examples \dontrun{stddev_samp(df$c)}
setMethod("stddev_samp",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "stddev_samp", x@jc)
column(jc)
})

#' sqrt
#'
#' Computes the square root of the specified float value.
Expand Down Expand Up @@ -1168,6 +1265,71 @@ setMethod("upper",
column(jc)
})

#' var
#'
#' Aggregate function: alias for \link{var_samp}.
#'
#' @rdname var
#' @name var
#' @family agg_funcs
#' @seealso \link{var_pop}, \link{var_samp}
#' @export
#' @examples
#'\dontrun{
#'variance(df$c)
#'select(df, var_pop(df$age))
#'agg(df, var(df$age))
#'}
setMethod("var",
signature(x = "Column"),
function(x, y = NULL, na.rm = FALSE, use) {
# In R, sample variance is calculated with the var() function.
var_samp(x)
})

#' @rdname var
#' @name variance
setMethod("variance",
signature(x = "Column"),
function(x) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call var_samp() directly

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

jc <- callJStatic("org.apache.spark.sql.functions", "variance", x@jc)
column(jc)
})

#' var_pop
#'
#' Aggregate function: returns the population variance of the values in a group.
#'
#' @rdname var_pop
#' @name var_pop
#' @family agg_funcs
#' @seealso \link{var}, \link{var_samp}
#' @export
#' @examples \dontrun{var_pop(df$c)}
setMethod("var_pop",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "var_pop", x@jc)
column(jc)
})

#' var_samp
#'
#' Aggregate function: returns the unbiased variance of the values in a group.
#'
#' @rdname var_samp
#' @name var_samp
#' @family agg_funcs
#' @seealso \link{var_pop}, \link{var}
#' @export
#' @examples \dontrun{var_samp(df$c)}
setMethod("var_samp",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "var_samp", x@jc)
column(jc)
})

#' weekofyear
#'
#' Extracts the week number as an integer from a given date/timestamp/string.
Expand Down Expand Up @@ -2020,10 +2182,10 @@ setMethod("ifelse",
#'
#' Window function: returns the cumulative distribution of values within a window partition,
#' i.e. the fraction of rows that are below the current row.
#'
#'
#' N = total number of rows in the partition
#' cumeDist(x) = number of values before (and including) x / N
#'
#'
#' This is equivalent to the CUME_DIST function in SQL.
#'
#' @rdname cumeDist
Expand All @@ -2039,13 +2201,13 @@ setMethod("cumeDist",
})

#' denseRank
#'
#'
#' Window function: returns the rank of rows within a window partition, without any gaps.
#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
#' sequence when there are ties. That is, if you were ranking a competition using denseRank
#' and had three people tie for second place, you would say that all three were in second
#' place and that the next person came in third.
#'
#'
#' This is equivalent to the DENSE_RANK function in SQL.
#'
#' @rdname denseRank
Expand All @@ -2065,7 +2227,7 @@ setMethod("denseRank",
#' Window function: returns the value that is `offset` rows before the current row, and
#' `defaultValue` if there is less than `offset` rows before the current row. For example,
#' an `offset` of one will return the previous row at any given point in the window partition.
#'
#'
#' This is equivalent to the LAG function in SQL.
#'
#' @rdname lag
Expand All @@ -2092,7 +2254,7 @@ setMethod("lag",
#' Window function: returns the value that is `offset` rows after the current row, and
#' `null` if there is less than `offset` rows after the current row. For example,
#' an `offset` of one will return the next row at any given point in the window partition.
#'
#'
#' This is equivalent to the LEAD function in SQL.
#'
#' @rdname lead
Expand All @@ -2119,7 +2281,7 @@ setMethod("lead",
#' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
#' partition. Fow example, if `n` is 4, the first quarter of the rows will get value 1, the second
#' quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
#'
#'
#' This is equivalent to the NTILE function in SQL.
#'
#' @rdname ntile
Expand All @@ -2137,9 +2299,9 @@ setMethod("ntile",
#' percentRank
#'
#' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
#'
#'
#' This is computed by:
#'
#'
#' (rank of row in its partition - 1) / (number of rows in the partition - 1)
#'
#' This is equivalent to the PERCENT_RANK function in SQL.
Expand All @@ -2159,12 +2321,12 @@ setMethod("percentRank",
#' rank
#'
#' Window function: returns the rank of rows within a window partition.
#'
#'
#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
#' sequence when there are ties. That is, if you were ranking a competition using denseRank
#' and had three people tie for second place, you would say that all three were in second
#' place and that the next person came in third.
#'
#'
#' This is equivalent to the RANK function in SQL.
#'
#' @rdname rank
Expand All @@ -2189,7 +2351,7 @@ setMethod("rank",
#' rowNumber
#'
#' Window function: returns a sequential number starting at 1 within a window partition.
#'
#'
#' This is equivalent to the ROW_NUMBER function in SQL.
#'
#' @rdname rowNumber
Expand Down
40 changes: 40 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,10 @@ setGeneric("instr", function(y, x) { standardGeneric("instr") })
#' @export
setGeneric("isNaN", function(x) { standardGeneric("isNaN") })

#' @rdname kurtosis
#' @export
setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })

#' @rdname lag
#' @export
setGeneric("lag", function(x, offset, defaultValue = NULL) { standardGeneric("lag") })
Expand Down Expand Up @@ -935,6 +939,10 @@ setGeneric("rpad", function(x, len, pad) { standardGeneric("rpad") })
#' @export
setGeneric("rtrim", function(x) { standardGeneric("rtrim") })

#' @rdname sd
#' @export
setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })

#' @rdname second
#' @export
setGeneric("second", function(x) { standardGeneric("second") })
Expand Down Expand Up @@ -967,10 +975,26 @@ setGeneric("signum", function(x) { standardGeneric("signum") })
#' @export
setGeneric("size", function(x) { standardGeneric("size") })

#' @rdname skewness
#' @export
setGeneric("skewness", function(x) { standardGeneric("skewness") })

#' @rdname soundex
#' @export
setGeneric("soundex", function(x) { standardGeneric("soundex") })

#' @rdname sd
#' @export
setGeneric("stddev", function(x) { standardGeneric("stddev") })

#' @rdname stddev_pop
#' @export
setGeneric("stddev_pop", function(x) { standardGeneric("stddev_pop") })

#' @rdname stddev_samp
#' @export
setGeneric("stddev_samp", function(x) { standardGeneric("stddev_samp") })

#' @rdname substring_index
#' @export
setGeneric("substring_index", function(x, delim, count) { standardGeneric("substring_index") })
Expand Down Expand Up @@ -1019,6 +1043,22 @@ setGeneric("unix_timestamp", function(x, format) { standardGeneric("unix_timesta
#' @export
setGeneric("upper", function(x) { standardGeneric("upper") })

#' @rdname var
#' @export
setGeneric("var", function(x, y = NULL, na.rm = FALSE, use) { standardGeneric("var") })

#' @rdname var
#' @export
setGeneric("variance", function(x) { standardGeneric("variance") })

#' @rdname var_pop
#' @export
setGeneric("var_pop", function(x) { standardGeneric("var_pop") })

#' @rdname var_samp
#' @export
setGeneric("var_samp", function(x) { standardGeneric("var_samp") })

#' @rdname weekofyear
#' @export
setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
Expand Down
Loading