Skip to content

Commit e761c27

Browse files
committed
Let ggplot handle histogran binning. Fix #198
1 parent a9daa28 commit e761c27

File tree

5 files changed

+199
-164
lines changed

5 files changed

+199
-164
lines changed

R/ggplotly.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,52 @@ gg2list <- function(p){
242242
if (!all(barmodes == barmodes[1]))
243243
warning(paste0("You have multiple barcharts or histograms with different positions; ",
244244
"Plotly's layout barmode will be '", layout$barmode, "'."))
245+
# for stacked bar charts, plotly cumulates bar heights, but ggplot doesn't
246+
if (layout$barmode == "stack") {
247+
# could speed up this function with environments or C/C++
248+
unStack <- function(vec) {
249+
n <- length(vec)
250+
if (n == 1) return(vec)
251+
seq.n <- seq_len(n)
252+
names(vec) <- seq.n
253+
vec <- sort(vec)
254+
for (k in seq(2, n)) {
255+
vec[k] <- vec[k] - sum(vec[seq(1, k-1)])
256+
}
257+
as.numeric(vec[as.character(seq.n)])
258+
}
259+
ys <- lapply(trace.list, "[[", "y")
260+
xs <- lapply(trace.list, "[[", "x")
261+
x.vals <- unique(unlist(xs))
262+
# if there is more than one y-value (for a particular x value)
263+
# then
264+
#
265+
for (val in x.vals) {
266+
zs <- lapply(xs, function(x) which(x == val))
267+
ys.given.x <- Map(function(x, y) y[x], zs, ys)
268+
if (length(unlist(ys.given.x)) < 2) next
269+
st <- unStack(unlist(ys.given.x))
270+
lens <- sapply(ys.given.x, length)
271+
trace.seq <- seq_along(zs)
272+
ws <- split(st, rep(trace.seq, lens))
273+
for (tr in trace.seq) {
274+
idx <- zs[[tr]]
275+
if (length(idx)) trace.list[[tr]]$y[idx] <- ws[[tr]][idx]
276+
}
277+
}
278+
}
245279
}
280+
281+
# lens <- sapply(ys, length)
282+
# && length(trace.list) > 1 && any(lens > 1)) {
283+
# xs <- unlist(xs)
284+
# trace.seq <- seq_along(trace.list)
285+
# idx <- rep(trace.seq, lens)
286+
#
287+
#
288+
# browser()
289+
# diffs <- tapply(unlist(ys), INDEX = xs, unStack)
290+
# for (k in trace.seq) trace.list[[k]]$y <- as.numeric(sapply(diffs, "[", k))
246291

247292
# Bar Gap for histograms should be 0
248293
bargaps <- do.call(c, lapply(trace.list, function (x) x$bargap))

R/trace_generation.R

Lines changed: 31 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,14 @@ layer2traces <- function(l, d, misc) {
4141
g$geom <- "smoothLine"
4242
}
4343
}
44-
# Barmode and bargap
45-
barmode <- "group"
46-
if (g$geom == "bar" || g$geom == "histogram") {
47-
if (l$stat$objname == "bin") {
48-
if (g$geom != "histogram") {
49-
warning("You may want to use geom_histogram.")
50-
}
51-
} else {
52-
bargap <- "default"
53-
}
54-
g$geom <- "bar" # histogram is just an alias for geom_bar + stat_bin
55-
pos <- l$position$.super$objname
56-
if (pos == "identity") {
57-
barmode <- "overlay"
58-
} else if (pos == "stack") {
59-
barmode <- "stack"
60-
}
44+
# histogram is essentially a bar chart with no gaps (after stats are computed)
45+
if (g$geom == "histogram") {
46+
g$geom <- "bar"
47+
bargap <- 0
48+
misc$hist <- TRUE
6149
}
50+
51+
# TODO: remove this once we reimplement density as area
6252
if (g$geom == "density") {
6353
bargap <- 0
6454
}
@@ -176,18 +166,19 @@ layer2traces <- function(l, d, misc) {
176166
matched.names <- names(basic$data)[data.i]
177167
name.i <- name.names %in% matched.names
178168
invariable.names <- cbind(name.names, mark.names)[name.i,]
169+
# fill can be variable for histograms
170+
#if (misc$hist)
171+
# invariable.names <- invariable.names[!grepl("fill", invariable.names)]
179172
other.names <- !names(basic$data) %in% invariable.names
180173
vec.list <- basic$data[is.split]
181174
df.list <- split(basic$data, vec.list, drop=TRUE)
182175
lapply(df.list, function(df){
183176
params <- basic$params
184177
params[invariable.names] <- if (ncol(x <- df[1, invariable.names]) > 0) x else NULL
185-
list(data=df[other.names],
186-
params=params)
178+
list(data=df[other.names], params=params)
187179
})
188180
}
189181
}
190-
191182
# Split hline and vline when multiple panels or intercepts:
192183
# Need multiple traces accordingly.
193184
if (g$geom == "hline" || g$geom == "vline") {
@@ -216,7 +207,6 @@ layer2traces <- function(l, d, misc) {
216207
}
217208
traces <- NULL
218209
names.in.legend <- NULL
219-
220210
for (data.i in seq_along(data.list)) {
221211
data.params <- data.list[[data.i]]
222212
data.params$params$stat.type <- l$stat$objname
@@ -260,18 +250,21 @@ layer2traces <- function(l, d, misc) {
260250
if (is.null(tr$name) || tr$name %in% names.in.legend)
261251
tr$showlegend <- FALSE
262252
names.in.legend <- c(names.in.legend, tr$name)
263-
264-
if (g$geom == "bar")
265-
tr$barmode <- barmode
266-
267-
# Bar Gap
268-
if (exists("bargap")) {
269-
tr$bargap <- bargap
253+
254+
# special handling for bars
255+
if (g$geom == "bar") {
256+
tr$bargap <- if (exists("bargap")) bargap else "default"
257+
pos <- l$position$.super$objname
258+
tr$barmode <- if (pos == "identity") {
259+
"overlay"
260+
} else if (pos %in% c("stack", "fill")) {
261+
"stack"
262+
} else "group"
270263
}
264+
271265
traces <- c(traces, list(tr))
272266
}
273267

274-
275268
sort.val <- sapply(traces, function(tr){
276269
rank.val <- unlist(tr$sort)
277270
if(is.null(rank.val)){
@@ -357,16 +350,9 @@ toBasic <- list(
357350
g$data <- g$prestats.data
358351
g
359352
},
360-
bar=function(g) {
361-
if (any(is.na(g$prestats.data$x)))
362-
g$prestats.data$x <- g$prestats.data$x.name
363-
for(a in c("fill", "colour")){
364-
g$prestats.data[[a]] <-
365-
g$data[[a]][match(g$prestats.data$group, g$data$group)]
366-
}
367-
g$params$xstart <- min(g$data$xmin)
368-
g$params$xend <- max(g$data$xmax)
369-
g$data <- g$prestats.data
353+
bar=function(g){
354+
g <- group2NA(g, "bar")
355+
g$data <- g$data[!is.na(g$data$y), ]
370356
g
371357
},
372358
contour=function(g) {
@@ -591,40 +577,19 @@ geom2trace <- list(
591577
L
592578
},
593579
bar=function(data, params) {
594-
L <- list(x=data$x,
580+
#data <- data[order(data$y), ]
581+
x <- if ("x.name" %in% names(data)) data$x.name else data$x
582+
L <- list(x=x,
583+
y=data$y,
584+
type="bar",
595585
name=params$name,
596586
text=data$text,
597587
marker=list(color=toRGB(params$fill)))
598-
599588
if (!is.null(params$colour)) {
600589
L$marker$line <- list(color=toRGB(params$colour))
601590
L$marker$line$width <- if (is.null(params$size)) 1 else params$size
602591
}
603-
604-
if (!is.null(params$alpha))
605-
L$opacity <- params$alpha
606-
607-
if (params$stat.type == "bin") {
608-
L$type <- "histogram"
609-
if (is.null(params$binwidth)) {
610-
L$autobinx <- TRUE
611-
} else {
612-
L$autobinx <- FALSE
613-
L$xbins=list(start=params$xstart,
614-
end=params$xend,
615-
size=params$binwidth)
616-
if (inherits(data$x.name, "POSIXt")) {
617-
# Convert seconds into milliseconds
618-
L$xbins <- lapply(L$xbins, function(x) x * 1000)
619-
} else if (inherits(data$x.name, "Date")) {
620-
# Convert days into milliseconds
621-
L$xbins <- lapply(L$xbins, function(x) x * 24 * 60 * 60 * 1000)
622-
}
623-
}
624-
} else {
625-
L$y <- data$y
626-
L$type <- "bar"
627-
}
592+
if (!is.null(params$alpha)) L$opacity <- params$alpha
628593
L
629594
},
630595
step=function(data, params) {

tests/testthat.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ library(testthat)
22
save_outputs <- function(gg, name, ignore_ggplot=FALSE) {
33
print(paste("running", name))
44
}
5-
test_check("plotly")
5+
test_check("plotly", filter = "histogram")

tests/testthat/test-ggplot-bar.R

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
context("bar")
22

3+
expect_traces <- function(gg, n.traces, name){
4+
stopifnot(is.ggplot(gg))
5+
stopifnot(is.numeric(n.traces))
6+
save_outputs(gg, paste0("bar-", name))
7+
L <- gg2list(gg)
8+
is.trace <- names(L) == ""
9+
all.traces <- L[is.trace]
10+
no.data <- sapply(all.traces, function(tr) {
11+
is.null(tr[["x"]]) && is.null(tr[["y"]])
12+
})
13+
has.data <- all.traces[!no.data]
14+
expect_equal(length(has.data), n.traces)
15+
list(traces=has.data, kwargs=L$kwargs)
16+
}
17+
318
researchers <-
419
data.frame(country=c("Canada", "Canada", "Germany", "USA"),
520
name=c("Warren", "Andreanne", "Stefan", "Toby"),
@@ -10,75 +25,46 @@ gg <- ggplot(researchers, aes(country, papers, fill=field))
1025

1126
test_that("position_dodge is translated to barmode=group", {
1227
gg.dodge <- gg + geom_bar(stat="identity", position="dodge")
13-
L <- gg2list(gg.dodge)
14-
expect_equal(length(L), 3)
15-
trace.names <- sapply(L[1:2], "[[", "name")
28+
info <- expect_traces(gg.dodge, 3, "dodge")
29+
trs <- info$traces
30+
trace.names <- sapply(trs[1:2], "[[", "name")
1631
expect_true(all(c("Math", "Bio") %in% trace.names))
17-
expect_identical(L$kwargs$layout$barmode, "group")
32+
expect_identical(info$kwargs$layout$barmode, "group")
1833
# Check x values
19-
expect_identical(as.character(L[[1]]$x[1]), "Canada")
20-
expect_identical(as.character(L[[1]]$x[2]), "Germany")
21-
expect_identical(as.character(L[[2]]$x[1]), "Canada")
22-
expect_identical(as.character(L[[2]]$x[2]), "USA")
23-
24-
save_outputs(gg.dodge, "bar-dodge")
34+
expect_identical(as.character(trs[[1]]$x), c("Canada", "Germany"))
35+
expect_identical(as.character(trs[[2]]$x), c("Canada", "USA"))
2536
})
2637

2738
test_that("position_stack is translated to barmode=stack", {
2839
gg.stack <- gg + geom_bar(stat="identity", position="stack")
29-
L <- gg2list(gg.stack)
30-
expect_equal(length(L), 3)
31-
trace.names <- sapply(L[1:2], "[[", "name")
40+
info <- expect_traces(gg.stack, 3, "stack")
41+
trs <- info$traces
42+
trace.names <- sapply(trs[1:2], "[[", "name")
3243
expect_true(all(c("Math", "Bio") %in% trace.names))
33-
expect_identical(L$kwargs$layout$barmode, "stack")
34-
35-
save_outputs(gg.stack, "bar-stack")
44+
expect_identical(info$kwargs$layout$barmode, "stack")
3645
})
3746

3847
test_that("position_identity is translated to barmode=overlay", {
3948
gg.identity <- gg + geom_bar(stat="identity", position="identity")
40-
L <- gg2list(gg.identity)
41-
expect_equal(length(L), 3)
42-
trace.names <- sapply(L[1:2], "[[", "name")
49+
info <- expect_traces(gg.identity, 3, "identity")
50+
trs <- info$traces
51+
trace.names <- sapply(trs[1:2], "[[", "name")
4352
expect_true(all(c("Math", "Bio") %in% trace.names))
44-
expect_identical(L$kwargs$layout$barmode, "overlay")
45-
46-
save_outputs(gg.identity, "bar-identity")
53+
expect_identical(info$kwargs$layout$barmode, "overlay")
4754
})
4855

4956
test_that("dates work well with bar charts", {
50-
5157
researchers$month <- c("2012-01-01", "2012-01-01", "2012-02-01", "2012-02-01")
5258
researchers$month <- as.Date(researchers$month)
53-
5459
gd <- ggplot(researchers, aes(month, papers, fill=field)) +
5560
geom_bar(stat="identity")
56-
57-
L <- gg2list(gd)
58-
59-
expect_equal(length(L), 3) # 2 traces + layout
60-
expect_identical(L$kwargs$layout$xaxis$type, "date")
61-
expect_identical(L[[1]]$x[1], "2012-01-01 00:00:00")
62-
expect_identical(L[[1]]$x[2], "2012-02-01 00:00:00")
63-
64-
save_outputs(gd, "bar-dates")
61+
info <- expect_traces(gd, 3, "dates")
62+
trs <- info$traces
63+
expect_identical(info$kwargs$layout$xaxis$type, "date")
64+
expect_identical(trs[[1]]$x[1], "2012-01-01 00:00:00")
65+
expect_identical(trs[[1]]$x[2], "2012-02-01 00:00:00")
6566
})
6667

67-
expect_traces <- function(gg, n.traces, name){
68-
stopifnot(is.ggplot(gg))
69-
stopifnot(is.numeric(n.traces))
70-
save_outputs(gg, paste0("bar-", name))
71-
L <- gg2list(gg)
72-
is.trace <- names(L) == ""
73-
all.traces <- L[is.trace]
74-
no.data <- sapply(all.traces, function(tr) {
75-
is.null(tr[["x"]]) && is.null(tr[["y"]])
76-
})
77-
has.data <- all.traces[!no.data]
78-
expect_equal(length(has.data), n.traces)
79-
list(traces=has.data, kwargs=L$kwargs)
80-
}
81-
8268
## http://www.cookbook-r.com/Graphs/Bar_and_line_graphs_%28ggplot2%29/
8369
df <- data.frame(time = factor(c("Lunch","Dinner"), levels=c("Lunch","Dinner")),
8470
total_bill = c(14.89, 17.23))
@@ -182,3 +168,23 @@ test_that("guides(fill=FALSE) does not affect colour legend", {
182168
expect_true(info$kwargs$layout$showlegend)
183169
})
184170

171+
172+
base <- ggplot(mtcars, aes(factor(vs), fill=factor(cyl)))
173+
174+
test_that("geom_bar() stacks counts", {
175+
info <- expect_traces(base + geom_bar(), 3, "position-stack")
176+
expect_identical(info$kwargs$layout$barmode, "stack")
177+
trs <- info$traces
178+
test <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE)
179+
true <- as.numeric(colSums(with(mtcars, table(cyl, vs))))
180+
expect_identical(test, true)
181+
})
182+
183+
test_that("geom_bar(position = 'fill') stacks proportions", {
184+
info <- expect_traces(base + geom_bar(position = "fill"), 3, "position-fill")
185+
expect_identical(info$kwargs$layout$barmode, "stack")
186+
trs <- info$traces
187+
props <- colSums(t(sapply(trs, "[[", "y")), na.rm = TRUE)
188+
expect_identical(props, c(1, 1))
189+
})
190+

0 commit comments

Comments
 (0)