cmu-delphi · nmdefries · Apr 26, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 6.1.0
+current_version = 7.0.0
 commit = False
 tag = False
 

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: forecasteval
 Title: Forecast Evaluation Dashboard
-Version: 6.1.0
+Version: 7.0.0
 Authors@R: c(person("Kate", "Harwood", email = "[email protected]", role = "cre"),
 	   person("Chris", "Scott", role = "ctb"),
 	   person("Jed", "Grabman", role = "ctb"))
@@ -24,7 +24,9 @@ Imports:
  covidcast,
  stringr,
  markdown,
- memoise
+ memoise,
+ purrr,
+ data.table
 Suggests:
  styler,
  lintr,

diff --git a/Makefile b/Makefile
@@ -1,3 +1,6 @@
+SHELL:=/bin/bash
+PWD=$(shell pwd)
+
 .DEFAULT_GOAL:=build
 S3_URL=https://forecast-eval.s3.us-east-2.amazonaws.com
 S3_BUCKET=s3://forecast-eval
@@ -10,7 +13,7 @@ r_build:
 %.rds: dist
 	test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@
 
-pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds datetime_created_utc.rds
+pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds datetime_created_utc.rds predictions_cards.rds
 
 dist:
 	mkdir $@

diff --git a/app/R/data.R b/app/R/data.R
@@ -5,7 +5,7 @@ library(aws.s3)
 shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict = "lru"))
 cache <- getShinyOption("cache")
 
-# Since covidcast data updates about once a day. Add date arg to
+# Since covidcast data updates about once a day, add date arg to
 # covidcast_signal so caches aren't used after that.
 covidcast_signal_mem <- function(..., date = Sys.Date()) {
   return(covidcast_signal(...))
@@ -67,65 +67,74 @@ getCreationDate <- function(loadFile) {
 }
 
 
-getAllData <- function(loadFile) {
-  dfStateCases <- loadFile("score_cards_state_cases.rds")
-  dfStateDeaths <- loadFile("score_cards_state_deaths.rds")
-  dfStateHospitalizations <- loadFile("score_cards_state_hospitalizations.rds")
-  dfNationCases <- loadFile("score_cards_nation_cases.rds")
-  dfNationDeaths <- loadFile("score_cards_nation_deaths.rds")
-  dfNationHospitalizations <- loadFile("score_cards_nation_hospitalizations.rds")
+getAllData <- function(loadFile, targetVariable) {
+  df <- switch(targetVariable,
+    "Deaths" = bind_rows(
+      loadFile("score_cards_state_deaths.rds"),
+      loadFile("score_cards_nation_deaths.rds")
+    ),
+    "Cases" = bind_rows(
+      loadFile("score_cards_state_cases.rds"),
+      loadFile("score_cards_nation_cases.rds")
+    ),
+    "Hospitalizations" = bind_rows(
+      loadFile("score_cards_state_hospitalizations.rds"),
+      loadFile("score_cards_nation_hospitalizations.rds")
+    )
+  )
 
-  # Pick out expected columns only
-  covCols <- paste0("cov_", COVERAGE_INTERVALS)
-  expectedCols <- c(
+  # The names of the `covCols` elements become the new names of those columns
+  # when we use this vector in the `select` below.
+  covCols <- setNames(paste0("cov_", COVERAGE_INTERVALS), COVERAGE_INTERVALS)
+  keepCols <- c(
     "ahead", "geo_value", "forecaster", "forecast_date",
     "data_source", "signal", "target_end_date", "incidence_period",
     "actual", "wis", "sharpness", "ae", "value_50",
     covCols
   )
-
-  df <- bind_rows(
-    dfStateCases %>% select(all_of(expectedCols)),
-    dfStateDeaths %>% select(all_of(expectedCols)),
-    dfStateHospitalizations %>% select(all_of(expectedCols)),
-    dfNationCases %>% select(all_of(expectedCols)),
-    dfNationDeaths %>% select(all_of(expectedCols)),
-    dfNationHospitalizations %>% select(all_of(expectedCols))
-  )
-  df <- df %>% rename(
-    "10" = cov_10, "20" = cov_20, "30" = cov_30,
-    "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70,
-    "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98
-  )
+  df <- select(df, all_of(keepCols))
 
   return(df)
 }
 
 createS3DataLoader <- function() {
+  # Cached connection info
   s3bucket <- getS3Bucket()
-  df <- data.frame()
+  s3DataFetcher <- createS3DataFactory(s3bucket)
+  s3Contents <- s3bucket[attr(s3bucket, "names", exact = TRUE)]
+
+  # Cached data
+  df_list <- list()
   dataCreationDate <- as.Date(NA)
 
-  getRecentData <- function() {
-    newS3bucket <- getS3Bucket()
+  getRecentData <- function(targetVariable = TARGET_OPTIONS) {
+    targetVariable <- match.arg(targetVariable)
 
-    s3Contents <- s3bucket[attr(s3bucket, "names", exact = TRUE)]
+    newS3bucket <- getS3Bucket()
     newS3Contents <- newS3bucket[attr(newS3bucket, "names", exact = TRUE)]
+    s3BucketHasChanged <- !identical(s3Contents, newS3Contents)
 
-    # Fetch new score data if contents of S3 bucket has changed (including file
+    # Fetch new data if contents of S3 bucket has changed (including file
     # names, sizes, and last modified timestamps). Ignores characteristics of
-    # bucket and request, including bucket region, name, content type, request
-    # date, request ID, etc.
-    if (nrow(df) == 0 || !identical(s3Contents, newS3Contents)) {
-      # Save new data and new bucket connection info to vars in env of
-      # `getRecentDataHelper`. They persist between calls to `getRecentData` a
-      # la https://stackoverflow.com/questions/1088639/static-variables-in-r
+    # bucket and request, including bucket region, name, content type,
+    # request date, request ID, etc.
+    #
+    # Save new score data and new bucket connection info to vars in env of
+    # `createS3DataLoader`. They persist between calls to `getRecentData` a
+    # la https://stackoverflow.com/questions/1088639/static-variables-in-r
+    if (s3BucketHasChanged) {
       s3bucket <<- newS3bucket
-      df <<- getAllData(createS3DataFactory(s3bucket))
-      dataCreationDate <<- getCreationDate(createS3DataFactory(s3bucket))
+      s3DataFetcher <<- createS3DataFactory(newS3bucket)
+      s3Contents <<- newS3Contents
+    }
+    if (s3BucketHasChanged ||
+      !(targetVariable %chin% names(df_list)) ||
+      nrow(df_list[[targetVariable]]) == 0) {
+      df_list[[targetVariable]] <<- getAllData(s3DataFetcher, targetVariable)
+      dataCreationDate <<- getCreationDate(s3DataFetcher)
     }
 
-    return(list(df = df, dataCreationDate = dataCreationDate))
+    return(list(df_list = df_list, dataCreationDate = dataCreationDate))
   }
 
   return(getRecentData)
@@ -134,12 +143,17 @@ createS3DataLoader <- function() {
 
 #' create a data loader with fallback data only
 createFallbackDataLoader <- function() {
-  df <- getAllData(getFallbackData)
+  df_list <- list()
+  for (targetVariable in TARGET_OPTIONS) {
+    df_list[[targetVariable]] <- getAllData(getFallbackData, targetVariable)
+  }
+  dataCreationDate <- getCreationDate(getFallbackData)
 
   dataLoader <- function() {
-    df
+    return(list(df_list = df_list, dataCreationDate = dataCreationDate))
   }
-  dataLoader
+
+  return(dataLoader)
 }
 
 

diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R
@@ -14,18 +14,18 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) {
 
 filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) {
   locationsIntersect <- list()
-  filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score) | target_end_date >= filterDate)
+  filteredScoreDf <- filter(filteredScoreDf, !is.na(Score) | target_end_date >= filterDate)
   # Create df with col for all locations across each unique date, ahead and forecaster combo
   locationDf <- filteredScoreDf %>%
     group_by(forecaster, target_end_date, ahead) %>%
     summarize(location_list = paste(sort(unique(geo_value)), collapse = ","))
-  locationDf <- locationDf %>% filter(location_list != c("us"))
+  locationDf <- filter(locationDf, location_list != c("us"))
   # Create a list containing each row's location list
   locationList <- sapply(locationDf$location_list, function(x) strsplit(x, ","))
   locationList <- lapply(locationList, function(x) x[x != "us"])
   # Get the intersection of all the locations in these lists
   locationsIntersect <- unique(Reduce(intersect, locationList))
-  filteredScoreDf <- filteredScoreDf %>% filter(geo_value %in% locationsIntersect)
+  filteredScoreDf <- filter(filteredScoreDf, geo_value %chin% locationsIntersect)
   if (scoreType == "coverage") {
     if (hasAsOfData) {
       filteredScoreDf <- filteredScoreDf %>%
@@ -56,40 +56,23 @@ filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FAL
 # Only use weekly aheads for hospitalizations
 # May change in the future
 filterHospitalizationsAheads <- function(scoreDf) {
-  scoreDf["weekday"] <- weekdays(as.Date(scoreDf$target_end_date))
-  scoreDf <- scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY)
+  days_list <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
+  # Make sure to use `data.table`'s `wday`; `lubridate` has a function of the same name.
+  scoreDf["weekday"] <- days_list[data.table::wday(as.Date(scoreDf$target_end_date, "%Y-%m-%d"))]
+  scoreDf <- filter(scoreDf, weekday == HOSPITALIZATIONS_TARGET_DAY)
+  scoreDf$ahead_group <- case_when(
+    scoreDf$ahead >= HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 7 + HOSPITALIZATIONS_OFFSET ~ 1L,
+    scoreDf$ahead >= 7 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 14 + HOSPITALIZATIONS_OFFSET ~ 2L,
+    scoreDf$ahead >= 14 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 21 + HOSPITALIZATIONS_OFFSET ~ 3L,
+    scoreDf$ahead >= 21 + HOSPITALIZATIONS_OFFSET & scoreDf$ahead < 28 + HOSPITALIZATIONS_OFFSET ~ 4L,
+    TRUE ~ NA_integer_
+  )
 
-  oneAheadDf <- scoreDf %>%
-    filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
-    filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
-    group_by(target_end_date, forecaster) %>%
-    filter(ahead == min(ahead)) %>%
-    mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1])
-
-  return(bind_rows(
-    scoreDf %>%
-      filter(ahead >= HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]),
-    scoreDf %>%
-      filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]),
-    scoreDf %>%
-      filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
-      filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]),
+  return(
     scoreDf %>%
-      filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>%
-      filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>%
-      group_by(target_end_date, forecaster) %>%
+      filter(!is.na(ahead_group)) %>%
+      group_by(target_end_date, forecaster, ahead_group) %>%
       filter(ahead == min(ahead)) %>%
-      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4])
-  ))
+      mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[ahead_group])
+  )
 }
diff --git a/app/R/exportScores.R b/app/R/exportScores.R
@@ -6,24 +6,20 @@ exportScoresUI <- function(id = "exportScores") {
 }
 
 createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) {
-  signalFilter <- CASE_FILTER
-  if (targetVariable == "Deaths") {
-    signalFilter <- DEATH_FILTER
-  } else if (targetVariable == "Hospitalizations") {
-    signalFilter <- HOSPITALIZATIONS_FILTER
-  }
+  scoreDf <- filter(
+    scoreDf[[targetVariable]],
+    forecaster %chin% forecasters
+  )
   scoreDf <- renameScoreCol(scoreDf, scoreType, coverageInterval)
-  scoreDf <- scoreDf %>%
-    filter(signal == signalFilter) %>%
-    filter(forecaster %in% forecasters)
+
   if (loc == TOTAL_LOCATIONS || scoreType == "coverage") {
-    if (signalFilter == HOSPITALIZATIONS_FILTER) {
+    if (targetVariable == "Hospitalizations") {
       scoreDf <- filterHospitalizationsAheads(scoreDf)
     }
     scoreDf <- filterOverAllLocations(scoreDf, scoreType)
     return(scoreDf[[1]])
   } else {
-    scoreDf <- scoreDf %>% filter(geo_value == tolower(loc))
+    scoreDf <- filter(scoreDf, geo_value == tolower(loc))
     scoreDf <- scoreDf[c(
       "ahead", "geo_value", "forecaster", "forecast_date",
       "data_source", "target_end_date", "Score", "actual"

diff --git a/app/global.R b/app/global.R
@@ -2,23 +2,26 @@ library(shiny)
 library(shinyjs)
 library(plotly)
 library(tidyr)
+library(purrr)
 library(dplyr, warn.conflicts = FALSE)
 library(lubridate)
 library(viridis)
 library(tsibble)
 library(covidcast)
+library(data.table)
 
-appVersion <- "6.1.0"
+appVersion <- "7.0.0"
 
 COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98")
-DEATH_FILTER <- "deaths_incidence_num"
-CASE_FILTER <- "confirmed_incidence_num"
 CASES_DEATHS_TARGET_DAY <- "Saturday"
-HOSPITALIZATIONS_FILTER <- "confirmed_admissions_covid_1d"
 HOSPITALIZATIONS_TARGET_DAY <- "Wednesday"
 TOTAL_LOCATIONS <- "Totaled Over States*"
 AHEAD_OPTIONS <- c(1, 2, 3, 4)
 
+INIT_SCORE_TYPE <- "wis"
+INIT_TARGET <- "Hospitalizations"
+TARGET_OPTIONS <- c("Deaths", "Cases", "Hospitalizations")
+
 # Num days to offset the forecast week by
 # Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2,
 # ahead 1 has to have forecast date of Monday or earlier,
@@ -29,8 +32,18 @@ HOSPITALIZATIONS_AHEAD_OPTIONS <- c(
   HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21
 )
 
-# Sets the previous target to be the same as the first one, Deaths
-PREV_TARGET <- "Deaths"
+CURRENT_TAB_SUFFIX <- ""
+ARCHIVE_TAB_SUFFIX <- "_archive"
+
+
+TARGET_VARS_BY_TAB <- list()
+TARGET_VARS_BY_TAB[[paste0("evaluations", CURRENT_TAB_SUFFIX)]] <- c(
+  "Hospital Admissions" = "Hospitalizations"
+)
+TARGET_VARS_BY_TAB[[paste0("evaluations", ARCHIVE_TAB_SUFFIX)]] <- c(
+  "Incident Deaths" = "Deaths",
+  "Incident Cases" = "Cases"
+)
 
 # When RE_RENDER_TRUTH = TRUE
 # summaryPlot will be called only to update TruthPlot