From 19786cb799383c55e0663f920a978ccd5463c361 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 11 Oct 2021 19:53:19 -0400 Subject: [PATCH 01/58] instead of hiding showForecasts button, gray out --- dashboard/app.R | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index f489412..c922b7d 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -149,13 +149,15 @@ ui <- fluidPage(padding=0, title="Forecast Eval Dashboard", selected = '' ), tags$p(id="missing-data-disclaimer", "Some locations may not have 'as of' data for the chosen 'as of' date"), - hidden(div(id="showForecastsCheckbox", - checkboxInput( - "showForecasts", - "Show Forecasters' Predictions", - value = FALSE, - ) - )), + div(id="showForecastsCheckbox", + disabled( + checkboxInput( + "showForecasts", + "Show Forecasters' Predictions", + value = FALSE, + ) + ) + ), tags$hr(), export_scores_ui, tags$hr(), @@ -800,9 +802,9 @@ server <- function(input, output, session) { } if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") + disable("showForecasts") } else { - showElement("showForecastsCheckbox") + enable("showForecasts") } if (input$scoreType == "wis") { show("wisExplanation") @@ -850,9 +852,9 @@ server <- function(input, output, session) { updateAsOfData() # Only show forecast check box option if we are showing as of data if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") + disable("showForecasts") } else { - showElement("showForecastsCheckbox") + enable("showForecasts") } }) @@ -860,9 +862,9 @@ server <- function(input, output, session) { updateAsOfData() # Only show forecast check box option if we are showing as of data if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") + disable("showForecasts") } else { - showElement("showForecastsCheckbox") + enable("showForecasts") } }) From 4e1d27cb71367476b783ac651a5ea7418ee2b5a4 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 12 Oct 2021 14:30:34 -0400 Subject: [PATCH 02/58] small code fixes --- dashboard/app.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index f489412..0b0c08d 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -889,7 +889,7 @@ server <- function(input, output, session) { # Ensure there is always one forecaster selected if(length(input$forecasters) < 1) { updateSelectInput(session, "forecasters", - selected = c("COVIDhub-ensemble")) # Use ensemble rather than baseline bc it has hospitalization scores + selected = c("COVIDhub-baseline")) } # Ensure COVIDhub-baseline is selected when scaling by baseline if(input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) { @@ -954,14 +954,12 @@ server <- function(input, output, session) { selectedAsOf = isolate(input$asOf) if (input$targetVariable == "Hospitalizations") { minChoice = MIN_AVAIL_HOSP_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] } else if(input$location == 'US' && input$scoreType != 'coverage') { minChoice = MIN_AVAIL_NATION_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] } else if(input$location %in% TERRITORIES || input$location == TOTAL_LOCATIONS || input$scoreType == 'coverage') { minChoice = MIN_AVAIL_TERRITORY_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] } + asOfChoices = asOfChoices[asOfChoices >= minChoice] asOfChoices = c(asOfChoices, CURRENT_WEEK_END_DATE()) # Make sure we have a valid as of selection nonValidAsOf = selectedAsOf == '' || !(as.Date(selectedAsOf) %in% asOfChoices) From 7ed6856c5e50e0703a4623ef7dd819b189ef4d77 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:02:50 -0400 Subject: [PATCH 03/58] Revert "Merge pull request #196 from cmu-delphi/patch-asof-dates" This reverts commit 2154476fc3c0e54d09f8cf9212aea6e59a595f64, reversing changes made to 3a987674bd22d666f10dd6ae67539ae8c8aee906. --- dashboard/app.R | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dashboard/app.R b/dashboard/app.R index f489412..e058dae 100644 --- a/dashboard/app.R +++ b/dashboard/app.R @@ -334,15 +334,16 @@ server <- function(input, output, session) { # Get most recent target end date # Prev Saturday for Cases and Deaths, prev Wednesday for Hospitalizations - # Since we don't upload new observed data until Sunday: - # Use 7 and 1 for Cases and Deaths so that Sundays will use the Saturday directly beforehand. - # Use 4 and 10 for Hospitalizations since Thurs-Sat should not use the Wednesday directly beforehand. - # (This means that on Sundays until the afternoon when the pipeline completes, the "as of" will show - # the most recent Saturday / Wednesday date even though the actual updated data won't be there yet) - prevWeek <- seq(Sys.Date()-7,Sys.Date()-1,by='day') + # Since we don't upload new observed data until Monday: + # Use 8 and 2 for Cases and Deaths so that Sundays will not use the Saturday directly beforehand + # since we don't have data for it yet. + # Use 5 and 11 for Hospitalizations since Thurs-Sun should also not use the Wednesday directly beforehand. + # (This means that on Mondays until the afternoon when pipeline completes, the "as of" will show + # most recent Saturday / Wednesday date even though the actual updated data won't be there yet) + prevWeek <- seq(Sys.Date()-8,Sys.Date()-2,by='day') CASES_DEATHS_CURRENT = prevWeek[weekdays(prevWeek)=='Saturday'] CURRENT_WEEK_END_DATE = reactiveVal(CASES_DEATHS_CURRENT) - prevHospWeek <- seq(Sys.Date()-10,Sys.Date()-4,by='day') + prevHospWeek <- seq(Sys.Date()-11,Sys.Date()-5,by='day') HOSP_CURRENT = prevHospWeek[weekdays(prevHospWeek)=='Wednesday'] # Get scores From 6ed7d23960b819285c28f3d45c7d8ae40dabe5fc Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:19:08 -0400 Subject: [PATCH 04/58] Revert "Merge pull request #193 from cmu-delphi/revert-template-refactor+" This reverts commit 4cd31c25a75ffcf85ade9a561d1df92d9a47175c, reversing changes made to 3da2a05b450d97c7952af0145700af7654b00f2c. --- .bumpversion.cfg | 8 + .gitattributes | 127 ++ .github/release-drafter.yml | 28 + .github/workflows/ci.yaml | 80 ++ .github/workflows/create_release.yml | 43 + .github/workflows/main.yml | 54 - .github/workflows/release_main.yml | 57 + .gitignore | 56 +- .lintr | 9 + DESCRIPTION | 25 +- Makefile | 15 +- Report/create_reports.R | 220 ++-- Report/error_measures.R | 108 +- Report/score.R | 134 ++- app/R/data.R | 128 ++ app/R/data_manipulation.R | 89 ++ app/R/delphiLayout.R | 86 ++ app/R/exportScores.R | 74 ++ {dashboard => app/assets}/about-dashboard.md | 0 {dashboard => app/assets}/about.md | 0 {dashboard => app/assets}/ae.md | 0 {dashboard => app/assets}/coverageplot.md | 0 .../assets}/google-analytics.html | 0 .../assets}/scoring-disclaimer.md | 0 {dashboard => app/assets}/sharpness.md | 0 {dashboard => app/assets}/wis.md | 0 app/global.R | 64 + app/server.R | 780 ++++++++++++ app/ui.R | 187 +++ {dashboard => app}/www/cmu_brand.png | Bin app/www/delphiLayout.css | 122 ++ {dashboard => app}/www/forecast-hub-logo.png | Bin app/www/style.css | 45 + csv2rds.R | 2 +- dashboard/app.R | 1060 ----------------- dashboard/arrow-left.svg | 1 - dashboard/common.R | 93 -- dashboard/export_scores.R | 60 - dashboard/github.svg | 1 - dashboard/www/style.css | 154 --- devops/Dockerfile | 15 + .../shiny_server.conf | 9 +- docker_build/dependencies.R | 1 - docker_dashboard/Dockerfile | 25 - 44 files changed, 2254 insertions(+), 1706 deletions(-) create mode 100644 .bumpversion.cfg create mode 100644 .gitattributes create mode 100644 .github/release-drafter.yml create mode 100644 .github/workflows/ci.yaml create mode 100644 .github/workflows/create_release.yml delete mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/release_main.yml create mode 100644 .lintr create mode 100644 app/R/data.R create mode 100644 app/R/data_manipulation.R create mode 100644 app/R/delphiLayout.R create mode 100644 app/R/exportScores.R rename {dashboard => app/assets}/about-dashboard.md (100%) rename {dashboard => app/assets}/about.md (100%) rename {dashboard => app/assets}/ae.md (100%) rename {dashboard => app/assets}/coverageplot.md (100%) rename {dashboard => app/assets}/google-analytics.html (100%) rename {dashboard => app/assets}/scoring-disclaimer.md (100%) rename {dashboard => app/assets}/sharpness.md (100%) rename {dashboard => app/assets}/wis.md (100%) create mode 100644 app/global.R create mode 100644 app/server.R create mode 100644 app/ui.R rename {dashboard => app}/www/cmu_brand.png (100%) create mode 100644 app/www/delphiLayout.css rename {dashboard => app}/www/forecast-hub-logo.png (100%) create mode 100644 app/www/style.css delete mode 100644 dashboard/app.R delete mode 100644 dashboard/arrow-left.svg delete mode 100644 dashboard/common.R delete mode 100644 dashboard/export_scores.R delete mode 100644 dashboard/github.svg delete mode 100644 dashboard/www/style.css create mode 100644 devops/Dockerfile rename {docker_dashboard => devops}/shiny_server.conf (72%) delete mode 100644 docker_dashboard/Dockerfile diff --git a/.bumpversion.cfg b/.bumpversion.cfg new file mode 100644 index 0000000..0fc0a01 --- /dev/null +++ b/.bumpversion.cfg @@ -0,0 +1,8 @@ +[bumpversion] +current_version = 4.0.0 +commit = False +tag = False + +[bumpversion:file:DESCRIPTION] + +[bumpversion:file:app/global.R] diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d155bfb --- /dev/null +++ b/.gitattributes @@ -0,0 +1,127 @@ +# These settings are for any web project + +# Handle line endings automatically for files detected as text +# and leave all files detected as binary untouched. +* text=auto eol=lf + +# +# The above will handle all files NOT found below +# + +# +## These files are text and should be normalized (Convert crlf => lf) +# + +# source code +*.php text +*.css text +*.sass text +*.scss text +*.less text +*.styl text +*.js text +*.ts text +*.coffee text +*.json text +*.htm text +*.html text +*.xml text +*.txt text +*.ini text +*.inc text +*.pl text +*.rb text +*.py text +*.scm text +*.sql text +*.sh text eof=LF +*.bat text + +# templates +*.hbt text +*.jade text +*.haml text +*.hbs text +*.dot text +*.tmpl text +*.phtml text + +# server config +.htaccess text + +# git config +.gitattributes text +.gitignore text + +# code analysis config +.jshintrc text +.jscsrc text +.jshintignore text +.csslintrc text + +# misc config +*.yaml text +*.yml text +.editorconfig text + +# build config +*.npmignore text +*.bowerrc text +Dockerfile text eof=LF + +# Heroku +Procfile text +.slugignore text + +# Documentation +*.md text +LICENSE text +AUTHORS text + + +# +## These files are binary and should be left untouched +# + +# (binary is a macro for -text -diff) +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.mov binary +*.mp4 binary +*.mp3 binary +*.flv binary +*.fla binary +*.swf binary +*.gz binary +*.zip binary +*.7z binary +*.ttf binary +*.pyc binary +*.pdf binary + +# Source files +# ============ +*.pxd text +*.py text +*.py3 text +*.pyw text +*.pyx text +*.sh text eol=lf +*.json text + +# Binary files +# ============ +*.db binary +*.p binary +*.pkl binary +*.pyc binary +*.pyd binary +*.pyo binary + +# Note: .db, .p, and .pkl files are associated +# with the python modules ``pickle``, ``dbm.*``, +# ``shelve``, ``marshal``, ``anydbm``, & ``bsddb`` +# (among others). diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000..ea4e539 --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,28 @@ +name-template: "v$RESOLVED_VERSION" +tag-template: "v$RESOLVED_VERSION" +categories: + - title: "📚 Content Changes" + labels: + - "blog" + - "content" + - title: "🚀 Features" + labels: + - "enhancement" + - "feature" + - title: "🐛 Bugs Fixes" + labels: + - "bug" + - title: "📕 Documentation" + labels: + - "documentation" + - title: "🧰 Development" + labels: + - "chore" + - "documentation" + - "dependencies" +change-template: "- #$NUMBER $TITLE" +change-title-escapes: '\<*_&`#@' +template: | + $CHANGES + + Thanks to $CONTRIBUTORS diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..461e197 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,80 @@ +name: ci + +on: push + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-r@v1 + with: + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v1 + - name: Style / Format + shell: Rscript {0} + run: styler::style_dir(dry="fail") + - name: Lint + shell: Rscript {0} + run: | + lintr::lint_dir('.') + + image: + needs: build + # only on main and dev branch + if: startsWith(github.ref, 'refs/heads/main') || github.ref == 'refs/heads/dev' + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v2 + - name: Pull Image Data + run: make pull_data + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: cmu-delphi-deploy-machine + password: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }} + - name: Build Image + env: + DEVOPS_DOCKER_FILE: ./devops/Dockerfile + run: | + docker build -t repo --file ${DEVOPS_DOCKER_FILE} . + - name: Resolve Tag + id: tagname + run: | + baseRef="${GITHUB_REF#*/}" + imageTag="${baseRef#*/}" + if [ "$imageTag" = "main" ] ; then + imageTag="latest" + fi + echo "::set-output name=tag::$imageTag" + echo "::set-output name=repo::ghcr.io/${{ github.repository }}" + - name: Push Dev Tag + run: | + docker tag repo ${{ steps.tagname.outputs.repo }}:${{ steps.tagname.outputs.tag }} + docker push ${{ steps.tagname.outputs.repo }}:${{ steps.tagname.outputs.tag }} + - name: Set up Python 3.8 + if: startsWith(github.ref, 'refs/heads/main') + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Extract version + if: startsWith(github.ref, 'refs/heads/main') + id: extract_version + run: | + python -m pip install bump2version + echo -n "::set-output name=version::" + bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",, + - name: Trigger Webhook + run: | + # trigger a webhook update + curl -H "Authorization: Bearer ${{ secrets.DELPHI_DEPLOY_WEBHOOK_TOKEN }}" \ + -X POST ${{ secrets.DELPHI_DEPLOY_WEBHOOK_URL }} \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "repository=${{ steps.tagname.outputs.repo }}&tag=${{ steps.tagname.outputs.tag }}" + - name: Push Version Tag Tag + if: startsWith(github.ref, 'refs/heads/main') + run: | + docker tag repo ${{ steps.tagname.outputs.repo }}:v${{ steps.extract_version.outputs.version }} + docker push ${{ steps.tagname.outputs.repo }}:v${{ steps.extract_version.outputs.version }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml new file mode 100644 index 0000000..2d82e6f --- /dev/null +++ b/.github/workflows/create_release.yml @@ -0,0 +1,43 @@ +name: Create Release +on: + workflow_dispatch: + inputs: + versionName: + description: "Semantic Version Number (i.e., 5.5.0 or patch, minor, major, prepatch, preminor, premajor, prerelease)" + required: true + default: patch +jobs: + create_release: + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v2 + with: + ref: main + ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} + - name: Reset main branch + run: | + git fetch origin dev:dev + git reset --hard dev + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Change version number + id: version + run: | + python -m pip install bump2version + echo -n "::set-output name=next_tag::" + bump2version --list ${{ github.event.inputs.versionName }} | grep new_version | sed -r s,"^.*=",, + - name: Create pull request into main + uses: peter-evans/create-pull-request@v3 + with: + branch: release/${{ steps.version.outputs.next_tag }} + commit-message: "chore: release ${{ steps.version.outputs.next_tag }}" + base: main + title: Release ${{ steps.version.outputs.next_tag }} + labels: chore + reviewers: krivard + assignees: krivard + body: | + Releasing ${{ steps.version.outputs.next_tag }}. diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index a3815d5..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,54 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the action will run. -on: - # Triggers the workflow on push to the dev and main branches - push: - branches: [ dev, main ] - pull_request: - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel -jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: cmu-delphi-deploy-machine - password: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_PAT }} - - # Runs a single command using the runners shell - - name: Build and deploy dashboard docker image - run: | - baseRef="${GITHUB_REF#*/}" - baseRef="${baseRef#*/}" - case "${baseRef}" in - main) - imageTag="latest" - ;; - *) - imageTag="${baseRef//\//_}" # replace `/` with `_` in branch name - ;; - esac - echo "using tag: --${imageTag}--" - make deploy_dashboard imageTag=$imageTag - - # Trigger a webhook update - curl -H "Authorization: Bearer ${{ secrets.DELPHI_DEPLOY_WEBHOOK_TOKEN }}" \ - -X POST ${{ secrets.DELPHI_DEPLOY_WEBHOOK_URL }} \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "repository=ghcr.io/cmu-delphi/forecast-eval&tag=$imageTag" diff --git a/.github/workflows/release_main.yml b/.github/workflows/release_main.yml new file mode 100644 index 0000000..6e756c7 --- /dev/null +++ b/.github/workflows/release_main.yml @@ -0,0 +1,57 @@ +name: Release Main +on: + push: + branches: + - main + +jobs: + correct_repository: + runs-on: ubuntu-latest + steps: + - name: fail on fork + if: github.repository_owner != 'cmu-delphi' + run: exit 1 + + create_release: + needs: correct_repository + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Extract version + id: extract_version + run: | + python -m pip install bump2version + echo -n "::set-output name=version::" + bump2version --dry-run --list patch | grep ^current_version | sed -r s,"^.*=",, + - name: Create Release + id: create_release + uses: release-drafter/release-drafter@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + version: ${{ steps.extract_version.outputs.version }} + publish: true + outputs: + version: ${{ steps.extract_version.outputs.version }} + upload_url: ${{ steps.create_release.outputs.upload_url }} + tag_name: ${{ steps.create_release.outputs.tag_name }} + + sync_dev: + needs: correct_repository + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v2 + with: + ref: dev + ssh-key: ${{ secrets.CMU_DELPHI_DEPLOY_MACHINE_SSH }} + - name: Reset dev branch + run: | + git fetch origin main:main + git merge main + git push diff --git a/.gitignore b/.gitignore index caaa79c..ec4ceab 100644 --- a/.gitignore +++ b/.gitignore @@ -1,51 +1,11 @@ -# History files +.Rproj.user .Rhistory -.Rapp.history - -# Session Data files .RData - -# User-specific files -.Ruserdata - -# Example code in package build process -*-Ex.R - -# Output files from R CMD build +doc +Meta +.vscode +*.log /*.tar.gz - -# Output files from R CMD check -/*.Rcheck/ - -# RStudio files -.Rproj.user/ - -# produced vignettes -vignettes/*.html -vignettes/*.pdf - -# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 -.httr-oauth - -# knitr and R markdown default cache directories -*_cache/ -/cache/ - -# Build output files -/dist/ - -# Temporary files created by R markdown -*.utf8.md -*.knit.md - -# R Environment Variables -.Renviron - -# Build output -dist/ - -# Data files -.csv -.rds -dashboard/**.csv -dashboard/**.rds +*.Rcheck +/dist +/NAMESPACE \ No newline at end of file diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..0971764 --- /dev/null +++ b/.lintr @@ -0,0 +1,9 @@ +linters: with_defaults( + line_length_linter(160), + object_name_linter = NULL, + object_usage_linter = NULL, + cyclocomp_linter = NULL, + open_curly_linter = NULL, + closed_curly_linter = NULL, + object_length_linter = NULL + ) diff --git a/DESCRIPTION b/DESCRIPTION index eabb8d5..6ab3f59 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: forecast-eval +Package: forecasteval Title: Forecast Evaluation Dashboard Version: 4.2.0 Authors@R: c(person("Kate", "Harwood", email = "kharwood@andrew.cmu.edu", role = "cre"), @@ -6,3 +6,26 @@ Authors@R: c(person("Kate", "Harwood", email = "kharwood@andrew.cmu.edu", role = person("Jed", "Grabman", role = "ctb")) Description: This app collects and scores COVID-19 forecasts submitted to the CDC and displays the results in an RShiny dashboard. License: MIT License, Copyright (c) 2021 Delphi contributors +URL: https://github.com/cmu-delphi/forecast-eval/ +BugReports: https://github.com/cmu-delphi/forecast-eval/issues +Depends: R (>= 3.6.0) +Encoding: UTF-8 +Type: Shiny +Imports: + shiny, + shinyjs, + bslib, + plotly, + dplyr, + tools, + tsibble, + viridis, + aws.s3, + covidcast, + stringr, + markdown +Suggests: + styler, + lintr, + testthat +RoxygenNote: 7.1.1 diff --git a/Makefile b/Makefile index cf10a80..409df92 100644 --- a/Makefile +++ b/Makefile @@ -7,14 +7,13 @@ build: build_dashboard r_build: docker build --no-cache --pull -t forecast-eval-build docker_build -predictions_cards.rds score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds: dist - test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@ +%.rds: dist + test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@ -pull_data: predictions_cards.rds score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds +pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds dist: mkdir $@ - cp dashboard/www/style.css dist/style-`md5sum dashboard/www/style.css | cut -d ' ' -f 1`.css clean: rm -rf dist @@ -34,19 +33,19 @@ deploy: score_forecast start_dev: r_build docker run --pull=always -ti --rm \ -v ${PWD}/Report:/var/forecast-eval \ - -v ${PWD}/dashboard:/var/forecast-eval-dashboard \ + -v ${PWD}/app:/var/forecast-eval-dashboard \ -v ${PWD}/dist:/var/dist \ -w /var/forecast-eval \ ghcr.io/cmu-delphi/forecast-eval:latest bash build_dashboard_dev: pull_data - docker build --no-cache --pull -t ghcr.io/cmu-delphi/forecast-eval:latest -f docker_dashboard/Dockerfile . + docker build --no-cache --pull -t ghcr.io/cmu-delphi/forecast-eval:latest -f devops/Dockerfile . build_dashboard: pull_data - docker build --no-cache=true --pull -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f docker_dashboard/Dockerfile . + docker build --no-cache=true --pull -t ghcr.io/cmu-delphi/forecast-eval:$(imageTag) -f devops/Dockerfile . deploy_dashboard: build_dashboard docker push ghcr.io/cmu-delphi/forecast-eval:$(imageTag) start_dashboard: build_dashboard_dev - docker run --rm -p 3838:3838 ghcr.io/cmu-delphi/forecast-eval:latest + docker run --rm -p 3838:80 ghcr.io/cmu-delphi/forecast-eval:latest diff --git a/Report/create_reports.R b/Report/create_reports.R index 3d7cf39..5ae5f37 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -4,160 +4,180 @@ library("dplyr") library("evalcast") library("lubridate") -option_list = list( - make_option( - c("-d", "--dir"), - type = "character", - default = ".", - help = "Directory to read/write data", - metavar = "character" - ) -); - -opt_parser = OptionParser(option_list = option_list); -opt = parse_args(opt_parser); - -prediction_cards_filename = "predictions_cards.rds" -prediction_cards_filepath = case_when( - !is.null(opt$dir) ~ file.path(opt$dir, prediction_cards_filename), - TRUE~prediction_cards_filename +option_list <- list( + make_option( + c("-d", "--dir"), + type = "character", + default = ".", + help = "Directory to read/write data", + metavar = "character" + ) +) +opt_parser <- OptionParser(option_list = option_list) +opt <- parse_args(opt_parser) +prediction_cards_filename <- "predictions_cards.rds" +prediction_cards_filepath <- case_when( + !is.null(opt$dir) ~ file.path(opt$dir, prediction_cards_filename), + TRUE ~ prediction_cards_filename ) -forecasters = unique(c(get_covidhub_forecaster_names(designations = c("primary", "secondary")), - "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble")) -locations = covidHubUtils::hub_locations +forecasters <- unique(c( + get_covidhub_forecaster_names(designations = c("primary", "secondary")), + "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" +)) +locations <- covidHubUtils::hub_locations # also includes "us", which is national level data -state_geos = locations %>% - filter(nchar(.data$geo_value) == 2) %>% - pull(.data$geo_value) -signals = c("confirmed_incidence_num", - "deaths_incidence_num", - "confirmed_admissions_covid_1d") - -predictions_cards = get_covidhub_predictions(forecasters, - signal = signals, - ahead = 1:28, - geo_values = state_geos, - verbose = TRUE, - use_disk = TRUE) %>% - filter(!(incidence_period == "epiweek" & ahead > 4)) - -predictions_cards = predictions_cards %>% - filter(!is.na(target_end_date)) %>% - filter(target_end_date < today()) +state_geos <- locations %>% + filter(nchar(.data$geo_value) == 2) %>% + pull(.data$geo_value) +signals <- c( + "confirmed_incidence_num", + "deaths_incidence_num", + "confirmed_admissions_covid_1d" +) + +predictions_cards <- get_covidhub_predictions(forecasters, + signal = signals, + ahead = 1:28, + geo_values = state_geos, + verbose = TRUE, + use_disk = TRUE +) %>% + filter(!(incidence_period == "epiweek" & ahead > 4)) + +predictions_cards <- predictions_cards %>% + filter(!is.na(target_end_date)) %>% + filter(target_end_date < today()) # For hospitalizations, drop all US territories except Puerto Rico and the # Virgin Islands; HHS does not report data for any territories except PR and VI. territories <- c("as", "gu", "mp", "fm", "mh", "pw", "um") -predictions_cards = predictions_cards %>% - filter(!(geo_value %in% territories & data_source == "hhs")) +predictions_cards <- predictions_cards %>% + filter(!(geo_value %in% territories & data_source == "hhs")) # For epiweek predictions, only accept forecasts made Monday or earlier. # target_end_date is the date of the last day (Saturday) in the epiweek # For daily predictions, accept any forecast where the target_end_date is later # than the forecast_date. -predictions_cards = predictions_cards %>% - filter( - (incidence_period == "epiweek" & target_end_date - (forecast_date + 7 * ahead) >= -2) | - (incidence_period == "day" & target_end_date > forecast_date) - ) +predictions_cards <- predictions_cards %>% + filter( + (incidence_period == "epiweek" & target_end_date - (forecast_date + 7 * ahead) >= -2) | + (incidence_period == "day" & target_end_date > forecast_date) + ) # And only a forecaster's last forecast if multiple were made -predictions_cards = predictions_cards %>% - group_by(forecaster, geo_value, target_end_date, quantile, ahead, signal) %>% - filter(forecast_date == max(forecast_date)) %>% - ungroup() -class(predictions_cards) = c("predictions_cards", class(predictions_cards)) +predictions_cards <- predictions_cards %>% + group_by(forecaster, geo_value, target_end_date, quantile, ahead, signal) %>% + filter(forecast_date == max(forecast_date)) %>% + ungroup() +class(predictions_cards) <- c("predictions_cards", class(predictions_cards)) print("Saving predictions...") saveRDS(predictions_cards, - file = prediction_cards_filepath, - compress = "xz") + file = prediction_cards_filepath, + compress = "xz" +) print("Predictions saved") # Create error measure functions -central_intervals = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98) -cov_names = paste0("cov_", central_intervals * 100) -coverage_functions = sapply(central_intervals, - function(coverage) interval_coverage(coverage)) -names(coverage_functions) = cov_names +central_intervals <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98) +cov_names <- paste0("cov_", central_intervals * 100) +coverage_functions <- sapply( + central_intervals, + function(coverage) interval_coverage(coverage) +) +names(coverage_functions) <- cov_names # TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast # Redefines overprediction, underprediction and sharpness source("error_measures.R") -err_measures = c(wis = weighted_interval_score, - overprediction = overprediction, - underprediction = underprediction, - sharpness = sharpness, - ae = absolute_error, - coverage_functions, - value_20 = get_quantile_prediction_factory(0.2), - value_50 = get_quantile_prediction_factory(0.5), - value_80 = get_quantile_prediction_factory(0.8)) +err_measures <- c( + wis = weighted_interval_score, + overprediction = overprediction, + underprediction = underprediction, + sharpness = sharpness, + ae = absolute_error, + coverage_functions, + value_20 = get_quantile_prediction_factory(0.2), + value_50 = get_quantile_prediction_factory(0.5), + value_80 = get_quantile_prediction_factory(0.8) +) -nation_predictions = predictions_cards %>% filter(geo_value == "us") -state_predictions = predictions_cards %>% filter(geo_value != "us") +nation_predictions <- predictions_cards %>% filter(geo_value == "us") +state_predictions <- predictions_cards %>% filter(geo_value != "us") # predictions_cards not needed beyond this point, try free up the memory rm(predictions_cards) gc() print("Evaluating state forecasts") -state_scores = evaluate_covid_predictions(state_predictions, - err_measures, - geo_type = "state") +state_scores <- evaluate_covid_predictions(state_predictions, + err_measures, + geo_type = "state" +) source("score.R") -if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { - print("Saving state confirmed incidence...") - save_score_cards(state_scores, "state", signal_name = "confirmed_incidence_num", - output_dir = opt$dir) +if ("confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state confirmed incidence...") + save_score_cards(state_scores, "state", + signal_name = "confirmed_incidence_num", + output_dir = opt$dir + ) } else { - warning("State confirmed incidence should generally be available. Please + warning("State confirmed incidence should generally be available. Please verify that you expect not to have any cases incidence forecasts") } -if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { - print("Saving state deaths incidence...") - save_score_cards(state_scores, "state", signal_name = "deaths_incidence_num", - output_dir = opt$dir) +if ("deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving state deaths incidence...") + save_score_cards(state_scores, "state", + signal_name = "deaths_incidence_num", + output_dir = opt$dir + ) } else { - warning("State deaths incidence should generally be available. Please + warning("State deaths incidence should generally be available. Please verify that you expect not to have any deaths incidence forecasts") } -if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { - print("Saving state hospitalizations...") - save_score_cards(state_scores, "state", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ("confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving state hospitalizations...") + save_score_cards(state_scores, "state", + signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir + ) } print("Evaluating national forecasts") # COVIDcast does not return national level data, using CovidHubUtils instead -nation_scores = evaluate_chu(nation_predictions, signals, err_measures) +nation_scores <- evaluate_chu(nation_predictions, signals, err_measures) -if ( "confirmed_incidence_num" %in% unique(state_scores$signal)) { - print("Saving nation confirmed incidence...") - save_score_cards(nation_scores, "nation", - signal_name = "confirmed_incidence_num", output_dir = opt$dir) +if ("confirmed_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation confirmed incidence...") + save_score_cards(nation_scores, "nation", + signal_name = "confirmed_incidence_num", output_dir = opt$dir + ) } else { - warning("Nation confirmed incidence should generally be available. Please + warning("Nation confirmed incidence should generally be available. Please verify that you expect not to have any cases incidence forecasts") } -if ( "deaths_incidence_num" %in% unique(state_scores$signal)) { - print("Saving nation deaths incidence...") - save_score_cards(nation_scores, "nation", signal_name = "deaths_incidence_num", - output_dir = opt$dir) +if ("deaths_incidence_num" %in% unique(state_scores$signal)) { + print("Saving nation deaths incidence...") + save_score_cards(nation_scores, "nation", + signal_name = "deaths_incidence_num", + output_dir = opt$dir + ) } else { - warning("Nation deaths incidence should generally be available. Please + warning("Nation deaths incidence should generally be available. Please verify that you expect not to have any deaths incidence forecasts") } -if ( "confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { - print("Saving nation hospitalizations...") - save_score_cards(nation_scores, "nation", signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir) +if ("confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { + print("Saving nation hospitalizations...") + save_score_cards(nation_scores, "nation", + signal_name = "confirmed_admissions_covid_1d", + output_dir = opt$dir + ) } print("Done") diff --git a/Report/error_measures.R b/Report/error_measures.R index 522fba0..4298961 100644 --- a/Report/error_measures.R +++ b/Report/error_measures.R @@ -3,16 +3,20 @@ library(assertthat) overprediction <- function(quantile, value, actual_value) { score_func_param_checker(quantile, value, actual_value, "overprediction") if (!is_symmetric(quantile)) { - warning(paste0("overprediction/underprediction/sharpness require", - "symmetric quantile forecasts. Using NA.")) + warning(paste0( + "overprediction/underprediction/sharpness require", + "symmetric quantile forecasts. Using NA." + )) + return(NA) + } + if (all(is.na(actual_value))) { return(NA) } - if (all(is.na(actual_value))) return(NA) actual_value <- unique(actual_value) - + lower <- value[!is.na(quantile) & quantile < .5] med <- value[find_quantile_match(quantile, 0.5)] - + if (length(med) > 1L) { return(NA) } else if (length(med) == 1L) { @@ -20,27 +24,32 @@ overprediction <- function(quantile, value, actual_value) { } else { m <- NULL } - + ans <- mean(c( - rep((lower - actual_value) * (lower > actual_value), 2), m)) - - + rep((lower - actual_value) * (lower > actual_value), 2), m + )) + + return(ans) } underprediction <- function(quantile, value, actual_value) { score_func_param_checker(quantile, value, actual_value, "underprediction") if (!is_symmetric(quantile)) { - warning(paste0("overprediction/underprediction/sharpness require", - "symmetric quantile forecasts. Using NA.")) + warning(paste0( + "overprediction/underprediction/sharpness require", + "symmetric quantile forecasts. Using NA." + )) + return(NA) + } + if (all(is.na(actual_value))) { return(NA) } - if (all(is.na(actual_value))) return(NA) actual_value <- unique(actual_value) - + upper <- value[!is.na(quantile) & quantile > .5] med <- value[find_quantile_match(quantile, 0.5)] - + if (length(med) > 1L) { return(NA) } else if (length(med) == 1L) { @@ -48,63 +57,80 @@ underprediction <- function(quantile, value, actual_value) { } else { m <- NULL } - + ans <- mean(c( - rep((actual_value - upper) * (upper < actual_value), 2), m)) - + rep((actual_value - upper) * (upper < actual_value), 2), m + )) + return(ans) } sharpness <- function(quantile, value, actual_value) { - weighted_interval_score(quantile, value, actual_value) - - overprediction(quantile, value, actual_value) - + weighted_interval_score(quantile, value, actual_value) - + overprediction(quantile, value, actual_value) - underprediction(quantile, value, actual_value) } # Utility functions required from evalcast that are not exported -is_symmetric <- function(x, tol=1e-8) { +is_symmetric <- function(x, tol = 1e-8) { x <- sort(x) all(abs(x + rev(x) - 1) < tol) } -find_quantile_match <- function(quantiles, val_to_match, tol=1e-8){ - return(abs(quantiles - val_to_match) < tol & !is.na(quantiles)) +find_quantile_match <- function(quantiles, val_to_match, tol = 1e-8) { + return(abs(quantiles - val_to_match) < tol & !is.na(quantiles)) } -get_quantile_prediction_factory <- function(val_to_match, tol=1e-8) { +get_quantile_prediction_factory <- function(val_to_match, tol = 1e-8) { get_quantile_prediction <- function(quantile, value, actual_value) { - if (all(is.na(quantile))) return(NA) + if (all(is.na(quantile))) { + return(NA) + } value <- value[!is.na(quantile)] quantile <- quantile[!is.na(quantile)] - + val <- value[find_quantile_match(quantile, val_to_match, tol)] - - if (length(val) != 1L) return(NA) - + + if (length(val) != 1L) { + return(NA) + } + return(val) } - + return(get_quantile_prediction) } -score_func_param_checker <- function(quantiles, values, actual_value, id = ""){ - id_str = paste0(id, ": ") +score_func_param_checker <- function(quantiles, values, actual_value, id = "") { + id_str <- paste0(id, ": ") if (length(actual_value) > 1) { assert_that(length(actual_value) == length(values), - msg = paste0(id_str, - "actual_value must be a scalar or the same length", - " as values")) - actual_value = unique(actual_value) + msg = paste0( + id_str, + "actual_value must be a scalar or the same length", + " as values" + ) + ) + actual_value <- unique(actual_value) } assert_that(length(actual_value) == 1, - msg = paste0(id_str, - "actual_value must have exactly 1 unique value")) + msg = paste0( + id_str, + "actual_value must have exactly 1 unique value" + ) + ) assert_that(length(quantiles) == length(values), - msg = paste0(id_str, - "quantiles and values must be of the same length")) + msg = paste0( + id_str, + "quantiles and values must be of the same length" + ) + ) assert_that(!any(duplicated(quantiles)), - msg = paste0(id_str, - "quantiles must be unique.")) + msg = paste0( + id_str, + "quantiles must be unique." + ) + ) } diff --git a/Report/score.R b/Report/score.R index ac61312..8b057fe 100644 --- a/Report/score.R +++ b/Report/score.R @@ -1,79 +1,103 @@ library("dplyr") library("assertthat") -save_score_cards = function(score_card, geo_type = c("state", "nation"), - signal_name = c("confirmed_incidence_num", - "deaths_incidence_num", - "confirmed_admissions_covid_1d"), - output_dir = ".") { - signal_name = match.arg(signal_name) - geo_type = match.arg(geo_type) - signals = score_card %>% - distinct(signal) %>% - pull(signal) +save_score_cards <- function(score_card, geo_type = c("state", "nation"), + signal_name = c( + "confirmed_incidence_num", + "deaths_incidence_num", + "confirmed_admissions_covid_1d" + ), + output_dir = ".") { + signal_name <- match.arg(signal_name) + geo_type <- match.arg(geo_type) + signals <- score_card %>% + distinct(signal) %>% + pull(signal) assert_that(signal_name %in% signals, - msg = "signal is not in score_card") - score_card = score_card %>% filter(signal == signal_name) - - type_map <- list("confirmed_incidence_num" = "cases", - "deaths_incidence_num" = "deaths", - "confirmed_admissions_covid_1d" = "hospitalizations") + msg = "signal is not in score_card" + ) + score_card <- score_card %>% filter(signal == signal_name) + + type_map <- list( + "confirmed_incidence_num" = "cases", + "deaths_incidence_num" = "deaths", + "confirmed_admissions_covid_1d" = "hospitalizations" + ) sig_suffix <- type_map[[signal_name]] - output_file_name = file.path(output_dir, - paste0("score_cards_", geo_type, "_", - sig_suffix, ".rds")) + output_file_name <- file.path( + output_dir, + paste0( + "score_cards_", geo_type, "_", + sig_suffix, ".rds" + ) + ) if (geo_type == "state") { - score_card = score_card %>% + score_card <- score_card %>% filter(nchar(geo_value) == 2, geo_value != "us") } else if (geo_type == "nation") { - score_card = score_card %>% + score_card <- score_card %>% filter(geo_value == "us") } saveRDS(score_card, - file = output_file_name, - compress = "xz") + file = output_file_name, + compress = "xz" + ) } -evaluate_chu = function(predictions, signals, err_measures) { - allowed_signals = c("confirmed_incidence_num", - "deaths_incidence_num", - "confirmed_admissions_covid_1d") +evaluate_chu <- function(predictions, signals, err_measures) { + allowed_signals <- c( + "confirmed_incidence_num", + "deaths_incidence_num", + "confirmed_admissions_covid_1d" + ) assert_that(all(signals %in% allowed_signals), - msg = paste("Signal not allowed:", - setdiff(signals, allowed_signals))) - - target_map <- list("confirmed_incidence_num" = "inc case", - "deaths_incidence_num" = "inc death", - "confirmed_admissions_covid_1d" = "inc hosp") - source_map <- list("confirmed_incidence_num" = "JHU", - "deaths_incidence_num" = "JHU", - "confirmed_admissions_covid_1d" = "HealthData") - scores = c() + msg = paste( + "Signal not allowed:", + setdiff(signals, allowed_signals) + ) + ) + + target_map <- list( + "confirmed_incidence_num" = "inc case", + "deaths_incidence_num" = "inc death", + "confirmed_admissions_covid_1d" = "inc hosp" + ) + source_map <- list( + "confirmed_incidence_num" = "JHU", + "deaths_incidence_num" = "JHU", + "confirmed_admissions_covid_1d" = "HealthData" + ) + scores <- c() for (signal_name in signals) { - preds_signal = predictions %>% + preds_signal <- predictions %>% filter(signal == signal_name) signal <- target_map[[signal_name]] source <- source_map[[signal_name]] - chu_truth = covidHubUtils::load_truth(source, signal) - chu_truth = chu_truth %>% + chu_truth <- covidHubUtils::load_truth(source, signal) + chu_truth <- chu_truth %>% rename(actual = value) %>% - select(-c(model, - target_variable, - location, - location_name, - population, - geo_type, - abbreviation)) - signal_scores = evaluate_predictions(preds_signal, - truth_data = chu_truth, - err_measures, - grp_vars = c("target_end_date", - "geo_value", - "ahead", - "forecaster")) - scores = rbind(scores, signal_scores) + select(-c( + model, + target_variable, + location, + location_name, + population, + geo_type, + abbreviation + )) + signal_scores <- evaluate_predictions(preds_signal, + truth_data = chu_truth, + err_measures, + grp_vars = c( + "target_end_date", + "geo_value", + "ahead", + "forecaster" + ) + ) + scores <- rbind(scores, signal_scores) } return(scores) } diff --git a/app/R/data.R b/app/R/data.R new file mode 100644 index 0000000..b7d76ec --- /dev/null +++ b/app/R/data.R @@ -0,0 +1,128 @@ +library(aws.s3) + +# Get and prepare data +getS3Bucket <- function() { + # Connect to AWS s3bucket + Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2") + s3bucket <- tryCatch( + { + aws.s3::get_bucket(bucket = "forecast-eval") + }, + error = function(e) { + e + return(NULL) + } + ) + + return(s3bucket) +} + +getData <- function(filename, s3bucket) { + if (!is.null(s3bucket)) { + tryCatch( + { + aws.s3::s3readRDS(object = filename, bucket = s3bucket) + }, + error = function(e) { + e + getFallbackData(filename) + } + ) + } else { + getFallbackData(filename) + } +} + +createS3DataFactory <- function(s3bucket) { + function(filename) { + getData(filename, s3bucket) + } +} + +getFallbackData <- function(filename) { + path <- ifelse( + file.exists(filename), + filename, + file.path("../dist/", filename) + ) + readRDS(path) +} + +getAllData <- function(loadFile) { + dfStateCases <- loadFile("score_cards_state_cases.rds") + dfStateDeaths <- loadFile("score_cards_state_deaths.rds") + dfStateHospitalizations <- loadFile("score_cards_state_hospitalizations.rds") + dfNationCases <- loadFile("score_cards_nation_cases.rds") + dfNationDeaths <- loadFile("score_cards_nation_deaths.rds") + dfNationHospitalizations <- loadFile("score_cards_nation_hospitalizations.rds") + + # Pick out expected columns only + covCols <- paste0("cov_", COVERAGE_INTERVALS) + expectedCols <- c( + "ahead", "geo_value", "forecaster", "forecast_date", + "data_source", "signal", "target_end_date", "incidence_period", + "actual", "wis", "sharpness", "ae", "value_50", + covCols + ) + + dfStateCases <- dfStateCases %>% select(all_of(expectedCols)) + dfStateDeaths <- dfStateDeaths %>% select(all_of(expectedCols)) + dfStateHospitalizations <- dfStateHospitalizations %>% select(all_of(expectedCols)) + dfNationCases <- dfNationCases %>% select(all_of(expectedCols)) + dfNationDeaths <- dfNationDeaths %>% select(all_of(expectedCols)) + dfNationHospitalizations <- dfNationHospitalizations %>% select(all_of(expectedCols)) + + df <- rbind( + dfStateCases, dfStateDeaths, dfStateHospitalizations, + dfNationCases, dfNationDeaths, dfNationHospitalizations + ) + df <- df %>% rename( + "10" = cov_10, "20" = cov_20, "30" = cov_30, + "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70, + "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98 + ) + + return(df) +} + +createS3DataLoader <- function() { + s3bucket <- getS3Bucket() + df <- data.frame() + + getRecentData <- function() { + newS3bucket <- getS3Bucket() + + s3Contents <- s3bucket[attr(s3bucket, "names", exact = TRUE)] + newS3Contents <- newS3bucket[attr(newS3bucket, "names", exact = TRUE)] + + # Fetch new score data if contents of S3 bucket has changed (including file + # names, sizes, and last modified timestamps). Ignores characteristics of + # bucket and request, including bucket region, name, content type, request + # date, request ID, etc. + if (nrow(df) == 0 || !identical(s3Contents, newS3Contents)) { + # Save new data and new bucket connection info to vars in env of + # `getRecentDataHelper`. They persist between calls to `getRecentData` a + # la https://stackoverflow.com/questions/1088639/static-variables-in-r + s3bucket <<- newS3bucket + df <<- getAllData(createS3DataFactory(s3bucket)) + } + + return(df) + } + + return(getRecentData) +} + + +#' create a data loader with fallback data only +createFallbackDataLoader <- function() { + df <- getAllData(getFallbackData) + + dataLoader <- function() { + df + } + dataLoader +} + + +createDataLoader <- createS3DataLoader diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R new file mode 100644 index 0000000..c1f5c43 --- /dev/null +++ b/app/R/data_manipulation.R @@ -0,0 +1,89 @@ + +renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) { + if (scoreType == "wis") { + filteredScoreDf <- filteredScoreDf %>% rename(Score = wis) + } else if (scoreType == "sharpness") { + filteredScoreDf <- filteredScoreDf %>% rename(Score = sharpness) + } else if (scoreType == "ae") { + filteredScoreDf <- filteredScoreDf %>% rename(Score = ae) + } else if (scoreType == "coverage") { + filteredScoreDf <- filteredScoreDf %>% rename(Score = !!coverageInterval) + } + return(filteredScoreDf) +} + + +filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE) { + locationsIntersect <- list() + filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score)) + # Create df with col for all locations across each unique date, ahead and forecaster combo + locationDf <- filteredScoreDf %>% + group_by(forecaster, target_end_date, ahead) %>% + summarize(location_list = paste(sort(unique(geo_value)), collapse = ",")) + locationDf <- locationDf %>% filter(location_list != c("us")) + # Create a list containing each row's location list + locationList <- sapply(locationDf$location_list, function(x) strsplit(x, ",")) + locationList <- lapply(locationList, function(x) x[x != "us"]) + # Get the intersection of all the locations in these lists + locationsIntersect <- unique(Reduce(intersect, locationList)) + filteredScoreDf <- filteredScoreDf %>% filter(geo_value %in% locationsIntersect) + if (scoreType == "coverage") { + if (hasAsOfData) { + filteredScoreDf <- filteredScoreDf %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize( + Score = sum(Score) / length(locationsIntersect), + actual = sum(actual), as_of_actual = sum(as_of_actual) + ) + } else { + filteredScoreDf <- filteredScoreDf %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize(Score = sum(Score) / length(locationsIntersect), actual = sum(actual)) + } + } else { + if (hasAsOfData) { + filteredScoreDf <- filteredScoreDf %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize(Score = sum(Score), actual = sum(actual), as_of_actual = sum(as_of_actual)) + } else { + filteredScoreDf <- filteredScoreDf %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize(Score = sum(Score), actual = sum(actual)) + } + } + return(list(filteredScoreDf, locationsIntersect)) +} + +# Only use weekly aheads for hospitalizations +# May change in the future +filterHospitalizationsAheads <- function(scoreDf) { + scoreDf["weekday"] <- weekdays(as.Date(scoreDf$target_end_date)) + scoreDf <- scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY) + + oneAheadDf <- scoreDf %>% + filter(ahead >= HOSPITALIZATIONS_OFFSET) %>% + filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% + filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) + twoAheadDf <- scoreDf %>% + filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>% + filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% + filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]) + threeAheadDf <- scoreDf %>% + filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>% + filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% + filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]) + fourAheadDf <- scoreDf %>% + filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>% + filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>% + group_by(target_end_date, forecaster) %>% + filter(ahead == min(ahead)) %>% + mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4]) + + return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf)) +} diff --git a/app/R/delphiLayout.R b/app/R/delphiLayout.R new file mode 100644 index 0000000..9c06d5e --- /dev/null +++ b/app/R/delphiLayout.R @@ -0,0 +1,86 @@ + +# create a common delphi header +delphiHeaderUI <- function(id = "delphi-header", title = "My App") { + # Get css file + cssFiles <- list.files(path = "www", pattern = "*.css") + + toCSSLink <- function(f) { + # append a cache buster + md5 <- tools::md5sum(paste0("www/", f)) + shiny::tags$link(rel = "stylesheet", type = "text/css", href = paste0(f, "?md5=", md5)) + } + + shiny::conditionalPanel( + condition = "window.location.search.indexOf('embed') < 0", + id = id, + class = "delphi-header", + tags$head( + lapply(cssFiles, toCSSLink), + includeHTML("assets/google-analytics.html"), + ), + useShinyjs(), + a( + class = "delphi-header-logo", href = "https://delphi.cmu.edu", + img(src = "./cmu_brand.png", alt = "Carnegie Mellon University Delphi Group") + ), + div( + class = "delphi-header-title", + h1(title), + ), + ) +} + +# delphi credits +delphiCredits <- function(title, repo) { + div( + class = "delphi-credits", + a(href = paste0(repo, "/releases/v", appVersion), paste0(title, " v", appVersion)), + tags$br(), + HTML("© 2021 Delphi Group"), + tags$br(), + a(href = repo, "Source Code"), + " under ", + a(href = paste0(repo, "/blob/main/LICENSE"), "MIT license") + ) +} + +delphiLayoutUI <- function(id = "delphi-root", title = "My App", + repo = "https://github.com/cmu-delphi/forecast-eval", + sidebar = list(), + main = list()) { + ns <- shiny::NS(id) + font <- bslib::font_google("Open Sans", local = FALSE) + div( + id = id, + class = "delphi-root", + delphiHeaderUI(id = ns("header"), title = title), + fluidPage( + theme = bslib::bs_theme( + version = 4, + bootswatch = "default", + primary = "#0f6ecd", + fg = "#232735", + bg = "#ffffff", + base_font = font, + ), + sidebarLayout( + div( + class = "col-sm-3 delphi-sidebar p-0 px-1", + tags$form( + class = "well", role = "complementary", + div( + class = "delphi-sidebar-container", + sidebar, + ), + delphiCredits(title, repo), + ) + ), + mainPanel( + width = 9, + class = "delphi-main-panel", + main + ), + ), + ) + ) +} diff --git a/app/R/exportScores.R b/app/R/exportScores.R new file mode 100644 index 0000000..9496d95 --- /dev/null +++ b/app/R/exportScores.R @@ -0,0 +1,74 @@ + +exportScoresUI <- function(id = "exportScores") { + ns <- shiny::NS(id) + div( + downloadButton(ns("exportScores"), "Download CSV") + ) +} + +createExportScoresDataFrame <- function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) { + signalFilter <- CASE_FILTER + if (targetVariable == "Deaths") { + signalFilter <- DEATH_FILTER + } else if (targetVariable == "Hospitalizations") { + signalFilter <- HOSPITALIZATIONS_FILTER + } + scoreDf <- renameScoreCol(scoreDf, scoreType, coverageInterval) + scoreDf <- scoreDf %>% + filter(signal == signalFilter) %>% + filter(forecaster %in% forecasters) + if (loc == TOTAL_LOCATIONS || scoreType == "coverage") { + if (signalFilter == HOSPITALIZATIONS_FILTER) { + scoreDf <- filterHospitalizationsAheads(scoreDf) + } + scoreDf <- filterOverAllLocations(scoreDf, scoreType) + return(scoreDf[[1]]) + } else { + scoreDf <- scoreDf %>% filter(geo_value == tolower(loc)) + scoreDf <- scoreDf[c( + "ahead", "geo_value", "forecaster", "forecast_date", + "data_source", "target_end_date", "Score", "actual" + )] + return(scoreDf) + } +} + +generateExportFilename <- function(input) { + score <- input$scoreType + if (input$scoreType == "sharpness") { + score <- "spread" + } + filename <- paste0("forecast-eval-", input$targetVariable, "-", score) + if (input$location != TOTAL_LOCATIONS) { + filename <- paste0(filename, "-", input$location) + } else if (input$scoreType == "coverage") { + filename <- paste0(filename, "-", "averaged-over-common-locations-Coverage-interval-", input$coverageInterval) + } else { + filename <- paste0(filename, "-totaled-over-common-locations") + } + filename +} + +exportScoresServer <- function(id, filenameReactive, dataReactive) { + shiny::moduleServer(id, function(input, output, session) { + output$exportScores <- downloadHandler( + filename = function() { + paste0(filenameReactive(), "-", Sys.Date(), ".csv") + }, + contentType = "text/csv", + content = function(file) { + shiny::withProgress( + message = "Preparing export", + detail = "This may take a while...", + value = 0, + max = 2, + { + shiny::incProgress(1) + write.csv(dataReactive(), file, row.names = FALSE) + shiny::incProgress(2) + } + ) + } + ) + }) +} diff --git a/dashboard/about-dashboard.md b/app/assets/about-dashboard.md similarity index 100% rename from dashboard/about-dashboard.md rename to app/assets/about-dashboard.md diff --git a/dashboard/about.md b/app/assets/about.md similarity index 100% rename from dashboard/about.md rename to app/assets/about.md diff --git a/dashboard/ae.md b/app/assets/ae.md similarity index 100% rename from dashboard/ae.md rename to app/assets/ae.md diff --git a/dashboard/coverageplot.md b/app/assets/coverageplot.md similarity index 100% rename from dashboard/coverageplot.md rename to app/assets/coverageplot.md diff --git a/dashboard/google-analytics.html b/app/assets/google-analytics.html similarity index 100% rename from dashboard/google-analytics.html rename to app/assets/google-analytics.html diff --git a/dashboard/scoring-disclaimer.md b/app/assets/scoring-disclaimer.md similarity index 100% rename from dashboard/scoring-disclaimer.md rename to app/assets/scoring-disclaimer.md diff --git a/dashboard/sharpness.md b/app/assets/sharpness.md similarity index 100% rename from dashboard/sharpness.md rename to app/assets/sharpness.md diff --git a/dashboard/wis.md b/app/assets/wis.md similarity index 100% rename from dashboard/wis.md rename to app/assets/wis.md diff --git a/app/global.R b/app/global.R new file mode 100644 index 0000000..222329e --- /dev/null +++ b/app/global.R @@ -0,0 +1,64 @@ +library(shiny) +library(shinyjs) +library(plotly) +library(tidyr) +library(dplyr, warn.conflicts = FALSE) +library(lubridate) +library(viridis) +library(tsibble) +library(covidcast) + +appVersion <- "4.0.0" + +COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") +DEATH_FILTER <- "deaths_incidence_num" +CASE_FILTER <- "confirmed_incidence_num" +CASES_DEATHS_TARGET_DAY <- "Saturday" +HOSPITALIZATIONS_FILTER <- "confirmed_admissions_covid_1d" +HOSPITALIZATIONS_TARGET_DAY <- "Wednesday" +TOTAL_LOCATIONS <- "Totaled Over States*" +AHEAD_OPTIONS <- c(1, 2, 3, 4) + +# Num days to offset the forecast week by +# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2, +# ahead 1 has to have forecast date of Monday or earlier, +# ahead 2 has to have forecast date of Monday + 7 days or earlier (offset + 7 days or more), etc +HOSPITALIZATIONS_OFFSET <- 2 +HOSPITALIZATIONS_AHEAD_OPTIONS <- c( + HOSPITALIZATIONS_OFFSET, HOSPITALIZATIONS_OFFSET + 7, + HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21 +) + +# Earliest 'as of' date available from covidcast API +MIN_AVAIL_NATION_AS_OF_DATE <- as.Date("2021-01-09") +MIN_AVAIL_HOSP_AS_OF_DATE <- as.Date("2020-11-11") +MIN_AVAIL_TERRITORY_AS_OF_DATE <- as.Date("2021-02-10") + +TERRITORIES <- c("AS", "GU", "MP", "VI") + +resolveCurrentCasesDeathDay <- function() { + # Get most recent target end date + # Prev Saturday for Cases and Deaths, prev Wednesday for Hospitalizations + # Since we don't upload new observed data until Sunday: + # Use 7 and 1 for Cases and Deaths so that Sundays will use the Saturday directly beforehand. + # (This means that on Sundays until the afternoon when the pipeline completes, the "as of" will show + # the most recent Saturday / Wednesday date even though the actual updated data won't be there yet) + prevWeek <- seq(Sys.Date() - 7, Sys.Date() - 1, by = "day") + prevWeek[weekdays(prevWeek) == CASES_DEATHS_TARGET_DAY] +} + +# Use 5 and 11 for Hospitalizations since Thurs-Sun should also not use the Wednesday directly beforehand. +# (This means that on Mondays until the afternoon when pipeline completes, the "as of" will show +# most recent Saturday / Wednesday date even though the actual updated data won't be there yet) + + +resolveCurrentHospDay <- function() { + # Get most recent target end date + # Prev Saturday for Cases and Deaths, prev Wednesday for Hospitalizations + # Since we don't upload new observed data until Sunday: + # Use 4 and 10 for Hospitalizations since Thurs-Sat should not use the Wednesday directly beforehand. + # (This means that on Sundays until the afternoon when the pipeline completes, the "as of" will show + # the most recent Saturday / Wednesday date even though the actual updated data won't be there yet) + prevHospWeek <- seq(Sys.Date() - 10, Sys.Date() - 4, by = "day") + prevHospWeek[weekdays(prevHospWeek) == HOSPITALIZATIONS_TARGET_DAY] +} diff --git a/app/server.R b/app/server.R new file mode 100644 index 0000000..5da60fc --- /dev/null +++ b/app/server.R @@ -0,0 +1,780 @@ +################ +# UTIL FUNCTIONS before server definition +################ +updateForecasterChoices <- function(session, df, forecasterInput, scoreType) { + if (scoreType == "wis") { + df <- df %>% filter(!is.na(wis)) + } + if (scoreType == "ae") { + df <- df %>% filter(!is.na(ae)) + } + forecasterChoices <- unique(df$forecaster) + updateSelectInput(session, "forecasters", + choices = forecasterChoices, + selected = forecasterInput + ) +} + + +updateCoverageChoices <- function(session, df, targetVariable, forecasterChoices, coverageInput, output) { + df <- df %>% filter(forecaster %in% forecasterChoices) + df <- Filter(function(x) !all(is.na(x)), df) + coverageChoices <- intersect(colnames(df), COVERAGE_INTERVALS) + # Ensure previsouly selected options are still allowed + if (coverageInput %in% coverageChoices) { + selectedCoverage <- coverageInput + } else if ("95" %in% coverageChoices) { + selectedCoverage <- "95" + } else { + selectedCoverage <- coverageChoices[1] + } + updateSelectInput(session, "coverageInterval", + choices = coverageChoices, + selected = selectedCoverage + ) +} + + +updateLocationChoices <- function(session, df, targetVariable, forecasterChoices, locationInput) { + df <- df %>% filter(forecaster %in% forecasterChoices) + locationChoices <- unique(toupper(df$geo_value)) + # Move US to front of list + locationChoices <- locationChoices[c(length(locationChoices), seq_len(length(locationChoices) - 1))] + locationChoices <- c(TOTAL_LOCATIONS, locationChoices) + # Ensure previously selected options are still allowed + if (locationInput %in% locationChoices) { + selectedLocation <- locationInput + } else { + selectedLocation <- locationChoices[1] + } + updateSelectInput(session, "location", + choices = locationChoices, + selected = selectedLocation + ) +} + +updateAheadChoices <- function(session, df, targetVariable, forecasterChoices, aheads, targetVariableChange) { + df <- df %>% filter(forecaster %in% forecasterChoices) + if (targetVariable == "Hospitalizations") { + aheadOptions <- HOSPITALIZATIONS_AHEAD_OPTIONS + title <- "Forecast Horizon (Days)" + show("horizon-disclaimer") + } else { + aheadOptions <- AHEAD_OPTIONS + title <- "Forecast Horizon (Weeks)" + hide("horizon-disclaimer") + } + aheadChoices <- Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions) + # Ensure previsouly selected options are still allowed + if (!is.null(aheads) && aheads %in% aheadChoices) { + selectedAheads <- aheads + } else { + selectedAheads <- aheadOptions[1] + } + # If we are changing target variable, always reset ahead selection to first option + if (targetVariableChange) { + selectedAheads <- aheadOptions[1] + } + updateCheckboxGroupInput(session, "aheads", + title, + choices = aheadChoices, + selected = selectedAheads, + inline = TRUE + ) +} +# All data is fully loaded from AWS +DATA_LOADED <- FALSE +loadData <- createDataLoader() + +server <- function(input, output, session) { + CASES_DEATHS_CURRENT <- resolveCurrentCasesDeathDay() + HOSP_CURRENT <- resolveCurrentHospDay() + + PREV_AS_OF_DATA <- reactiveVal(NULL) + AS_OF_CHOICES <- reactiveVal(NULL) + SUMMARIZING_OVER_ALL_LOCATIONS <- reactive(input$scoreType == "coverage" || input$location == TOTAL_LOCATIONS) + + + CURRENT_WEEK_END_DATE <- reactiveVal(CASES_DEATHS_CURRENT) + + + # Get scores + df <- loadData() + DATA_LOADED <- TRUE + + # Prepare input choices + forecasterChoices <- sort(unique(df$forecaster)) + updateForecasterChoices(session, df, forecasterChoices, "wis") + + + ################## + # CREATE MAIN PLOT + ################## + summaryPlot <- function(colorSeed = 100, reRenderTruth = FALSE, asOfData = NULL) { + filteredScoreDf <- filterScoreDf() + dfWithForecasts <- NULL + if (input$showForecasts) { + dfWithForecasts <- filteredScoreDf + } + # Need to do this after setting dfWithForecasts to leave in aheads for forecasts + filteredScoreDf <- filteredScoreDf %>% filter(ahead %in% input$aheads) + if (dim(filteredScoreDf)[1] == 0) { + output$renderWarningText <- renderText(paste0( + "The selected forecasters do not have enough data ", + "to display the selected scoring metric." + )) + return() + } + if (is.null(asOfData)) { + if (!is.null(isolate(PREV_AS_OF_DATA())) && dim(isolate(PREV_AS_OF_DATA()))[1] != 0 && + isolate(input$asOf) != "" && isolate(input$asOf) != isolate(CURRENT_WEEK_END_DATE())) { + asOfData <- isolate(PREV_AS_OF_DATA()) + } + } + if (!is.null(asOfData) && dim(asOfData)[1] != 0) { + asOfData <- asOfData %>% rename(target_end_date = time_value, as_of_actual = value) + asOfData <- asOfData[c("target_end_date", "geo_value", "as_of_actual")] + + # Get the 'as of' dates that are the target_end_dates in the scoring df + dateGroupDf <- asOfData %>% filter(asOfData$target_end_date %in% filteredScoreDf$target_end_date) + if (dim(dateGroupDf)[1] != 0) { + # Since cases and deaths are shown as weekly incidence, but the "as of" data from the covidcast API + # is daily, we need to sum over the days leading up to the target_end_date of each week to get the + # weekly incidence + asOfData <- filterAsOfData(asOfData, dateGroupDf, filteredScoreDf) + filteredScoreDf <- merge(filteredScoreDf, asOfData, by = c("target_end_date", "geo_value"), all = TRUE) + } else { + # Input 'as of' date chosen does not match the available target_end_dates that result from the rest of the selected inputs + # It is too far back or we are switching between hosp and cases/deaths which have different target date days + # As of input will be updated to the default (latest) and plot will re-render with the just the normal truth data, no 'as of' + asOfData <- NULL + } + } + + # Totaling over all locations + if (SUMMARIZING_OVER_ALL_LOCATIONS()) { + filteredScoreDfAndIntersections <- filterOverAllLocations(filteredScoreDf, input$scoreType, !is.null(asOfData)) + filteredScoreDf <- filteredScoreDfAndIntersections[[1]] + locationsIntersect <- filteredScoreDfAndIntersections[[2]] + if (input$showForecasts) { + dfWithForecasts <- dfWithForecasts %>% filter(geo_value %in% locationsIntersect) + } + aggregateText <- "*For fair comparison, all displayed forecasters on all displayed dates are compared across a common set of states and territories." + if (input$scoreType == "coverage") { + aggregate <- "Averaged" + output$renderAggregateText <- renderText(paste( + aggregateText, + " Some forecasters may not have any data for the coverage interval chosen. Locations inlcuded: " + )) + } else { + aggregate <- "Totaled" + output$renderAggregateText <- renderText(paste(aggregateText, " Locations included: ")) + } + if (length(locationsIntersect) == 0) { + output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common on all dates.") + output$renderLocations <- renderText("") + output$renderAggregateText <- renderText("") + hideElement("truthPlot") + hideElement("refresh-colors") + return() + } else { + locationSubtitleText <- paste0(", Location: ", aggregate, " over all states and territories common to these forecasters*") + output$renderLocations <- renderText(toupper(locationsIntersect)) + output$renderWarningText <- renderText("") + showElement("truthPlot") + } + # Not totaling over all locations + } else { + if (!is.null(asOfData)) { + filteredScoreDf <- filteredScoreDf %>% + filter(geo_value == tolower(input$location)) %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize(Score = Score, actual = actual, as_of_actual = as_of_actual) + } else { + filteredScoreDf <- filteredScoreDf %>% + filter(geo_value == tolower(input$location)) %>% + group_by(forecaster, forecast_date, target_end_date, ahead) %>% + summarize(Score = Score, actual = actual) + } + locationSubtitleText <- paste0(", Location: ", input$location) + output$renderAggregateText <- renderText("") + output$renderLocations <- renderText("") + output$renderWarningText <- renderText("") + } + + showElement("refresh-colors") + if (nrow(filteredScoreDf) == 0) { + # no data to show + return() + } + + # Rename columns that will be used as labels and for clarity on CSV exports + filteredScoreDf <- filteredScoreDf %>% rename( + Forecaster = forecaster, Forecast_Date = forecast_date, + Week_End_Date = target_end_date + ) + + # Set forecaster colors for plot + set.seed(colorSeed) + forecasterRand <- sample(unique(df$forecaster)) + colorPalette <- setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand) + if (!is.null(asOfData)) { + colorPalette["Reported_Incidence"] <- "grey" + colorPalette["Reported_As_Of_Incidence"] <- "black" + } + + # Render truth plot with observed values + truthDf <- filteredScoreDf + output$truthPlot <- renderPlotly({ + truthPlot(truthDf, locationsIntersect, !is.null(asOfData), dfWithForecasts, colorPalette) + }) + + # If we are just re-rendering the truth plot with as of data + # we don't need to re-render the score plot + if (reRenderTruth) { + return() + } + # If we are re-rendering scoring plot with new inputs that were just selected + # we need to make sure the as of input options are valid with those inputs + updateAsOfChoices(session, truthDf) + + # Format and transform data for plot + filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Week_End_Date)) + filteredScoreDf <- filteredScoreDf[c("Forecaster", "Forecast_Date", "Week_End_Date", "Score", "ahead")] + filteredScoreDf <- filteredScoreDf %>% mutate(across(where(is.numeric), ~ round(., 2))) + if (input$scoreType != "coverage") { + if (input$scaleByBaseline) { + baselineDf <- filteredScoreDf %>% filter(Forecaster %in% "COVIDhub-baseline") + filteredScoreDfMerged <- merge(filteredScoreDf, baselineDf, by = c("Week_End_Date", "ahead")) + # Scaling score by baseline forecaster + filteredScoreDfMerged$Score.x <- filteredScoreDfMerged$Score.x / filteredScoreDfMerged$Score.y + filteredScoreDf <- filteredScoreDfMerged %>% + rename(Forecaster = Forecaster.x, Score = Score.x, Forecast_Date = Forecast_Date.x) %>% + select(Forecaster, Forecast_Date, Week_End_Date, ahead, Score) + } + if (input$logScale) { + filteredScoreDf$Score <- log10(filteredScoreDf$Score) + } + } + + # Title plot + if (input$scoreType == "wis") { + plotTitle <- "Weighted Interval Score" + } else if (input$scoreType == "sharpness") { + plotTitle <- "Spread" + } else if (input$scoreType == "ae") { + plotTitle <- "Absolute Error" + } else { + plotTitle <- "Coverage" + } + + titleText <- paste0( + "", plotTitle, "", "
", "", + "Target Variable: ", input$targetVariable, + locationSubtitleText, "
", + tags$span(id = "drag-to-zoom", " Drag to zoom"), + "
" + ) + + # Fill gaps so there are line breaks on weeks without data + # This is failing for CU-select on US deaths (https://github.com/cmu-delphi/forecast-eval/issues/157) + filteredScoreDf <- filteredScoreDf %>% + as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>% + group_by(Forecaster, Forecast_Date, ahead) %>% + fill_gaps(.full = TRUE) + # Set labels for faceted horizon plots + horizonOptions <- AHEAD_OPTIONS + horizonLabels <- lapply(AHEAD_OPTIONS, function(x) paste0("Horizon: ", x, " Week(s)")) + if (input$targetVariable == "Hospitalizations") { + horizonOptions <- HOSPITALIZATIONS_AHEAD_OPTIONS + horizonLabels <- lapply(HOSPITALIZATIONS_AHEAD_OPTIONS, function(x) paste0("Horizon: ", x, " Days")) + } + filteredScoreDf$ahead <- factor(filteredScoreDf$ahead, + levels = horizonOptions, + labels = horizonLabels + ) + + p <- ggplot( + filteredScoreDf, + aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster, label = Forecast_Date) + ) + + geom_line() + + geom_point(size = 2) + + labs(x = "", y = "", title = titleText) + + scale_x_date(date_labels = "%b %Y") + + facet_wrap(~ahead, ncol = 1) + + scale_color_manual(values = colorPalette) + + theme_bw() + + theme(panel.spacing = unit(0.5, "lines")) + + theme(legend.title = element_blank()) + + if (input$scoreType == "coverage") { + p <- p + geom_hline(yintercept = .01 * as.integer(input$coverageInterval)) + } + if (input$logScale) { + p <- p + scale_y_continuous(label = function(x) paste0("10^", x)) + } else { + p <- p + scale_y_continuous(limits = c(0, NA), labels = scales::comma) + } + plotHeight <- 550 + (length(input$aheads) - 1) * 100 + finalPlot <- + ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% + layout( + height = plotHeight, + legend = list(orientation = "h", y = -0.1), + margin = list(t = 90), + height = 500, + hovermode = "x unified", + xaxis = list( + title = list(text = "Target Date", standoff = 8L), + titlefont = list(size = 12) + ) + ) %>% + config(displayModeBar = F) + + return(finalPlot) + } + + ################### + # CREATE TRUTH PLOT + ################### + # Create the plot for target variable ground truth + truthPlot <- function(filteredDf = NULL, locationsIntersect = NULL, hasAsOfData = FALSE, dfWithForecasts = NULL, colorPalette = NULL) { + observation <- paste0("Incident ", input$targetVariable) + if (input$targetVariable == "Hospitalizations") { + observation <- paste0("Hospital Admissions") + } + titleText <- paste0("Observed ", observation, "") + if (SUMMARIZING_OVER_ALL_LOCATIONS()) { + titleText <- paste0("Observed ", observation, "", "
Totaled over all states and territories common to selected forecasters*") + } + + if (hasAsOfData) { + filteredDf <- filteredDf %>% + group_by(Week_End_Date) %>% + summarize(Forecaster = Forecaster, Reported_Incidence = actual, Reported_As_Of_Incidence = as_of_actual) %>% + distinct() + if (input$showForecasts) { + filteredDf <- filterForecastData(filteredDf, dfWithForecasts) + } + } else { + filteredDf <- filteredDf %>% + group_by(Week_End_Date) %>% + summarize(Reported_Incidence = actual) + } + + finalPlot <- ggplot(filteredDf, aes(x = Week_End_Date)) + + labs(x = "", y = "", title = titleText) + + scale_y_continuous(limits = c(0, NA), labels = scales::comma) + + scale_x_date(date_labels = "%b %Y") + + scale_color_manual(values = colorPalette) + + theme_bw() + + theme(legend.title = element_blank()) + + if (hasAsOfData) { + finalPlot <- finalPlot + + geom_line(aes(y = Reported_Incidence, color = "Reported_Incidence")) + + geom_point(aes(y = Reported_Incidence, color = "Reported_Incidence")) + + geom_line(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) + + geom_point(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) + if (input$showForecasts) { + finalPlot <- finalPlot + + geom_line(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + } + } else { + finalPlot <- finalPlot + geom_line(aes(y = Reported_Incidence)) + + geom_point(aes(y = Reported_Incidence)) + } + finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% + layout(hovermode = "x unified", legend = list(orientation = "h", y = -0.1)) %>% + config(displayModeBar = F) + # Remove the extra grouping from the legend: "(___,1)" + for (i in seq_along(finalPlot$x$data)) { + if (!is.null(finalPlot$x$data[[i]]$name)) { + finalPlot$x$data[[i]]$name <- gsub("\\(", "", stringr::str_split(finalPlot$x$data[[i]]$name, ",")[[1]][1]) + } + } + return(finalPlot) + } + + ############# + # PLOT OUTPUT + ############# + output$summaryPlot <- renderPlotly({ + summaryPlot() + }) + + # Filter scoring df by inputs chosen (targetVariable, forecasters, aheads) + filterScoreDf <- function() { + signalFilter <- CASE_FILTER + if (input$targetVariable == "Deaths") { + signalFilter <- DEATH_FILTER + } + if (input$targetVariable == "Hospitalizations") { + signalFilter <- HOSPITALIZATIONS_FILTER + } + filteredScoreDf <- df %>% + filter(signal == signalFilter) %>% + filter(forecaster %in% input$forecasters) + + if (signalFilter == HOSPITALIZATIONS_FILTER) { + filteredScoreDf <- filterHospitalizationsAheads(filteredScoreDf) + } + if (input$scoreType == "wis" || input$scoreType == "sharpness") { + # Only show WIS or Sharpness for forecasts that have all intervals + filteredScoreDf <- filteredScoreDf %>% + filter(!is.na(`50`)) %>% + filter(!is.na(`80`)) %>% + filter(!is.na(`95`)) + if (input$targetVariable == "Deaths") { + filteredScoreDf <- filteredScoreDf %>% + filter(!is.na(`10`)) %>% + filter(!is.na(`20`)) %>% + filter(!is.na(`30`)) %>% + filter(!is.na(`40`)) %>% + filter(!is.na(`60`)) %>% + filter(!is.na(`70`)) %>% + filter(!is.na(`90`)) %>% + filter(!is.na(`98`)) + } + } + filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) + return(filteredScoreDf) + } + + # Filter as of data so that it matches weekly incidence for the target end dates in the score df + filterAsOfData <- function(asOfData, dateGroupDf, filteredScoreDf) { + # Hospitalization scores are shown as daily incidence, not weekly incidence, no summing necessary + if (input$targetVariable != "Hospitalizations") { + # Create a df to fill in the corresponding target_end_date in a new date_group column for all intervening days + dateGroupDf[, "date_group"] <- NA + dateGroupDf$date_group <- dateGroupDf$target_end_date + asOfData <- merge(asOfData, dateGroupDf, by = c("target_end_date", "geo_value", "as_of_actual"), all = TRUE) + + # Cut off the extra days on beginning and end of series so that when we sum the values we are only + # summing over the weeks included in the score plot + asOfData <- asOfData %>% filter(target_end_date >= min(filteredScoreDf$target_end_date) - 6) + asOfData <- asOfData %>% filter(target_end_date <= isolate(input$asOf)) + + # Fill in the date_group column with the target week end days for all intervening days + asOfData <- asOfData %>% + arrange(geo_value) %>% + fill(date_group, .direction = "up") + + # In the case where there are target week end days missing from the scoring or as of data + # we don't want to end up summing values over multiple weeks so we make sure each date_group only spans one week + asOfData <- asOfData %>% filter(asOfData$date_group - asOfData$target_end_date < 7) + + asOfData <- asOfData[c("geo_value", "as_of_actual", "date_group")] + # Sum over preceding week for all weekly target variables + asOfData <- asOfData %>% + group_by(geo_value, date_group) %>% + summarize(as_of_actual = sum(as_of_actual)) + asOfData <- asOfData %>% rename(target_end_date = date_group) + # If targetVariable is Hospitalizations + } else { + asOfData <- dateGroupDf + # Need to make sure that we are only matching the target_end_dates shown in the scoring plot + # and not using fetched data for as of dates before those target_end_dates. + # This is taken care of above for cases and deaths. + minDate <- min(filteredScoreDf$target_end_date) + if (!SUMMARIZING_OVER_ALL_LOCATIONS()) { + chosenLocationDf <- filteredScoreDf %>% filter(geo_value == tolower(input$location)) + minDate <- min(chosenLocationDf$target_end_date) + } + asOfData <- asOfData %>% filter(target_end_date >= minDate) + } + return(asOfData) + } + + filterForecastData <- function(filteredDf, dfWithForecasts) { + dfWithForecasts <- dfWithForecasts %>% + rename(Week_End_Date = target_end_date, Forecaster = forecaster, Quantile_50 = value_50) + if (!SUMMARIZING_OVER_ALL_LOCATIONS()) { + dfWithForecasts <- dfWithForecasts %>% filter(geo_value == tolower(input$location)) + } else { + # Sum the predictions for all included locations + dfWithForecasts <- dfWithForecasts %>% + group_by(Forecaster, forecast_date, Week_End_Date, ahead) %>% + summarize(Quantile_50 = sum(Quantile_50)) + } + # We want the forecasts to be later than latest as of date with data + lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_As_Of_Incidence)), n = 1)$Week_End_Date[1] + dfWithForecasts <- dfWithForecasts %>% + filter(forecast_date >= lastEndDate) %>% + group_by(Week_End_Date) %>% + summarize(Forecaster, forecast_date, Quantile_50) + + # Get the next as of choice available in dropdown menu + dfWithForecasts <- dfWithForecasts[order(dfWithForecasts$forecast_date), ] + AS_OF_CHOICES(sort(AS_OF_CHOICES() %>% unique())) + nextAsOfInList <- AS_OF_CHOICES()[which.min(abs(AS_OF_CHOICES() - dfWithForecasts$forecast_date[1])) + 1] + + # Take only those forecasts with a forecast date before the next as of date in dropdown + # aka within the week after the current as of shown + if (length(nextAsOfInList) != 0 && !is.na(nextAsOfInList)) { + dfWithForecasts <- dfWithForecasts %>% + filter(forecast_date < nextAsOfInList) + } + + # Hospitalizations will have multiple forecast dates within this target week + # So we want to take the earliest forecast date for each forecaster & week end date pair + if (input$targetVariable == "Hospitalizations") { + dfWithForecasts <- dfWithForecasts %>% + group_by(Week_End_Date, Forecaster) %>% + top_n(n = 1, wt = desc(forecast_date)) + dfWithForecasts <- dfWithForecasts %>% + group_by(Forecaster) %>% + filter(forecast_date == first(forecast_date)) + } + filteredDf <- merge(filteredDf, dfWithForecasts, by = c("Week_End_Date", "Forecaster"), all = TRUE) %>% + group_by(Week_End_Date) %>% + select(Quantile_50, Forecaster, Reported_Incidence, Reported_As_Of_Incidence) + # Remove rows of NAs + filteredDf <- filteredDf %>% filter(!is.null(Forecaster)) + filteredDf <- filteredDf %>% + arrange(Week_End_Date) %>% + fill(Reported_Incidence, .direction = "downup") + return(filteredDf) + } + + ################### + # EVENT OBSERVATION + ################### + + observeEvent(input$refreshColors, { + colorSeed <- floor(runif(1, 1, 1000)) + output$summaryPlot <- renderPlotly({ + summaryPlot(colorSeed) + }) + }) + + # When the target variable changes, update available forecasters, locations, and CIs to choose from + observeEvent(input$targetVariable, { + CURRENT_WEEK_END_DATE(CASES_DEATHS_CURRENT) + if (input$targetVariable == "Deaths") { + df <- df %>% filter(signal == DEATH_FILTER) + } else if (input$targetVariable == "Cases") { + df <- df %>% filter(signal == CASE_FILTER) + } else { + df <- df %>% filter(signal == HOSPITALIZATIONS_FILTER) + CURRENT_WEEK_END_DATE(HOSP_CURRENT) + } + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, TRUE) + updateForecasterChoices(session, df, input$forecasters, input$scoreType) + updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) + updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) + updateAsOfData() + }) + + observeEvent(input$scoreType, { + if (input$targetVariable == "Deaths") { + df <- df %>% filter(signal == DEATH_FILTER) + } else if (input$targetVariable == "Cases") { + df <- df %>% filter(signal == CASE_FILTER) + } else { + df <- df %>% filter(signal == HOSPITALIZATIONS_FILTER) + } + # Only show forecasters that have data for the score chosen + updateForecasterChoices(session, df, input$forecasters, input$scoreType) + + # If we are switching between coverage and other score types we need to + # update the as of data we have so it matches the correct locations shown + if (input$location == "US") { + updateAsOfData() + } + + if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { + hideElement("showForecastsCheckbox") + } else { + showElement("showForecastsCheckbox") + } + if (input$scoreType == "wis") { + show("wisExplanation") + hide("sharpnessExplanation") + hide("aeExplanation") + hide("coverageExplanation") + } + if (input$scoreType == "sharpness") { + show("sharpnessExplanation") + hide("wisExplanation") + hide("aeExplanation") + hide("coverageExplanation") + } + if (input$scoreType == "ae") { + hide("wisExplanation") + hide("sharpnessExplanation") + show("aeExplanation") + hide("coverageExplanation") + } + if (input$scoreType == "coverage") { + hide("wisExplanation") + hide("sharpnessExplanation") + hide("aeExplanation") + show("coverageExplanation") + } + }) + + # When forecaster selections change, update available aheads, locations, and CIs to choose from + observeEvent(input$forecasters, { + if (input$targetVariable == "Deaths") { + df <- df %>% filter(signal == DEATH_FILTER) + } else if (input$targetVariable == "Cases") { + df <- df %>% filter(signal == CASE_FILTER) + } else { + df <- df %>% filter(signal == HOSPITALIZATIONS_FILTER) + } + df <- df %>% filter(forecaster %in% input$forecasters) + + updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, FALSE) + updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) + updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) + }) + + observeEvent(input$location, { + updateAsOfData() + # Only show forecast check box option if we are showing as of data + if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { + hideElement("showForecastsCheckbox") + } else { + showElement("showForecastsCheckbox") + } + }) + + observeEvent(input$asOf, { + updateAsOfData() + # Only show forecast check box option if we are showing as of data + if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { + hideElement("showForecastsCheckbox") + } else { + showElement("showForecastsCheckbox") + } + }) + + # The following checks ensure the minimum necessary input selections + observe({ + # Show data loading message and hide other messages until all data is loaded + if (DATA_LOADED) { + hide("data-loading-message") + show("refresh-colors") + show("notes") + show("scoreExplanations") + show("scoringDisclaimer") + } + # Ensure there is always one ahead selected + if (length(input$aheads) < 1) { + if (input$targetVariable == "Hospitalizations") { + updateCheckboxGroupInput(session, "aheads", + selected = HOSPITALIZATIONS_AHEAD_OPTIONS[1] + ) + } else { + updateCheckboxGroupInput(session, "aheads", + selected = AHEAD_OPTIONS[1] + ) + } + } + # Ensure there is always one forecaster selected + if (length(input$forecasters) < 1) { + updateSelectInput(session, "forecasters", + selected = c("COVIDhub-ensemble") + ) # Use ensemble rather than baseline bc it has hospitalization scores + } + # Ensure COVIDhub-baseline is selected when scaling by baseline + if (input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) { + updateSelectInput(session, "forecasters", selected = c(input$forecasters, "COVIDhub-baseline")) + } + }) + + updateAsOfData <- function() { + dataSource <- "jhu-csse" + if (input$targetVariable == "Cases") { + targetSignal <- "confirmed_incidence_num" + } else if (input$targetVariable == "Deaths") { + targetSignal <- "deaths_incidence_num" + } else if (input$targetVariable == "Hospitalizations") { + targetSignal <- "confirmed_admissions_covid_1d" + dataSource <- "hhs" + } + + if (input$location == "US" && input$scoreType != "coverage") { + location <- "nation" + } else { + location <- "state" + } + if (input$asOf == "") { + return() + } + if (input$asOf < CURRENT_WEEK_END_DATE()) { + hideElement("truthPlot") + hideElement("notes") + hideElement("scoringDisclaimer") + hideElement("scoreExplanations") + hideElement("renderAggregateText") + hideElement("renderLocations") + showElement("truth-plot-loading-message") + + # Since as_of matches to the issue date in covidcast (rather than the time_value) + # we need to add one extra day to get the as of we want. + fetchDate <- as.Date(input$asOf) + 1 + + # Covidcast API call + asOfTruthData <- covidcast_signal( + data_source = dataSource, signal = targetSignal, + start_day = "2020-02-15", end_day = fetchDate, + as_of = fetchDate, + geo_type = location + ) + showElement("truthPlot") + showElement("notes") + showElement("scoringDisclaimer") + showElement("scoreExplanations") + showElement("renderAggregateText") + showElement("renderLocations") + hideElement("truth-plot-loading-message") + PREV_AS_OF_DATA(asOfTruthData) + + if (dim(asOfTruthData)[1] == 0) { + return() + } + summaryPlot(reRenderTruth = TRUE, asOfData = asOfTruthData) + } else if (input$asOf == CURRENT_WEEK_END_DATE()) { + summaryPlot(reRenderTruth = TRUE) + } + } + + updateAsOfChoices <- function(session, truthDf) { + asOfChoices <- truthDf$Week_End_Date + selectedAsOf <- isolate(input$asOf) + if (input$targetVariable == "Hospitalizations") { + minChoice <- MIN_AVAIL_HOSP_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] + } else if (input$location == "US" && input$scoreType != "coverage") { + minChoice <- MIN_AVAIL_NATION_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] + } else if (input$location %in% TERRITORIES || input$location == TOTAL_LOCATIONS || input$scoreType == "coverage") { + minChoice <- MIN_AVAIL_TERRITORY_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] + } + asOfChoices <- c(asOfChoices, CURRENT_WEEK_END_DATE()) + # Make sure we have a valid as of selection + nonValidAsOf <- selectedAsOf == "" || !(as.Date(selectedAsOf) %in% asOfChoices) + if (length(asOfChoices) != 0 && nonValidAsOf) { + selectedAsOf <- max(asOfChoices, na.rm = TRUE) + } + AS_OF_CHOICES(asOfChoices) + updateSelectInput(session, "asOf", + choices = sort(asOfChoices), + selected = selectedAsOf + ) + } + + exportScoresServer( + "exportScores", shiny::reactive(generateExportFilename(input)), + shiny::reactive(createExportScoresDataFrame( + df, input$targetVariable, input$scoreType, input$forecasters, + input$location, input$coverageInterval + )) + ) +} diff --git a/app/ui.R b/app/ui.R new file mode 100644 index 0000000..6c6293f --- /dev/null +++ b/app/ui.R @@ -0,0 +1,187 @@ +# Score explanations +wisExplanation <- includeMarkdown("assets/wis.md") +sharpnessExplanation <- includeMarkdown("assets/sharpness.md") +aeExplanation <- includeMarkdown("assets/ae.md") +coverageExplanation <- includeMarkdown("assets/coverageplot.md") +scoringDisclaimer <- includeMarkdown("assets/scoring-disclaimer.md") + +# About page content +aboutPageText <- includeMarkdown("assets/about.md") +aboutDashboardText <- includeMarkdown("assets/about-dashboard.md") + +######## +# Layout +######## + +sidebar <- tags$div( + conditionalPanel( + condition = "input.tabset == 'evaluations'", + radioButtons("targetVariable", "Target Variable", + choices = list( + "Incident Deaths" = "Deaths", + "Incident Cases" = "Cases", + "Hospital Admissions" = "Hospitalizations" + ) + ), + radioButtons("scoreType", "Scoring Metric", + choices = list( + "Weighted Interval Score" = "wis", + "Spread" = "sharpness", + "Absolute Error" = "ae", + "Coverage" = "coverage" + ) + ), + conditionalPanel( + condition = "input.scoreType != 'coverage'", + class = "checkbox-grouper", + tags$div(class = "control-label", "Y-Axis Score Scale"), + checkboxInput( + "logScale", + "Log Scale", + value = FALSE, + ), + conditionalPanel( + condition = "input.targetVariable != 'Hospitalizations'", + checkboxInput( + "scaleByBaseline", + "Scale by Baseline Forecaster", + value = FALSE, + ) + ), + ), + selectInput( + "forecasters", + tags$div("Forecasters", tags$div(id = "forecaster-input", "Type a name or select from dropdown")), + choices = c("COVIDhub-baseline", "COVIDhub-ensemble"), + multiple = TRUE, + selected = c("COVIDhub-baseline", "COVIDhub-ensemble") + ), + tags$p( + id = "missing-data-disclaimer", + "Some forecasters may not have data for the chosen location or scoring metric" + ), + checkboxGroupInput( + "aheads", + "Forecast Horizon (Weeks)", + choices = AHEAD_OPTIONS, + selected = AHEAD_OPTIONS[1], + inline = TRUE + ), + hidden(tags$p( + id = "horizon-disclaimer", + "Forecasters submitted earlier than Mondays may have longer actual prediction horizons" + )), + conditionalPanel( + condition = "input.scoreType == 'coverage'", + selectInput( + "coverageInterval", + "Coverage Interval", + choices = "", + multiple = FALSE, + selected = "95" + ), + ), + conditionalPanel( + condition = "input.scoreType != 'coverage'", + selectInput( + "location", + "Location", + choices = "", + multiple = FALSE, + selected = "US" + ) + ), + selectInput( + "asOf", + "As Of", + choices = "", + multiple = FALSE, + selected = "" + ), + tags$p(id = "missing-data-disclaimer", "Some locations may not have 'as of' data for the chosen 'as of' date"), + hidden(div( + id = "showForecastsCheckbox", + checkboxInput( + "showForecasts", + "Show Forecasters' Predictions", + value = FALSE, + ) + )), + tags$hr(), + exportScoresUI("exportScores"), + tags$hr() + ), + aboutDashboardText, + tags$hr() +) + +main <- tabsetPanel( + id = "tabset", + selected = "evaluations", + tabPanel( + "About", + fluidRow(column( + 10, + div( + id = "aboutContentArea", + aboutPageText, + tags$br(), + h3("Explanation of Scoring Methods"), + h4("Weighted Interval Score"), + wisExplanation, + h4("Spread"), + sharpnessExplanation, + h4("Absolute Error"), + aeExplanation, + h4("Coverage"), + coverageExplanation + ), + tags$br() + )), + ), + tabPanel("Evaluation Plots", + value = "evaluations", + fluidRow(column(11, textOutput("renderWarningText"))), + plotlyOutput(outputId = "summaryPlot", height = "auto"), + fluidRow( + column(11, + offset = 1, + hidden(div(id = "refresh-colors", actionButton(inputId = "refreshColors", label = "Recolor"))) + ) + ), + tags$br(), + plotlyOutput(outputId = "truthPlot", height = "auto"), + fluidRow( + column(11, + offset = 1, + div(id = "data-loading-message", "DATA IS LOADING...(this may take a while)"), + hidden(div(id = "truth-plot-loading-message", "Fetching 'as of' data and loading observed values...")), + hidden(div(id = "notes", "About the Scores")), + hidden(div( + id = "scoreExplanations", + hidden(div(id = "wisExplanation", wisExplanation)), + hidden(div(id = "sharpnessExplanation", sharpnessExplanation)), + hidden(div(id = "aeExplanation", aeExplanation)), + hidden(div(id = "coverageExplanation", coverageExplanation)) + )), + hidden(div(id = "scoringDisclaimer", scoringDisclaimer)) + ) + ), + fluidRow( + column(11, + offset = 1, + textOutput("renderLocationText"), + textOutput("renderAggregateText"), + textOutput("renderLocations"), + tags$br() + ) + ) + ) +) + +ui <- delphiLayoutUI( + title = "Forecast Evaluation Dashboard", + repo = "https://github.com/cmu-delphi/forecast-eval", + sidebar = sidebar, + main = main +) diff --git a/dashboard/www/cmu_brand.png b/app/www/cmu_brand.png similarity index 100% rename from dashboard/www/cmu_brand.png rename to app/www/cmu_brand.png diff --git a/app/www/delphiLayout.css b/app/www/delphiLayout.css new file mode 100644 index 0000000..2e3afe3 --- /dev/null +++ b/app/www/delphiLayout.css @@ -0,0 +1,122 @@ +html { + font-family: Open Sans, Roboto, Arial, sans-serif; + font-size: 16px; + font-weight: normal; + line-height: 1.5; + background: white; + color: #666666; + padding: 0; + margin: 0; +} + +.delphi-header { + box-shadow: rgba(0, 0, 0, 0.15) 0px 3px 5px -1px; + padding: 20px 15px 0 15px; + margin-bottom: 8px; + display: -webkit-flex; + display: flex; +} + +.delphi-header-logo { + position: relative; + width: 220px; + height: 50px; + margin-right: 30px; + margin-bottom: 5px; +} + +.delphi-header-logo > img { + width: 100%; + height: 100%; + object-fit: contain; + object-position: left; +} + +.delphi-header-title h1 { + text-transform: uppercase; + font-size: 1.5rem; + letter-spacing: 0.05em; + font-weight: 300; + margin: 0; +} + +.delphi-root { + display: -webkit-flex; + display: flex; + min-height: 100vh; + -webkit-flex-direction: column; + flex-direction: column; +} + +.delphi-root > .container-fluid { + -webkit-flex-grow: 1; + flex-grow: 1; + display: -webkit-flex; + display: flex; + margin: 0; +} + +.delphi-root > .container-fluid > .row { + -webkit-flex-grow: 1; + flex-grow: 1; +} + +.delphi-credits { + font-size: 0.75rem; +} + +.delphi-sidebar { + display: -webkit-flex; + display: flex; +} + +.delphi-sidebar > .well { + -webkit-flex-grow: 1; + flex-grow: 1; + display: -webkit-flex; + display: flex; + background: #fafafc; +} + +.delphi-sidebar .delphi-sidebar-container { + -webkit-flex-grow: 1; + flex-grow: 1; +} + +a:active, +a:hover { + outline: none; +} + +a { + color: #1e87f0; + text-decoration: none; + cursor: pointer; +} + +a:hover { + color: #0f6ecd; + text-decoration: underline; +} + +.delphi-main-panel { + padding: 0; + display: -webkit-flex; + display: flex; +} + +.delphi-main-panel > .tabbable { + -webkit-flex-grow: 1; + flex-grow: 1; + display: -webkit-flex; + display: flex; + flex-direction: column; + -webkit-flex-direction: column; +} + +.delphi-main-panel > .tabbable > .tab-content { + padding: 0.5rem; + border-left: 1px solid #bdbec2; + -webkit-flex-grow: 1; + flex-grow: 1; +} diff --git a/dashboard/www/forecast-hub-logo.png b/app/www/forecast-hub-logo.png similarity index 100% rename from dashboard/www/forecast-hub-logo.png rename to app/www/forecast-hub-logo.png diff --git a/app/www/style.css b/app/www/style.css new file mode 100644 index 0000000..53e60d3 --- /dev/null +++ b/app/www/style.css @@ -0,0 +1,45 @@ +/* +.shiny-output-error { visibility: hidden; } +.shiny-output-error:before { visibility: hidden; } + */ + +#aboutContentArea{ + max-width: 70rem; +} +#notes { + font-weight: 600; + font-size: 18px; +} +#forecaster-input { + font-weight:normal; + font-size:13px; +} +#missing-data-disclaimer { + margin-top:-20px; + font-size:12px; +} +#horizon-disclaimer { + margin-top:-10px; + font-size:12px; +} +#drag-to-zoom { + font-size:11px; +} +#data-loading-message { + font-style: italic; + font-size: 18px; +} +#truth-plot-loading-message { + margin-top:50px; + font-size: 18px; + font-style:italic; + color: #949494; +} + +.control-label { + font-weight: 600; +} + +.checkbox-grouper > .form-group { + margin-bottom: 0; +} diff --git a/csv2rds.R b/csv2rds.R index ea08b8a..040952b 100644 --- a/csv2rds.R +++ b/csv2rds.R @@ -2,7 +2,7 @@ library(readr) -args = commandArgs(trailingOnly=TRUE) +args <- commandArgs(trailingOnly = TRUE) file_name <- args[1] df <- read_csv(file_name) diff --git a/dashboard/app.R b/dashboard/app.R deleted file mode 100644 index e058dae..0000000 --- a/dashboard/app.R +++ /dev/null @@ -1,1060 +0,0 @@ -library(shiny) -library(tidyr) -library(dplyr) -library(lubridate) -library(ggplot2) -library(viridis) -library(plotly) -library(shinyjs) -library(tsibble) -library(aws.s3) -library(covidcast) -library(stringr) -library(memoise) - -source('./common.R') - -# Set application-level caching location. Stores up to 1GB of caches. Removes -# least recently used objects first. -shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict="lru")) -cache <- getShinyOption("cache") - -# Since covidcast data updates about once a day. Add date arg to -# covidcast_signal so caches aren't used after that. -covidcast_signal_mem <- function(..., date=Sys.Date()) { - return(covidcast_signal(...)) -} -covidcast_signal_mem <- memoise(covidcast_signal_mem, cache = cache) - -# All data is fully loaded from AWS -DATA_LOADED = FALSE - -# Earliest 'as of' date available from covidcast API -MIN_AVAIL_NATION_AS_OF_DATE = as.Date('2021-01-09') -MIN_AVAIL_HOSP_AS_OF_DATE = as.Date('2020-11-11') -MIN_AVAIL_TERRITORY_AS_OF_DATE = as.Date('2021-02-10') - -# Score explanations -wisExplanation = includeMarkdown("wis.md") -sharpnessExplanation = includeMarkdown("sharpness.md") -aeExplanation = includeMarkdown("ae.md") -coverageExplanation = includeMarkdown("coverageplot.md") -scoringDisclaimer = includeMarkdown("scoring-disclaimer.md") - -# About page content -aboutPageText = includeMarkdown("about.md") - -# Get css file -cssFiles = list.files(path="www",pattern="style*") -if(length(cssFiles)!=1){ - cat(file=stderr(),"Error: couldn't load style files\n") -} -cssFile = cssFiles[1] -cat(file=stderr(),"Loaded css file:",cssFile,"\n") - -source('./export_scores.R') - -######## -# Layout -######## - -ui <- fluidPage(padding=0, title="Forecast Eval Dashboard", - tags$head( - tags$link(rel = "stylesheet", type = "text/css", href = cssFile) - ), - tags$head(includeHTML(("google-analytics.html"))), - useShinyjs(), - div(id="header",class="row", - div(id="logo",class="col-sm-2", - a(href="https://delphi.cmu.edu", - img(src="cmu_brand.png",width="220px",heigh="50px",alt="Carnegie Mellon University Delphi Group") - ) - ), - div(id="title", class="col-sm-6", - HTML("FORECAST EVALUATION DASHBOARD ", - includeHTML("arrow-left.svg"), " Back"), - ), - div(id="github-logo-container", class="col-sm-1", - a(id="github-logo",href="https://github.com/cmu-delphi/forecast-eval/", - includeHTML("github.svg"), - HTML(" GitHub") - ) - ), - ), - tags$br(), - sidebarLayout( - sidebarPanel(id = "inputOptions", - conditionalPanel(condition = "input.tabset == 'evaluations'", - radioButtons("targetVariable", "Target Variable", - choices = list("Incident Deaths" = "Deaths", - "Incident Cases" = "Cases", - "Hospital Admissions" = "Hospitalizations")), - radioButtons("scoreType", "Scoring Metric", - choices = list("Weighted Interval Score" = "wis", - "Spread" = "sharpness", - "Absolute Error" = "ae", - "Coverage" = "coverage")), - conditionalPanel(condition = "input.scoreType != 'coverage'", - tags$p(id="scale-score", "Y-Axis Score Scale"), - checkboxInput( - "logScale", - "Log Scale", - value = FALSE, - )), - conditionalPanel(condition = "input.scoreType != 'coverage'", - checkboxInput( - "scaleByBaseline", - "Scale by Baseline Forecaster", - value = FALSE, - )), - selectInput( - "forecasters", - p("Forecasters", tags$br(), tags$span(id="forecaster-input", "Type a name or select from dropdown")), - choices = c("COVIDhub-baseline", "COVIDhub-ensemble"), - multiple = TRUE, - selected = c("COVIDhub-baseline", "COVIDhub-ensemble") - ), - tags$p(id="missing-data-disclaimer", "Some forecasters may not have data for the chosen location or scoring metric"), - checkboxGroupInput( - "aheads", - "Forecast Horizon (Weeks)", - choices = AHEAD_OPTIONS, - selected = AHEAD_OPTIONS[1], - inline = TRUE - ), - hidden(tags$p(id="horizon-disclaimer", "Forecasters submitted earlier than Mondays may have longer actual prediction horizons")), - conditionalPanel(condition = "input.scoreType == 'coverage'", - selectInput( - "coverageInterval", - "Coverage Interval", - choices = '', - multiple = FALSE, - selected = "95" - ), - ), - conditionalPanel(condition = "input.scoreType != 'coverage'", - selectInput( - "location", - "Location", - choices = '', - multiple = FALSE, - selected = "US" - ) - ), - selectInput( - "asOf", - "As Of", - choices = '', - multiple = FALSE, - selected = '' - ), - tags$p(id="missing-data-disclaimer", "Some locations may not have 'as of' data for the chosen 'as of' date"), - hidden(div(id="showForecastsCheckbox", - checkboxInput( - "showForecasts", - "Show Forecasters' Predictions", - value = FALSE, - ) - )), - tags$hr(), - export_scores_ui, - tags$hr(), - ), - includeMarkdown("about-dashboard.md"), - width=3, - ), - - mainPanel( - width=9, - tabsetPanel(id = "tabset", - selected = "evaluations", - tabPanel("About", - fluidRow(column(10, - div( - id="aboutContentArea", - aboutPageText, - tags$br(), - h3("Explanation of Scoring Methods"), - h4("Weighted Interval Score"), - wisExplanation, - h4("Spread"), - sharpnessExplanation, - h4("Absolute Error"), - aeExplanation, - h4("Coverage"), - coverageExplanation - ), - tags$br() - )), - ), - tabPanel("Evaluation Plots", value = "evaluations", - fluidRow(column(11, textOutput('renderWarningText'))), - plotlyOutput(outputId = "summaryPlot", height="auto"), - fluidRow( - column(11, offset=1, - hidden(div(id="refresh-colors", actionButton(inputId="refreshColors", label= "Recolor"))) - )), - tags$br(), - plotlyOutput(outputId = "truthPlot", height="auto"), - fluidRow( - column(11, offset=1, - div(id="data-loading-message", "DATA IS LOADING...(this may take a while)"), - hidden(div(id="truth-plot-loading-message", "Fetching 'as of' data and loading observed values...")), - hidden(div(id="notes", "About the Scores")), - hidden(div(id="scoreExplanations", - hidden(div(id = "wisExplanation", wisExplanation)), - hidden(div(id = "sharpnessExplanation", sharpnessExplanation)), - hidden(div(id = "aeExplanation", aeExplanation)), - hidden(div(id = "coverageExplanation", coverageExplanation)) - )), - hidden(div(id = "scoringDisclaimer", scoringDisclaimer)) - ) - ), - fluidRow( - column(11,offset=1, - textOutput('renderLocationText'), - textOutput('renderAggregateText'), - textOutput('renderLocations'), - tags$br() - ) - ) - ) - ), - ), - ), -) - - -# Get and prepare data -getS3Bucket <- function() { - # Connect to AWS s3bucket - Sys.setenv("AWS_DEFAULT_REGION" = "us-east-2") - s3bucket = tryCatch( - { - get_bucket(bucket = 'forecast-eval') - }, - error = function(e) { - e - return(NULL) - } - ) - - return(s3bucket) -} - -getData <- function(filename, s3bucket){ - if(!is.null(s3bucket)) { - tryCatch( - { - s3readRDS(object = filename, bucket = s3bucket) - }, - error = function(e) { - e - getFallbackData(filename) - } - ) - } else { - getFallbackData(filename) - } -} - -getFallbackData = function(filename) { - path = ifelse( - file.exists(filename), - filename, - file.path("../dist/",filename) - ) - readRDS(path) -} - -getAllData = function(s3bucket) { - dfStateCases <- getData("score_cards_state_cases.rds", s3bucket) - dfStateDeaths <- getData("score_cards_state_deaths.rds", s3bucket) - dfNationCases = getData("score_cards_nation_cases.rds", s3bucket) - dfNationDeaths = getData("score_cards_nation_deaths.rds", s3bucket) - dfStateHospitalizations = getData("score_cards_state_hospitalizations.rds", s3bucket) - dfNationHospitalizations = getData("score_cards_nation_hospitalizations.rds", s3bucket) - - # Pick out expected columns only - covCols = paste0("cov_", COVERAGE_INTERVALS) - expectedCols = c("ahead", "geo_value", "forecaster", "forecast_date", - "data_source", "signal", "target_end_date", "incidence_period", - "actual", "wis", "sharpness", "ae", "value_50", - covCols) - - dfStateCases = dfStateCases %>% select(all_of(expectedCols)) - dfStateDeaths = dfStateDeaths %>% select(all_of(expectedCols)) - dfNationCases = dfNationCases %>% select(all_of(expectedCols)) - dfNationDeaths = dfNationDeaths %>% select(all_of(expectedCols)) - dfStateHospitalizations = dfStateHospitalizations %>% select(all_of(expectedCols)) - dfNationHospitalizations = dfNationHospitalizations %>% select(all_of(expectedCols)) - - df = rbind(dfStateCases, dfStateDeaths, dfNationCases, dfNationDeaths, dfStateHospitalizations, dfNationHospitalizations) - df = df %>% rename("10" = cov_10, "20" = cov_20, "30" = cov_30, "40" = cov_40, "50" = cov_50, "60" = cov_60, "70" = cov_70, "80" = cov_80, "90" = cov_90, "95" = cov_95, "98" = cov_98) - - return(df) -} - -getRecentDataHelper = function() { - s3bucket <- getS3Bucket() - df <- data.frame() - - getRecentData = function() { - newS3bucket <- getS3Bucket() - - s3Contents <- s3bucket[attr(s3bucket, "names", exact=TRUE)] - newS3Contents <- newS3bucket[attr(newS3bucket, "names", exact=TRUE)] - - # Fetch new score data if contents of S3 bucket has changed (including file - # names, sizes, and last modified timestamps). Ignores characteristics of - # bucket and request, including bucket region, name, content type, request - # date, request ID, etc. - if ( nrow(df) == 0 || !identical(s3Contents, newS3Contents) ) { - # Save new data and new bucket connection info to vars in env of - # `getRecentDataHelper`. They persist between calls to `getRecentData` a - # la https://stackoverflow.com/questions/1088639/static-variables-in-r - s3bucket <<- newS3bucket - df <<- getAllData(s3bucket) - } - - return(df) - } - - return(getRecentData) -} - -getRecentData <- getRecentDataHelper() - - -server <- function(input, output, session) { - TERRITORIES = c('AS', 'GU', 'MP', 'VI') - PREV_AS_OF_DATA = reactiveVal(NULL) - AS_OF_CHOICES = reactiveVal(NULL) - SUMMARIZING_OVER_ALL_LOCATIONS = reactive(input$scoreType == 'coverage' || input$location == TOTAL_LOCATIONS) - - # Get most recent target end date - # Prev Saturday for Cases and Deaths, prev Wednesday for Hospitalizations - # Since we don't upload new observed data until Monday: - # Use 8 and 2 for Cases and Deaths so that Sundays will not use the Saturday directly beforehand - # since we don't have data for it yet. - # Use 5 and 11 for Hospitalizations since Thurs-Sun should also not use the Wednesday directly beforehand. - # (This means that on Mondays until the afternoon when pipeline completes, the "as of" will show - # most recent Saturday / Wednesday date even though the actual updated data won't be there yet) - prevWeek <- seq(Sys.Date()-8,Sys.Date()-2,by='day') - CASES_DEATHS_CURRENT = prevWeek[weekdays(prevWeek)=='Saturday'] - CURRENT_WEEK_END_DATE = reactiveVal(CASES_DEATHS_CURRENT) - prevHospWeek <- seq(Sys.Date()-11,Sys.Date()-5,by='day') - HOSP_CURRENT = prevHospWeek[weekdays(prevHospWeek)=='Wednesday'] - - # Get scores - df = getRecentData() - DATA_LOADED = TRUE - - # Prepare input choices - forecasterChoices = sort(unique(df$forecaster)) - updateForecasterChoices(session, df, forecasterChoices, 'wis') - - - ################## - # CREATE MAIN PLOT - ################## - summaryPlot = function(colorSeed = 100, reRenderTruth = FALSE, asOfData = NULL) { - filteredScoreDf = filterScoreDf() - dfWithForecasts = NULL - if (input$showForecasts) { - dfWithForecasts = filteredScoreDf - } - # Need to do this after setting dfWithForecasts to leave in aheads for forecasts - filteredScoreDf = filteredScoreDf %>% filter(ahead %in% input$aheads) - if (dim(filteredScoreDf)[1] == 0) { - output$renderWarningText <- renderText("The selected forecasters do not have enough data to display the selected scoring metric.") - return() - } - if (is.null(asOfData)) { - if (!is.null(isolate(PREV_AS_OF_DATA())) && dim(isolate(PREV_AS_OF_DATA()))[1] != 0 && - isolate(input$asOf) != '' && isolate(input$asOf) != isolate(CURRENT_WEEK_END_DATE())) { - asOfData = isolate(PREV_AS_OF_DATA()) - } - } - if (!is.null(asOfData) && dim(asOfData)[1] != 0) { - asOfData = asOfData %>% rename(target_end_date = time_value, as_of_actual = value) - asOfData = asOfData[c("target_end_date", "geo_value", "as_of_actual")] - - # Get the 'as of' dates that are the target_end_dates in the scoring df - dateGroupDf = asOfData %>% filter(asOfData$target_end_date %in% filteredScoreDf$target_end_date) - if (dim(dateGroupDf)[1] != 0) { - # Since cases and deaths are shown as weekly incidence, but the "as of" data from the covidcast API - # is daily, we need to sum over the days leading up to the target_end_date of each week to get the - # weekly incidence - asOfData = filterAsOfData(asOfData, dateGroupDf, filteredScoreDf) - filteredScoreDf = merge(filteredScoreDf, asOfData, by=c("target_end_date", "geo_value"), all = TRUE) - } else { - # Input 'as of' date chosen does not match the available target_end_dates that result from the rest of the selected inputs - # It is too far back or we are switching between hosp and cases/deaths which have different target date days - # As of input will be updated to the default (latest) and plot will re-render with the just the normal truth data, no 'as of' - asOfData = NULL - } - } - - # Totaling over all locations - if (SUMMARIZING_OVER_ALL_LOCATIONS()) { - filteredScoreDfAndIntersections = filterOverAllLocations(filteredScoreDf, input$scoreType, !is.null(asOfData)) - filteredScoreDf = filteredScoreDfAndIntersections[[1]] - locationsIntersect = filteredScoreDfAndIntersections[[2]] - if (input$showForecasts) { - dfWithForecasts = dfWithForecasts %>% filter(geo_value %in% locationsIntersect) - } - aggregateText = "*For fair comparison, all displayed forecasters on all displayed dates are compared across a common set of states and territories." - if (input$scoreType == "coverage") { - aggregate = "Averaged" - output$renderAggregateText = renderText(paste(aggregateText," Some forecasters may not have any data for the coverage interval chosen. Locations inlcuded: ")) - } - else { - aggregate = "Totaled" - output$renderAggregateText = renderText(paste(aggregateText, " Locations included: ")) - } - if (length(locationsIntersect) == 0) { - output$renderWarningText <- renderText("The selected forecasters do not have data for any locations in common on all dates.") - output$renderLocations <- renderText("") - output$renderAggregateText = renderText("") - hideElement("truthPlot") - hideElement("refresh-colors") - return() - } - else { - locationSubtitleText = paste0(', Location: ', aggregate ,' over all states and territories common to these forecasters*') - output$renderLocations <- renderText(toupper(locationsIntersect)) - output$renderWarningText = renderText("") - showElement("truthPlot") - } - # Not totaling over all locations - } else { - if (!is.null(asOfData)) { - filteredScoreDf <- filteredScoreDf %>% filter(geo_value == tolower(input$location)) %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = Score, actual = actual, as_of_actual = as_of_actual) - } else { - filteredScoreDf <- filteredScoreDf %>% filter(geo_value == tolower(input$location)) %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = Score, actual = actual) - } - locationSubtitleText = paste0(', Location: ', input$location) - output$renderAggregateText = renderText("") - output$renderLocations <- renderText("") - output$renderWarningText <- renderText("") - } - - showElement("refresh-colors") - if(dim(filteredScoreDf)[1] == 0) { - return() - } - - # Rename columns that will be used as labels and for clarity on CSV exports - filteredScoreDf = filteredScoreDf %>% rename(Forecaster = forecaster, Forecast_Date = forecast_date, - Week_End_Date = target_end_date) - - # Set forecaster colors for plot - set.seed(colorSeed) - forecasterRand <- sample(unique(df$forecaster)) - colorPalette = setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand) - if (!is.null(asOfData)) { - colorPalette['Reported_Incidence'] = 'grey' - colorPalette['Reported_As_Of_Incidence'] = 'black' - } - - # Render truth plot with observed values - truthDf = filteredScoreDf - output$truthPlot <- renderPlotly({ - truthPlot(truthDf, locationsIntersect, !is.null(asOfData), dfWithForecasts, colorPalette) - }) - - # If we are just re-rendering the truth plot with as of data - # we don't need to re-render the score plot - if (reRenderTruth) { - return() - } - # If we are re-rendering scoring plot with new inputs that were just selected - # we need to make sure the as of input options are valid with those inputs - updateAsOfChoices(session, truthDf) - - # Format and transform data for plot - filteredScoreDf = filteredScoreDf %>% filter(!is.na(Week_End_Date)) - filteredScoreDf = filteredScoreDf[c("Forecaster", "Forecast_Date", "Week_End_Date", "Score", "ahead")] - filteredScoreDf = filteredScoreDf %>% mutate(across(where(is.numeric), ~ round(., 2))) - if (input$scoreType != 'coverage') { - if (input$scaleByBaseline) { - baselineDf = filteredScoreDf %>% filter(Forecaster %in% 'COVIDhub-baseline') - filteredScoreDfMerged = merge(filteredScoreDf, baselineDf, by=c("Week_End_Date","ahead")) - # Scaling score by baseline forecaster - filteredScoreDfMerged$Score.x = filteredScoreDfMerged$Score.x / filteredScoreDfMerged$Score.y - filteredScoreDf = filteredScoreDfMerged %>% - rename(Forecaster = Forecaster.x, Score = Score.x, Forecast_Date = Forecast_Date.x) %>% - select(Forecaster, Forecast_Date, Week_End_Date, ahead, Score) - } - if (input$logScale) { - filteredScoreDf$Score = log10(filteredScoreDf$Score) - } - } - - # Title plot - if (input$scoreType == "wis") { - plotTitle = "Weighted Interval Score" - } - else if (input$scoreType == "sharpness") { - plotTitle = "Spread" - } - else if (input$scoreType == "ae") { - plotTitle = "Absolute Error" - } - else { - plotTitle = "Coverage" - } - - titleText = paste0('', plotTitle,'','
', '', - 'Target Variable: ', input$targetVariable, - locationSubtitleText, '
', - tags$span(id="drag-to-zoom", " Drag to zoom"), - '
') - - # Fill gaps so there are line breaks on weeks without data - # This is failing for CU-select on US deaths (https://github.com/cmu-delphi/forecast-eval/issues/157) - filteredScoreDf = filteredScoreDf %>% - as_tsibble(key = c(Forecaster, ahead), index = Week_End_Date) %>% - group_by(Forecaster, Forecast_Date, ahead) %>% - fill_gaps(.full = TRUE) - # Set labels for faceted horizon plots - horizonOptions = AHEAD_OPTIONS - horizonLabels = lapply(AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Week(s)")) - if (input$targetVariable == 'Hospitalizations') { - horizonOptions = HOSPITALIZATIONS_AHEAD_OPTIONS - horizonLabels = lapply(HOSPITALIZATIONS_AHEAD_OPTIONS, function (x) paste0("Horizon: ", x, " Days")) - } - filteredScoreDf$ahead = factor(filteredScoreDf$ahead, levels = horizonOptions, - labels = horizonLabels) - - p = ggplot( - filteredScoreDf, - aes(x = Week_End_Date, y = Score, color = Forecaster, shape = Forecaster, label = Forecast_Date) - ) + - geom_line() + - geom_point(size=2) + - labs(x = "", y = "", title=titleText) + - scale_x_date(date_labels = "%b %Y") + - facet_wrap(~ahead, ncol=1) + - scale_color_manual(values = colorPalette) + - theme_bw() + - theme(panel.spacing=unit(0.5, "lines")) + - theme(legend.title = element_blank()) - - if (input$scoreType == "coverage") { - p = p + geom_hline(yintercept = .01 * as.integer(input$coverageInterval)) - } - if (input$logScale) { - p = p + scale_y_continuous(label = function(x) paste0("10^", x)) - } else { - p = p + scale_y_continuous(limits = c(0,NA), labels = scales::comma) - } - plotHeight = 550 + (length(input$aheads)-1)*100 - finalPlot <- - ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% - layout( - height = plotHeight, - legend = list(orientation = "h", y = -0.1), - margin = list(t=90), - height=500, - hovermode = 'x unified', - xaxis = list( - title = list(text = "Target Date",standoff = 8L), - titlefont = list(size = 12)) - ) %>% - config(displayModeBar = F) - - return(finalPlot) - } - - ################### - # CREATE TRUTH PLOT - ################### - # Create the plot for target variable ground truth - truthPlot = function(filteredDf = NULL, locationsIntersect = NULL, hasAsOfData = FALSE, dfWithForecasts = NULL, colorPalette = NULL) { - observation = paste0('Incident ', input$targetVariable) - if (input$targetVariable == "Hospitalizations") { - observation = paste0('Hospital Admissions') - } - titleText = paste0('Observed ', observation, '') - if (SUMMARIZING_OVER_ALL_LOCATIONS()) { - titleText = paste0('Observed ', observation, '', '
Totaled over all states and territories common to selected forecasters*') - } - - if (hasAsOfData) { - filteredDf = filteredDf %>% - group_by(Week_End_Date) %>% summarize(Forecaster = Forecaster, Reported_Incidence = actual, Reported_As_Of_Incidence = as_of_actual) %>% - distinct() - if(input$showForecasts) { - filteredDf = filterForecastData(filteredDf, dfWithForecasts) - } - } else { - filteredDf <- filteredDf %>% - group_by(Week_End_Date) %>% summarize(Reported_Incidence = actual) - } - - finalPlot = ggplot(filteredDf, aes(x = Week_End_Date)) + - labs(x = "", y = "", title = titleText) + - scale_y_continuous(limits = c(0,NA), labels = scales::comma) + - scale_x_date(date_labels = "%b %Y") + - scale_color_manual(values = colorPalette) + - theme_bw() + - theme(legend.title = element_blank()) - - if (hasAsOfData) { - finalPlot = finalPlot + - geom_line(aes(y = Reported_Incidence, color = "Reported_Incidence")) + - geom_point(aes(y = Reported_Incidence, color = "Reported_Incidence")) + - geom_line(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) + - geom_point(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) - if(input$showForecasts) { - finalPlot = finalPlot + - geom_line(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + - geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) - } - } else { - finalPlot = finalPlot + geom_line(aes(y = Reported_Incidence)) + - geom_point(aes(y = Reported_Incidence)) - } - finalPlot = ggplotly(finalPlot, tooltip = c("shape","x", "y")) %>% - layout(hovermode = 'x unified', legend = list(orientation = "h", y = -0.1)) %>% - config(displayModeBar = F) - # Remove the extra grouping from the legend: "(___,1)" - for (i in 1:length(finalPlot$x$data)){ - if (!is.null(finalPlot$x$data[[i]]$name)){ - finalPlot$x$data[[i]]$name = gsub("\\(","",str_split(finalPlot$x$data[[i]]$name,",")[[1]][1]) - } - } - return (finalPlot) - } - - ############# - # PLOT OUTPUT - ############# - output$summaryPlot <- renderPlotly({ - summaryPlot() - }) - - # Filter scoring df by inputs chosen (targetVariable, forecasters, aheads) - filterScoreDf = function() { - signalFilter = CASE_FILTER - if (input$targetVariable == "Deaths") { - signalFilter = DEATH_FILTER - } - if (input$targetVariable == "Hospitalizations") { - signalFilter = HOSPITALIZATIONS_FILTER - } - filteredScoreDf = df %>% - filter(signal == signalFilter) %>% - filter(forecaster %in% input$forecasters) - - if (signalFilter == HOSPITALIZATIONS_FILTER) { - filteredScoreDf = filterHospitalizationsAheads(filteredScoreDf) - } - if (input$scoreType == "wis" || input$scoreType == "sharpness") { - # Only show WIS or Sharpness for forecasts that have all intervals - filteredScoreDf = filteredScoreDf %>% filter(!is.na(`50`)) %>% filter(!is.na(`80`)) %>% filter(!is.na(`95`)) - if (input$targetVariable == "Deaths") { - filteredScoreDf = filteredScoreDf %>% filter(!is.na(`10`)) %>% filter(!is.na(`20`)) %>% filter(!is.na(`30`)) %>% - filter(!is.na(`40`)) %>% filter(!is.na(`60`)) %>% filter(!is.na(`70`)) %>% filter(!is.na(`90`)) %>% filter(!is.na(`98`)) - } - } - filteredScoreDf = renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) - return(filteredScoreDf) - } - - # Filter as of data so that it matches weekly incidence for the target end dates in the score df - filterAsOfData = function(asOfData, dateGroupDf, filteredScoreDf) { - # Hospitalization scores are shown as daily incidence, not weekly incidence, no summing necessary - if (input$targetVariable != "Hospitalizations") { - # Create a df to fill in the corresponding target_end_date in a new date_group column for all intervening days - dateGroupDf[,"date_group"] <- NA - dateGroupDf$date_group = dateGroupDf$target_end_date - asOfData = merge(asOfData, dateGroupDf, by=c('target_end_date', 'geo_value', 'as_of_actual'), all = TRUE) - - # Cut off the extra days on beginning and end of series so that when we sum the values we are only - # summing over the weeks included in the score plot - asOfData = asOfData %>% filter(target_end_date >= min(filteredScoreDf$target_end_date) - 6) - asOfData = asOfData %>% filter(target_end_date <= isolate(input$asOf)) - - # Fill in the date_group column with the target week end days for all intervening days - asOfData = asOfData %>% arrange(geo_value) %>% fill(date_group, .direction = "up") - - # In the case where there are target week end days missing from the scoring or as of data - # we don't want to end up summing values over multiple weeks so we make sure each date_group only spans one week - asOfData = asOfData %>% filter(asOfData$date_group - asOfData$target_end_date < 7) - - asOfData = asOfData[c('geo_value', 'as_of_actual', 'date_group')] - # Sum over preceding week for all weekly target variables - asOfData = asOfData %>% group_by(geo_value, date_group) %>% summarize(as_of_actual = sum(as_of_actual)) - asOfData = asOfData %>% rename(target_end_date = date_group) - # If targetVariable is Hospitalizations - } else { - asOfData = dateGroupDf - # Need to make sure that we are only matching the target_end_dates shown in the scoring plot - # and not using fetched data for as of dates before those target_end_dates. - # This is taken care of above for cases and deaths. - minDate = min(filteredScoreDf$target_end_date) - if (!SUMMARIZING_OVER_ALL_LOCATIONS()) { - chosenLocationDf = filteredScoreDf %>% filter(geo_value == tolower(input$location)) - minDate = min(chosenLocationDf$target_end_date) - } - asOfData = asOfData %>% filter(target_end_date >= minDate) - } - return(asOfData) - } - - filterForecastData = function(filteredDf, dfWithForecasts) { - dfWithForecasts = dfWithForecasts %>% - rename(Week_End_Date = target_end_date, Forecaster = forecaster, Quantile_50 = value_50) - if (!SUMMARIZING_OVER_ALL_LOCATIONS()) { - dfWithForecasts = dfWithForecasts %>% filter(geo_value == tolower(input$location)) - } else { - # Sum the predictions for all included locations - dfWithForecasts = dfWithForecasts %>% - group_by(Forecaster, forecast_date, Week_End_Date, ahead) %>% - summarize(Quantile_50 = sum(Quantile_50)) - } - dfWithForecasts = dfWithForecasts %>% - # We want the forecasts to be later than latest as of date with data - filter(forecast_date >= tail(filteredDf %>% filter(!is.na(Reported_As_Of_Incidence)), n=1)$Week_End_Date[1]) %>% - group_by(Week_End_Date) %>% - summarize(Forecaster, forecast_date, Quantile_50) - - # Get the next as of choice available in dropdown menu - dfWithForecasts = dfWithForecasts[order(dfWithForecasts$forecast_date),] - AS_OF_CHOICES(sort(AS_OF_CHOICES() %>% unique())) - nextAsOfInList = AS_OF_CHOICES()[which.min(abs(AS_OF_CHOICES()-dfWithForecasts$forecast_date[1])) + 1] - - # Take only those forecasts with a forecast date before the next as of date in dropdown - # aka within the week after the current as of shown - if(length(nextAsOfInList) != 0 && !is.na(nextAsOfInList)) { - dfWithForecasts = dfWithForecasts %>% - filter(forecast_date < nextAsOfInList) - } - - # Hospitalizations will have multiple forecast dates within this target week - # So we want to take the earliest forecast date for each forecaster & week end date pair - if (input$targetVariable == "Hospitalizations") { - dfWithForecasts = dfWithForecasts %>% group_by(Week_End_Date, Forecaster) %>% top_n(n=1, wt=desc(forecast_date)) - dfWithForecasts = dfWithForecasts %>% group_by(Forecaster) %>% filter(forecast_date == first(forecast_date)) - } - filteredDf = merge(filteredDf, dfWithForecasts, by=c('Week_End_Date', 'Forecaster'), all = TRUE) %>% - group_by(Week_End_Date) %>% - select(Quantile_50, Forecaster, Reported_Incidence, Reported_As_Of_Incidence) - # Remove rows of NAs - filteredDf = filteredDf %>% filter(!is.null(Forecaster)) - filteredDf = filteredDf %>% arrange(Week_End_Date) %>% fill(Reported_Incidence, .direction = "downup") - return (filteredDf) - } - - ################### - # EVENT OBSERVATION - ################### - - observeEvent(input$refreshColors, { - colorSeed = floor(runif(1, 1, 1000)) - output$summaryPlot <- renderPlotly({ - summaryPlot(colorSeed) - }) - }) - - # When the target variable changes, update available forecasters, locations, and CIs to choose from - observeEvent(input$targetVariable, { - CURRENT_WEEK_END_DATE(CASES_DEATHS_CURRENT) - if (input$targetVariable == 'Deaths') { - df = df %>% filter(signal == DEATH_FILTER) - } else if (input$targetVariable == 'Cases') { - df = df %>% filter(signal == CASE_FILTER) - } else { - df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) - CURRENT_WEEK_END_DATE(HOSP_CURRENT) - } - - updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, TRUE) - updateForecasterChoices(session, df, input$forecasters, input$scoreType) - updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) - updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) - updateAsOfData() - }) - - observeEvent(input$scoreType, { - if (input$targetVariable == 'Deaths') { - df = df %>% filter(signal == DEATH_FILTER) - } else if (input$targetVariable == 'Cases') { - df = df %>% filter(signal == CASE_FILTER) - } else { - df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) - } - # Only show forecasters that have data for the score chosen - updateForecasterChoices(session, df, input$forecasters, input$scoreType) - - # If we are switching between coverage and other score types we need to - # update the as of data we have so it matches the correct locations shown - if (input$location == 'US') { - updateAsOfData() - } - - if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") - } else { - showElement("showForecastsCheckbox") - } - if (input$scoreType == "wis") { - show("wisExplanation") - hide("sharpnessExplanation") - hide("aeExplanation") - hide("coverageExplanation") - } - if (input$scoreType == "sharpness") { - show("sharpnessExplanation") - hide("wisExplanation") - hide("aeExplanation") - hide("coverageExplanation") - } - if (input$scoreType == "ae") { - hide("wisExplanation") - hide("sharpnessExplanation") - show("aeExplanation") - hide("coverageExplanation") - } - if (input$scoreType == "coverage") { - hide("wisExplanation") - hide("sharpnessExplanation") - hide("aeExplanation") - show("coverageExplanation") - } - }) - - # When forecaster selections change, update available aheads, locations, and CIs to choose from - observeEvent(input$forecasters, { - if (input$targetVariable == 'Deaths') { - df = df %>% filter(signal == DEATH_FILTER) - } else if (input$targetVariable == 'Cases') { - df = df %>% filter(signal == CASE_FILTER) - } else { - df = df %>% filter(signal == HOSPITALIZATIONS_FILTER) - } - df = df %>% filter(forecaster %in% input$forecasters) - - updateAheadChoices(session, df, input$targetVariable, input$forecasters, input$aheads, FALSE) - updateLocationChoices(session, df, input$targetVariable, input$forecasters, input$location) - updateCoverageChoices(session, df, input$targetVariable, input$forecasters, input$coverageInterval, output) - }) - - observeEvent(input$location, { - updateAsOfData() - # Only show forecast check box option if we are showing as of data - if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") - } else { - showElement("showForecastsCheckbox") - } - }) - - observeEvent(input$asOf, { - updateAsOfData() - # Only show forecast check box option if we are showing as of data - if (input$asOf != '' && input$asOf == CURRENT_WEEK_END_DATE()) { - hideElement("showForecastsCheckbox") - } else { - showElement("showForecastsCheckbox") - } - }) - - # The following checks ensure the minimum necessary input selections - observe({ - # Show data loading message and hide other messages until all data is loaded - if (DATA_LOADED) { - hide("data-loading-message") - show("refresh-colors") - show("notes") - show("scoreExplanations") - show("scoringDisclaimer") - } - # Ensure there is always one ahead selected - if(length(input$aheads) < 1) { - if (input$targetVariable == 'Hospitalizations') { - updateCheckboxGroupInput(session, "aheads", - selected = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) - } else { - updateCheckboxGroupInput(session, "aheads", - selected = AHEAD_OPTIONS[1]) - } - } - # Ensure there is always one forecaster selected - if(length(input$forecasters) < 1) { - updateSelectInput(session, "forecasters", - selected = c("COVIDhub-ensemble")) # Use ensemble rather than baseline bc it has hospitalization scores - } - # Ensure COVIDhub-baseline is selected when scaling by baseline - if(input$scaleByBaseline && !("COVIDhub-baseline" %in% input$forecasters)) { - updateSelectInput(session, "forecasters", selected = c(input$forecasters, "COVIDhub-baseline")) - } - }) - - updateAsOfData = function() { - dataSource = "jhu-csse" - if(input$targetVariable == "Cases") { - targetSignal = "confirmed_incidence_num" - } else if (input$targetVariable == "Deaths") { - targetSignal = "deaths_incidence_num" - } else if (input$targetVariable == "Hospitalizations") { - targetSignal = "confirmed_admissions_covid_1d" - dataSource = "hhs" - } - - if (input$location == 'US' && input$scoreType != 'coverage') { - location = "nation" - } else { - location = "state" - } - if (input$asOf < CURRENT_WEEK_END_DATE() && input$asOf != '') { - hideElement("truthPlot") - hideElement("notes") - hideElement("scoringDisclaimer") - hideElement("scoreExplanations") - hideElement("renderAggregateText") - hideElement("renderLocations") - showElement("truth-plot-loading-message") - - # Since as_of matches to the issue date in covidcast (rather than the time_value) - # we need to add one extra day to get the as of we want. - fetchDate = as.Date(input$asOf) + 1 - - # Covidcast API call - asOfTruthData = covidcast_signal_mem(data_source = dataSource, signal = targetSignal, - start_day = "2020-02-15", end_day = fetchDate, - as_of = fetchDate, - geo_type = location) - showElement("truthPlot") - showElement("notes") - showElement("scoringDisclaimer") - showElement("scoreExplanations") - showElement("renderAggregateText") - showElement("renderLocations") - hideElement("truth-plot-loading-message") - PREV_AS_OF_DATA(asOfTruthData) - - if(dim(asOfTruthData)[1] == 0) { - return() - } - summaryPlot(reRenderTruth = TRUE, asOfData = asOfTruthData) - } else if(input$asOf == CURRENT_WEEK_END_DATE() && input$asOf != '') { - summaryPlot(reRenderTruth = TRUE) - } - } - - updateAsOfChoices = function(session, truthDf) { - asOfChoices = truthDf$Week_End_Date - selectedAsOf = isolate(input$asOf) - if (input$targetVariable == "Hospitalizations") { - minChoice = MIN_AVAIL_HOSP_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] - } else if(input$location == 'US' && input$scoreType != 'coverage') { - minChoice = MIN_AVAIL_NATION_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] - } else if(input$location %in% TERRITORIES || input$location == TOTAL_LOCATIONS || input$scoreType == 'coverage') { - minChoice = MIN_AVAIL_TERRITORY_AS_OF_DATE - asOfChoices = asOfChoices[asOfChoices >= minChoice] - } - asOfChoices = c(asOfChoices, CURRENT_WEEK_END_DATE()) - # Make sure we have a valid as of selection - nonValidAsOf = selectedAsOf == '' || !(as.Date(selectedAsOf) %in% asOfChoices) - if (length(asOfChoices) != 0 && nonValidAsOf) { - selectedAsOf = max(asOfChoices, na.rm=TRUE) - } - AS_OF_CHOICES(asOfChoices) - updateSelectInput(session, "asOf", - choices = sort(asOfChoices), - selected = selectedAsOf) - } - export_scores_server(input, output, df) -} - -################ -# UTIL FUNCTIONS -################ -updateForecasterChoices = function(session, df, forecasterInput, scoreType) { - if (scoreType == "wis") { - df = df %>% filter(!is.na(wis)) - } - if (scoreType == "ae") { - df = df %>% filter(!is.na(ae)) - } - forecasterChoices = unique(df$forecaster) - updateSelectInput(session, "forecasters", - choices = forecasterChoices, - selected = forecasterInput) -} - - -updateCoverageChoices = function(session, df, targetVariable, forecasterChoices, coverageInput, output) { - df = df %>% filter(forecaster %in% forecasterChoices) - df = Filter(function(x)!all(is.na(x)), df) - coverageChoices = intersect(colnames(df), COVERAGE_INTERVALS) - # Ensure previsouly selected options are still allowed - if (coverageInput %in% coverageChoices) { - selectedCoverage = coverageInput - } else if ("95" %in% coverageChoices) { - selectedCoverage = "95" - } else { - selectedCoverage = coverageChoices[1] - } - updateSelectInput(session, "coverageInterval", - choices = coverageChoices, - selected = selectedCoverage) -} - - -updateLocationChoices = function(session, df, targetVariable, forecasterChoices, locationInput) { - df = df %>% filter(forecaster %in% forecasterChoices) - locationChoices = unique(toupper(df$geo_value)) - locationChoices = locationChoices[c(length(locationChoices), (1:length(locationChoices)-1))] # Move US to front of list - locationChoices = c(TOTAL_LOCATIONS, locationChoices) - # Ensure previously selected options are still allowed - if (locationInput %in% locationChoices) { - selectedLocation = locationInput - } else { - selectedLocation = locationChoices[1] - } - updateSelectInput(session, "location", - choices = locationChoices, - selected = selectedLocation) -} - -updateAheadChoices = function(session, df, targetVariable, forecasterChoices, aheads, targetVariableChange) { - df = df %>% filter(forecaster %in% forecasterChoices) - if (targetVariable == 'Hospitalizations') { - aheadOptions = HOSPITALIZATIONS_AHEAD_OPTIONS - title = "Forecast Horizon (Days)" - show("horizon-disclaimer") - } else { - aheadOptions = AHEAD_OPTIONS - title = "Forecast Horizon (Weeks)" - hide("horizon-disclaimer") - } - aheadChoices = Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions) - # Ensure previsouly selected options are still allowed - if (!is.null(aheads) && aheads %in% aheadChoices) { - selectedAheads = aheads - } else { - selectedAheads = aheadOptions[1] - } - # If we are changing target variable, always reset ahead selection to first option - if (targetVariableChange) { - selectedAheads = aheadOptions[1] - } - updateCheckboxGroupInput(session, "aheads", - title, - choices = aheadChoices, - selected = selectedAheads, - inline = TRUE) -} - -shinyApp(ui = ui, server = server) diff --git a/dashboard/arrow-left.svg b/dashboard/arrow-left.svg deleted file mode 100644 index 21db7f7..0000000 --- a/dashboard/arrow-left.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/dashboard/common.R b/dashboard/common.R deleted file mode 100644 index ba763a4..0000000 --- a/dashboard/common.R +++ /dev/null @@ -1,93 +0,0 @@ - -COVERAGE_INTERVALS = c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") -DEATH_FILTER = "deaths_incidence_num" -CASE_FILTER = "confirmed_incidence_num" -HOSPITALIZATIONS_FILTER = "confirmed_admissions_covid_1d" -HOSPITALIZATIONS_TARGET_DAY = "Wednesday" -TOTAL_LOCATIONS = "Totaled Over States*" -AHEAD_OPTIONS = c(1,2,3,4) - -# Num days to offset the forecast week by -# Example: if HOSPITALIZATIONS_TARGET_DAY is Wednesday and HOSPITALIZATIONS_OFFSET is 2, -# ahead 1 has to have forecast date of Monday or earlier, -# ahead 2 has to have forecast date of Monday + 7 days or earlier (offset + 7 days or more), etc -HOSPITALIZATIONS_OFFSET = 2 -HOSPITALIZATIONS_AHEAD_OPTIONS = c(HOSPITALIZATIONS_OFFSET, HOSPITALIZATIONS_OFFSET + 7, HOSPITALIZATIONS_OFFSET + 14, HOSPITALIZATIONS_OFFSET + 21) - - -renameScoreCol = function(filteredScoreDf, scoreType, coverageInterval) { - if (scoreType == "wis") { - filteredScoreDf <- filteredScoreDf %>% rename(Score = wis) - } - else if (scoreType == "sharpness") { - filteredScoreDf <- filteredScoreDf %>% rename(Score = sharpness) - } - else if (scoreType == "ae") { - filteredScoreDf <- filteredScoreDf %>% rename(Score = ae) - } - else if (scoreType == "coverage") { - filteredScoreDf <- filteredScoreDf %>% rename(Score = !!coverageInterval) - } - return (filteredScoreDf) -} - - -filterOverAllLocations = function(filteredScoreDf, scoreType, hasAsOfData = FALSE) { - locationsIntersect = list() - filteredScoreDf = filteredScoreDf %>% filter(!is.na(Score)) - # Create df with col for all locations across each unique date, ahead and forecaster combo - locationDf = filteredScoreDf %>% group_by(forecaster, target_end_date, ahead) %>% - summarize(location_list = paste(sort(unique(geo_value)),collapse=",")) - locationDf = locationDf %>% filter(location_list != c('us')) - # Create a list containing each row's location list - locationList = sapply(locationDf$location_list, function(x) strsplit(x, ",")) - locationList = lapply(locationList, function(x) x[x != 'us']) - # Get the intersection of all the locations in these lists - locationsIntersect = unique(Reduce(intersect, locationList)) - filteredScoreDf = filteredScoreDf %>% filter(geo_value %in% locationsIntersect) - if (scoreType == "coverage") { - if (hasAsOfData) { - filteredScoreDf = filteredScoreDf %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = sum(Score)/length(locationsIntersect), actual = sum(actual), as_of_actual = sum(as_of_actual)) - } else { - filteredScoreDf = filteredScoreDf %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = sum(Score)/length(locationsIntersect), actual = sum(actual)) - } - } - else { - if (hasAsOfData) { - filteredScoreDf = filteredScoreDf %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = sum(Score), actual = sum(actual), as_of_actual = sum(as_of_actual)) - } else { - filteredScoreDf = filteredScoreDf %>% - group_by(forecaster, forecast_date, target_end_date, ahead) %>% - summarize(Score = sum(Score), actual = sum(actual)) - } - } - return (list(filteredScoreDf, locationsIntersect)) -} - -# Only use weekly aheads for hospitalizations -# May change in the future -filterHospitalizationsAheads = function(scoreDf) { - scoreDf['weekday'] = weekdays(as.Date(scoreDf$target_end_date)) - scoreDf = scoreDf %>% filter(weekday == HOSPITALIZATIONS_TARGET_DAY) - - oneAheadDf = scoreDf %>% filter(ahead >= HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 7 + HOSPITALIZATIONS_OFFSET) %>% - group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[1]) - twoAheadDf = scoreDf %>% filter(ahead >= 7 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 14 + HOSPITALIZATIONS_OFFSET) %>% - group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[2]) - threeAheadDf = scoreDf %>% filter(ahead >= 14 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 21 + HOSPITALIZATIONS_OFFSET) %>% - group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[3]) - fourAheadDf = scoreDf %>% filter(ahead >= 21 + HOSPITALIZATIONS_OFFSET) %>% filter(ahead < 28 + HOSPITALIZATIONS_OFFSET) %>% - group_by(target_end_date, forecaster) %>% filter(ahead == min(ahead)) %>% - mutate(ahead = HOSPITALIZATIONS_AHEAD_OPTIONS[4]) - - return(rbind(oneAheadDf, twoAheadDf, threeAheadDf, fourAheadDf)) -} \ No newline at end of file diff --git a/dashboard/export_scores.R b/dashboard/export_scores.R deleted file mode 100644 index 9eb1954..0000000 --- a/dashboard/export_scores.R +++ /dev/null @@ -1,60 +0,0 @@ -source('./common.R') - -create_export_df = function(scoreDf, targetVariable, scoreType, forecasters, loc, coverageInterval) { - signalFilter = CASE_FILTER - if (targetVariable == "Deaths") { - signalFilter = DEATH_FILTER - } else if (targetVariable == "Hospitalizations") { - signalFilter = HOSPITALIZATIONS_FILTER - } - scoreDf = renameScoreCol(scoreDf, scoreType, coverageInterval) - scoreDf = scoreDf %>% - filter(signal == signalFilter) %>% - filter(forecaster %in% forecasters) - if (loc == TOTAL_LOCATIONS || scoreType == "coverage") { - if (signalFilter == HOSPITALIZATIONS_FILTER) { - scoreDf = filterHospitalizationsAheads(scoreDf) - } - scoreDf = filterOverAllLocations(scoreDf, scoreType) - return(scoreDf[[1]]) - } else { - scoreDf = scoreDf %>% filter(geo_value == tolower(loc)) - scoreDf = scoreDf[c("ahead", "geo_value", "forecaster", "forecast_date", "data_source", "target_end_date", "Score", "actual")] - return(scoreDf) - } -} - -export_scores_ui = div( - downloadButton("exportScores", "Download CSV") -) - -export_scores_server = function(input, output, df) { - output$exportScores <- downloadHandler( - filename = function() { - score = input$scoreType - if (input$scoreType == 'sharpness') { - score = 'spread' - } - filename = paste0("forecast-eval-", input$targetVariable, "-", score) - if (input$location != TOTAL_LOCATIONS) { - filename = paste0(filename, '-', input$location) - } else if (input$scoreType == 'coverage') { - filename = paste0(filename, '-', 'averaged-over-common-locations-Coverage-interval-', input$coverageInterval) - } else { - filename = paste0(filename, '-totaled-over-common-locations') - } - paste0(filename,'-', Sys.Date(), ".csv") - }, - contentType = 'text/csv', - content = function(file) { - withProgress(message = 'Preparing export', - detail = 'This may take a while...', value = 0, max = 2, { - out_df = create_export_df(df, input$targetVariable, input$scoreType, input$forecasters, - input$location, input$coverageInterval) - incProgress(1) - write.csv(out_df, file, row.names=FALSE) - incProgress(2) - }) - } - ) -} diff --git a/dashboard/github.svg b/dashboard/github.svg deleted file mode 100644 index 878e274..0000000 --- a/dashboard/github.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/dashboard/www/style.css b/dashboard/www/style.css deleted file mode 100644 index 87dfcb0..0000000 --- a/dashboard/www/style.css +++ /dev/null @@ -1,154 +0,0 @@ -.shiny-output-error { visibility: hidden; } -.shiny-output-error:before { visibility: hidden; } - -#logo{ - margin-left:35px; - margin-right:60px; - padding-right: 0; - padding-left:0; -} -#aboutContentArea{ - max-width: 70rem; -} -#header{ - display: flex; - align-items: flex-end; - flex-wrap: wrap; - padding-bottom: 5px; - margin-top: 20px; - box-shadow: 0 3px 5px -1px rgb(0 0 0 / 15%); - font-family: "open sans", Roboto, Arial, sans-serif; - font-size: 16px; - font-weight: 400; - color: black; -} - -#title { - white-space: pre-wrap; - font-weight: 300; - font-size: 18px; - padding-left:0; -} -#bold-title { - font-weight: 500; -} -#back-button { - color: #505565; - font-size: 14px; - font-weight:350; - margin-top:10px; - width: 65px; - text-decoration:none; - display: flex; - align-items: center; - flex-wrap: nowrap; - flex-direction: row; -} -#back-button svg { - width:14px; - height:14px; - fill: #505565; - display: flex; - align-items: center; - flex-wrap: nowrap; - flex-direction: row; -} -#github-logo-container { - display: flex; - justify-content: flex-end; - padding-right: 0; - padding-left:0; - margin-bottom:20px; -} -#github-logo { - text-decoration:none; - color: #505565; - display: flex; - align-items: center; - flex-wrap: nowrap; - flex-direction: row; -} -#github-logo svg { - width:16px; - height:16px; - fill: #505565; -} -#notes { - font-weight: 600; - font-size: 18px; -} -#forecaster-input { - font-weight:normal; - font-size:13px; -} -#missing-data-disclaimer { - margin-top:-20px; - font-size:12px; -} -#horizon-disclaimer { - margin-top:-10px; - font-size:12px; -} -#drag-to-zoom { - font-size:11px; -} -#refresh-colors { - height: 26px; - font-size: 12px; -} -#scale-score { - font-weight: bold; -} -#data-loading-message { - font-style: italic; - font-size: 18px; -} -#truth-plot-loading-message { - margin-top:50px; - font-size: 18px; - font-style:italic; - color: #949494; -} - -@media (max-width: 1450px) { - #github-logo-container { - margin-left:100px; - } -} - -@media (max-width: 1000px) { - #title { - margin-left: 35px; - } - #github-logo-container { - margin-left:35px; - } -} - -@media (max-width: 767px) { - #title { - width:100%; - margin-top: 10px; - } - #github-logo-container { - justify-content: center; - margin-top: 10px; - } -} - -@media (min-width: 1100px) { - #logo { - margin-left: 5%; - } -} - -@media (min-width: 1450px) { - #logo { - margin-left: 9%; - margin-right: 0; - } -} - - - - diff --git a/devops/Dockerfile b/devops/Dockerfile new file mode 100644 index 0000000..d64a6fa --- /dev/null +++ b/devops/Dockerfile @@ -0,0 +1,15 @@ +FROM rocker/shiny-verse +LABEL org.opencontainers.image.source = "https://github.com/cmu-delphi/forecast-eval" +RUN apt-get update && apt-get install -qq -y \ + libgdal-dev \ + libudunits2-dev && \ + rm -rf /var/cache/apt && \ + apt-get clean + +COPY devops/shiny_server.conf /etc/shiny-server/shiny-server.conf +WORKDIR /srv/shinyapp/ +COPY DESCRIPTION ./ +RUN Rscript -e "devtools::install_deps(dependencies = NA)" +COPY dist/*.rds ./ +COPY app/ ./ +RUN chmod -R a+rw . diff --git a/docker_dashboard/shiny_server.conf b/devops/shiny_server.conf similarity index 72% rename from docker_dashboard/shiny_server.conf rename to devops/shiny_server.conf index 43b3a31..e209c64 100644 --- a/docker_dashboard/shiny_server.conf +++ b/devops/shiny_server.conf @@ -1,13 +1,16 @@ run_as shiny; +# allow embedding only iframe with same domain +frame_options sameorigin; + server { - listen 3838; + listen 80; # Define a location at the base URL location / { # Host the directory of Shiny Apps stored in this directory - site_dir /srv/shiny-server; + site_dir /srv/shinyapp; # Log all Shiny output to files in this directory log_dir /var/log/shiny-server; @@ -17,7 +20,7 @@ server { directory_index on; # Disable some network protocols that are causing issues - disable_protocols websocket xdr-streaming xhr-streaming iframe-eventsource iframe-htmlfile; + # disable_protocols websocket xdr-streaming xhr-streaming iframe-eventsource iframe-htmlfile; # Set app timeout threshold to 4 hours app_idle_timeout 14400; diff --git a/docker_build/dependencies.R b/docker_build/dependencies.R index 08fc4dc..171951a 100644 --- a/docker_build/dependencies.R +++ b/docker_build/dependencies.R @@ -1,3 +1,2 @@ # Tidyverse is installed in the base image # Other packages should be installed here - diff --git a/docker_dashboard/Dockerfile b/docker_dashboard/Dockerfile deleted file mode 100644 index 072bd87..0000000 --- a/docker_dashboard/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM rocker/shiny-verse -LABEL org.opencontainers.image.source = "https://github.com/cmu-delphi/forecast-eval" - -RUN apt-get update && apt-get install -qq -y \ - libgdal-dev \ - libudunits2-dev - -ADD docker_dashboard/shiny_server.conf /etc/shiny-server/shiny-server.conf -RUN install2.r --error \ - plotly \ - shinyjs \ - tsibble \ - viridis \ - aws.s3 \ - covidcast \ - stringr \ - markdown \ - memoise - -COPY dist/*rds /srv/shiny-server/ -COPY dashboard/* /srv/shiny-server/ -COPY dashboard/www/*png /srv/shiny-server/www/ -COPY dist/*css /srv/shiny-server/www/ - - From 5cf4f34fec3f5fa3df8b1ab0b7d54ef423111490 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:23:50 -0400 Subject: [PATCH 05/58] bring up to date with dev --- DESCRIPTION | 3 ++- app/R/data.R | 12 ++++++++++++ app/server.R | 2 +- app/ui.R | 13 +++++-------- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6ab3f59..2d91518 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,8 @@ Imports: aws.s3, covidcast, stringr, - markdown + markdown, + memoise Suggests: styler, lintr, diff --git a/app/R/data.R b/app/R/data.R index b7d76ec..8124cc8 100644 --- a/app/R/data.R +++ b/app/R/data.R @@ -1,5 +1,17 @@ library(aws.s3) +# Set application-level caching location. Stores up to 1GB of caches. Removes +# least recently used objects first. +shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict="lru")) +cache <- getShinyOption("cache") + +# Since covidcast data updates about once a day. Add date arg to +# covidcast_signal so caches aren't used after that. +covidcast_signal_mem <- function(..., date=Sys.Date()) { + return(covidcast_signal(...)) +} +covidcast_signal_mem <- memoise::memoise(covidcast_signal_mem, cache = cache) + # Get and prepare data getS3Bucket <- function() { # Connect to AWS s3bucket diff --git a/app/server.R b/app/server.R index 5da60fc..194cf62 100644 --- a/app/server.R +++ b/app/server.R @@ -720,7 +720,7 @@ server <- function(input, output, session) { fetchDate <- as.Date(input$asOf) + 1 # Covidcast API call - asOfTruthData <- covidcast_signal( + asOfTruthData <- covidcast_signal_mem( data_source = dataSource, signal = targetSignal, start_day = "2020-02-15", end_day = fetchDate, as_of = fetchDate, diff --git a/app/ui.R b/app/ui.R index 6c6293f..4c3a337 100644 --- a/app/ui.R +++ b/app/ui.R @@ -40,14 +40,11 @@ sidebar <- tags$div( "Log Scale", value = FALSE, ), - conditionalPanel( - condition = "input.targetVariable != 'Hospitalizations'", - checkboxInput( - "scaleByBaseline", - "Scale by Baseline Forecaster", - value = FALSE, - ) - ), + checkboxInput( + "scaleByBaseline", + "Scale by Baseline Forecaster", + value = FALSE, + ) ), selectInput( "forecasters", From 150329de794a9fc0349cdeb022c6d9678a952ca4 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 13 Oct 2021 19:46:07 -0400 Subject: [PATCH 06/58] styling --- app/R/data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/R/data.R b/app/R/data.R index 8124cc8..06bd512 100644 --- a/app/R/data.R +++ b/app/R/data.R @@ -2,12 +2,12 @@ library(aws.s3) # Set application-level caching location. Stores up to 1GB of caches. Removes # least recently used objects first. -shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict="lru")) +shinyOptions(cache = cachem::cache_mem(max_size = 1000 * 1024^2, evict = "lru")) cache <- getShinyOption("cache") # Since covidcast data updates about once a day. Add date arg to # covidcast_signal so caches aren't used after that. -covidcast_signal_mem <- function(..., date=Sys.Date()) { +covidcast_signal_mem <- function(..., date = Sys.Date()) { return(covidcast_signal(...)) } covidcast_signal_mem <- memoise::memoise(covidcast_signal_mem, cache = cache) From 09616c0b67e0f402a29bb5d2f1c86edad782d539 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:59:35 -0400 Subject: [PATCH 07/58] make sure score info appears at the right time --- app/server.R | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/app/server.R b/app/server.R index 194cf62..c1d278c 100644 --- a/app/server.R +++ b/app/server.R @@ -58,11 +58,11 @@ updateAheadChoices <- function(session, df, targetVariable, forecasterChoices, a if (targetVariable == "Hospitalizations") { aheadOptions <- HOSPITALIZATIONS_AHEAD_OPTIONS title <- "Forecast Horizon (Days)" - show("horizon-disclaimer") + showElement("horizon-disclaimer") } else { aheadOptions <- AHEAD_OPTIONS title <- "Forecast Horizon (Weeks)" - hide("horizon-disclaimer") + hideElement("horizon-disclaimer") } aheadChoices <- Filter(function(x) any(unique(df$ahead) %in% x), aheadOptions) # Ensure previsouly selected options are still allowed @@ -592,28 +592,28 @@ server <- function(input, output, session) { showElement("showForecastsCheckbox") } if (input$scoreType == "wis") { - show("wisExplanation") - hide("sharpnessExplanation") - hide("aeExplanation") - hide("coverageExplanation") + showElement("wisExplanation") + hideElement("sharpnessExplanation") + hideElement("aeExplanation") + hideElement("coverageExplanation") } if (input$scoreType == "sharpness") { - show("sharpnessExplanation") - hide("wisExplanation") - hide("aeExplanation") - hide("coverageExplanation") + showElement("sharpnessExplanation") + hideElement("wisExplanation") + hideElement("aeExplanation") + hideElement("coverageExplanation") } if (input$scoreType == "ae") { - hide("wisExplanation") - hide("sharpnessExplanation") - show("aeExplanation") - hide("coverageExplanation") + hideElement("wisExplanation") + hideElement("sharpnessExplanation") + showElement("aeExplanation") + hideElement("coverageExplanation") } if (input$scoreType == "coverage") { - hide("wisExplanation") - hide("sharpnessExplanation") - hide("aeExplanation") - show("coverageExplanation") + hideElement("wisExplanation") + hideElement("sharpnessExplanation") + hideElement("aeExplanation") + showElement("coverageExplanation") } }) @@ -657,11 +657,11 @@ server <- function(input, output, session) { observe({ # Show data loading message and hide other messages until all data is loaded if (DATA_LOADED) { - hide("data-loading-message") - show("refresh-colors") - show("notes") - show("scoreExplanations") - show("scoringDisclaimer") + hideElement("data-loading-message") + showElement("refresh-colors") + showElement("notes") + showElement("scoreExplanations") + showElement("scoringDisclaimer") } # Ensure there is always one ahead selected if (length(input$aheads) < 1) { From 0c2e20d843d23fac0d40a01568eebc0484784fa4 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Oct 2021 10:39:47 -0400 Subject: [PATCH 08/58] track colorseed in session var --- app/server.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/server.R b/app/server.R index 194cf62..9c59654 100644 --- a/app/server.R +++ b/app/server.R @@ -94,6 +94,7 @@ server <- function(input, output, session) { AS_OF_CHOICES <- reactiveVal(NULL) SUMMARIZING_OVER_ALL_LOCATIONS <- reactive(input$scoreType == "coverage" || input$location == TOTAL_LOCATIONS) + COLOR_SEED <- reactiveVal(100) CURRENT_WEEK_END_DATE <- reactiveVal(CASES_DEATHS_CURRENT) @@ -110,7 +111,7 @@ server <- function(input, output, session) { ################## # CREATE MAIN PLOT ################## - summaryPlot <- function(colorSeed = 100, reRenderTruth = FALSE, asOfData = NULL) { + summaryPlot <- function(reRenderTruth = FALSE, asOfData = NULL) { filteredScoreDf <- filterScoreDf() dfWithForecasts <- NULL if (input$showForecasts) { @@ -215,7 +216,7 @@ server <- function(input, output, session) { ) # Set forecaster colors for plot - set.seed(colorSeed) + set.seed(COLOR_SEED()) forecasterRand <- sample(unique(df$forecaster)) colorPalette <- setNames(object = viridis(length(unique(df$forecaster))), nm = forecasterRand) if (!is.null(asOfData)) { @@ -544,9 +545,9 @@ server <- function(input, output, session) { ################### observeEvent(input$refreshColors, { - colorSeed <- floor(runif(1, 1, 1000)) + COLOR_SEED(floor(runif(1, 1, 1000))) output$summaryPlot <- renderPlotly({ - summaryPlot(colorSeed) + summaryPlot() }) }) From e39e0d4c0c653b19e4198f101fb7d9a378533db0 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Oct 2021 12:23:15 -0400 Subject: [PATCH 09/58] styling --- app/ui.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/ui.R b/app/ui.R index 7d94796..3be22a7 100644 --- a/app/ui.R +++ b/app/ui.R @@ -96,7 +96,8 @@ sidebar <- tags$div( selected = "" ), tags$p(id = "missing-data-disclaimer", "Some locations may not have 'as of' data for the chosen 'as of' date"), - div(id="showForecastsCheckbox", + div( + id = "showForecastsCheckbox", disabled( checkboxInput( "showForecasts", From 4307e85a6c6cdcfc3a6f8843ba4da18af9f7b25f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Oct 2021 14:36:15 -0400 Subject: [PATCH 10/58] simplify and clarify filters, dim, joins --- app/server.R | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/app/server.R b/app/server.R index c8a08ee..6c1df8e 100644 --- a/app/server.R +++ b/app/server.R @@ -118,7 +118,7 @@ server <- function(input, output, session) { } # Need to do this after setting dfWithForecasts to leave in aheads for forecasts filteredScoreDf <- filteredScoreDf %>% filter(ahead %in% input$aheads) - if (dim(filteredScoreDf)[1] == 0) { + if (nrow(filteredScoreDf) == 0) { output$renderWarningText <- renderText(paste0( "The selected forecasters do not have enough data ", "to display the selected scoring metric." @@ -126,23 +126,23 @@ server <- function(input, output, session) { return() } if (is.null(asOfData)) { - if (!is.null(isolate(PREV_AS_OF_DATA())) && dim(isolate(PREV_AS_OF_DATA()))[1] != 0 && + if (!is.null(isolate(PREV_AS_OF_DATA())) && nrow(isolate(PREV_AS_OF_DATA())) != 0 && isolate(input$asOf) != "" && isolate(input$asOf) != isolate(CURRENT_WEEK_END_DATE())) { asOfData <- isolate(PREV_AS_OF_DATA()) } } - if (!is.null(asOfData) && dim(asOfData)[1] != 0) { + if (!is.null(asOfData) && nrow(asOfData) != 0) { asOfData <- asOfData %>% rename(target_end_date = time_value, as_of_actual = value) asOfData <- asOfData[c("target_end_date", "geo_value", "as_of_actual")] # Get the 'as of' dates that are the target_end_dates in the scoring df dateGroupDf <- asOfData %>% filter(asOfData$target_end_date %in% filteredScoreDf$target_end_date) - if (dim(dateGroupDf)[1] != 0) { + if (nrow(dateGroupDf) != 0) { # Since cases and deaths are shown as weekly incidence, but the "as of" data from the covidcast API # is daily, we need to sum over the days leading up to the target_end_date of each week to get the # weekly incidence asOfData <- filterAsOfData(asOfData, dateGroupDf, filteredScoreDf) - filteredScoreDf <- merge(filteredScoreDf, asOfData, by = c("target_end_date", "geo_value"), all = TRUE) + filteredScoreDf <- full_join(filteredScoreDf, asOfData, by = c("target_end_date", "geo_value")) } else { # Input 'as of' date chosen does not match the available target_end_dates that result from the rest of the selected inputs # It is too far back or we are switching between hosp and cases/deaths which have different target date days @@ -239,9 +239,10 @@ server <- function(input, output, session) { updateAsOfChoices(session, truthDf) # Format and transform data for plot - filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Week_End_Date)) - filteredScoreDf <- filteredScoreDf[c("Forecaster", "Forecast_Date", "Week_End_Date", "Score", "ahead")] - filteredScoreDf <- filteredScoreDf %>% mutate(across(where(is.numeric), ~ round(., 2))) + filteredScoreDf <- filteredScoreDf %>% + filter(!is.na(Week_End_Date)) %>% + select(Forecaster, Forecast_Date, Week_End_Date, Score, ahead) %>% + mutate(across(where(is.numeric), ~ round(., 2))) if (input$scoreType != "coverage") { if (input$scaleByBaseline) { baselineDf <- filteredScoreDf %>% filter(Forecaster %in% "COVIDhub-baseline") @@ -450,12 +451,13 @@ server <- function(input, output, session) { # Create a df to fill in the corresponding target_end_date in a new date_group column for all intervening days dateGroupDf[, "date_group"] <- NA dateGroupDf$date_group <- dateGroupDf$target_end_date - asOfData <- merge(asOfData, dateGroupDf, by = c("target_end_date", "geo_value", "as_of_actual"), all = TRUE) + asOfData <- full_join(asOfData, dateGroupDf, by = c("target_end_date", "geo_value", "as_of_actual")) # Cut off the extra days on beginning and end of series so that when we sum the values we are only # summing over the weeks included in the score plot - asOfData <- asOfData %>% filter(target_end_date >= min(filteredScoreDf$target_end_date) - 6) - asOfData <- asOfData %>% filter(target_end_date <= isolate(input$asOf)) + asOfData <- asOfData %>% + filter(target_end_date >= min(filteredScoreDf$target_end_date) - 6) %>% + filter(target_end_date <= isolate(input$asOf)) # Fill in the date_group column with the target week end days for all intervening days asOfData <- asOfData %>% @@ -735,7 +737,7 @@ server <- function(input, output, session) { hideElement("truth-plot-loading-message") PREV_AS_OF_DATA(asOfTruthData) - if (dim(asOfTruthData)[1] == 0) { + if (nrow(asOfTruthData) == 0) { return() } summaryPlot(reRenderTruth = TRUE, asOfData = asOfTruthData) @@ -749,12 +751,14 @@ server <- function(input, output, session) { selectedAsOf <- isolate(input$asOf) if (input$targetVariable == "Hospitalizations") { minChoice <- MIN_AVAIL_HOSP_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] } else if (input$location == "US" && input$scoreType != "coverage") { minChoice <- MIN_AVAIL_NATION_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] } else if (input$location %in% TERRITORIES || input$location == TOTAL_LOCATIONS || input$scoreType == "coverage") { minChoice <- MIN_AVAIL_TERRITORY_AS_OF_DATE + asOfChoices <- asOfChoices[asOfChoices >= minChoice] } - asOfChoices <- asOfChoices[asOfChoices >= minChoice] asOfChoices <- c(asOfChoices, CURRENT_WEEK_END_DATE()) # Make sure we have a valid as of selection nonValidAsOf <- selectedAsOf == "" || !(as.Date(selectedAsOf) %in% asOfChoices) From 6b1d0067cef69ba0b5eef95fd62ad0106f44a0b2 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Oct 2021 16:15:16 -0400 Subject: [PATCH 11/58] move height setting, remove shape setting to fix plotting warnings --- app/server.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/server.R b/app/server.R index 6c1df8e..ed1846e 100644 --- a/app/server.R +++ b/app/server.R @@ -319,12 +319,10 @@ server <- function(input, output, session) { } plotHeight <- 550 + (length(input$aheads) - 1) * 100 finalPlot <- - ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% + ggplotly(p, tooltip = c("x", "y", "shape", "label"), height = plotHeight) %>% layout( - height = plotHeight, legend = list(orientation = "h", y = -0.1), margin = list(t = 90), - height = 500, hovermode = "x unified", xaxis = list( title = list(text = "Target Date", standoff = 8L), @@ -380,7 +378,7 @@ server <- function(input, output, session) { geom_point(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) if (input$showForecasts) { finalPlot <- finalPlot + - geom_line(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + + geom_line(aes(y = Quantile_50, color = Forecaster)) + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) } } else { From 0a6b7a9e20b1894115bd5b689933021924240e1b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 15 Oct 2021 17:23:07 -0400 Subject: [PATCH 12/58] display full location names --- app/global.R | 2 ++ app/server.R | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/app/global.R b/app/global.R index 222329e..8c40f4f 100644 --- a/app/global.R +++ b/app/global.R @@ -35,6 +35,8 @@ MIN_AVAIL_HOSP_AS_OF_DATE <- as.Date("2020-11-11") MIN_AVAIL_TERRITORY_AS_OF_DATE <- as.Date("2021-02-10") TERRITORIES <- c("AS", "GU", "MP", "VI") +STATE_ABB <- c(state.abb, TERRITORIES, "PR", "DC") +STATE_NAME <- c(state.name, "American Samoa", "Guam", "Northern Mariana Islands", "US Virgin Islands", "Puerto Rico", "District of Columbia") resolveCurrentCasesDeathDay <- function() { # Get most recent target end date diff --git a/app/server.R b/app/server.R index 194cf62..3ab8092 100644 --- a/app/server.R +++ b/app/server.R @@ -38,9 +38,18 @@ updateCoverageChoices <- function(session, df, targetVariable, forecasterChoices updateLocationChoices <- function(session, df, targetVariable, forecasterChoices, locationInput) { df <- df %>% filter(forecaster %in% forecasterChoices) locationChoices <- unique(toupper(df$geo_value)) + # Move US to front of list locationChoices <- locationChoices[c(length(locationChoices), seq_len(length(locationChoices) - 1))] + # Add totaled states option to front of list locationChoices <- c(TOTAL_LOCATIONS, locationChoices) + + # Display full names for subset of locations + longnames <- STATE_NAME[match(locationChoices, STATE_ABB)] + names(locationChoices) <- paste(locationChoices, "-", longnames) + unmatched <- which(is.na(longnames)) + names(locationChoices)[unmatched] <- locationChoices[unmatched] + # Ensure previously selected options are still allowed if (locationInput %in% locationChoices) { selectedLocation <- locationInput From 0f493640f292c5e1120cc40a4c01f261c8ff8e19 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 18 Oct 2021 16:46:16 -0400 Subject: [PATCH 13/58] modify layout css to support resizing --- app/www/delphiLayout.css | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/www/delphiLayout.css b/app/www/delphiLayout.css index 2e3afe3..aba03de 100644 --- a/app/www/delphiLayout.css +++ b/app/www/delphiLayout.css @@ -53,12 +53,14 @@ html { flex-grow: 1; display: -webkit-flex; display: flex; + min-width: 0; margin: 0; } .delphi-root > .container-fluid > .row { -webkit-flex-grow: 1; flex-grow: 1; + min-width: 0; } .delphi-credits { @@ -100,7 +102,7 @@ a:hover { } .delphi-main-panel { - padding: 0; + padding: 10px; display: -webkit-flex; display: flex; } @@ -112,6 +114,7 @@ a:hover { display: flex; flex-direction: column; -webkit-flex-direction: column; + min-width: 0; } .delphi-main-panel > .tabbable > .tab-content { From 8c1c0560d581b5f6b42a45f40518b58800ea8399 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 1 Dec 2021 10:14:42 -0500 Subject: [PATCH 14/58] move legend title setting to plotly::layout --- app/server.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/app/server.R b/app/server.R index 2526ac1..31e3ac2 100644 --- a/app/server.R +++ b/app/server.R @@ -316,8 +316,7 @@ server <- function(input, output, session) { facet_wrap(~ahead, ncol = 1) + scale_color_manual(values = colorPalette) + theme_bw() + - theme(panel.spacing = unit(0.5, "lines")) + - theme(legend.title = element_blank()) + theme(panel.spacing = unit(0.5, "lines")) if (input$scoreType == "coverage") { p <- p + geom_hline(yintercept = .01 * as.integer(input$coverageInterval)) @@ -331,7 +330,7 @@ server <- function(input, output, session) { finalPlot <- ggplotly(p, tooltip = c("x", "y", "shape", "label"), height = plotHeight) %>% layout( - legend = list(orientation = "h", y = -0.1), + legend = list(orientation = "h", y = -0.1, title=list(text = NULL)), margin = list(t = 90), hovermode = "x unified", xaxis = list( @@ -377,8 +376,7 @@ server <- function(input, output, session) { scale_y_continuous(limits = c(0, NA), labels = scales::comma) + scale_x_date(date_labels = "%b %Y") + scale_color_manual(values = colorPalette) + - theme_bw() + - theme(legend.title = element_blank()) + theme_bw() if (hasAsOfData) { finalPlot <- finalPlot + @@ -396,7 +394,10 @@ server <- function(input, output, session) { geom_point(aes(y = Reported_Incidence)) } finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% - layout(hovermode = "x unified", legend = list(orientation = "h", y = -0.1)) %>% + layout( + hovermode = "x unified", + legend = list(orientation = "h", y = -0.1, title=list(text = NULL)) + ) %>% config(displayModeBar = F) # Remove the extra grouping from the legend: "(___,1)" for (i in seq_along(finalPlot$x$data)) { From efaf9dc34592f63876a0134dd95161fa84210841 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 1 Dec 2021 16:25:37 -0500 Subject: [PATCH 15/58] move height setting back to layout because it interferes with subplot spacing --- app/server.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/server.R b/app/server.R index 31e3ac2..7c7a560 100644 --- a/app/server.R +++ b/app/server.R @@ -328,9 +328,10 @@ server <- function(input, output, session) { } plotHeight <- 550 + (length(input$aheads) - 1) * 100 finalPlot <- - ggplotly(p, tooltip = c("x", "y", "shape", "label"), height = plotHeight) %>% + ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% layout( - legend = list(orientation = "h", y = -0.1, title=list(text = NULL)), + height = plotHeight, + legend = list(orientation = "h", y = -0.1, title = list(text=NULL)), margin = list(t = 90), hovermode = "x unified", xaxis = list( @@ -396,7 +397,7 @@ server <- function(input, output, session) { finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% layout( hovermode = "x unified", - legend = list(orientation = "h", y = -0.1, title=list(text = NULL)) + legend = list(orientation = "h", y = -0.1, title = list(text=NULL)) ) %>% config(displayModeBar = F) # Remove the extra grouping from the legend: "(___,1)" From 8c517609efa5309b11eb90ac6f933a7792c7a14c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 1 Dec 2021 16:46:11 -0500 Subject: [PATCH 16/58] styling --- app/server.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/server.R b/app/server.R index 7c7a560..9c80413 100644 --- a/app/server.R +++ b/app/server.R @@ -331,7 +331,7 @@ server <- function(input, output, session) { ggplotly(p, tooltip = c("x", "y", "shape", "label")) %>% layout( height = plotHeight, - legend = list(orientation = "h", y = -0.1, title = list(text=NULL)), + legend = list(orientation = "h", y = -0.1, title = list(text = NULL)), margin = list(t = 90), hovermode = "x unified", xaxis = list( @@ -397,7 +397,7 @@ server <- function(input, output, session) { finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% layout( hovermode = "x unified", - legend = list(orientation = "h", y = -0.1, title = list(text=NULL)) + legend = list(orientation = "h", y = -0.1, title = list(text = NULL)) ) %>% config(displayModeBar = F) # Remove the extra grouping from the legend: "(___,1)" From 6340c6cf1bf6f08f4c6c51353951afff90c32c3b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 2 Dec 2021 17:29:31 -0500 Subject: [PATCH 17/58] extend as-of dates --- app/global.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/global.R b/app/global.R index 8c40f4f..50a420b 100644 --- a/app/global.R +++ b/app/global.R @@ -30,9 +30,9 @@ HOSPITALIZATIONS_AHEAD_OPTIONS <- c( ) # Earliest 'as of' date available from covidcast API -MIN_AVAIL_NATION_AS_OF_DATE <- as.Date("2021-01-09") -MIN_AVAIL_HOSP_AS_OF_DATE <- as.Date("2020-11-11") -MIN_AVAIL_TERRITORY_AS_OF_DATE <- as.Date("2021-02-10") +MIN_AVAIL_NATION_AS_OF_DATE <- as.Date("2020-04-02") +MIN_AVAIL_HOSP_AS_OF_DATE <- as.Date("2020-11-16") +MIN_AVAIL_TERRITORY_AS_OF_DATE <- as.Date("2020-04-02") TERRITORIES <- c("AS", "GU", "MP", "VI") STATE_ABB <- c(state.abb, TERRITORIES, "PR", "DC") From 8024a48f5ba05a3017e300c6e7cadf11e92860cf Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 14 Dec 2021 16:36:47 -0500 Subject: [PATCH 18/58] force scoring pipeline to use old stable covidcast image --- docker_build/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile index af88c80..539a6e1 100644 --- a/docker_build/Dockerfile +++ b/docker_build/Dockerfile @@ -1,2 +1,2 @@ # docker image for setting up an R environment -FROM ghcr.io/cmu-delphi/covidcast:latest \ No newline at end of file +FROM ghcr.io/cmu-delphi/covidcast@sha256:5c1324d2a9b67e557214ad7e1d5effee7229083d46e3c405a63505b8a3a6427b \ No newline at end of file From 12af7f6fc8f3b52a38a067bca435b4c087d1a1ff Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 16 Dec 2021 12:22:02 -0500 Subject: [PATCH 19/58] Revert "Force scoring pipeline to use old stable covidcast image" --- docker_build/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile index 539a6e1..af88c80 100644 --- a/docker_build/Dockerfile +++ b/docker_build/Dockerfile @@ -1,2 +1,2 @@ # docker image for setting up an R environment -FROM ghcr.io/cmu-delphi/covidcast@sha256:5c1324d2a9b67e557214ad7e1d5effee7229083d46e3c405a63505b8a3a6427b \ No newline at end of file +FROM ghcr.io/cmu-delphi/covidcast:latest \ No newline at end of file From 6bb65370ca30478920635d53c6270ceedaada9ef Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 10 Jan 2022 12:27:55 -0500 Subject: [PATCH 20/58] increase workflow timeout to 12 hours --- .github/workflows/s3_upload_ec2.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/s3_upload_ec2.yml b/.github/workflows/s3_upload_ec2.yml index 1f62cda..63b7b24 100644 --- a/.github/workflows/s3_upload_ec2.yml +++ b/.github/workflows/s3_upload_ec2.yml @@ -16,6 +16,8 @@ jobs: deploy: # The type of runner that the job will run on runs-on: self-hosted + # Time out after 12 hours. + timeout-minutes: 720 # Steps represent a sequence of tasks that will be executed as part of the job steps: From 580e47fceeff3912675b9b2bf77eb3a7bf3bc125 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 10 Jan 2022 16:03:41 -0500 Subject: [PATCH 21/58] actually just make that 24 hours --- .github/workflows/s3_upload_ec2.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/s3_upload_ec2.yml b/.github/workflows/s3_upload_ec2.yml index 63b7b24..06fef8c 100644 --- a/.github/workflows/s3_upload_ec2.yml +++ b/.github/workflows/s3_upload_ec2.yml @@ -16,8 +16,8 @@ jobs: deploy: # The type of runner that the job will run on runs-on: self-hosted - # Time out after 12 hours. - timeout-minutes: 720 + # Time out after 24 hours. + timeout-minutes: 1440 # Steps represent a sequence of tasks that will be executed as part of the job steps: From a77be45df11779bee577da9d695475f96c64518d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 14 Jan 2022 16:32:18 -0500 Subject: [PATCH 22/58] display warnings as they happen --- Report/create_reports.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Report/create_reports.R b/Report/create_reports.R index 5ae5f37..ad403ed 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -4,6 +4,8 @@ library("dplyr") library("evalcast") library("lubridate") +options(warn = 1) + option_list <- list( make_option( c("-d", "--dir"), From a03c227581e6d759af9cc1b8e9aa8a0604e7b813 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 28 Jan 2022 13:16:53 -0500 Subject: [PATCH 23/58] note CDDEP-ABM omission --- Report/create_reports.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Report/create_reports.R b/Report/create_reports.R index ad403ed..701ab3e 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -23,6 +23,9 @@ prediction_cards_filepath <- case_when( TRUE ~ prediction_cards_filename ) +# Note: CDDEP-ABM is not longer available and causes some warnings when trying +# to download its data. Defer to `get_covidhub_forecaster_names` and underlying +# Reich Lab utilities as to which forecasters to include. forecasters <- unique(c( get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" From c9f7ac249b25456d6ddbd3adef0357684e4e2401 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 31 Jan 2022 08:51:45 -0500 Subject: [PATCH 24/58] turn standard warn setting back on after pulling data to make logs less verbose --- Report/create_reports.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index ad403ed..f092c2e 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -4,7 +4,6 @@ library("dplyr") library("evalcast") library("lubridate") -options(warn = 1) option_list <- list( make_option( @@ -23,6 +22,8 @@ prediction_cards_filepath <- case_when( TRUE ~ prediction_cards_filename ) +options(warn = 1) + forecasters <- unique(c( get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" @@ -48,6 +49,8 @@ predictions_cards <- get_covidhub_predictions(forecasters, ) %>% filter(!(incidence_period == "epiweek" & ahead > 4)) +options(warn = 0) + predictions_cards <- predictions_cards %>% filter(!is.na(target_end_date)) %>% filter(target_end_date < today()) From 53a365e7edd2f784d37f07c3440f3f0c56b972fd Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 28 Jan 2022 16:53:08 -0500 Subject: [PATCH 25/58] check if expected or previous forecasters are missing --- Report/create_reports.R | 16 ++++++++++++---- Report/score.R | 18 ++++++++++++++++++ Report/utils.R | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 Report/utils.R diff --git a/Report/create_reports.R b/Report/create_reports.R index 701ab3e..fdb8dc4 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -4,6 +4,12 @@ library("dplyr") library("evalcast") library("lubridate") +# TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast +# Redefines overprediction, underprediction and sharpness +source("error_measures.R") +source("score.R") +source("utils.R") + options(warn = 1) option_list <- list( @@ -94,9 +100,6 @@ coverage_functions <- sapply( ) names(coverage_functions) <- cov_names -# TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast -# Redefines overprediction, underprediction and sharpness -source("error_measures.R") err_measures <- c( wis = weighted_interval_score, @@ -117,13 +120,18 @@ state_predictions <- predictions_cards %>% filter(geo_value != "us") rm(predictions_cards) gc() +## Check if nation and state predictions objects contain the expected forecasters +for (signal_name in signals) { + check_for_missing_forecasters(nation_predictions, forecasters, "nation", signal_name, opt$dir) + check_for_missing_forecasters(state_predictions, forecasters, "state", signal_name, opt$dir) +} + print("Evaluating state forecasts") state_scores <- evaluate_covid_predictions(state_predictions, err_measures, geo_type = "state" ) -source("score.R") if ("confirmed_incidence_num" %in% unique(state_scores$signal)) { print("Saving state confirmed incidence...") save_score_cards(state_scores, "state", diff --git a/Report/score.R b/Report/score.R index 8b057fe..3d20366 100644 --- a/Report/score.R +++ b/Report/score.R @@ -1,6 +1,24 @@ library("dplyr") library("assertthat") +type_map <- list( + "confirmed_incidence_num" = "cases", + "deaths_incidence_num" = "deaths", + "confirmed_admissions_covid_1d" = "hospitalizations" +) + +generate_score_card_file_path <- function(geo_type, signal_name, output_dir) { + sig_suffix <- type_map[[signal_name]] + output_file_name <- file.path( + output_dir, + paste0( + "score_cards_", geo_type, "_", + sig_suffix, ".rds" + ) + ) + return(output_file_name) +} + save_score_cards <- function(score_card, geo_type = c("state", "nation"), signal_name = c( "confirmed_incidence_num", diff --git a/Report/utils.R b/Report/utils.R new file mode 100644 index 0000000..22644fe --- /dev/null +++ b/Report/utils.R @@ -0,0 +1,32 @@ +check_for_missing_forecasters <- function(predictions_cards, forecasters_list, geo_type, signal_name, output_dir) { + output_file_name <- generate_score_card_file_path(geo_type, signal_name, output_dir) + previous_run_forecasters <- readRDS(output_file_name) %>% + filter(signal == signal_name) %>% + distinct(forecaster) %>% + pull() + current_run_forecasters <- predictions_cards %>% + filter(signal == signal_name) %>% + distinct(forecaster) %>% + pull() + + # Find forecasters we asked for that weren't produced. This just prints a + # message because we already know that some forecasters, like CDDEP-ABM, + # aren't available. + missing_forecasters <- setdiff(forecasters_list, current_run_forecasters) + if (length(missing_forecasters) != 0) { + print(paste( + paste(missing_forecasters, collapse = ", "), + "were asked for but not generated") + ) + } + + # Find forecasters included in the previous run (based on which ones are + # included in the relevant score file downloaded from the S3 bucket) that are + # not in the current run. + missing_forecasters <- setdiff(previous_run_forecasters, current_run_forecasters) + assert_that(length(missing_forecasters) == 0, + msg = paste( + paste(missing_forecasters, collapse = ", "), + "were available in the most recent pipeline run but are no longer present") + ) +} From 44b1a8a2a4aac07efa05c253062f531770fa645f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Feb 2022 17:50:17 -0500 Subject: [PATCH 26/58] styler --- Report/utils.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Report/utils.R b/Report/utils.R index 22644fe..cc25c3f 100644 --- a/Report/utils.R +++ b/Report/utils.R @@ -4,29 +4,30 @@ check_for_missing_forecasters <- function(predictions_cards, forecasters_list, g filter(signal == signal_name) %>% distinct(forecaster) %>% pull() - current_run_forecasters <- predictions_cards %>% + current_run_forecasters <- predictions_cards %>% filter(signal == signal_name) %>% distinct(forecaster) %>% pull() - - # Find forecasters we asked for that weren't produced. This just prints a + + # Find forecasters we asked for that weren't produced. This just prints a # message because we already know that some forecasters, like CDDEP-ABM, # aren't available. missing_forecasters <- setdiff(forecasters_list, current_run_forecasters) if (length(missing_forecasters) != 0) { print(paste( - paste(missing_forecasters, collapse = ", "), - "were asked for but not generated") - ) + paste(missing_forecasters, collapse = ", "), + "were asked for but not generated" + )) } - + # Find forecasters included in the previous run (based on which ones are # included in the relevant score file downloaded from the S3 bucket) that are # not in the current run. missing_forecasters <- setdiff(previous_run_forecasters, current_run_forecasters) assert_that(length(missing_forecasters) == 0, - msg = paste( - paste(missing_forecasters, collapse = ", "), - "were available in the most recent pipeline run but are no longer present") + msg = paste( + paste(missing_forecasters, collapse = ", "), + "were available in the most recent pipeline run but are no longer present" + ) ) } From ad7fe2c328195a6e839683bbf42cd0609bcdb5c0 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 28 Jan 2022 15:48:27 -0500 Subject: [PATCH 27/58] correct score objects in checks define wrapper for saving score files + alerting check if expected forecasters or previous forecasters are missing --- Report/create_reports.R | 85 ++++++++++++----------------------------- Report/score.R | 27 ++++++------- 2 files changed, 37 insertions(+), 75 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 43eb58f..d91f795 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -21,14 +21,22 @@ option_list <- list( ) opt_parser <- OptionParser(option_list = option_list) opt <- parse_args(opt_parser) +output_dir <- opt$dir prediction_cards_filename <- "predictions_cards.rds" prediction_cards_filepath <- case_when( - !is.null(opt$dir) ~ file.path(opt$dir, prediction_cards_filename), + !is.null(output_dir) ~ file.path(output_dir, prediction_cards_filename), TRUE ~ prediction_cards_filename ) + options(warn = 1) +# TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast +# Redefines overprediction, underprediction and sharpness +source("error_measures.R") +source("score.R") +source("utils.R") + # Note: CDDEP-ABM is not longer available and causes some warnings when trying # to download its data. Defer to `get_covidhub_forecaster_names` and underlying # Reich Lab utilities as to which forecasters to include. @@ -93,7 +101,7 @@ saveRDS(predictions_cards, ) print("Predictions saved") -# Create error measure functions +## Create error measure functions central_intervals <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.98) cov_names <- paste0("cov_", central_intervals * 100) coverage_functions <- sapply( @@ -102,7 +110,6 @@ coverage_functions <- sapply( ) names(coverage_functions) <- cov_names - err_measures <- c( wis = weighted_interval_score, overprediction = overprediction, @@ -124,75 +131,33 @@ gc() ## Check if nation and state predictions objects contain the expected forecasters for (signal_name in signals) { - check_for_missing_forecasters(nation_predictions, forecasters, "nation", signal_name, opt$dir) - check_for_missing_forecasters(state_predictions, forecasters, "state", signal_name, opt$dir) + check_for_missing_forecasters(nation_predictions, forecasters, "nation", signal_name, output_dir) + check_for_missing_forecasters(state_predictions, forecasters, "state", signal_name, output_dir) } +## Score predictions print("Evaluating state forecasts") +geo_type <- "state" state_scores <- evaluate_covid_predictions(state_predictions, err_measures, - geo_type = "state" + geo_type = geo_type ) -if ("confirmed_incidence_num" %in% unique(state_scores$signal)) { - print("Saving state confirmed incidence...") - save_score_cards(state_scores, "state", - signal_name = "confirmed_incidence_num", - output_dir = opt$dir - ) -} else { - warning("State confirmed incidence should generally be available. Please - verify that you expect not to have any cases incidence forecasts") -} -if ("deaths_incidence_num" %in% unique(state_scores$signal)) { - print("Saving state deaths incidence...") - save_score_cards(state_scores, "state", - signal_name = "deaths_incidence_num", - output_dir = opt$dir - ) -} else { - warning("State deaths incidence should generally be available. Please - verify that you expect not to have any deaths incidence forecasts") -} -if ("confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { - print("Saving state hospitalizations...") - save_score_cards(state_scores, "state", - signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir - ) +for (signal_name in signals) { + save_score_cards_wrapper(state_scores, geo_type, signal_name, output_dir) } - print("Evaluating national forecasts") -# COVIDcast does not return national level data, using CovidHubUtils instead - +# TODO: When this function was created, COVIDcast did not return national level +# data, and CovidHubUtils was used instead. We could now switch to COVIDcast, +# but COVIDcast and CovidHubUtils don't produce exactly the same data. This +# requires more investigation. Also using CovidHubUtils might be faster. +geo_type <- "nation" nation_scores <- evaluate_chu(nation_predictions, signals, err_measures) -if ("confirmed_incidence_num" %in% unique(state_scores$signal)) { - print("Saving nation confirmed incidence...") - save_score_cards(nation_scores, "nation", - signal_name = "confirmed_incidence_num", output_dir = opt$dir - ) -} else { - warning("Nation confirmed incidence should generally be available. Please - verify that you expect not to have any cases incidence forecasts") -} -if ("deaths_incidence_num" %in% unique(state_scores$signal)) { - print("Saving nation deaths incidence...") - save_score_cards(nation_scores, "nation", - signal_name = "deaths_incidence_num", - output_dir = opt$dir - ) -} else { - warning("Nation deaths incidence should generally be available. Please - verify that you expect not to have any deaths incidence forecasts") -} -if ("confirmed_admissions_covid_1d" %in% unique(state_scores$signal)) { - print("Saving nation hospitalizations...") - save_score_cards(nation_scores, "nation", - signal_name = "confirmed_admissions_covid_1d", - output_dir = opt$dir - ) +for (signal_name in signals) { + save_score_cards_wrapper(nation_scores, geo_type, signal_name, output_dir) } + print("Done") diff --git a/Report/score.R b/Report/score.R index 3d20366..e115dac 100644 --- a/Report/score.R +++ b/Report/score.R @@ -35,21 +35,6 @@ save_score_cards <- function(score_card, geo_type = c("state", "nation"), msg = "signal is not in score_card" ) score_card <- score_card %>% filter(signal == signal_name) - - type_map <- list( - "confirmed_incidence_num" = "cases", - "deaths_incidence_num" = "deaths", - "confirmed_admissions_covid_1d" = "hospitalizations" - ) - sig_suffix <- type_map[[signal_name]] - output_file_name <- file.path( - output_dir, - paste0( - "score_cards_", geo_type, "_", - sig_suffix, ".rds" - ) - ) - if (geo_type == "state") { score_card <- score_card %>% filter(nchar(geo_value) == 2, geo_value != "us") @@ -58,12 +43,24 @@ save_score_cards <- function(score_card, geo_type = c("state", "nation"), filter(geo_value == "us") } + output_file_name <- generate_score_card_file_path(geo_type, signal_name, output_dir) saveRDS(score_card, file = output_file_name, compress = "xz" ) } +save_score_cards_wrapper <- function(score_card, geo_type, signal_name, output_dir) { + if (signal_name %in% unique(score_card["signal"])) { + print(paste("Saving", geo, type_map[[signal_name]], "...")) + save_score_cards(score_card, geo_type, + signal_name = signal_name, output_dir = output_dir + ) + } else { + stop(paste("No", signal_name, "available at the", geo_type, "level")) + } +} + evaluate_chu <- function(predictions, signals, err_measures) { allowed_signals <- c( "confirmed_incidence_num", From 182f791730cf8de6f3d878362bc18565d6c32c98 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Feb 2022 18:18:23 -0500 Subject: [PATCH 28/58] styler --- Report/create_reports.R | 9 +-------- Report/score.R | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index d91f795..f6b04e4 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -28,15 +28,8 @@ prediction_cards_filepath <- case_when( TRUE ~ prediction_cards_filename ) - options(warn = 1) -# TODO: Contains fixed versions of WIS component metrics, to be ported over to evalcast -# Redefines overprediction, underprediction and sharpness -source("error_measures.R") -source("score.R") -source("utils.R") - # Note: CDDEP-ABM is not longer available and causes some warnings when trying # to download its data. Defer to `get_covidhub_forecaster_names` and underlying # Reich Lab utilities as to which forecasters to include. @@ -110,6 +103,7 @@ coverage_functions <- sapply( ) names(coverage_functions) <- cov_names + err_measures <- c( wis = weighted_interval_score, overprediction = overprediction, @@ -159,5 +153,4 @@ for (signal_name in signals) { save_score_cards_wrapper(nation_scores, geo_type, signal_name, output_dir) } - print("Done") diff --git a/Report/score.R b/Report/score.R index e115dac..0d7bd50 100644 --- a/Report/score.R +++ b/Report/score.R @@ -54,7 +54,7 @@ save_score_cards_wrapper <- function(score_card, geo_type, signal_name, output_d if (signal_name %in% unique(score_card["signal"])) { print(paste("Saving", geo, type_map[[signal_name]], "...")) save_score_cards(score_card, geo_type, - signal_name = signal_name, output_dir = output_dir + signal_name = signal_name, output_dir = output_dir ) } else { stop(paste("No", signal_name, "available at the", geo_type, "level")) From ff4d2342a858fb27b76a89d2971c885199753d4d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Feb 2022 18:38:08 -0500 Subject: [PATCH 29/58] typo --- Report/score.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/score.R b/Report/score.R index 0d7bd50..7bfb18f 100644 --- a/Report/score.R +++ b/Report/score.R @@ -52,7 +52,7 @@ save_score_cards <- function(score_card, geo_type = c("state", "nation"), save_score_cards_wrapper <- function(score_card, geo_type, signal_name, output_dir) { if (signal_name %in% unique(score_card["signal"])) { - print(paste("Saving", geo, type_map[[signal_name]], "...")) + print(paste("Saving", geo_type, type_map[[signal_name]], "...")) save_score_cards(score_card, geo_type, signal_name = signal_name, output_dir = output_dir ) From 55eeb3501eb688bebb07334f20f819790d2e6157 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Feb 2022 18:57:34 -0500 Subject: [PATCH 30/58] add geo, signal to error; drop warning because most models only forecast for a single outcome variable, and certain signals are very unpopular, e.g. few models predict hospitalizations --- Report/utils.R | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/Report/utils.R b/Report/utils.R index cc25c3f..311bb38 100644 --- a/Report/utils.R +++ b/Report/utils.R @@ -9,17 +9,6 @@ check_for_missing_forecasters <- function(predictions_cards, forecasters_list, g distinct(forecaster) %>% pull() - # Find forecasters we asked for that weren't produced. This just prints a - # message because we already know that some forecasters, like CDDEP-ABM, - # aren't available. - missing_forecasters <- setdiff(forecasters_list, current_run_forecasters) - if (length(missing_forecasters) != 0) { - print(paste( - paste(missing_forecasters, collapse = ", "), - "were asked for but not generated" - )) - } - # Find forecasters included in the previous run (based on which ones are # included in the relevant score file downloaded from the S3 bucket) that are # not in the current run. @@ -27,7 +16,8 @@ check_for_missing_forecasters <- function(predictions_cards, forecasters_list, g assert_that(length(missing_forecasters) == 0, msg = paste( paste(missing_forecasters, collapse = ", "), - "were available in the most recent pipeline run but are no longer present" + "were available in the most recent pipeline run but are no longer present for", + geo_type, signal_name ) ) } From 2acbaaf86d6d71427534f98c609736dc815b9fda Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 17 Feb 2022 16:19:53 -0500 Subject: [PATCH 31/58] bundle score obj erros --- Report/create_reports.R | 16 ++++++++++++++-- Report/score.R | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index f6b04e4..1336eef 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -137,8 +137,13 @@ state_scores <- evaluate_covid_predictions(state_predictions, geo_type = geo_type ) +save_score_errors <- list() + for (signal_name in signals) { - save_score_cards_wrapper(state_scores, geo_type, signal_name, output_dir) + status <- save_score_cards_wrapper(state_scores, geo_type, signal_name, output_dir) + if (status != 0) { + save_score_errors[paste(signal_name, geo_type)] <- status + } } print("Evaluating national forecasts") @@ -150,7 +155,14 @@ geo_type <- "nation" nation_scores <- evaluate_chu(nation_predictions, signals, err_measures) for (signal_name in signals) { - save_score_cards_wrapper(nation_scores, geo_type, signal_name, output_dir) + status <- save_score_cards_wrapper(nation_scores, geo_type, signal_name, output_dir) + if (status != 0) { + save_score_errors[paste(signal_name, geo_type)] <- status + } +} + +if ( length(save_score_errors) > 0 ) { + stop(paste(save_score_errors, collapse="\n")) } print("Done") diff --git a/Report/score.R b/Report/score.R index 7bfb18f..746b9ec 100644 --- a/Report/score.R +++ b/Report/score.R @@ -56,8 +56,9 @@ save_score_cards_wrapper <- function(score_card, geo_type, signal_name, output_d save_score_cards(score_card, geo_type, signal_name = signal_name, output_dir = output_dir ) + return(0) } else { - stop(paste("No", signal_name, "available at the", geo_type, "level")) + return(paste("No", signal_name, "available at the", geo_type, "level")) } } From 57f65dccf01b459e6eb969c190efb79fa7360ca9 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 17 Feb 2022 16:27:54 -0500 Subject: [PATCH 32/58] styler --- Report/create_reports.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 1336eef..db255e7 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -161,8 +161,8 @@ for (signal_name in signals) { } } -if ( length(save_score_errors) > 0 ) { - stop(paste(save_score_errors, collapse="\n")) +if (length(save_score_errors) > 0) { + stop(paste(save_score_errors, collapse = "\n")) } print("Done") From 844400d1284e0c478e328b791aa6057d26349d40 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 18 Feb 2022 18:12:16 -0500 Subject: [PATCH 33/58] document why particular forecasters aren't included --- Report/create_reports.R | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index db255e7..0f86414 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -30,9 +30,18 @@ prediction_cards_filepath <- case_when( options(warn = 1) -# Note: CDDEP-ABM is not longer available and causes some warnings when trying -# to download its data. Defer to `get_covidhub_forecaster_names` and underlying -# Reich Lab utilities as to which forecasters to include. +# Requested forecasters that do not get included in final scores: +# Auquan-SEIR: Only predicts cumulative deaths +# CDDEP-ABM: No longer on Forecast Hub. Causes some warnings when trying to download. +# CDDEP-SEIR_MCMC: County-level predictions only +# CUBoulder-COVIDLSTM: County-level predictions only +# FAIR-NRAR: County-level predictions only +# HKUST-DNN: Only predicts cumulative deaths +# ISUandPKU-vSEIdR: Folder but no forecasts on Forecast Hub +# PandemicCentral-COVIDForest: County-level predictions only +# UT_GISAG-SPDM: County-level predictions only +# WalmartLabsML-LogForecasting: Only predicts cumulative deaths +# Yu_Group-CLEP: County-level predictions only forecasters <- unique(c( get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" From 9f0501aaca67cc44a8328e92412f4884217a4070 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 18 Feb 2022 18:16:50 -0500 Subject: [PATCH 34/58] styler --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 0f86414..10e31f9 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -41,7 +41,7 @@ options(warn = 1) # PandemicCentral-COVIDForest: County-level predictions only # UT_GISAG-SPDM: County-level predictions only # WalmartLabsML-LogForecasting: Only predicts cumulative deaths -# Yu_Group-CLEP: County-level predictions only +# Yu_Group-CLEP: County-level predictions only forecasters <- unique(c( get_covidhub_forecaster_names(designations = c("primary", "secondary")), "COVIDhub-baseline", "COVIDhub-trained_ensemble", "COVIDhub-4_week_ensemble" From 19e0fdf6d9a0d1b7268a922bc4fe114a398192fb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Sun, 20 Feb 2022 16:11:47 -0500 Subject: [PATCH 35/58] add brackets so signal col is returned as char not df --- Report/score.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/score.R b/Report/score.R index 746b9ec..364677a 100644 --- a/Report/score.R +++ b/Report/score.R @@ -51,7 +51,7 @@ save_score_cards <- function(score_card, geo_type = c("state", "nation"), } save_score_cards_wrapper <- function(score_card, geo_type, signal_name, output_dir) { - if (signal_name %in% unique(score_card["signal"])) { + if (signal_name %in% unique(score_card[["signal"]])) { print(paste("Saving", geo_type, type_map[[signal_name]], "...")) save_score_cards(score_card, geo_type, signal_name = signal_name, output_dir = output_dir From c855375b5f58428f41476aa6c4d7971aaf18f39d Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 23 Feb 2022 16:56:10 -0500 Subject: [PATCH 36/58] stop pipeline from removing future forecasts --- Report/create_reports.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 10e31f9..b74e97b 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -69,9 +69,9 @@ predictions_cards <- get_covidhub_predictions(forecasters, options(warn = 0) +# Includes predictions for future dates, which will not be scored. predictions_cards <- predictions_cards %>% - filter(!is.na(target_end_date)) %>% - filter(target_end_date < today()) + filter(!is.na(target_end_date)) # For hospitalizations, drop all US territories except Puerto Rico and the # Virgin Islands; HHS does not report data for any territories except PR and VI. From a9d28f313ec3078c51b774732cd12361d56afe3b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 10:39:49 -0500 Subject: [PATCH 37/58] dashboard to keep future predictions and always enable showForecasts button --- app/R/data_manipulation.R | 2 +- app/server.R | 48 +++++++++++++++------------------------ app/ui.R | 10 ++++---- 3 files changed, 23 insertions(+), 37 deletions(-) diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R index c1f5c43..1367cb7 100644 --- a/app/R/data_manipulation.R +++ b/app/R/data_manipulation.R @@ -15,7 +15,7 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) { filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE) { locationsIntersect <- list() - filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score)) + filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) && target_end_date < today())) # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf <- filteredScoreDf %>% group_by(forecaster, target_end_date, ahead) %>% diff --git a/app/server.R b/app/server.R index 9c80413..4bbf4ff 100644 --- a/app/server.R +++ b/app/server.R @@ -433,21 +433,23 @@ server <- function(input, output, session) { filteredScoreDf <- filterHospitalizationsAheads(filteredScoreDf) } if (input$scoreType == "wis" || input$scoreType == "sharpness") { - # Only show WIS or Sharpness for forecasts that have all intervals + # Only show WIS or Sharpness for forecasts that have all intervals unless they are for future dates filteredScoreDf <- filteredScoreDf %>% - filter(!is.na(`50`)) %>% - filter(!is.na(`80`)) %>% - filter(!is.na(`95`)) + filter(!(is.na(`50`) && + is.na(`80`) && + is.na(`95`) && + target_end_date < today())) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% - filter(!is.na(`10`)) %>% - filter(!is.na(`20`)) %>% - filter(!is.na(`30`)) %>% - filter(!is.na(`40`)) %>% - filter(!is.na(`60`)) %>% - filter(!is.na(`70`)) %>% - filter(!is.na(`90`)) %>% - filter(!is.na(`98`)) + filter(!(is.na(`10`) && + is.na(`20`) && + is.na(`30`) && + is.na(`40`) && + is.na(`60`) && + is.na(`70`) && + is.na(`90`) && + is.na(`98`) && + target_end_date < today())) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) @@ -598,11 +600,6 @@ server <- function(input, output, session) { updateAsOfData() } - if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { - disable("showForecasts") - } else { - enable("showForecasts") - } if (input$scoreType == "wis") { showElement("wisExplanation") hideElement("sharpnessExplanation") @@ -647,22 +644,10 @@ server <- function(input, output, session) { observeEvent(input$location, { updateAsOfData() - # Only show forecast check box option if we are showing as of data - if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { - disable("showForecasts") - } else { - enable("showForecasts") - } }) observeEvent(input$asOf, { updateAsOfData() - # Only show forecast check box option if we are showing as of data - if (input$asOf != "" && input$asOf == CURRENT_WEEK_END_DATE()) { - disable("showForecasts") - } else { - enable("showForecasts") - } }) # The following checks ensure the minimum necessary input selections @@ -757,7 +742,10 @@ server <- function(input, output, session) { } updateAsOfChoices <- function(session, truthDf) { - asOfChoices <- truthDf$Week_End_Date + asOfChoices <- truthDf %>% + select(Week_End_Date) %>% + filter(Week_End_Date <= CURRENT_WEEK_END_DATE()) %>% + pull() selectedAsOf <- isolate(input$asOf) if (input$targetVariable == "Hospitalizations") { minChoice <- MIN_AVAIL_HOSP_AS_OF_DATE diff --git a/app/ui.R b/app/ui.R index 3be22a7..0204b92 100644 --- a/app/ui.R +++ b/app/ui.R @@ -98,12 +98,10 @@ sidebar <- tags$div( tags$p(id = "missing-data-disclaimer", "Some locations may not have 'as of' data for the chosen 'as of' date"), div( id = "showForecastsCheckbox", - disabled( - checkboxInput( - "showForecasts", - "Show Forecasters' Predictions", - value = FALSE, - ) + checkboxInput( + "showForecasts", + "Show Forecasters' Predictions", + value = FALSE ) ), tags$hr(), From 558caa699deab418adee903df87e98ac2d0a1ed5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 12:14:24 -0500 Subject: [PATCH 38/58] allow truthPlot to show predictions for most recent as-of --- app/server.R | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/app/server.R b/app/server.R index 4bbf4ff..19843f8 100644 --- a/app/server.R +++ b/app/server.R @@ -238,7 +238,6 @@ server <- function(input, output, session) { output$truthPlot <- renderPlotly({ truthPlot(truthDf, locationsIntersect, !is.null(asOfData), dfWithForecasts, colorPalette) }) - # If we are just re-rendering the truth plot with as of data # we don't need to re-render the score plot if (reRenderTruth) { @@ -268,7 +267,7 @@ server <- function(input, output, session) { } } - # Title plot + # Set plot title if (input$scoreType == "wis") { plotTitle <- "Weighted Interval Score" } else if (input$scoreType == "sharpness") { @@ -363,13 +362,14 @@ server <- function(input, output, session) { group_by(Week_End_Date) %>% summarize(Forecaster = Forecaster, Reported_Incidence = actual, Reported_As_Of_Incidence = as_of_actual) %>% distinct() - if (input$showForecasts) { - filteredDf <- filterForecastData(filteredDf, dfWithForecasts) - } } else { filteredDf <- filteredDf %>% group_by(Week_End_Date) %>% - summarize(Reported_Incidence = actual) + summarize(Forecaster = Forecaster, Reported_Incidence = actual) %>% + distinct() + } + if (input$showForecasts) { + filteredDf <- filterForecastData(filteredDf, dfWithForecasts, hasAsOfData) } finalPlot <- ggplot(filteredDf, aes(x = Week_End_Date)) + @@ -385,15 +385,15 @@ server <- function(input, output, session) { geom_point(aes(y = Reported_Incidence, color = "Reported_Incidence")) + geom_line(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) + geom_point(aes(y = Reported_As_Of_Incidence, color = "Reported_As_Of_Incidence")) - if (input$showForecasts) { - finalPlot <- finalPlot + - geom_line(aes(y = Quantile_50, color = Forecaster)) + - geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) - } } else { finalPlot <- finalPlot + geom_line(aes(y = Reported_Incidence)) + geom_point(aes(y = Reported_Incidence)) } + if (input$showForecasts) { + finalPlot <- finalPlot + + geom_line(aes(y = Quantile_50, color = Forecaster)) + + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + } finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% layout( hovermode = "x unified", @@ -502,7 +502,7 @@ server <- function(input, output, session) { return(asOfData) } - filterForecastData <- function(filteredDf, dfWithForecasts) { + filterForecastData <- function(filteredDf, dfWithForecasts, hasAsOfData) { dfWithForecasts <- dfWithForecasts %>% rename(Week_End_Date = target_end_date, Forecaster = forecaster, Quantile_50 = value_50) if (!SUMMARIZING_OVER_ALL_LOCATIONS()) { @@ -514,7 +514,11 @@ server <- function(input, output, session) { summarize(Quantile_50 = sum(Quantile_50)) } # We want the forecasts to be later than latest as of date with data - lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_As_Of_Incidence)), n = 1)$Week_End_Date[1] + if (hasAsOfData) { + lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_As_Of_Incidence)), n = 1)$Week_End_Date[1] + } else { + lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_Incidence)), n = 1)$Week_End_Date[1] + } dfWithForecasts <- dfWithForecasts %>% filter(forecast_date >= lastEndDate) %>% group_by(Week_End_Date) %>% @@ -542,9 +546,14 @@ server <- function(input, output, session) { group_by(Forecaster) %>% filter(forecast_date == first(forecast_date)) } + + keepCols <- c("Quantile_50", "Forecaster", "Reported_Incidence") + if (hasAsOfData) { + keepCols <- c(keepCols, "Reported_As_Of_Incidence") + } filteredDf <- merge(filteredDf, dfWithForecasts, by = c("Week_End_Date", "Forecaster"), all = TRUE) %>% group_by(Week_End_Date) %>% - select(Quantile_50, Forecaster, Reported_Incidence, Reported_As_Of_Incidence) + select(keepCols) # Remove rows of NAs filteredDf <- filteredDf %>% filter(!is.null(Forecaster)) filteredDf <- filteredDf %>% From 8f60b151fce552665ee302be45db0e6a4434b504 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 12:37:51 -0500 Subject: [PATCH 39/58] make sure score and truth plots have same x scale --- app/server.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/app/server.R b/app/server.R index 19843f8..36d38d2 100644 --- a/app/server.R +++ b/app/server.R @@ -317,6 +317,13 @@ server <- function(input, output, session) { theme_bw() + theme(panel.spacing = unit(0.5, "lines")) + if (input$showForecasts) { + maxLim <- max( + as.Date(input$asOf) + 7 * 4, + filteredScoreDf %>% filter(!is.na(Score)) %>% pull(Week_End_Date) %>% max() + 7 * 2 + ) + p <- p + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") + } if (input$scoreType == "coverage") { p <- p + geom_hline(yintercept = .01 * as.integer(input$coverageInterval)) } @@ -390,9 +397,14 @@ server <- function(input, output, session) { geom_point(aes(y = Reported_Incidence)) } if (input$showForecasts) { + maxLim <- max( + as.Date(input$asOf) + 7 * 4, + filteredDf %>% filter(!is.na(Reported_Incidence)) %>% pull(Week_End_Date) %>% max() + 7 * 2 + ) finalPlot <- finalPlot + geom_line(aes(y = Quantile_50, color = Forecaster)) + - geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") } finalPlot <- ggplotly(finalPlot, tooltip = c("shape", "x", "y")) %>% layout( From aa986903a4f1c3af32ddcc61aa8602e7ec5e357c Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 12:41:09 -0500 Subject: [PATCH 40/58] styler --- app/server.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/app/server.R b/app/server.R index 36d38d2..f74e906 100644 --- a/app/server.R +++ b/app/server.R @@ -448,20 +448,20 @@ server <- function(input, output, session) { # Only show WIS or Sharpness for forecasts that have all intervals unless they are for future dates filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(`50`) && - is.na(`80`) && - is.na(`95`) && - target_end_date < today())) + is.na(`80`) && + is.na(`95`) && + target_end_date < today())) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(`10`) && - is.na(`20`) && - is.na(`30`) && - is.na(`40`) && - is.na(`60`) && - is.na(`70`) && - is.na(`90`) && - is.na(`98`) && - target_end_date < today())) + is.na(`20`) && + is.na(`30`) && + is.na(`40`) && + is.na(`60`) && + is.na(`70`) && + is.na(`90`) && + is.na(`98`) && + target_end_date < today())) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) From b814c8c4dd84a1c2370c339bdda1ae27d2a2f006 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 15:49:14 -0500 Subject: [PATCH 41/58] save ts to S3 and have dashboard fetch it --- Report/create_reports.R | 2 ++ app/R/data.R | 11 ++++++++++- app/R/data_manipulation.R | 4 ++-- app/server.R | 10 ++++++---- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index b74e97b..4c7b458 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -58,6 +58,7 @@ signals <- c( "confirmed_admissions_covid_1d" ) +data_pull_timestamp <- now(tzone = "UTC") predictions_cards <- get_covidhub_predictions(forecasters, signal = signals, ahead = 1:28, @@ -174,4 +175,5 @@ if (length(save_score_errors) > 0) { stop(paste(save_score_errors, collapse = "\n")) } +saveRDS(data.frame(datetime = c(data_pull_timestamp)), file=file.path(output_dir, "datetime_created_utc.rds")) print("Done") diff --git a/app/R/data.R b/app/R/data.R index 06bd512..92b4ba1 100644 --- a/app/R/data.R +++ b/app/R/data.R @@ -60,6 +60,13 @@ getFallbackData <- function(filename) { readRDS(path) } + +getCreationDate <- function(loadFile) { + dataCreationDate <- loadFile("datetime_created_utc.rds") + return(dataCreationDate %>% pull(datetime) %>% as.Date()) +} + + getAllData <- function(loadFile) { dfStateCases <- loadFile("score_cards_state_cases.rds") dfStateDeaths <- loadFile("score_cards_state_deaths.rds") @@ -100,6 +107,7 @@ getAllData <- function(loadFile) { createS3DataLoader <- function() { s3bucket <- getS3Bucket() df <- data.frame() + dataCreationDate <- as.Date(NA) getRecentData <- function() { newS3bucket <- getS3Bucket() @@ -117,9 +125,10 @@ createS3DataLoader <- function() { # la https://stackoverflow.com/questions/1088639/static-variables-in-r s3bucket <<- newS3bucket df <<- getAllData(createS3DataFactory(s3bucket)) + dataCreationDate <<- getCreationDate(createS3DataFactory(s3bucket)) } - return(df) + return(list(df = df, dataCreationDate = dataCreationDate)) } return(getRecentData) diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R index 1367cb7..dfaa42c 100644 --- a/app/R/data_manipulation.R +++ b/app/R/data_manipulation.R @@ -13,9 +13,9 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) { } -filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE) { +filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) { locationsIntersect <- list() - filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) && target_end_date < today())) + filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) && target_end_date < filterDate)) # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf <- filteredScoreDf %>% group_by(forecaster, target_end_date, ahead) %>% diff --git a/app/server.R b/app/server.R index f74e906..44de237 100644 --- a/app/server.R +++ b/app/server.R @@ -109,7 +109,9 @@ server <- function(input, output, session) { # Get scores - df <- loadData() + loaded <- loadData() + df <- loaded$df + dataCreationDate <- loaded$dataCreationDate DATA_LOADED <- TRUE # Prepare input choices @@ -163,7 +165,7 @@ server <- function(input, output, session) { # Totaling over all locations if (SUMMARIZING_OVER_ALL_LOCATIONS()) { - filteredScoreDfAndIntersections <- filterOverAllLocations(filteredScoreDf, input$scoreType, !is.null(asOfData)) + filteredScoreDfAndIntersections <- filterOverAllLocations(filteredScoreDf, input$scoreType, !is.null(asOfData), filterDate = dataCreationDate) filteredScoreDf <- filteredScoreDfAndIntersections[[1]] locationsIntersect <- filteredScoreDfAndIntersections[[2]] if (input$showForecasts) { @@ -450,7 +452,7 @@ server <- function(input, output, session) { filter(!(is.na(`50`) && is.na(`80`) && is.na(`95`) && - target_end_date < today())) + target_end_date < dataCreationDate)) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(`10`) && @@ -461,7 +463,7 @@ server <- function(input, output, session) { is.na(`70`) && is.na(`90`) && is.na(`98`) && - target_end_date < today())) + target_end_date < dataCreationDate)) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) From af51e54cac38200a35d6a61d1b2237ecddbfec47 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 18:41:59 -0500 Subject: [PATCH 42/58] pipeline to save data_pull ts to S3 bucket --- Report/create_reports.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Report/create_reports.R b/Report/create_reports.R index 10e31f9..a8eaee4 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -58,6 +58,7 @@ signals <- c( "confirmed_admissions_covid_1d" ) +data_pull_timestamp <- now(tzone = "UTC") predictions_cards <- get_covidhub_predictions(forecasters, signal = signals, ahead = 1:28, @@ -174,4 +175,5 @@ if (length(save_score_errors) > 0) { stop(paste(save_score_errors, collapse = "\n")) } +saveRDS(data.frame(datetime = c(data_pull_timestamp)), file=file.path(output_dir, "datetime_created_utc.rds")) print("Done") From 1380645f5568f12d8f1cf61c533afab8bd876dfd Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 18:46:53 -0500 Subject: [PATCH 43/58] keep predictions for future dates --- Report/create_reports.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index a8eaee4..4c7b458 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -70,9 +70,9 @@ predictions_cards <- get_covidhub_predictions(forecasters, options(warn = 0) +# Includes predictions for future dates, which will not be scored. predictions_cards <- predictions_cards %>% - filter(!is.na(target_end_date)) %>% - filter(target_end_date < today()) + filter(!is.na(target_end_date)) # For hospitalizations, drop all US territories except Puerto Rico and the # Virgin Islands; HHS does not report data for any territories except PR and VI. From 4518a4fa4831aa0848c2de7a271bdde78f8bf428 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 18:48:11 -0500 Subject: [PATCH 44/58] styler --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 4c7b458..f17a868 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -175,5 +175,5 @@ if (length(save_score_errors) > 0) { stop(paste(save_score_errors, collapse = "\n")) } -saveRDS(data.frame(datetime = c(data_pull_timestamp)), file=file.path(output_dir, "datetime_created_utc.rds")) +saveRDS(data.frame(datetime = c(data_pull_timestamp)), file = file.path(output_dir, "datetime_created_utc.rds")) print("Done") From e7726b526eb03cb69fb5d99f04d68e5feb1644c7 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Thu, 24 Feb 2022 18:49:04 -0500 Subject: [PATCH 45/58] styler --- Report/create_reports.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 4c7b458..f17a868 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -175,5 +175,5 @@ if (length(save_score_errors) > 0) { stop(paste(save_score_errors, collapse = "\n")) } -saveRDS(data.frame(datetime = c(data_pull_timestamp)), file=file.path(output_dir, "datetime_created_utc.rds")) +saveRDS(data.frame(datetime = c(data_pull_timestamp)), file = file.path(output_dir, "datetime_created_utc.rds")) print("Done") From 0d2dbcab0e923a84528e5f320023530a9ae8e2d2 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 25 Feb 2022 09:11:10 -0500 Subject: [PATCH 46/58] pull datetime file in makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 409df92..64d570a 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ r_build: %.rds: dist test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@ -pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds +pull_data: score_cards_state_deaths.rds score_cards_state_cases.rds score_cards_nation_cases.rds score_cards_nation_deaths.rds score_cards_state_hospitalizations.rds score_cards_nation_hospitalizations.rds datetime_created_utc.rds dist: mkdir $@ From 87e5daa26726c2acc5dab1e37762be27b0641c3e Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 25 Feb 2022 09:42:35 -0500 Subject: [PATCH 47/58] use current week end date to set plot axis range --- app/server.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/server.R b/app/server.R index 44de237..d1fee70 100644 --- a/app/server.R +++ b/app/server.R @@ -322,7 +322,7 @@ server <- function(input, output, session) { if (input$showForecasts) { maxLim <- max( as.Date(input$asOf) + 7 * 4, - filteredScoreDf %>% filter(!is.na(Score)) %>% pull(Week_End_Date) %>% max() + 7 * 2 + CURRENT_WEEK_END_DATE() + 7 * 1 ) p <- p + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") } @@ -401,7 +401,7 @@ server <- function(input, output, session) { if (input$showForecasts) { maxLim <- max( as.Date(input$asOf) + 7 * 4, - filteredDf %>% filter(!is.na(Reported_Incidence)) %>% pull(Week_End_Date) %>% max() + 7 * 2 + CURRENT_WEEK_END_DATE() + 7 * 1 ) finalPlot <- finalPlot + geom_line(aes(y = Quantile_50, color = Forecaster)) + From 1b005b1c8924c62b8b17c214ddf77634ece73cd8 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 25 Feb 2022 16:44:48 -0500 Subject: [PATCH 48/58] remove duplicate forecaster legend entries in truth plot --- app/server.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/server.R b/app/server.R index d1fee70..d789351 100644 --- a/app/server.R +++ b/app/server.R @@ -417,7 +417,11 @@ server <- function(input, output, session) { # Remove the extra grouping from the legend: "(___,1)" for (i in seq_along(finalPlot$x$data)) { if (!is.null(finalPlot$x$data[[i]]$name)) { + if (endsWith(finalPlot$x$data[[i]]$name, ",1)") && finalPlot$x$data[[i]]$mode != "lines+markers") { + finalPlot$x$data[[i]]$showlegend <- FALSE + } finalPlot$x$data[[i]]$name <- gsub("\\(", "", stringr::str_split(finalPlot$x$data[[i]]$name, ",")[[1]][1]) + finalPlot$x$data[[i]]$mode <- "lines+markers" } } return(finalPlot) From 8402fcfcd9ff6812c6dfe30820e200f69ce3c209 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Mar 2022 11:47:47 -0500 Subject: [PATCH 49/58] if predictions aren't in the future, leave truth plot autoscaling on --- app/server.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/server.R b/app/server.R index d789351..5c4dc08 100644 --- a/app/server.R +++ b/app/server.R @@ -320,9 +320,10 @@ server <- function(input, output, session) { theme(panel.spacing = unit(0.5, "lines")) if (input$showForecasts) { - maxLim <- max( + maxLim <- if_else( + as.Date(input$asOf) + 7 * 4 > CURRENT_WEEK_END_DATE(), as.Date(input$asOf) + 7 * 4, - CURRENT_WEEK_END_DATE() + 7 * 1 + as.Date(NA) ) p <- p + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") } @@ -399,9 +400,10 @@ server <- function(input, output, session) { geom_point(aes(y = Reported_Incidence)) } if (input$showForecasts) { - maxLim <- max( + maxLim <- if_else( + as.Date(input$asOf) + 7 * 4 > CURRENT_WEEK_END_DATE(), as.Date(input$asOf) + 7 * 4, - CURRENT_WEEK_END_DATE() + 7 * 1 + as.Date(NA) ) finalPlot <- finalPlot + geom_line(aes(y = Quantile_50, color = Forecaster)) + From a5233f6a8693294d6785a8fbe241daa1eeb849ec Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Mar 2022 12:46:12 -0500 Subject: [PATCH 50/58] correct exclusion logic --- app/R/data_manipulation.R | 2 +- app/server.R | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R index dfaa42c..e7eb3a8 100644 --- a/app/R/data_manipulation.R +++ b/app/R/data_manipulation.R @@ -15,7 +15,7 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) { filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) { locationsIntersect <- list() - filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) && target_end_date < filterDate)) + filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) & target_end_date < filterDate)) # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf <- filteredScoreDf %>% group_by(forecaster, target_end_date, ahead) %>% diff --git a/app/server.R b/app/server.R index 5c4dc08..4295a85 100644 --- a/app/server.R +++ b/app/server.R @@ -455,21 +455,25 @@ server <- function(input, output, session) { if (input$scoreType == "wis" || input$scoreType == "sharpness") { # Only show WIS or Sharpness for forecasts that have all intervals unless they are for future dates filteredScoreDf <- filteredScoreDf %>% - filter(!(is.na(`50`) && - is.na(`80`) && - is.na(`95`) && - target_end_date < dataCreationDate)) + filter(!( + (is.na(`50`) | + is.na(`80`) | + is.na(`95`) + ) & + target_end_date < dataCreationDate)) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% - filter(!(is.na(`10`) && - is.na(`20`) && - is.na(`30`) && - is.na(`40`) && - is.na(`60`) && - is.na(`70`) && - is.na(`90`) && - is.na(`98`) && - target_end_date < dataCreationDate)) + filter(!( + (is.na(`10`) | + is.na(`20`) | + is.na(`30`) | + is.na(`40`) | + is.na(`60`) | + is.na(`70`) | + is.na(`90`) | + is.na(`98`) + ) & + target_end_date < dataCreationDate)) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) From 2c71344d5c1067408d7a3d520fe6e7e6efa85b30 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 1 Mar 2022 17:25:07 -0500 Subject: [PATCH 51/58] generalize showing-predictions logic since some forecasters upload earlier than others --- app/server.R | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/app/server.R b/app/server.R index 4295a85..ec56199 100644 --- a/app/server.R +++ b/app/server.R @@ -406,7 +406,6 @@ server <- function(input, output, session) { as.Date(NA) ) finalPlot <- finalPlot + - geom_line(aes(y = Quantile_50, color = Forecaster)) + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") } @@ -537,21 +536,32 @@ server <- function(input, output, session) { group_by(Forecaster, forecast_date, Week_End_Date, ahead) %>% summarize(Quantile_50 = sum(Quantile_50)) } - # We want the forecasts to be later than latest as of date with data + if (hasAsOfData) { + # We want the forecasts to be later than latest as of date with data lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_As_Of_Incidence)), n = 1)$Week_End_Date[1] + dfWithForecasts <- dfWithForecasts %>% + filter(forecast_date >= lastEndDate) %>% + group_by(Week_End_Date) %>% + summarize(Forecaster, forecast_date, Quantile_50) } else { - lastEndDate <- tail(filteredDf %>% filter(!is.na(Reported_Incidence)), n = 1)$Week_End_Date[1] + # Get the latest predictions for each forecaster + dfWithForecasts <- dfWithForecasts %>% + group_by(Forecaster) %>% + filter(forecast_date == max(forecast_date)) %>% + ungroup() %>% + group_by(Week_End_Date) %>% + summarize(Forecaster, forecast_date, Quantile_50) %>% + filter(Week_End_Date > CURRENT_WEEK_END_DATE()) } - dfWithForecasts <- dfWithForecasts %>% - filter(forecast_date >= lastEndDate) %>% - group_by(Week_End_Date) %>% - summarize(Forecaster, forecast_date, Quantile_50) # Get the next as of choice available in dropdown menu dfWithForecasts <- dfWithForecasts[order(dfWithForecasts$forecast_date), ] AS_OF_CHOICES(sort(AS_OF_CHOICES() %>% unique())) - nextAsOfInList <- AS_OF_CHOICES()[which.min(abs(AS_OF_CHOICES() - dfWithForecasts$forecast_date[1])) + 1] + nextAsOfInList <- AS_OF_CHOICES()[which.min(abs(AS_OF_CHOICES() - min(dfWithForecasts$forecast_date))) + 1] + if (!hasAsOfData) { + nextAsOfInList <- NA + } # Take only those forecasts with a forecast date before the next as of date in dropdown # aka within the week after the current as of shown From 873b23609b80788bdf644d7c2ea48f692cf86bd9 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Mar 2022 11:55:16 -0500 Subject: [PATCH 52/58] revert truthplot legend formatting --- app/server.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/server.R b/app/server.R index ec56199..b18d771 100644 --- a/app/server.R +++ b/app/server.R @@ -405,7 +405,9 @@ server <- function(input, output, session) { as.Date(input$asOf) + 7 * 4, as.Date(NA) ) + finalPlot <- finalPlot + + geom_line(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + scale_x_date(limits = c(as.Date(NA), maxLim), date_labels = "%b %Y") } @@ -418,11 +420,7 @@ server <- function(input, output, session) { # Remove the extra grouping from the legend: "(___,1)" for (i in seq_along(finalPlot$x$data)) { if (!is.null(finalPlot$x$data[[i]]$name)) { - if (endsWith(finalPlot$x$data[[i]]$name, ",1)") && finalPlot$x$data[[i]]$mode != "lines+markers") { - finalPlot$x$data[[i]]$showlegend <- FALSE - } finalPlot$x$data[[i]]$name <- gsub("\\(", "", stringr::str_split(finalPlot$x$data[[i]]$name, ",")[[1]][1]) - finalPlot$x$data[[i]]$mode <- "lines+markers" } } return(finalPlot) From 9eed7d7690c0d6b7fc3eef99d60d5eafc3e57473 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 2 Mar 2022 12:37:37 -0500 Subject: [PATCH 53/58] styler --- app/server.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/server.R b/app/server.R index b18d771..4b9eda0 100644 --- a/app/server.R +++ b/app/server.R @@ -405,7 +405,7 @@ server <- function(input, output, session) { as.Date(input$asOf) + 7 * 4, as.Date(NA) ) - + finalPlot <- finalPlot + geom_line(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + geom_point(aes(y = Quantile_50, color = Forecaster, shape = Forecaster)) + From 40ac6184af16daa2813006f373560f141ba7f4b0 Mon Sep 17 00:00:00 2001 From: nmdefries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 4 Mar 2022 12:53:26 -0500 Subject: [PATCH 54/58] Switch to keep logic for 50/80/95 Co-authored-by: Katie Mazaitis --- app/server.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/server.R b/app/server.R index 4b9eda0..b454ab2 100644 --- a/app/server.R +++ b/app/server.R @@ -452,12 +452,12 @@ server <- function(input, output, session) { if (input$scoreType == "wis" || input$scoreType == "sharpness") { # Only show WIS or Sharpness for forecasts that have all intervals unless they are for future dates filteredScoreDf <- filteredScoreDf %>% - filter(!( - (is.na(`50`) | - is.na(`80`) | - is.na(`95`) - ) & - target_end_date < dataCreationDate)) + filter(( + !is.na(`50`) & + !is.na(`80`) & + !is.na(`95`) + ) | + target_end_date >= dataCreationDate) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% filter(!( From 0c2c2116fd89c74584ad82c154870491a102b925 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 4 Mar 2022 12:58:49 -0500 Subject: [PATCH 55/58] switch to keep logic for all score/date filters --- app/R/data_manipulation.R | 2 +- app/server.R | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R index e7eb3a8..c3ef4c7 100644 --- a/app/R/data_manipulation.R +++ b/app/R/data_manipulation.R @@ -15,7 +15,7 @@ renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) { filterOverAllLocations <- function(filteredScoreDf, scoreType, hasAsOfData = FALSE, filterDate) { locationsIntersect <- list() - filteredScoreDf <- filteredScoreDf %>% filter(!(is.na(Score) & target_end_date < filterDate)) + filteredScoreDf <- filteredScoreDf %>% filter(!is.na(Score) | target_end_date >= filterDate) # Create df with col for all locations across each unique date, ahead and forecaster combo locationDf <- filteredScoreDf %>% group_by(forecaster, target_end_date, ahead) %>% diff --git a/app/server.R b/app/server.R index b454ab2..51506a5 100644 --- a/app/server.R +++ b/app/server.R @@ -450,27 +450,29 @@ server <- function(input, output, session) { filteredScoreDf <- filterHospitalizationsAheads(filteredScoreDf) } if (input$scoreType == "wis" || input$scoreType == "sharpness") { - # Only show WIS or Sharpness for forecasts that have all intervals unless they are for future dates + # Only show WIS or Sharpness for forecasts that have all intervals or are for future dates filteredScoreDf <- filteredScoreDf %>% filter(( !is.na(`50`) & !is.na(`80`) & !is.na(`95`) ) | - target_end_date >= dataCreationDate) + target_end_date >= dataCreationDate + ) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% - filter(!( - (is.na(`10`) | - is.na(`20`) | - is.na(`30`) | - is.na(`40`) | - is.na(`60`) | - is.na(`70`) | - is.na(`90`) | - is.na(`98`) - ) & - target_end_date < dataCreationDate)) + filter(( + !is.na(`10`) & + !is.na(`20`) & + !is.na(`30`) & + !is.na(`40`) & + !is.na(`60`) & + !is.na(`70`) & + !is.na(`90`) & + !is.na(`98`) + ) | + target_end_date >= dataCreationDate + ) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) From 6326cf3f2cf7c409c1e39b8d4e3760e1666b700a Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Fri, 4 Mar 2022 13:02:04 -0500 Subject: [PATCH 56/58] styler --- app/server.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/app/server.R b/app/server.R index 51506a5..f083dac 100644 --- a/app/server.R +++ b/app/server.R @@ -453,16 +453,15 @@ server <- function(input, output, session) { # Only show WIS or Sharpness for forecasts that have all intervals or are for future dates filteredScoreDf <- filteredScoreDf %>% filter(( - !is.na(`50`) & + !is.na(`50`) & !is.na(`80`) & !is.na(`95`) - ) | - target_end_date >= dataCreationDate - ) + ) | + target_end_date >= dataCreationDate) if (input$targetVariable == "Deaths") { filteredScoreDf <- filteredScoreDf %>% filter(( - !is.na(`10`) & + !is.na(`10`) & !is.na(`20`) & !is.na(`30`) & !is.na(`40`) & @@ -470,9 +469,8 @@ server <- function(input, output, session) { !is.na(`70`) & !is.na(`90`) & !is.na(`98`) - ) | - target_end_date >= dataCreationDate - ) + ) | + target_end_date >= dataCreationDate) } } filteredScoreDf <- renameScoreCol(filteredScoreDf, input$scoreType, input$coverageInterval) From 0ae89b2aa5d92942f86c0736ab0299437fe0b037 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 30 Mar 2022 11:42:06 -0400 Subject: [PATCH 57/58] set description, bumpversion, and app/global.R versions all to 4.2.0 --- .bumpversion.cfg | 2 +- app/global.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0fc0a01..948d3f1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 4.0.0 +current_version = 4.2.0 commit = False tag = False diff --git a/app/global.R b/app/global.R index 50a420b..1f125ae 100644 --- a/app/global.R +++ b/app/global.R @@ -8,7 +8,7 @@ library(viridis) library(tsibble) library(covidcast) -appVersion <- "4.0.0" +appVersion <- "4.2.0" COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") DEATH_FILTER <- "deaths_incidence_num" From ea9fc00eb0e604fcb8a2aaf91074ad7962ea63bf Mon Sep 17 00:00:00 2001 From: nmdefries Date: Fri, 1 Apr 2022 15:24:09 +0000 Subject: [PATCH 58/58] chore: release 5.0.0 --- .bumpversion.cfg | 2 +- DESCRIPTION | 2 +- app/global.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 948d3f1..d04ca18 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 4.2.0 +current_version = 5.0.0 commit = False tag = False diff --git a/DESCRIPTION b/DESCRIPTION index 2d91518..48e2f2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: forecasteval Title: Forecast Evaluation Dashboard -Version: 4.2.0 +Version: 5.0.0 Authors@R: c(person("Kate", "Harwood", email = "kharwood@andrew.cmu.edu", role = "cre"), person("Chris", "Scott", role = "ctb"), person("Jed", "Grabman", role = "ctb")) diff --git a/app/global.R b/app/global.R index 1f125ae..b2f5d14 100644 --- a/app/global.R +++ b/app/global.R @@ -8,7 +8,7 @@ library(viridis) library(tsibble) library(covidcast) -appVersion <- "4.2.0" +appVersion <- "5.0.0" COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98") DEATH_FILTER <- "deaths_incidence_num"