From 3f63b80d611ff22a609dc595a3b8b098dcf8469a Mon Sep 17 00:00:00 2001 From: Jed Grabman Date: Fri, 12 Feb 2021 15:57:59 -0500 Subject: [PATCH 1/2] Use covidHubUtils as source when data not provided by covidcast --- Report/create_reports.R | 8 +++---- Report/score.R | 51 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/Report/create_reports.R b/Report/create_reports.R index 3519129..3bf4bd2 100644 --- a/Report/create_reports.R +++ b/Report/create_reports.R @@ -26,12 +26,12 @@ create_prediction_cards(prediction_cards_filepath) source("score.R") print("Scoring state confirmed incidence...") -create_score_cards(prediction_cards_filepath, "state", signal = "confirmed_incidence_num", output_dir=opt$dir) +create_score_cards(prediction_cards_filepath, "state", signal_name = "confirmed_incidence_num", output_dir=opt$dir) print("Scoring state deaths incidence...") -create_score_cards(prediction_cards_filepath, "state", signal = "deaths_incidence_num", output_dir=opt$dir) +create_score_cards(prediction_cards_filepath, "state", signal_name = "deaths_incidence_num", output_dir=opt$dir) print("Scoring nation confirmed incidence...") -create_score_cards(prediction_cards_filepath, "nation", signal = "confirmed_incidence_num", output_dir=opt$dir) +create_score_cards(prediction_cards_filepath, "nation", signal_name = "confirmed_incidence_num", output_dir=opt$dir) print("Scoring nation deaths incidence...") -create_score_cards(prediction_cards_filepath, "nation", signal = "deaths_incidence_num", output_dir=opt$dir) +create_score_cards(prediction_cards_filepath, "nation", signal_name = "deaths_incidence_num", output_dir=opt$dir) print("Done") diff --git a/Report/score.R b/Report/score.R index 59cd530..4482302 100644 --- a/Report/score.R +++ b/Report/score.R @@ -56,8 +56,6 @@ create_score_cards = function(prediction_cards_filepath, geo_type, signal_name = } if (file.exists(output_file_name)) { score_cards = readRDS(output_file_name) - } - if(exists("score_cards")){ preds_to_eval = anti_join(preds_to_eval, score_cards, by = c("ahead", "forecaster", "forecast_date")) @@ -69,12 +67,17 @@ create_score_cards = function(prediction_cards_filepath, geo_type, signal_name = summarize(num_quantiles = n_distinct(quantile)) %>% filter(num_quantiles > 2) %>% select(-c(num_quantiles)) + preds_to_eval = semi_join(preds_to_eval, quantile_forecasts) if(nrow(preds_to_eval) > 0){ score_cards_new = evaluate_predictions(preds_to_eval, err_measures, backfill_buffer = 0, geo_type = geo_type) + # filter out scores that couldn't be evaluated to try to evaluate with + # covidHubUtils. + na_scores = score_cards_new %>% filter(is.na(actual)) + score_cards_new = score_cards_new %>% filter(!is.na(actual)) } else { score_cards_new = data.frame() } @@ -84,7 +87,49 @@ create_score_cards = function(prediction_cards_filepath, geo_type, signal_name = } else { score_cards = score_cards_new } - # score_cards = score_cards %>% filter(forecast_date >= start_date) + + if (nrow(na_scores) > 0){ + warning(msg = + paste("covidcast could not provide truth data for some", + "predictions. Attempting to use covidHubUtils instead.")) + has_jhu_signal = TRUE + if (signal_name == "confirmed_incidence_num"){ + jhu_signal = "inc case" + } else if (signal_name == "deaths_incidence_num"){ + jhu_signal = "inc death" + } else if (signal_name == "deaths_cumulative_num"){ + jhu_signal = "cum deaths" + } else { + has_jhu_signal == FALSE + warning(msg = paste("covidHubUtils cannot process provided signal:", + signal_name)) + } + if(has_jhu_signal){ + na_preds = semi_join(preds_to_eval, na_scores, by = c("forecaster", "ahead", "geo_value", "forecast_date")) + chu_truth = covidHubUtils::load_truth("JHU", jhu_signal) + chu_truth = chu_truth %>% + rename(actual = value) %>% + select(-c(model, + target_variable, + location, + location_name, + population, + geo_type, + abbreviation)) + chu_scores = evaluate_predictions(na_preds, + err_measures, + backfill_buffer = 0, + geo_type = geo_type, + side_truth = chu_truth) + if (any(is.na(chu_scores$actual))){ + warning(msg = + paste("covidHubUtils could not provide actual data for some", + "predictions. These predictions have not been scored.")) + chu_scores = chu_scores %>% filter(!is.na(actual)) + } + score_cards = rbind(score_cards, chu_scores) + } + } saveRDS(score_cards, file = output_file_name, From b36508c653a0110159b769be9ba54c4737bc61a0 Mon Sep 17 00:00:00 2001 From: Jed Grabman Date: Fri, 12 Feb 2021 16:02:31 -0500 Subject: [PATCH 2/2] typo, deaths -> death --- Report/score.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Report/score.R b/Report/score.R index 4482302..c9abd70 100644 --- a/Report/score.R +++ b/Report/score.R @@ -98,7 +98,7 @@ create_score_cards = function(prediction_cards_filepath, geo_type, signal_name = } else if (signal_name == "deaths_incidence_num"){ jhu_signal = "inc death" } else if (signal_name == "deaths_cumulative_num"){ - jhu_signal = "cum deaths" + jhu_signal = "cum death" } else { has_jhu_signal == FALSE warning(msg = paste("covidHubUtils cannot process provided signal:",