@@ -35,4 +35,75 @@ nssp <- pub_covidcast(
3535 signal = " pct_ed_visits_influenza" ,
3636 time_type = " week" ,
3737 geo_type = " state" ,
38- geo_values = " *" )
38+ geo_values = " *"
39+ ) | >
40+ select(geo_value , time_value , nssp = value )
41+
42+
43+ empty_data <- tibble(time_value = seq(max()))
44+
45+ flu_data <- hhs_v_nhsn | >
46+ select(time_value , geo_value , hhs = old_source ) | >
47+ left_join(nssp | > mutate(time_value = time_value + 6 ),
48+ by = join_by(geo_value , time_value )
49+ )
50+
51+ n_geos <- n_distinct(flu_data $ geo_value )
52+ max_time_value <- max(flu_data $ time_value )
53+ empty_data <- tibble(
54+ time_value = rep(max_time_value + days(1 : 3 * 7 ), each = n_geos ),
55+ geo_value = rep(unique(flu_data $ geo_value ), times = 3 ),
56+ nssp = NA ,
57+ hhs = NA
58+ )
59+
60+ flu_data <- flu_data | >
61+ add_row(empty_data ) | >
62+ mutate(epiweek = epiweek(time_value )) | >
63+ left_join(climate , by = join_by(geo_value , epiweek )) | >
64+ select(! epiweek ) | >
65+ filter(geo_value %nin % c(" as" , " vi" , " gu" , " mp" , " usa" )) | >
66+ arrange(geo_value , time_value ) | >
67+ as_epi_df()
68+
69+ r <- epi_recipe(flu_data ) | >
70+ step_population_scaling(
71+ hhs , nssp ,
72+ df = epidatasets :: state_census ,
73+ df_pop_col = " pop" ,
74+ create_new = FALSE ,
75+ rate_rescaling = 1e5 ,
76+ by = c(" geo_value" = " abbr" )) | >
77+ step_mutate(hhs = hhs ^ (1 / 4 ), nssp = nssp ^ (1 / 4 ), climate_pred = climate_pred ^ (1 / 4 )) | >
78+ step_epi_lag(hhs , lag = c(0 , 7 , 14 )) | >
79+ step_epi_lag(nssp , lag = c(0 , 7 , 14 )) | >
80+ step_epi_ahead(hhs , ahead = 14 ) | >
81+ step_epi_ahead(climate_pred , ahead = 14 , role = " predictor" ) | >
82+ step_epi_naomit()
83+
84+ # Training engine
85+ e <- quantile_reg(quantile_levels = c(0.01 , 0.025 , 1 : 19 / 20 , 0.975 , 0.99 )) # 23 ForecastHub quantiles
86+
87+ # A post-processing routine describing what to do to the predictions
88+ f <- frosting() | >
89+ layer_predict() | >
90+ layer_threshold(.pred , lower = 0 )
91+
92+
93+ # Bundle up the preprocessor, training engine, and postprocessor
94+ # We use quantile regression
95+ ewf <- epi_workflow(r , e , f )
96+
97+ # Fit it to data (we could fit this to ANY data that has the same format)
98+ trained_ewf <- ewf | > fit(flu_data )
99+
100+ # we could make predictions using the same model on ANY test data
101+ preds <- forecast(trained_ewf ) | >
102+ left_join(epidatasets :: state_census | > select(pop , abbr ), join_by(geo_value == abbr )) | >
103+ mutate(
104+ .pred = .pred ^ 4 * pop / 1e5 ,
105+ forecast_date = time_value ,
106+ target_date = forecast_date + days(14 ),
107+ time_value = NULL ,
108+ pop = NULL
109+ )
0 commit comments