Skip to content

Commit 6ee92cc

Browse files
authored
adding claims_hosp (hospital-admissions) flu signal (#2159)
1 parent 3149c13 commit 6ee92cc

File tree

9 files changed

+88
-54
lines changed

9 files changed

+88
-54
lines changed

claims_hosp/delphi_claims_hosp/backfill.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,29 @@ def store_backfill_file(claims_filepath, _end_date, backfill_dir):
3636
dtype=Config.CLAIMS_DTYPES,
3737
parse_dates=[Config.CLAIMS_DATE_COL],
3838
)
39-
backfilldata.rename({"ServiceDate": "time_value",
40-
"PatCountyFIPS": "fips",
41-
"Denominator": "den",
42-
"Covid_like": "num"},
43-
axis=1, inplace=True)
44-
backfilldata = gmpr.add_geocode(backfilldata, from_code="fips", new_code="state_id",
45-
from_col="fips", new_col="state_id")
39+
backfilldata.rename(
40+
{
41+
"ServiceDate": "time_value",
42+
"PatCountyFIPS": "fips",
43+
"Denominator": "den",
44+
"Covid_like": "num",
45+
"Flu1": "num_flu",
46+
},
47+
axis=1,
48+
inplace=True,
49+
)
50+
backfilldata = gmpr.add_geocode(
51+
backfilldata, from_code="fips", new_code="state_id", from_col="fips", new_col="state_id"
52+
)
4653
#Store one year's backfill data
4754
if _end_date.day == 29 and _end_date.month == 2:
4855
_start_date = datetime(_end_date.year-1, 2, 28)
4956
else:
50-
_start_date = _end_date.replace(year=_end_date.year-1)
51-
selected_columns = ['time_value', 'fips', 'state_id',
52-
'den', 'num']
53-
backfilldata = backfilldata.loc[(backfilldata["time_value"] >= _start_date)
54-
& (~backfilldata["fips"].isnull()),
55-
selected_columns]
57+
_start_date = _end_date.replace(year=_end_date.year - 1)
58+
selected_columns = ["time_value", "fips", "state_id", "den", "num", "num_flu"]
59+
backfilldata = backfilldata.loc[
60+
(backfilldata["time_value"] >= _start_date) & (~backfilldata["fips"].isnull()), selected_columns
61+
]
5662

5763
backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]]
5864
backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d")

claims_hosp/delphi_claims_hosp/config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,14 @@
1313
class Config:
1414
"""Static configuration variables."""
1515

16-
signal_name = "smoothed_covid19_from_claims"
17-
signal_weekday_name = "smoothed_adj_covid19_from_claims"
16+
signal_name = {
17+
"Covid_like": "smoothed_covid19_from_claims",
18+
"Flu1": "smoothed_flu_from_claims",
19+
}
20+
signal_weekday_name = {
21+
"Covid_like": "smoothed_adj_covid19_from_claims",
22+
"Flu1": "smoothed_adj_flu_from_claims",
23+
}
1824

1925
# max number of CPUs available for pool
2026
MAX_CPU_POOL = 10
@@ -30,7 +36,7 @@ class Config:
3036
DAY_SHIFT = timedelta(days=0)
3137

3238
# data columns
33-
CLAIMS_COUNT_COLS = ["Denominator", "Covid_like"]
39+
CLAIMS_COUNT_COLS = ["Denominator", "Covid_like", "Flu1"]
3440
CLAIMS_DATE_COL = "ServiceDate"
3541
FIPS_COL = "fips"
3642
DATE_COL = "timestamp"
@@ -44,6 +50,7 @@ class Config:
4450
"PatCountyFIPS": str,
4551
"Denominator": float,
4652
"Covid_like": float,
53+
"Flu1": float,
4754
"PatAgeGroup": str,
4855
"Pat HRR ID": str,
4956
}

claims_hosp/delphi_claims_hosp/load_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def load_claims_data(claims_filepath, dropdate, base_geo):
5252

5353
return claims_data
5454

55-
def load_data(input_filepath, dropdate, base_geo):
55+
def load_data(input_filepath, dropdate, base_geo, numerator_name):
5656
"""
5757
Load in claims data, and combine them.
5858
@@ -71,7 +71,7 @@ def load_data(input_filepath, dropdate, base_geo):
7171

7272
# rename numerator and denominator
7373
data.fillna(0, inplace=True)
74-
data["num"] = data["Covid_like"]
74+
data["num"] = data[numerator_name]
7575
data["den"] = data["Denominator"]
7676
data = data[['num', 'den']]
7777
data.reset_index(inplace=True)

claims_hosp/delphi_claims_hosp/run.py

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -120,30 +120,37 @@ def run_module(params):
120120
else:
121121
logger.info("Starting no weekday adj", geo_type=geo)
122122

123-
signal_name = Config.signal_weekday_name if weekday else Config.signal_name
124-
if params["indicator"]["write_se"]:
125-
assert params["indicator"]["obfuscated_prefix"] is not None, \
126-
"supply obfuscated prefix in params.json"
127-
signal_name = params["indicator"]["obfuscated_prefix"] + "_" + signal_name
128-
129-
logger.info("Updating signal name", signal=signal_name)
130-
updater = ClaimsHospIndicatorUpdater(
131-
startdate,
132-
enddate,
133-
dropdate,
134-
geo,
135-
params["indicator"]["parallel"],
136-
weekday,
137-
params["indicator"]["write_se"],
138-
signal_name,
139-
logger,
140-
)
141-
updater.update_indicator(
142-
claims_file,
143-
params["common"]["export_dir"],
144-
)
145-
max_dates.append(updater.output_dates[-1])
146-
n_csv_export.append(len(updater.output_dates))
123+
124+
for numerator_name in ["Covid_like", "Flu1"]:
125+
126+
signal_name = (
127+
Config.signal_weekday_name[numerator_name] if weekday else Config.signal_name[numerator_name]
128+
)
129+
if params["indicator"]["write_se"]:
130+
assert (
131+
params["indicator"]["obfuscated_prefix"] is not None
132+
), "supply obfuscated prefix in params.json"
133+
signal_name = params["indicator"]["obfuscated_prefix"] + "_" + signal_name
134+
135+
logger.info("Updating signal name", signal=signal_name)
136+
updater = ClaimsHospIndicatorUpdater(
137+
startdate,
138+
enddate,
139+
dropdate,
140+
geo,
141+
params["indicator"]["parallel"],
142+
weekday,
143+
params["indicator"]["write_se"],
144+
signal_name,
145+
numerator_name,
146+
logger,
147+
)
148+
updater.update_indicator(
149+
claims_file,
150+
params["common"]["export_dir"],
151+
)
152+
max_dates.append(updater.output_dates[-1])
153+
n_csv_export.append(len(updater.output_dates))
147154
logger.info("Finished updating", geo_type=geo)
148155

149156
# Remove all the raw files

claims_hosp/delphi_claims_hosp/update_indicator.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ class ClaimsHospIndicatorUpdater:
2727
# pylint: disable=too-many-instance-attributes, too-many-arguments
2828
# all variables are used
2929

30-
def __init__(self, startdate, enddate, dropdate, geo, parallel, weekday, write_se, signal_name, logger):
30+
def __init__(
31+
self, startdate, enddate, dropdate, geo, parallel, weekday, write_se, signal_name, numerator_name, logger
32+
):
3133
"""
3234
Initialize updater for the claims-based hospitalization indicator.
3335
@@ -45,8 +47,14 @@ def __init__(self, startdate, enddate, dropdate, geo, parallel, weekday, write_s
4547
self.startdate, self.enddate, self.dropdate = [pd.to_datetime(t) for t in
4648
(startdate, enddate, dropdate)]
4749

48-
self.geo, self.parallel, self.weekday, self.write_se, self.signal_name = \
49-
geo.lower(), parallel, weekday, write_se, signal_name
50+
self.geo, self.parallel, self.weekday, self.write_se, self.signal_name, self.numerator_name = (
51+
geo.lower(),
52+
parallel,
53+
weekday,
54+
write_se,
55+
signal_name,
56+
numerator_name,
57+
)
5058

5159
# init in shift_dates, declared here for pylint
5260
self.burnindate, self.fit_dates, self.burn_in_dates, self.output_dates = \
@@ -147,7 +155,7 @@ def update_indicator(self, input_filepath, outpath):
147155

148156
# load data
149157
base_geo = Config.HRR_COL if self.geo == Config.HRR_COL else Config.FIPS_COL
150-
data = load_data(input_filepath, self.dropdate, base_geo)
158+
data = load_data(input_filepath, self.dropdate, base_geo, self.numerator_name)
151159
data_frame = self.geo_reindex(data)
152160

153161
# handle if we need to adjust by weekday

claims_hosp/tests/test_backfill.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_store_backfill_file(self):
3737
backfill_df = pd.read_parquet(backfill_dir + "/"+ fn, engine='pyarrow')
3838

3939
selected_columns = ['time_value', 'fips', 'state_id',
40-
'num', 'den', 'lag', 'issue_date']
40+
'num', 'den', 'lag', 'issue_date', 'num_flu']
4141
assert set(selected_columns) == set(backfill_df.columns)
4242

4343
os.remove(backfill_dir + "/" + fn)

claims_hosp/tests/test_indicator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020

2121

2222
class TestLoadData:
23-
fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips")
24-
hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr")
23+
fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips", "Covid_like")
24+
hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr", "Covid_like")
2525

2626
def test_backwards_pad(self):
2727
num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1)

claims_hosp/tests/test_load_data.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,23 @@
2121
class TestLoadData:
2222
fips_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "fips")
2323
hrr_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "hrr")
24-
fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips")
25-
hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr")
24+
fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips", "Covid_like")
25+
hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr", "Covid_like")
2626

2727
def test_base_unit(self):
2828
with pytest.raises(AssertionError):
2929
load_claims_data(DATA_FILEPATH, DROP_DATE, "foo")
3030

3131
with pytest.raises(AssertionError):
32-
load_data(DATA_FILEPATH, DROP_DATE, "foo")
32+
load_data(DATA_FILEPATH, DROP_DATE, "foo", "Covid_like")
3333

3434
def test_claims_columns(self):
3535
assert "hrr" in self.hrr_claims_data.index.names
3636
assert "fips" in self.fips_claims_data.index.names
3737
assert "timestamp" in self.hrr_claims_data.index.names
3838
assert "timestamp" in self.fips_claims_data.index.names
3939

40-
expected_claims_columns = ["Denominator", "Covid_like"]
40+
expected_claims_columns = ["Denominator", "Covid_like", "Flu1"]
4141
for col in expected_claims_columns:
4242
assert col in self.fips_claims_data.columns
4343
assert col in self.hrr_claims_data.columns

claims_hosp/tests/test_update_indicator.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def test_shift_dates(self):
5252
self.weekday,
5353
self.write_se,
5454
Config.signal_name,
55+
"Covid_like",
5556
TEST_LOGGER
5657
)
5758
## Test init
@@ -74,6 +75,7 @@ def test_geo_reindex(self):
7475
self.weekday,
7576
self.write_se,
7677
Config.signal_name,
78+
"Covid_like",
7779
TEST_LOGGER
7880
)
7981
updater.shift_dates()
@@ -93,6 +95,7 @@ def test_update_indicator(self):
9395
self.weekday,
9496
self.write_se,
9597
Config.signal_name,
98+
"Covid_like",
9699
TEST_LOGGER
97100
)
98101

@@ -115,6 +118,7 @@ def test_write_to_csv_results(self):
115118
self.weekday,
116119
self.write_se,
117120
Config.signal_name,
121+
"Covid_like",
118122
TEST_LOGGER
119123
)
120124

@@ -186,7 +190,7 @@ def test_write_to_csv_results(self):
186190

187191
def test_write_to_csv_with_se_results(self):
188192
obfuscated_name = PARAMS["indicator"]["obfuscated_prefix"]
189-
signal_name = obfuscated_name + "_" + Config.signal_weekday_name
193+
signal_name = obfuscated_name + "_" + Config.signal_weekday_name["Covid_like"]
190194
updater = ClaimsHospIndicatorUpdater(
191195
"02-01-2020",
192196
"06-01-2020",
@@ -196,6 +200,7 @@ def test_write_to_csv_with_se_results(self):
196200
True,
197201
True,
198202
signal_name,
203+
"Covid_like",
199204
TEST_LOGGER
200205
)
201206

@@ -248,6 +253,7 @@ def test_write_to_csv_wrong_results(self):
248253
self.weekday,
249254
self.write_se,
250255
Config.signal_name,
256+
"Covid_like",
251257
TEST_LOGGER
252258
)
253259

0 commit comments

Comments
 (0)