diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 1caf928..fd308e1 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -109,7 +109,7 @@ def pub_covid_hosp_facility_lookup( EpidataFieldInfo("city", EpidataFieldType.text), EpidataFieldInfo("zip", EpidataFieldType.text), EpidataFieldInfo("hospital_subtype", EpidataFieldType.text), - EpidataFieldInfo("fip_code", EpidataFieldType.text), + EpidataFieldInfo("fips_code", EpidataFieldType.text), EpidataFieldInfo("is_metro_micro", EpidataFieldType.int), ], ) @@ -553,7 +553,7 @@ def pub_ecdc_ili( [ EpidataFieldInfo("region", EpidataFieldType.text), EpidataFieldInfo("release_date", EpidataFieldType.date), - EpidataFieldInfo("issue", EpidataFieldType.date), + EpidataFieldInfo("issue", EpidataFieldType.epiweek), EpidataFieldInfo("epiweek", EpidataFieldType.epiweek), EpidataFieldInfo("lag", EpidataFieldType.int), EpidataFieldInfo("incidence_rate", EpidataFieldType.float), @@ -584,15 +584,15 @@ def pub_flusurv( [ EpidataFieldInfo("release_date", EpidataFieldType.text), EpidataFieldInfo("location", EpidataFieldType.text), - EpidataFieldInfo("issue", EpidataFieldType.date), + EpidataFieldInfo("issue", EpidataFieldType.date_or_epiweek), EpidataFieldInfo("epiweek", EpidataFieldType.epiweek), EpidataFieldInfo("lag", EpidataFieldType.int), - EpidataFieldInfo("rage_age_0", EpidataFieldType.float), - EpidataFieldInfo("rage_age_1", EpidataFieldType.float), - EpidataFieldInfo("rage_age_2", EpidataFieldType.float), - EpidataFieldInfo("rage_age_3", EpidataFieldType.float), - EpidataFieldInfo("rage_age_4", EpidataFieldType.float), - EpidataFieldInfo("rage_overall", EpidataFieldType.float), + EpidataFieldInfo("rate_age_0", EpidataFieldType.float), + EpidataFieldInfo("rate_age_1", EpidataFieldType.float), + EpidataFieldInfo("rate_age_2", EpidataFieldType.float), + EpidataFieldInfo("rate_age_3", EpidataFieldType.float), + EpidataFieldInfo("rate_age_4", EpidataFieldType.float), + EpidataFieldInfo("rate_overall", EpidataFieldType.float), ], ) diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index 9d65d72..2f7b10c 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -8,7 +8,11 @@ def parse_api_date(value: Union[str, int, float, None]) -> Optional[date]: if value is None: return value v = str(value) - return datetime.strptime(v, "%Y%m%d").date() + if len(v) == 10: # yyyy-mm-dd + d = datetime.strptime(v, "%Y-%m-%d").date() + else: + d = datetime.strptime(v, "%Y%m%d").date() + return d def parse_api_week(value: Union[str, int, float, None]) -> Optional[date]: @@ -23,6 +27,8 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date v = str(value) if len(v) == 6: d = Week.fromstring(v).startdate() + elif len(v) == 10: # yyyy-mm-dd + d = datetime.strptime(v, "%Y-%m-%d").date() else: d = datetime.strptime(v, "%Y%m%d").date() return d diff --git a/epidatpy/request.py b/epidatpy/request.py index 60e0212..c0c58a3 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -137,9 +137,9 @@ def df( df = DataFrame(rows, columns=columns or None) data_types: Dict[str, Any] = {} - time_fields: List[str] = [] + time_fields: List[EpidataFieldInfo] = [] for info in self.meta: - if not pred(info.name) or df[info.name].isnull().all(): + if not pred(info.name): continue if info.type == EpidataFieldType.bool: data_types[info.name] = bool @@ -154,8 +154,8 @@ def df( EpidataFieldType.epiweek, EpidataFieldType.date_or_epiweek, ): - data_types[info.name] = "Int64" - time_fields.append(info.name) + data_types[info.name] = "string" + time_fields.append(info) elif info.type == EpidataFieldType.float: data_types[info.name] = "Float64" else: @@ -163,8 +163,18 @@ def df( if data_types: df = df.astype(data_types) if not disable_date_parsing: - for field in time_fields: - df[field] = to_datetime(df[field], format="%Y%m%d", errors="ignore") + for info in time_fields: + if info.type == EpidataFieldType.epiweek: + continue + try: + df[info.name] = to_datetime(df[info.name], format="%Y-%m-%d") + continue + except ValueError: + pass + try: + df[info.name] = to_datetime(df[info.name], format="%Y%m%d") + except ValueError: + pass return df diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py new file mode 100644 index 0000000..cf7f072 --- /dev/null +++ b/tests/test_epidata_calls.py @@ -0,0 +1,397 @@ +""" +Requirements to run these: +- DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form +- it has access to the private endpoints being tested +""" + +import os + +import pytest + +from epidatpy.request import Epidata, EpiRange + +auth = os.environ.get("DELPHI_EPIDATA_KEY", "") +secret_cdc = os.environ.get("SECRET_API_AUTH_CDC", "") +secret_fluview = os.environ.get("SECRET_API_AUTH_FLUVIEW", "") +secret_ght = os.environ.get("SECRET_API_AUTH_GHT", "") +secret_norostat = os.environ.get("SECRET_API_AUTH_NOROSTAT", "") +secret_quidel = os.environ.get("SECRET_API_AUTH_QUIDEL", "") +secret_sensors = os.environ.get("SECRET_API_AUTH_SENSORS", "") +secret_twitter = os.environ.get("SECRET_API_AUTH_TWITTER", "") + + +@pytest.mark.skipif(not auth, reason="DELPHI_EPIDATA_KEY not available.") +class TestEpidataCalls: + """Make network call tests for Epidata.""" + + @pytest.mark.skipif(not secret_cdc, reason="CDC key not available.") + def test_pvt_cdc(self) -> None: + apicall = Epidata.pvt_cdc(auth=secret_cdc, locations="fl,ca", epiweeks=EpiRange(201501, 201601)) + data = apicall.df() + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num1"].dtype) == "Int64" + assert str(data["num2"].dtype) == "Int64" + assert str(data["num3"].dtype) == "Int64" + assert str(data["num4"].dtype) == "Int64" + assert str(data["num5"].dtype) == "Int64" + assert str(data["num6"].dtype) == "Int64" + assert str(data["num7"].dtype) == "Int64" + assert str(data["num8"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64" + + def test_pub_covid_hosp_facility_lookup(self) -> None: + apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") + data = apicall.df() + assert len(data) > 0 + + apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") + data = apicall.df() + assert len(data) > 0 + assert str(data["hospital_pk"].dtype) == "string" + assert str(data["state"].dtype) == "string" + assert str(data["ccn"].dtype) == "string" + assert str(data["hospital_name"].dtype) == "string" + assert str(data["address"].dtype) == "string" + assert str(data["city"].dtype) == "string" + assert str(data["zip"].dtype) == "string" + assert str(data["hospital_subtype"].dtype) == "string" + assert str(data["fips_code"].dtype) == "string" + assert str(data["is_metro_micro"].dtype) == "Int64" + + @pytest.mark.filterwarnings("ignore:`collection_weeks` is in week format") + def test_pub_covid_hosp_facility(self) -> None: + apicall = Epidata.pub_covid_hosp_facility(hospital_pks="100075", collection_weeks=EpiRange(20200101, 20200501)) + data = apicall.df() + assert len(data) > 0 + assert str(data["hospital_pk"].dtype) == "string" + assert str(data["state"].dtype) == "string" + assert str(data["ccn"].dtype) == "string" + assert str(data["hospital_name"].dtype) == "string" + assert str(data["address"].dtype) == "string" + assert str(data["city"].dtype) == "string" + assert str(data["zip"].dtype) == "string" + assert str(data["hospital_subtype"].dtype) == "string" + assert str(data["fips_code"].dtype) == "string" + assert str(data["publication_date"].dtype) == "datetime64[ns]" + assert str(data["collection_week"].dtype) == "datetime64[ns]" + assert str(data["is_metro_micro"].dtype) == "bool" + + apicall2 = Epidata.pub_covid_hosp_facility(hospital_pks="100075", collection_weeks=EpiRange(202001, 202030)) + data2 = apicall2.df() + assert len(data2) > 0 + + def test_pub_covid_hosp_state_timeseries(self) -> None: + apicall = Epidata.pub_covid_hosp_state_timeseries(states="fl", dates=EpiRange(20200101, 20200501)) + data = apicall.df() + assert len(data) > 0 + assert str(data["state"].dtype) == "string" + assert str(data["issue"].dtype) == "datetime64[ns]" + assert str(data["date"].dtype) == "datetime64[ns]" + + def test_pub_covidcast_meta(self) -> None: + apicall = Epidata.pub_covidcast_meta() + data = apicall.df() + + assert len(data) > 0 + assert str(data["data_source"].dtype) == "string" + assert str(data["signal"].dtype) == "string" + assert str(data["time_type"].dtype) == "category" + assert str(data["min_time"].dtype) == "string" + assert str(data["max_time"].dtype) == "datetime64[ns]" + assert str(data["num_locations"].dtype) == "Int64" + assert str(data["min_value"].dtype) == "Float64" + assert str(data["max_value"].dtype) == "Float64" + assert str(data["mean_value"].dtype) == "Float64" + assert str(data["stdev_value"].dtype) == "Float64" + assert str(data["last_update"].dtype) == "Int64" + assert str(data["max_issue"].dtype) == "datetime64[ns]" + assert str(data["min_lag"].dtype) == "Int64" + assert str(data["max_lag"].dtype) == "Int64" + + def test_pub_covidcast(self) -> None: + apicall = Epidata.pub_covidcast( + data_source="jhu-csse", + signals="confirmed_7dav_incidence_prop", + geo_type="state", + time_type="day", + geo_values=["ca", "fl"], + time_values=EpiRange(20200601, 20200801), + ) + data = apicall.df() + + assert len(data) > 0 + + apicall = Epidata.pub_covidcast( + data_source="jhu-csse", + signals="confirmed_7dav_incidence_prop", + geo_type="state", + time_type="day", + geo_values="*", + time_values=EpiRange(20200601, 20200801), + ) + data = apicall.df() + + print(data.dtypes) + + assert str(data["source"].dtype) == "string" + assert str(data["signal"].dtype) == "string" + assert str(data["geo_type"].dtype) == "category" + assert str(data["geo_value"].dtype) == "string" + assert str(data["time_type"].dtype) == "category" + assert str(data["time_value"].dtype) == "datetime64[ns]" + assert str(data["issue"].dtype) == "datetime64[ns]" + assert str(data["lag"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64" + assert str(data["missing_value"].dtype) == "Int64" + assert str(data["missing_stderr"].dtype) == "Int64" + assert str(data["missing_sample_size"].dtype) == "Int64" + + def test_pub_delphi(self) -> None: + apicall = Epidata.pub_delphi(system="ec", epiweek=201501) + data = apicall.classic() # only supports classic + assert len(data) > 0 + + def test_pub_dengue_nowcast(self) -> None: + apicall = Epidata.pub_dengue_nowcast(locations="pr", epiweeks=EpiRange(201401, 202301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + assert str(data["std"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_sensors, reason="Dengue sensors key not available.") + def test_pvt_dengue_sensors(self) -> None: + apicall = Epidata.pvt_dengue_sensors( + auth=secret_sensors, names="ght", locations="ag", epiweeks=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + def test_pub_ecdc_ili(self) -> None: + apicall = Epidata.pub_ecdc_ili(regions="austria", epiweeks=EpiRange(201901, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + + def test_pub_flusurv(self) -> None: + apicall = Epidata.pub_flusurv(locations="CA", epiweeks=EpiRange(201701, 201801)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "string" + assert str(data["location"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["rate_age_0"].dtype) == "Float64" + assert str(data["rate_age_1"].dtype) == "Float64" + assert str(data["rate_age_2"].dtype) == "Float64" + assert str(data["rate_age_3"].dtype) == "Float64" + assert str(data["rate_age_4"].dtype) == "Float64" + assert str(data["rate_overall"].dtype) == "Float64" + + def test_pub_fluview_clinical(self) -> None: + apicall = Epidata.pub_fluview_clinical(regions="nat", epiweeks=EpiRange(201601, 201701)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["total_specimens"].dtype) == "Int64" + assert str(data["total_a"].dtype) == "Int64" + assert str(data["total_b"].dtype) == "Int64" + assert str(data["percent_positive"].dtype) == "Float64" + assert str(data["percent_a"].dtype) == "Float64" + assert str(data["percent_b"].dtype) == "Float64" + + def test_pub_fluview_meta(self) -> None: + apicall = Epidata.pub_fluview_meta() + data = apicall.df() + + assert len(data) > 0 + assert str(data["latest_update"].dtype) == "datetime64[ns]" + assert str(data["latest_issue"].dtype) == "datetime64[ns]" + assert str(data["table_rows"].dtype) == "Int64" + + def test_pub_fluview(self) -> None: + apicall = Epidata.pub_fluview(regions="nat", epiweeks=EpiRange(201201, 202005)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["num_ili"].dtype) == "Int64" + assert str(data["num_patients"].dtype) == "Int64" + assert str(data["wili"].dtype) == "Float64" + assert str(data["ili"].dtype) == "Float64" + + def test_pub_gft(self) -> None: + apicall = Epidata.pub_gft(locations="hhs1", epiweeks=EpiRange(201201, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num"].dtype) == "Int64" + + @pytest.mark.skipif(not secret_ght, reason="GHT key not available.") + def test_pvt_ght(self) -> None: + apicall = Epidata.pvt_ght( + auth=secret_ght, locations="ma", epiweeks=EpiRange(199301, 202304), query="how to get over the flu" + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + def test_pub_kcdc_ili(self) -> None: + apicall = Epidata.pub_kcdc_ili(regions="ROK", epiweeks=200436) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["ili"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_norostat, reason="Norostat key not available.") + def test_pvt_meta_norostat(self) -> None: + apicall = Epidata.pvt_meta_norostat(auth=secret_norostat) + data = apicall.classic() + assert len(data) > 0 + + def test_pub_meta(self) -> None: + apicall = Epidata.pub_meta() + data = apicall.classic() # only supports classic + assert len(data) > 0 + + def test_pub_nidss_dengue(self) -> None: + apicall = Epidata.pub_nidss_dengue(locations="taipei", epiweeks=EpiRange(201201, 201301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["count"].dtype) == "Int64" + + def test_pub_nidss_flu(self) -> None: + apicall = Epidata.pub_nidss_flu(regions="taipei", epiweeks=EpiRange(201501, 201601)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["visits"].dtype) == "Int64" + assert str(data["ili"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_norostat, reason="Norostat key not available.") + def test_pvt_norostat(self) -> None: + apicall = Epidata.pvt_norostat(auth=secret_norostat, location="1", epiweeks=201233) + data = apicall.df() + + # TODO: Need a non-trivial query for Norostat + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Int64" + + def test_pub_nowcast(self) -> None: + apicall = Epidata.pub_nowcast(locations="ca", epiweeks=EpiRange(201201, 201301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + assert str(data["std"].dtype) == "Float64" + + def test_pub_paho_dengue(self) -> None: + apicall = Epidata.pub_paho_dengue(regions="ca", epiweeks=EpiRange(201401, 201501)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["serotype"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["total_pop"].dtype) == "Int64" + assert str(data["num_dengue"].dtype) == "Int64" + assert str(data["num_severe"].dtype) == "Int64" + assert str(data["num_deaths"].dtype) == "Int64" + assert str(data["incidence_rate"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_quidel, reason="Quidel key not available.") + def test_pvt_quidel(self) -> None: + apicall = Epidata.pvt_quidel(auth=secret_quidel, locations="hhs1", epiweeks=EpiRange(201201, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_sensors, reason="Sensors key not available.") + def test_pvt_sensors(self) -> None: + apicall = Epidata.pvt_sensors( + auth=secret_sensors, names="sar3", locations="nat", epiweeks=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["name"].dtype) == "string" + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_twitter, reason="Twitter key not available.") + def test_pvt_twitter(self) -> None: + apicall = Epidata.pvt_twitter( + auth=secret_twitter, locations="CA", time_type="week", time_values=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["percent"].dtype) == "Float64" + + def test_pub_wiki(self) -> None: + apicall = Epidata.pub_wiki(articles="avian_influenza", time_type="week", time_values=EpiRange(201501, 201601)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["article"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["count"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["hour"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64"