From 3dae260a848647d768e3d33fe7a872e6e3952cbf Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Tue, 11 Jun 2024 16:56:25 +0300 Subject: [PATCH 01/12] Port public R queries --- tests/test_epidata_calls.py | 174 ++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 tests/test_epidata_calls.py diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py new file mode 100644 index 0000000..ac15aeb --- /dev/null +++ b/tests/test_epidata_calls.py @@ -0,0 +1,174 @@ +from epidatpy.request import Epidata, EpiRange + +# Requirements to run these: +# DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form + +def test_pub_covid_hosp_facility_lookup() -> None: + apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") + data = apicall.json() + assert(len(data) > 0) + + apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") + data = apicall.json() + assert(len(data) > 0) + +def test_pub_covid_hosp_facility() -> None: + apicall = Epidata.pub_covid_hosp_facility( + hospital_pks = "100075", + collection_weeks = EpiRange(20200101, 20200501)) + data = apicall.json() + assert(len(data) > 0) + + apicall = Epidata.pub_covid_hosp_facility( + hospital_pks = "100075", + collection_weeks = EpiRange(202001, 202005)) + data = apicall.json() + assert(len(data) > 0) # fails + +def test_pub_covid_hosp_state_timeseries() -> None: + apicall = Epidata.pub_covid_hosp_state_timeseries( + states = "fl", + dates = EpiRange(20200101, 20200501)) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_covidcast_meta() -> None: + apicall = Epidata.pub_covidcast_meta() + data = apicall.json() + assert(len(data) > 0) + +def test_pub_covidcast() -> None: + apicall = Epidata.pub_covidcast( + data_source = "jhu-csse", + signals = "confirmed_7dav_incidence_prop", + geo_type = "state", + time_type = "day", + geo_values = ["ca", "fl"], + time_values = EpiRange(20200601, 20200801)) + data = apicall.json() + assert(len(data) > 0) + + apicall = Epidata.pub_covidcast( + data_source = "jhu-csse", + signals = "confirmed_7dav_incidence_prop", + geo_type = "state", + time_type = "day", + geo_values = "*", + time_values = EpiRange(20200601, 20200801)) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_delphi() -> None: + apicall = Epidata.pub_delphi( + system = "ec", + epiweek = 201501 + ) + data = apicall.classic() # only supports classic + assert(len(data) > 0) + +def test_pub_dengue_nowcast() -> None: + apicall = Epidata.pub_dengue_nowcast( + locations = "pr", + epiweeks = EpiRange(201401, 202301) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_ecdc_ili() -> None: + apicall = Epidata.pub_ecdc_ili( + regions = "austria", + epiweeks = EpiRange(201901, 202001) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_flusurv() -> None: + apicall = Epidata.pub_flusurv( + locations = "CA", + epiweeks = EpiRange(201701, 201801) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_fluview_clinical() -> None: + apicall = Epidata.pub_fluview_clinical( + regions = "nat", + epiweeks = EpiRange(201601, 201701) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_fluview_meta() -> None: + apicall = Epidata.pub_fluview_meta() + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_fluview() -> None: + apicall = Epidata.pub_fluview( + regions = "nat", + epiweeks = EpiRange(201201, 202005) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_gft() -> None: + apicall = Epidata.pub_gft( + locations = "hhs1", + epiweeks = EpiRange(201201, 202001) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_kcdc_ili() -> None: + apicall = Epidata.pub_kcdc_ili( + regions = "ROK", + epiweeks = 200436 + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_meta() -> None: + apicall = Epidata.pub_meta() + data = apicall.classic() # only supports classic + assert(len(data) > 0) + +def test_pub_nidss_dengue() -> None: + apicall = Epidata.pub_nidss_dengue( + locations = "taipei", + epiweeks = EpiRange(201201, 201301) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_nidss_flu() -> None: + apicall = Epidata.pub_nidss_flu( + regions = "taipei", + epiweeks = EpiRange(201501, 201601) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_nowcast() -> None: + apicall = Epidata.pub_nowcast( + locations = "ca", + epiweeks = EpiRange(201201, 201301) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pub_paho_dengue() -> None: + apicall = Epidata.pub_paho_dengue( + regions = "ca", + epiweeks = EpiRange(201401, 201501) + ) + data = apicall.json(disable_date_parsing=True) + assert(len(data) > 0) + +def test_pub_wiki() -> None: + apicall = Epidata.pub_wiki( + articles = "avian_influenza", + time_type = "week", + time_values = EpiRange(201501, 201601) + ) + data = apicall.json() + assert(len(data) > 0) From 3adce47647ddf62ce892fd402765d93ed7fb9299 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Mon, 17 Jun 2024 17:45:34 +0300 Subject: [PATCH 02/12] Add private calls --- tests/test_epidata_calls.py | 80 ++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index ac15aeb..b126691 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -1,7 +1,20 @@ +import os from epidatpy.request import Epidata, EpiRange # Requirements to run these: -# DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form +# - DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form +# - it has access to the private endpoints being tested + +auth = os.environ.get("DELPHI_EPIDATA_KEY") + +def test_pvt_cdc() -> None: + apicall = Epidata.pvt_cdc( + auth = auth, + locations = "fl,ca", + epiweeks = EpiRange(201501, 201601) + ) + data = apicall.json() + assert(len(data) > 0) def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") @@ -74,6 +87,16 @@ def test_pub_dengue_nowcast() -> None: data = apicall.json() assert(len(data) > 0) +def test_pvt_dengue_sensors() -> None: + apicall = Epidata.pvt_dengue_sensors( + auth = auth, + names = "ght", + locations = "ag", + epiweeks = EpiRange(201501, 202001) + ) + data = apicall.json() + assert(len(data) > 0) + def test_pub_ecdc_ili() -> None: apicall = Epidata.pub_ecdc_ili( regions = "austria", @@ -119,6 +142,16 @@ def test_pub_gft() -> None: data = apicall.json() assert(len(data) > 0) +def test_pvt_ght() -> None: + apicall = Epidata.pvt_ght( + auth = auth, + locations = "ma", + epiweeks = EpiRange(199301, 202304), + query = "how to get over the flu" + ) + data = apicall.json() + assert(len(data) > 0) + def test_pub_kcdc_ili() -> None: apicall = Epidata.pub_kcdc_ili( regions = "ROK", @@ -127,6 +160,13 @@ def test_pub_kcdc_ili() -> None: data = apicall.json(disable_date_parsing=True) assert(len(data) > 0) +def test_pvt_meta_norostat() -> None: + apicall = Epidata.pvt_meta_norostat( + auth = auth + ) + data = apicall.classic() + assert(len(data) > 0) + def test_pub_meta() -> None: apicall = Epidata.pub_meta() data = apicall.classic() # only supports classic @@ -148,6 +188,15 @@ def test_pub_nidss_flu() -> None: data = apicall.json(disable_date_parsing=True) assert(len(data) > 0) +def test_pvt_norostat() -> None: + apicall = Epidata.pvt_norostat( + auth = auth, + location = "1", + epiweeks = 201233 + ) + data = apicall.json() + assert(len(data) > 0) + def test_pub_nowcast() -> None: apicall = Epidata.pub_nowcast( locations = "ca", @@ -164,6 +213,35 @@ def test_pub_paho_dengue() -> None: data = apicall.json(disable_date_parsing=True) assert(len(data) > 0) +def test_pvt_quidel() -> None: + apicall = Epidata.pvt_quidel( + auth = auth, + locations = "hhs1", + epiweeks = EpiRange(201201, 202001) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pvt_sensors() -> None: + apicall = Epidata.pvt_sensors( + auth = auth, + names = "sar3", + locations = "nat", + epiweeks = EpiRange(201501, 202001) + ) + data = apicall.json() + assert(len(data) > 0) + +def test_pvt_twitter() -> None: + apicall = Epidata.pvt_twitter( + auth = auth, + locations = "CA", + time_type = "week", + time_values = EpiRange(201501, 202001) + ) + data = apicall.json() + assert(len(data) > 0) + def test_pub_wiki() -> None: apicall = Epidata.pub_wiki( articles = "avian_influenza", From 0aea6a810b7b211aad3265c698701b3221317873 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 21 Jun 2024 21:49:36 -0700 Subject: [PATCH 03/12] test: add secrets, convert dfs --- tests/test_epidata_calls.py | 66 +++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index b126691..7440ad7 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -6,48 +6,55 @@ # - it has access to the private endpoints being tested auth = os.environ.get("DELPHI_EPIDATA_KEY") +secret_cdc = os.environ.get("SECRET_API_AUTH_CDC") +secret_fluview = os.environ.get("SECRET_API_AUTH_FLUVIEW") +secret_ght = os.environ.get("SECRET_API_AUTH_GHT") +secret_norostat = os.environ.get("SECRET_API_AUTH_NOROSTAT") +secret_quidel = os.environ.get("SECRET_API_AUTH_QUIDEL") +secret_sensors = os.environ.get("SECRET_API_AUTH_SENSORS") +secret_twitter = os.environ.get("SECRET_API_AUTH_TWITTER") def test_pvt_cdc() -> None: apicall = Epidata.pvt_cdc( - auth = auth, + auth = secret_cdc, locations = "fl,ca", epiweeks = EpiRange(201501, 201601) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") - data = apicall.json() + data = apicall.df() assert(len(data) > 0) apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_covid_hosp_facility() -> None: apicall = Epidata.pub_covid_hosp_facility( hospital_pks = "100075", collection_weeks = EpiRange(20200101, 20200501)) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) apicall = Epidata.pub_covid_hosp_facility( hospital_pks = "100075", collection_weeks = EpiRange(202001, 202005)) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) # fails def test_pub_covid_hosp_state_timeseries() -> None: apicall = Epidata.pub_covid_hosp_state_timeseries( states = "fl", dates = EpiRange(20200101, 20200501)) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_covidcast_meta() -> None: apicall = Epidata.pub_covidcast_meta() - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_covidcast() -> None: @@ -58,7 +65,7 @@ def test_pub_covidcast() -> None: time_type = "day", geo_values = ["ca", "fl"], time_values = EpiRange(20200601, 20200801)) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) apicall = Epidata.pub_covidcast( @@ -68,7 +75,7 @@ def test_pub_covidcast() -> None: time_type = "day", geo_values = "*", time_values = EpiRange(20200601, 20200801)) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_delphi() -> None: @@ -84,17 +91,17 @@ def test_pub_dengue_nowcast() -> None: locations = "pr", epiweeks = EpiRange(201401, 202301) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pvt_dengue_sensors() -> None: apicall = Epidata.pvt_dengue_sensors( - auth = auth, + auth = secret_norostat, names = "ght", locations = "ag", epiweeks = EpiRange(201501, 202001) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_ecdc_ili() -> None: @@ -139,17 +146,17 @@ def test_pub_gft() -> None: locations = "hhs1", epiweeks = EpiRange(201201, 202001) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pvt_ght() -> None: apicall = Epidata.pvt_ght( - auth = auth, + auth = secret_ght, locations = "ma", epiweeks = EpiRange(199301, 202304), query = "how to get over the flu" ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_kcdc_ili() -> None: @@ -162,7 +169,7 @@ def test_pub_kcdc_ili() -> None: def test_pvt_meta_norostat() -> None: apicall = Epidata.pvt_meta_norostat( - auth = auth + auth = secret_norostat ) data = apicall.classic() assert(len(data) > 0) @@ -177,7 +184,7 @@ def test_pub_nidss_dengue() -> None: locations = "taipei", epiweeks = EpiRange(201201, 201301) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_nidss_flu() -> None: @@ -190,19 +197,20 @@ def test_pub_nidss_flu() -> None: def test_pvt_norostat() -> None: apicall = Epidata.pvt_norostat( - auth = auth, + auth = secret_norostat, location = "1", epiweeks = 201233 ) - data = apicall.json() - assert(len(data) > 0) + data = apicall.df() + # TODO: Norostat is known to not return data + # assert(len(data) > 0) def test_pub_nowcast() -> None: apicall = Epidata.pub_nowcast( locations = "ca", epiweeks = EpiRange(201201, 201301) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_paho_dengue() -> None: @@ -215,31 +223,31 @@ def test_pub_paho_dengue() -> None: def test_pvt_quidel() -> None: apicall = Epidata.pvt_quidel( - auth = auth, + auth = secret_quidel, locations = "hhs1", epiweeks = EpiRange(201201, 202001) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pvt_sensors() -> None: apicall = Epidata.pvt_sensors( - auth = auth, + auth = secret_sensors, names = "sar3", locations = "nat", epiweeks = EpiRange(201501, 202001) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pvt_twitter() -> None: apicall = Epidata.pvt_twitter( - auth = auth, + auth = secret_twitter, locations = "CA", time_type = "week", time_values = EpiRange(201501, 202001) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) def test_pub_wiki() -> None: @@ -248,5 +256,5 @@ def test_pub_wiki() -> None: time_type = "week", time_values = EpiRange(201501, 201601) ) - data = apicall.json() + data = apicall.df() assert(len(data) > 0) From 75bace17505c946766b0c0fd4c98f2ae895476c4 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Fri, 21 Jun 2024 21:51:35 -0700 Subject: [PATCH 04/12] test: convert more test calls to df --- tests/test_epidata_calls.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index 7440ad7..bbf8a64 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -109,7 +109,7 @@ def test_pub_ecdc_ili() -> None: regions = "austria", epiweeks = EpiRange(201901, 202001) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pub_flusurv() -> None: @@ -117,7 +117,7 @@ def test_pub_flusurv() -> None: locations = "CA", epiweeks = EpiRange(201701, 201801) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pub_fluview_clinical() -> None: @@ -125,12 +125,12 @@ def test_pub_fluview_clinical() -> None: regions = "nat", epiweeks = EpiRange(201601, 201701) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pub_fluview_meta() -> None: apicall = Epidata.pub_fluview_meta() - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pub_fluview() -> None: @@ -138,7 +138,7 @@ def test_pub_fluview() -> None: regions = "nat", epiweeks = EpiRange(201201, 202005) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pub_gft() -> None: @@ -164,7 +164,7 @@ def test_pub_kcdc_ili() -> None: regions = "ROK", epiweeks = 200436 ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pvt_meta_norostat() -> None: @@ -192,7 +192,7 @@ def test_pub_nidss_flu() -> None: regions = "taipei", epiweeks = EpiRange(201501, 201601) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pvt_norostat() -> None: @@ -218,7 +218,7 @@ def test_pub_paho_dengue() -> None: regions = "ca", epiweeks = EpiRange(201401, 201501) ) - data = apicall.json(disable_date_parsing=True) + data = apicall.df(disable_date_parsing=True) assert(len(data) > 0) def test_pvt_quidel() -> None: From 563c58e65b9b95da7f4974b98000abb16b1abfa4 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 8 Jul 2024 19:19:57 -0700 Subject: [PATCH 05/12] lint --- tests/test_epidata_calls.py | 62 ++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index bbf8a64..0ac2ed8 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -21,41 +21,41 @@ def test_pvt_cdc() -> None: epiweeks = EpiRange(201501, 201601) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_covid_hosp_facility() -> None: apicall = Epidata.pub_covid_hosp_facility( hospital_pks = "100075", collection_weeks = EpiRange(20200101, 20200501)) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 apicall = Epidata.pub_covid_hosp_facility( hospital_pks = "100075", collection_weeks = EpiRange(202001, 202005)) data = apicall.df() - assert(len(data) > 0) # fails + assert len(data) > 0 # fails def test_pub_covid_hosp_state_timeseries() -> None: apicall = Epidata.pub_covid_hosp_state_timeseries( states = "fl", dates = EpiRange(20200101, 20200501)) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_covidcast_meta() -> None: apicall = Epidata.pub_covidcast_meta() data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_covidcast() -> None: apicall = Epidata.pub_covidcast( @@ -66,7 +66,7 @@ def test_pub_covidcast() -> None: geo_values = ["ca", "fl"], time_values = EpiRange(20200601, 20200801)) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 apicall = Epidata.pub_covidcast( data_source = "jhu-csse", @@ -76,7 +76,7 @@ def test_pub_covidcast() -> None: geo_values = "*", time_values = EpiRange(20200601, 20200801)) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_delphi() -> None: apicall = Epidata.pub_delphi( @@ -84,7 +84,7 @@ def test_pub_delphi() -> None: epiweek = 201501 ) data = apicall.classic() # only supports classic - assert(len(data) > 0) + assert len(data) > 0 def test_pub_dengue_nowcast() -> None: apicall = Epidata.pub_dengue_nowcast( @@ -92,7 +92,7 @@ def test_pub_dengue_nowcast() -> None: epiweeks = EpiRange(201401, 202301) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_dengue_sensors() -> None: apicall = Epidata.pvt_dengue_sensors( @@ -102,7 +102,7 @@ def test_pvt_dengue_sensors() -> None: epiweeks = EpiRange(201501, 202001) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_ecdc_ili() -> None: apicall = Epidata.pub_ecdc_ili( @@ -110,7 +110,7 @@ def test_pub_ecdc_ili() -> None: epiweeks = EpiRange(201901, 202001) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pub_flusurv() -> None: apicall = Epidata.pub_flusurv( @@ -118,7 +118,7 @@ def test_pub_flusurv() -> None: epiweeks = EpiRange(201701, 201801) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pub_fluview_clinical() -> None: apicall = Epidata.pub_fluview_clinical( @@ -126,12 +126,12 @@ def test_pub_fluview_clinical() -> None: epiweeks = EpiRange(201601, 201701) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pub_fluview_meta() -> None: apicall = Epidata.pub_fluview_meta() data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pub_fluview() -> None: apicall = Epidata.pub_fluview( @@ -139,7 +139,7 @@ def test_pub_fluview() -> None: epiweeks = EpiRange(201201, 202005) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pub_gft() -> None: apicall = Epidata.pub_gft( @@ -147,7 +147,7 @@ def test_pub_gft() -> None: epiweeks = EpiRange(201201, 202001) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_ght() -> None: apicall = Epidata.pvt_ght( @@ -157,7 +157,7 @@ def test_pvt_ght() -> None: query = "how to get over the flu" ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_kcdc_ili() -> None: apicall = Epidata.pub_kcdc_ili( @@ -165,19 +165,19 @@ def test_pub_kcdc_ili() -> None: epiweeks = 200436 ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_meta_norostat() -> None: apicall = Epidata.pvt_meta_norostat( auth = secret_norostat ) data = apicall.classic() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_meta() -> None: apicall = Epidata.pub_meta() data = apicall.classic() # only supports classic - assert(len(data) > 0) + assert len(data) > 0 def test_pub_nidss_dengue() -> None: apicall = Epidata.pub_nidss_dengue( @@ -185,7 +185,7 @@ def test_pub_nidss_dengue() -> None: epiweeks = EpiRange(201201, 201301) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_nidss_flu() -> None: apicall = Epidata.pub_nidss_flu( @@ -193,7 +193,7 @@ def test_pub_nidss_flu() -> None: epiweeks = EpiRange(201501, 201601) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_norostat() -> None: apicall = Epidata.pvt_norostat( @@ -203,7 +203,7 @@ def test_pvt_norostat() -> None: ) data = apicall.df() # TODO: Norostat is known to not return data - # assert(len(data) > 0) + # assert len(data) > 0 def test_pub_nowcast() -> None: apicall = Epidata.pub_nowcast( @@ -211,7 +211,7 @@ def test_pub_nowcast() -> None: epiweeks = EpiRange(201201, 201301) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_paho_dengue() -> None: apicall = Epidata.pub_paho_dengue( @@ -219,7 +219,7 @@ def test_pub_paho_dengue() -> None: epiweeks = EpiRange(201401, 201501) ) data = apicall.df(disable_date_parsing=True) - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_quidel() -> None: apicall = Epidata.pvt_quidel( @@ -228,7 +228,7 @@ def test_pvt_quidel() -> None: epiweeks = EpiRange(201201, 202001) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_sensors() -> None: apicall = Epidata.pvt_sensors( @@ -238,7 +238,7 @@ def test_pvt_sensors() -> None: epiweeks = EpiRange(201501, 202001) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pvt_twitter() -> None: apicall = Epidata.pvt_twitter( @@ -248,7 +248,7 @@ def test_pvt_twitter() -> None: time_values = EpiRange(201501, 202001) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 def test_pub_wiki() -> None: apicall = Epidata.pub_wiki( @@ -257,4 +257,4 @@ def test_pub_wiki() -> None: time_values = EpiRange(201501, 201601) ) data = apicall.df() - assert(len(data) > 0) + assert len(data) > 0 From 722c8dfcce01a44596984b0f7512075f0d28a4f4 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Tue, 9 Jul 2024 17:13:49 +0300 Subject: [PATCH 06/12] Test datetime cols and improve datetime handling --- epidatpy/_endpoints.py | 2 +- epidatpy/_parse.py | 9 +++++- tests/test_epidata_calls.py | 56 +++++++++++++++++++++++++++++++------ 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index 2459870..7970043 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -663,7 +663,7 @@ def pub_flusurv( [ EpidataFieldInfo("release_date", EpidataFieldType.text), EpidataFieldInfo("location", EpidataFieldType.text), - EpidataFieldInfo("issue", EpidataFieldType.date), + EpidataFieldInfo("issue", EpidataFieldType.date_or_epiweek), EpidataFieldInfo("epiweek", EpidataFieldType.epiweek), EpidataFieldInfo("lag", EpidataFieldType.int), EpidataFieldInfo("rage_age_0", EpidataFieldType.float), diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index 1ffcfa9..2a5114f 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -9,7 +9,11 @@ def parse_api_date(value: Union[str, int, float, None]) -> Optional[date]: if value is None: return value v = str(value) - return datetime.strptime(v, "%Y%m%d").date() + if len(v) == 10: # yyyy-mm-dd + d = datetime.strptime(v, "%Y-%m-%d").date() + else: + d = datetime.strptime(v, "%Y%m%d").date() + return d def parse_api_week(value: Union[str, int, float, None]) -> Optional[date]: @@ -22,8 +26,11 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date if value is None: return None v = str(value) + print(len(v)) if len(v) == 6: d = cast(date, Week.fromstring(v).startdate()) + elif len(v) == 10: # yyyy-mm-dd + d = datetime.strptime(v, "%Y-%m-%d").date() else: d = datetime.strptime(v, "%Y%m%d").date() return d diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index 0ac2ed8..1003e9f 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -1,3 +1,4 @@ +import numpy as np import os from epidatpy.request import Epidata, EpiRange @@ -22,6 +23,7 @@ def test_pvt_cdc() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") @@ -51,6 +53,8 @@ def test_pub_covid_hosp_state_timeseries() -> None: dates = EpiRange(20200101, 20200501)) data = apicall.df() assert len(data) > 0 + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['date'].dtype) == 'datetime64[ns]' def test_pub_covidcast_meta() -> None: apicall = Epidata.pub_covidcast_meta() @@ -76,6 +80,8 @@ def test_pub_covidcast() -> None: geo_values = "*", time_values = EpiRange(20200601, 20200801)) data = apicall.df() + assert str(data['time_value'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' assert len(data) > 0 def test_pub_delphi() -> None: @@ -93,6 +99,7 @@ def test_pub_dengue_nowcast() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_dengue_sensors() -> None: apicall = Epidata.pvt_dengue_sensors( @@ -103,43 +110,57 @@ def test_pvt_dengue_sensors() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_ecdc_ili() -> None: apicall = Epidata.pub_ecdc_ili( regions = "austria", epiweeks = EpiRange(201901, 202001) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_flusurv() -> None: apicall = Epidata.pub_flusurv( locations = "CA", epiweeks = EpiRange(201701, 201801) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_fluview_clinical() -> None: apicall = Epidata.pub_fluview_clinical( regions = "nat", epiweeks = EpiRange(201601, 201701) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_fluview_meta() -> None: apicall = Epidata.pub_fluview_meta() - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['latest_update'].dtype) == 'datetime64[ns]' + assert str(data['latest_issue'].dtype) == 'datetime64[ns]' def test_pub_fluview() -> None: apicall = Epidata.pub_fluview( regions = "nat", epiweeks = EpiRange(201201, 202005) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_gft() -> None: apicall = Epidata.pub_gft( @@ -148,6 +169,7 @@ def test_pub_gft() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_ght() -> None: apicall = Epidata.pvt_ght( @@ -158,14 +180,18 @@ def test_pvt_ght() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_kcdc_ili() -> None: apicall = Epidata.pub_kcdc_ili( regions = "ROK", epiweeks = 200436 ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_meta_norostat() -> None: apicall = Epidata.pvt_meta_norostat( @@ -186,14 +212,18 @@ def test_pub_nidss_dengue() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_nidss_flu() -> None: apicall = Epidata.pub_nidss_flu( regions = "taipei", epiweeks = EpiRange(201501, 201601) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_norostat() -> None: apicall = Epidata.pvt_norostat( @@ -204,6 +234,8 @@ def test_pvt_norostat() -> None: data = apicall.df() # TODO: Norostat is known to not return data # assert len(data) > 0 + # assert str(data['release_date'].dtype) == 'datetime64[ns]' + # assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_nowcast() -> None: apicall = Epidata.pub_nowcast( @@ -212,14 +244,18 @@ def test_pub_nowcast() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_paho_dengue() -> None: apicall = Epidata.pub_paho_dengue( regions = "ca", epiweeks = EpiRange(201401, 201501) ) - data = apicall.df(disable_date_parsing=True) + data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_quidel() -> None: apicall = Epidata.pvt_quidel( @@ -229,6 +265,7 @@ def test_pvt_quidel() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_sensors() -> None: apicall = Epidata.pvt_sensors( @@ -239,6 +276,7 @@ def test_pvt_sensors() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pvt_twitter() -> None: apicall = Epidata.pvt_twitter( @@ -249,6 +287,7 @@ def test_pvt_twitter() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' def test_pub_wiki() -> None: apicall = Epidata.pub_wiki( @@ -258,3 +297,4 @@ def test_pub_wiki() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['epiweek'].dtype) == 'datetime64[ns]' From 78d353208d5e7666d54f969037f9e2a9a53c9697 Mon Sep 17 00:00:00 2001 From: Rostyslav Zatserkovnyi Date: Mon, 15 Jul 2024 16:13:06 +0300 Subject: [PATCH 07/12] dtype handling in tests --- epidatpy/_model.py | 2 +- tests/test_epidata_calls.py | 129 +++++++++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 3 deletions(-) diff --git a/epidatpy/_model.py b/epidatpy/_model.py index b6db10e..906ced3 100644 --- a/epidatpy/_model.py +++ b/epidatpy/_model.py @@ -305,7 +305,7 @@ def _as_df( elif info.type == EpidataFieldType.float: data_types[info.name] = float else: - data_types[info.name] = str + data_types[info.name] = "string" if data_types: df = df.astype(data_types) return df diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index 1003e9f..eb8a4a0 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -23,7 +23,17 @@ def test_pvt_cdc() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['num1'].dtype) == 'int64' + assert str(data['num2'].dtype) == 'int64' + assert str(data['num3'].dtype) == 'int64' + assert str(data['num4'].dtype) == 'int64' + assert str(data['num5'].dtype) == 'int64' + assert str(data['num6'].dtype) == 'int64' + assert str(data['num7'].dtype) == 'int64' + assert str(data['num8'].dtype) == 'int64' + assert str(data['total'].dtype) == 'int64' def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") @@ -33,6 +43,16 @@ def test_pub_covid_hosp_facility_lookup() -> None: apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") data = apicall.df() assert len(data) > 0 + assert str(data['hospital_pk'].dtype) == 'string' + assert str(data['state'].dtype) == 'string' + assert str(data['ccn'].dtype) == 'string' + assert str(data['hospital_name'].dtype) == 'string' + assert str(data['address'].dtype) == 'string' + assert str(data['city'].dtype) == 'string' + assert str(data['zip'].dtype) == 'string' + assert str(data['hospital_subtype'].dtype) == 'string' + assert str(data['fip_code'].dtype) == 'float64' + assert str(data['is_metro_micro'].dtype) == 'int64' def test_pub_covid_hosp_facility() -> None: apicall = Epidata.pub_covid_hosp_facility( @@ -40,6 +60,18 @@ def test_pub_covid_hosp_facility() -> None: collection_weeks = EpiRange(20200101, 20200501)) data = apicall.df() assert len(data) > 0 + assert str(data['hospital_pk'].dtype) == 'string' + assert str(data['state'].dtype) == 'string' + assert str(data['ccn'].dtype) == 'string' + assert str(data['hospital_name'].dtype) == 'string' + assert str(data['address'].dtype) == 'string' + assert str(data['city'].dtype) == 'string' + assert str(data['zip'].dtype) == 'string' + assert str(data['hospital_subtype'].dtype) == 'string' + assert str(data['fips_code'].dtype) == 'string' + assert str(data['publication_date'].dtype) == 'datetime64[ns]' + assert str(data['collection_week'].dtype) == 'datetime64[ns]' + assert str(data['is_metro_micro'].dtype) == 'bool' apicall = Epidata.pub_covid_hosp_facility( hospital_pks = "100075", @@ -53,6 +85,7 @@ def test_pub_covid_hosp_state_timeseries() -> None: dates = EpiRange(20200101, 20200501)) data = apicall.df() assert len(data) > 0 + assert str(data['state'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['date'].dtype) == 'datetime64[ns]' @@ -60,6 +93,20 @@ def test_pub_covidcast_meta() -> None: apicall = Epidata.pub_covidcast_meta() data = apicall.df() assert len(data) > 0 + assert str(data['data_source'].dtype) == 'string' + assert str(data['signal'].dtype) == 'string' + assert str(data['time_type'].dtype) == 'category' + assert str(data['min_time'].dtype) == 'datetime64[ns]' + assert str(data['max_time'].dtype) == 'datetime64[ns]' + assert str(data['num_locations'].dtype) == 'int64' + assert str(data['min_value'].dtype) == 'float64' + assert str(data['max_value'].dtype) == 'float64' + assert str(data['mean_value'].dtype) == 'float64' + assert str(data['stdev_value'].dtype) == 'float64' + assert str(data['last_update'].dtype) == 'int64' + assert str(data['max_issue'].dtype) == 'datetime64[ns]' + assert str(data['min_lag'].dtype) == 'int64' + assert str(data['max_lag'].dtype) == 'int64' def test_pub_covidcast() -> None: apicall = Epidata.pub_covidcast( @@ -80,9 +127,20 @@ def test_pub_covidcast() -> None: geo_values = "*", time_values = EpiRange(20200601, 20200801)) data = apicall.df() + print(data.dtypes) + + assert str(data['source'].dtype) == 'string' + assert str(data['signal'].dtype) == 'string' + assert str(data['geo_type'].dtype) == 'category' + assert str(data['geo_value'].dtype) == 'string' + assert str(data['time_type'].dtype) == 'category' assert str(data['time_value'].dtype) == 'datetime64[ns]' assert str(data['issue'].dtype) == 'datetime64[ns]' - assert len(data) > 0 + assert str(data['lag'].dtype) == 'int64' + assert str(data['value'].dtype) == 'float64' + assert str(data['missing_value'].dtype) == 'int64' + assert str(data['missing_stderr'].dtype) == 'int64' + assert str(data['missing_sample_size'].dtype) == 'int64' def test_pub_delphi() -> None: apicall = Epidata.pub_delphi( @@ -99,7 +157,10 @@ def test_pub_dengue_nowcast() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' + assert str(data['std'].dtype) == 'float64' def test_pvt_dengue_sensors() -> None: apicall = Epidata.pvt_dengue_sensors( @@ -110,7 +171,9 @@ def test_pvt_dengue_sensors() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' def test_pub_ecdc_ili() -> None: apicall = Epidata.pub_ecdc_ili( @@ -130,8 +193,17 @@ def test_pub_flusurv() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['release_date'].dtype) == 'string' + assert str(data['location'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['rage_age_0'].dtype) == 'float64' + assert str(data['rage_age_1'].dtype) == 'float64' + assert str(data['rage_age_2'].dtype) == 'float64' + assert str(data['rage_age_3'].dtype) == 'float64' + assert str(data['rage_age_4'].dtype) == 'float64' + assert str(data['rage_overall'].dtype) == 'float64' def test_pub_fluview_clinical() -> None: apicall = Epidata.pub_fluview_clinical( @@ -141,8 +213,16 @@ def test_pub_fluview_clinical() -> None: data = apicall.df() assert len(data) > 0 assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['region'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['total_specimens'].dtype) == 'int64' + assert str(data['total_a'].dtype) == 'int64' + assert str(data['total_b'].dtype) == 'int64' + assert str(data['percent_positive'].dtype) == 'float64' + assert str(data['percent_a'].dtype) == 'float64' + assert str(data['percent_b'].dtype) == 'float64' def test_pub_fluview_meta() -> None: apicall = Epidata.pub_fluview_meta() @@ -150,6 +230,7 @@ def test_pub_fluview_meta() -> None: assert len(data) > 0 assert str(data['latest_update'].dtype) == 'datetime64[ns]' assert str(data['latest_issue'].dtype) == 'datetime64[ns]' + assert str(data['table_rows'].dtype) == 'int64' def test_pub_fluview() -> None: apicall = Epidata.pub_fluview( @@ -159,8 +240,14 @@ def test_pub_fluview() -> None: data = apicall.df() assert len(data) > 0 assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['region'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['num_ili'].dtype) == 'int64' + assert str(data['num_patients'].dtype) == 'int64' + assert str(data['wili'].dtype) == 'float64' + assert str(data['ili'].dtype) == 'float64' def test_pub_gft() -> None: apicall = Epidata.pub_gft( @@ -169,7 +256,9 @@ def test_pub_gft() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['num'].dtype) == 'int64' def test_pvt_ght() -> None: apicall = Epidata.pvt_ght( @@ -180,7 +269,9 @@ def test_pvt_ght() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' def test_pub_kcdc_ili() -> None: apicall = Epidata.pub_kcdc_ili( @@ -190,8 +281,11 @@ def test_pub_kcdc_ili() -> None: data = apicall.df() assert len(data) > 0 assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['region'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['ili'].dtype) == 'float64' def test_pvt_meta_norostat() -> None: apicall = Epidata.pvt_meta_norostat( @@ -212,7 +306,9 @@ def test_pub_nidss_dengue() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['count'].dtype) == 'int64' def test_pub_nidss_flu() -> None: apicall = Epidata.pub_nidss_flu( @@ -222,8 +318,12 @@ def test_pub_nidss_flu() -> None: data = apicall.df() assert len(data) > 0 assert str(data['release_date'].dtype) == 'datetime64[ns]' + assert str(data['region'].dtype) == 'string' assert str(data['issue'].dtype) == 'datetime64[ns]' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['visits'].dtype) == 'int64' + assert str(data['ili'].dtype) == 'float64' def test_pvt_norostat() -> None: apicall = Epidata.pvt_norostat( @@ -244,7 +344,10 @@ def test_pub_nowcast() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' + assert str(data['std'].dtype) == 'float64' def test_pub_paho_dengue() -> None: apicall = Epidata.pub_paho_dengue( @@ -254,8 +357,16 @@ def test_pub_paho_dengue() -> None: data = apicall.df() assert len(data) > 0 assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['region'].dtype) == 'string' + assert str(data['serotype'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['issue'].dtype) == 'datetime64[ns]' + assert str(data['lag'].dtype) == 'int64' + assert str(data['total_pop'].dtype) == 'int64' + assert str(data['num_dengue'].dtype) == 'int64' + assert str(data['num_severe'].dtype) == 'int64' + assert str(data['num_deaths'].dtype) == 'int64' + assert str(data['incidence_rate'].dtype) == 'float64' def test_pvt_quidel() -> None: apicall = Epidata.pvt_quidel( @@ -265,7 +376,9 @@ def test_pvt_quidel() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' def test_pvt_sensors() -> None: apicall = Epidata.pvt_sensors( @@ -276,7 +389,10 @@ def test_pvt_sensors() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['name'].dtype) == 'string' + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['value'].dtype) == 'float64' def test_pvt_twitter() -> None: apicall = Epidata.pvt_twitter( @@ -287,7 +403,11 @@ def test_pvt_twitter() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['location'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['num'].dtype) == 'int64' + assert str(data['total'].dtype) == 'int64' + assert str(data['percent'].dtype) == 'float64' def test_pub_wiki() -> None: apicall = Epidata.pub_wiki( @@ -297,4 +417,9 @@ def test_pub_wiki() -> None: ) data = apicall.df() assert len(data) > 0 + assert str(data['article'].dtype) == 'string' assert str(data['epiweek'].dtype) == 'datetime64[ns]' + assert str(data['count'].dtype) == 'int64' + assert str(data['total'].dtype) == 'int64' + assert str(data['hour'].dtype) == 'int64' + assert str(data['value'].dtype) == 'float64' From 2943320a28349a8223a21d6ab4bac6fb04553b26 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 13:05:45 -0700 Subject: [PATCH 08/12] fix: remove print statement --- epidatpy/_parse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index a0c0394..4e9f95e 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -25,7 +25,6 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date if value is None: return None v = str(value) - print(len(v)) if len(v) == 6: d = Week.fromstring(v).startdate() elif len(v) == 10: # yyyy-mm-dd From b46d128433bd35312d942d4655e2277c64c25815 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 17:35:38 -0700 Subject: [PATCH 09/12] fix: typos in endpoint fields --- epidatpy/_endpoints.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/epidatpy/_endpoints.py b/epidatpy/_endpoints.py index ae71a2b..fd308e1 100644 --- a/epidatpy/_endpoints.py +++ b/epidatpy/_endpoints.py @@ -109,7 +109,7 @@ def pub_covid_hosp_facility_lookup( EpidataFieldInfo("city", EpidataFieldType.text), EpidataFieldInfo("zip", EpidataFieldType.text), EpidataFieldInfo("hospital_subtype", EpidataFieldType.text), - EpidataFieldInfo("fip_code", EpidataFieldType.text), + EpidataFieldInfo("fips_code", EpidataFieldType.text), EpidataFieldInfo("is_metro_micro", EpidataFieldType.int), ], ) @@ -553,7 +553,7 @@ def pub_ecdc_ili( [ EpidataFieldInfo("region", EpidataFieldType.text), EpidataFieldInfo("release_date", EpidataFieldType.date), - EpidataFieldInfo("issue", EpidataFieldType.date), + EpidataFieldInfo("issue", EpidataFieldType.epiweek), EpidataFieldInfo("epiweek", EpidataFieldType.epiweek), EpidataFieldInfo("lag", EpidataFieldType.int), EpidataFieldInfo("incidence_rate", EpidataFieldType.float), @@ -587,12 +587,12 @@ def pub_flusurv( EpidataFieldInfo("issue", EpidataFieldType.date_or_epiweek), EpidataFieldInfo("epiweek", EpidataFieldType.epiweek), EpidataFieldInfo("lag", EpidataFieldType.int), - EpidataFieldInfo("rage_age_0", EpidataFieldType.float), - EpidataFieldInfo("rage_age_1", EpidataFieldType.float), - EpidataFieldInfo("rage_age_2", EpidataFieldType.float), - EpidataFieldInfo("rage_age_3", EpidataFieldType.float), - EpidataFieldInfo("rage_age_4", EpidataFieldType.float), - EpidataFieldInfo("rage_overall", EpidataFieldType.float), + EpidataFieldInfo("rate_age_0", EpidataFieldType.float), + EpidataFieldInfo("rate_age_1", EpidataFieldType.float), + EpidataFieldInfo("rate_age_2", EpidataFieldType.float), + EpidataFieldInfo("rate_age_3", EpidataFieldType.float), + EpidataFieldInfo("rate_age_4", EpidataFieldType.float), + EpidataFieldInfo("rate_overall", EpidataFieldType.float), ], ) From a1acdbad79351bf9707125bcde3ee28650fc6387 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 17:36:06 -0700 Subject: [PATCH 10/12] fix: update tests and df type casting --- epidatpy/request.py | 22 +- tests/test_epidata_calls.py | 813 +++++++++++++++++------------------- 2 files changed, 407 insertions(+), 428 deletions(-) diff --git a/epidatpy/request.py b/epidatpy/request.py index 60e0212..c0c58a3 100644 --- a/epidatpy/request.py +++ b/epidatpy/request.py @@ -137,9 +137,9 @@ def df( df = DataFrame(rows, columns=columns or None) data_types: Dict[str, Any] = {} - time_fields: List[str] = [] + time_fields: List[EpidataFieldInfo] = [] for info in self.meta: - if not pred(info.name) or df[info.name].isnull().all(): + if not pred(info.name): continue if info.type == EpidataFieldType.bool: data_types[info.name] = bool @@ -154,8 +154,8 @@ def df( EpidataFieldType.epiweek, EpidataFieldType.date_or_epiweek, ): - data_types[info.name] = "Int64" - time_fields.append(info.name) + data_types[info.name] = "string" + time_fields.append(info) elif info.type == EpidataFieldType.float: data_types[info.name] = "Float64" else: @@ -163,8 +163,18 @@ def df( if data_types: df = df.astype(data_types) if not disable_date_parsing: - for field in time_fields: - df[field] = to_datetime(df[field], format="%Y%m%d", errors="ignore") + for info in time_fields: + if info.type == EpidataFieldType.epiweek: + continue + try: + df[info.name] = to_datetime(df[info.name], format="%Y-%m-%d") + continue + except ValueError: + pass + try: + df[info.name] = to_datetime(df[info.name], format="%Y%m%d") + except ValueError: + pass return df diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index eb8a4a0..cf2b12f 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -1,425 +1,394 @@ -import numpy as np +""" +Requirements to run these: +- DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form +- it has access to the private endpoints being tested +""" + import os + +import pytest + from epidatpy.request import Epidata, EpiRange -# Requirements to run these: -# - DELPHI_EPIDATA_KEY environment variable is set https://api.delphi.cmu.edu/epidata/admin/registration_form -# - it has access to the private endpoints being tested - -auth = os.environ.get("DELPHI_EPIDATA_KEY") -secret_cdc = os.environ.get("SECRET_API_AUTH_CDC") -secret_fluview = os.environ.get("SECRET_API_AUTH_FLUVIEW") -secret_ght = os.environ.get("SECRET_API_AUTH_GHT") -secret_norostat = os.environ.get("SECRET_API_AUTH_NOROSTAT") -secret_quidel = os.environ.get("SECRET_API_AUTH_QUIDEL") -secret_sensors = os.environ.get("SECRET_API_AUTH_SENSORS") -secret_twitter = os.environ.get("SECRET_API_AUTH_TWITTER") - -def test_pvt_cdc() -> None: - apicall = Epidata.pvt_cdc( - auth = secret_cdc, - locations = "fl,ca", - epiweeks = EpiRange(201501, 201601) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['num1'].dtype) == 'int64' - assert str(data['num2'].dtype) == 'int64' - assert str(data['num3'].dtype) == 'int64' - assert str(data['num4'].dtype) == 'int64' - assert str(data['num5'].dtype) == 'int64' - assert str(data['num6'].dtype) == 'int64' - assert str(data['num7'].dtype) == 'int64' - assert str(data['num8'].dtype) == 'int64' - assert str(data['total'].dtype) == 'int64' - -def test_pub_covid_hosp_facility_lookup() -> None: - apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") - data = apicall.df() - assert len(data) > 0 - - apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") - data = apicall.df() - assert len(data) > 0 - assert str(data['hospital_pk'].dtype) == 'string' - assert str(data['state'].dtype) == 'string' - assert str(data['ccn'].dtype) == 'string' - assert str(data['hospital_name'].dtype) == 'string' - assert str(data['address'].dtype) == 'string' - assert str(data['city'].dtype) == 'string' - assert str(data['zip'].dtype) == 'string' - assert str(data['hospital_subtype'].dtype) == 'string' - assert str(data['fip_code'].dtype) == 'float64' - assert str(data['is_metro_micro'].dtype) == 'int64' - -def test_pub_covid_hosp_facility() -> None: - apicall = Epidata.pub_covid_hosp_facility( - hospital_pks = "100075", - collection_weeks = EpiRange(20200101, 20200501)) - data = apicall.df() - assert len(data) > 0 - assert str(data['hospital_pk'].dtype) == 'string' - assert str(data['state'].dtype) == 'string' - assert str(data['ccn'].dtype) == 'string' - assert str(data['hospital_name'].dtype) == 'string' - assert str(data['address'].dtype) == 'string' - assert str(data['city'].dtype) == 'string' - assert str(data['zip'].dtype) == 'string' - assert str(data['hospital_subtype'].dtype) == 'string' - assert str(data['fips_code'].dtype) == 'string' - assert str(data['publication_date'].dtype) == 'datetime64[ns]' - assert str(data['collection_week'].dtype) == 'datetime64[ns]' - assert str(data['is_metro_micro'].dtype) == 'bool' - - apicall = Epidata.pub_covid_hosp_facility( - hospital_pks = "100075", - collection_weeks = EpiRange(202001, 202005)) - data = apicall.df() - assert len(data) > 0 # fails - -def test_pub_covid_hosp_state_timeseries() -> None: - apicall = Epidata.pub_covid_hosp_state_timeseries( - states = "fl", - dates = EpiRange(20200101, 20200501)) - data = apicall.df() - assert len(data) > 0 - assert str(data['state'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['date'].dtype) == 'datetime64[ns]' - -def test_pub_covidcast_meta() -> None: - apicall = Epidata.pub_covidcast_meta() - data = apicall.df() - assert len(data) > 0 - assert str(data['data_source'].dtype) == 'string' - assert str(data['signal'].dtype) == 'string' - assert str(data['time_type'].dtype) == 'category' - assert str(data['min_time'].dtype) == 'datetime64[ns]' - assert str(data['max_time'].dtype) == 'datetime64[ns]' - assert str(data['num_locations'].dtype) == 'int64' - assert str(data['min_value'].dtype) == 'float64' - assert str(data['max_value'].dtype) == 'float64' - assert str(data['mean_value'].dtype) == 'float64' - assert str(data['stdev_value'].dtype) == 'float64' - assert str(data['last_update'].dtype) == 'int64' - assert str(data['max_issue'].dtype) == 'datetime64[ns]' - assert str(data['min_lag'].dtype) == 'int64' - assert str(data['max_lag'].dtype) == 'int64' - -def test_pub_covidcast() -> None: - apicall = Epidata.pub_covidcast( - data_source = "jhu-csse", - signals = "confirmed_7dav_incidence_prop", - geo_type = "state", - time_type = "day", - geo_values = ["ca", "fl"], - time_values = EpiRange(20200601, 20200801)) - data = apicall.df() - assert len(data) > 0 - - apicall = Epidata.pub_covidcast( - data_source = "jhu-csse", - signals = "confirmed_7dav_incidence_prop", - geo_type = "state", - time_type = "day", - geo_values = "*", - time_values = EpiRange(20200601, 20200801)) - data = apicall.df() - print(data.dtypes) - - assert str(data['source'].dtype) == 'string' - assert str(data['signal'].dtype) == 'string' - assert str(data['geo_type'].dtype) == 'category' - assert str(data['geo_value'].dtype) == 'string' - assert str(data['time_type'].dtype) == 'category' - assert str(data['time_value'].dtype) == 'datetime64[ns]' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['value'].dtype) == 'float64' - assert str(data['missing_value'].dtype) == 'int64' - assert str(data['missing_stderr'].dtype) == 'int64' - assert str(data['missing_sample_size'].dtype) == 'int64' - -def test_pub_delphi() -> None: - apicall = Epidata.pub_delphi( - system = "ec", - epiweek = 201501 - ) - data = apicall.classic() # only supports classic - assert len(data) > 0 - -def test_pub_dengue_nowcast() -> None: - apicall = Epidata.pub_dengue_nowcast( - locations = "pr", - epiweeks = EpiRange(201401, 202301) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - assert str(data['std'].dtype) == 'float64' - -def test_pvt_dengue_sensors() -> None: - apicall = Epidata.pvt_dengue_sensors( - auth = secret_norostat, - names = "ght", - locations = "ag", - epiweeks = EpiRange(201501, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - -def test_pub_ecdc_ili() -> None: - apicall = Epidata.pub_ecdc_ili( - regions = "austria", - epiweeks = EpiRange(201901, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - -def test_pub_flusurv() -> None: - apicall = Epidata.pub_flusurv( - locations = "CA", - epiweeks = EpiRange(201701, 201801) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'string' - assert str(data['location'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['rage_age_0'].dtype) == 'float64' - assert str(data['rage_age_1'].dtype) == 'float64' - assert str(data['rage_age_2'].dtype) == 'float64' - assert str(data['rage_age_3'].dtype) == 'float64' - assert str(data['rage_age_4'].dtype) == 'float64' - assert str(data['rage_overall'].dtype) == 'float64' - -def test_pub_fluview_clinical() -> None: - apicall = Epidata.pub_fluview_clinical( - regions = "nat", - epiweeks = EpiRange(201601, 201701) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['region'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['total_specimens'].dtype) == 'int64' - assert str(data['total_a'].dtype) == 'int64' - assert str(data['total_b'].dtype) == 'int64' - assert str(data['percent_positive'].dtype) == 'float64' - assert str(data['percent_a'].dtype) == 'float64' - assert str(data['percent_b'].dtype) == 'float64' - -def test_pub_fluview_meta() -> None: - apicall = Epidata.pub_fluview_meta() - data = apicall.df() - assert len(data) > 0 - assert str(data['latest_update'].dtype) == 'datetime64[ns]' - assert str(data['latest_issue'].dtype) == 'datetime64[ns]' - assert str(data['table_rows'].dtype) == 'int64' - -def test_pub_fluview() -> None: - apicall = Epidata.pub_fluview( - regions = "nat", - epiweeks = EpiRange(201201, 202005) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['region'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['num_ili'].dtype) == 'int64' - assert str(data['num_patients'].dtype) == 'int64' - assert str(data['wili'].dtype) == 'float64' - assert str(data['ili'].dtype) == 'float64' - -def test_pub_gft() -> None: - apicall = Epidata.pub_gft( - locations = "hhs1", - epiweeks = EpiRange(201201, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['num'].dtype) == 'int64' - -def test_pvt_ght() -> None: - apicall = Epidata.pvt_ght( - auth = secret_ght, - locations = "ma", - epiweeks = EpiRange(199301, 202304), - query = "how to get over the flu" - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - -def test_pub_kcdc_ili() -> None: - apicall = Epidata.pub_kcdc_ili( - regions = "ROK", - epiweeks = 200436 - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['region'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['ili'].dtype) == 'float64' - -def test_pvt_meta_norostat() -> None: - apicall = Epidata.pvt_meta_norostat( - auth = secret_norostat - ) - data = apicall.classic() - assert len(data) > 0 - -def test_pub_meta() -> None: - apicall = Epidata.pub_meta() - data = apicall.classic() # only supports classic - assert len(data) > 0 - -def test_pub_nidss_dengue() -> None: - apicall = Epidata.pub_nidss_dengue( - locations = "taipei", - epiweeks = EpiRange(201201, 201301) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['count'].dtype) == 'int64' - -def test_pub_nidss_flu() -> None: - apicall = Epidata.pub_nidss_flu( - regions = "taipei", - epiweeks = EpiRange(201501, 201601) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['region'].dtype) == 'string' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['visits'].dtype) == 'int64' - assert str(data['ili'].dtype) == 'float64' - -def test_pvt_norostat() -> None: - apicall = Epidata.pvt_norostat( - auth = secret_norostat, - location = "1", - epiweeks = 201233 - ) - data = apicall.df() - # TODO: Norostat is known to not return data - # assert len(data) > 0 - # assert str(data['release_date'].dtype) == 'datetime64[ns]' - # assert str(data['epiweek'].dtype) == 'datetime64[ns]' - -def test_pub_nowcast() -> None: - apicall = Epidata.pub_nowcast( - locations = "ca", - epiweeks = EpiRange(201201, 201301) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - assert str(data['std'].dtype) == 'float64' - -def test_pub_paho_dengue() -> None: - apicall = Epidata.pub_paho_dengue( - regions = "ca", - epiweeks = EpiRange(201401, 201501) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['release_date'].dtype) == 'datetime64[ns]' - assert str(data['region'].dtype) == 'string' - assert str(data['serotype'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['issue'].dtype) == 'datetime64[ns]' - assert str(data['lag'].dtype) == 'int64' - assert str(data['total_pop'].dtype) == 'int64' - assert str(data['num_dengue'].dtype) == 'int64' - assert str(data['num_severe'].dtype) == 'int64' - assert str(data['num_deaths'].dtype) == 'int64' - assert str(data['incidence_rate'].dtype) == 'float64' - -def test_pvt_quidel() -> None: - apicall = Epidata.pvt_quidel( - auth = secret_quidel, - locations = "hhs1", - epiweeks = EpiRange(201201, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - -def test_pvt_sensors() -> None: - apicall = Epidata.pvt_sensors( - auth = secret_sensors, - names = "sar3", - locations = "nat", - epiweeks = EpiRange(201501, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['name'].dtype) == 'string' - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['value'].dtype) == 'float64' - -def test_pvt_twitter() -> None: - apicall = Epidata.pvt_twitter( - auth = secret_twitter, - locations = "CA", - time_type = "week", - time_values = EpiRange(201501, 202001) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['location'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['num'].dtype) == 'int64' - assert str(data['total'].dtype) == 'int64' - assert str(data['percent'].dtype) == 'float64' - -def test_pub_wiki() -> None: - apicall = Epidata.pub_wiki( - articles = "avian_influenza", - time_type = "week", - time_values = EpiRange(201501, 201601) - ) - data = apicall.df() - assert len(data) > 0 - assert str(data['article'].dtype) == 'string' - assert str(data['epiweek'].dtype) == 'datetime64[ns]' - assert str(data['count'].dtype) == 'int64' - assert str(data['total'].dtype) == 'int64' - assert str(data['hour'].dtype) == 'int64' - assert str(data['value'].dtype) == 'float64' +auth = os.environ.get("DELPHI_EPIDATA_KEY", "") +secret_cdc = os.environ.get("SECRET_API_AUTH_CDC", "") +secret_fluview = os.environ.get("SECRET_API_AUTH_FLUVIEW", "") +secret_ght = os.environ.get("SECRET_API_AUTH_GHT", "") +secret_norostat = os.environ.get("SECRET_API_AUTH_NOROSTAT", "") +secret_quidel = os.environ.get("SECRET_API_AUTH_QUIDEL", "") +secret_sensors = os.environ.get("SECRET_API_AUTH_SENSORS", "") +secret_twitter = os.environ.get("SECRET_API_AUTH_TWITTER", "") + + +@pytest.mark.skipif(not auth, reason="DELPHI_EPIDATA_KEY not available.") +class TestEpidataCalls: + @pytest.mark.skipif(not secret_cdc, reason="CDC key not available.") + def test_pvt_cdc(self) -> None: + apicall = Epidata.pvt_cdc(auth=secret_cdc, locations="fl,ca", epiweeks=EpiRange(201501, 201601)) + data = apicall.df() + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num1"].dtype) == "Int64" + assert str(data["num2"].dtype) == "Int64" + assert str(data["num3"].dtype) == "Int64" + assert str(data["num4"].dtype) == "Int64" + assert str(data["num5"].dtype) == "Int64" + assert str(data["num6"].dtype) == "Int64" + assert str(data["num7"].dtype) == "Int64" + assert str(data["num8"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64" + + def test_pub_covid_hosp_facility_lookup(self) -> None: + apicall = Epidata.pub_covid_hosp_facility_lookup(state="fl") + data = apicall.df() + assert len(data) > 0 + + apicall = Epidata.pub_covid_hosp_facility_lookup(city="southlake") + data = apicall.df() + assert len(data) > 0 + assert str(data["hospital_pk"].dtype) == "string" + assert str(data["state"].dtype) == "string" + assert str(data["ccn"].dtype) == "string" + assert str(data["hospital_name"].dtype) == "string" + assert str(data["address"].dtype) == "string" + assert str(data["city"].dtype) == "string" + assert str(data["zip"].dtype) == "string" + assert str(data["hospital_subtype"].dtype) == "string" + assert str(data["fips_code"].dtype) == "string" + assert str(data["is_metro_micro"].dtype) == "Int64" + + @pytest.mark.filterwarnings("ignore:`collection_weeks` is in week format") + def test_pub_covid_hosp_facility(self) -> None: + apicall = Epidata.pub_covid_hosp_facility(hospital_pks="100075", collection_weeks=EpiRange(20200101, 20200501)) + data = apicall.df() + assert len(data) > 0 + assert str(data["hospital_pk"].dtype) == "string" + assert str(data["state"].dtype) == "string" + assert str(data["ccn"].dtype) == "string" + assert str(data["hospital_name"].dtype) == "string" + assert str(data["address"].dtype) == "string" + assert str(data["city"].dtype) == "string" + assert str(data["zip"].dtype) == "string" + assert str(data["hospital_subtype"].dtype) == "string" + assert str(data["fips_code"].dtype) == "string" + assert str(data["publication_date"].dtype) == "datetime64[ns]" + assert str(data["collection_week"].dtype) == "datetime64[ns]" + assert str(data["is_metro_micro"].dtype) == "bool" + + apicall2 = Epidata.pub_covid_hosp_facility(hospital_pks="100075", collection_weeks=EpiRange(202001, 202030)) + data2 = apicall2.df() + assert len(data2) > 0 + + def test_pub_covid_hosp_state_timeseries(self) -> None: + apicall = Epidata.pub_covid_hosp_state_timeseries(states="fl", dates=EpiRange(20200101, 20200501)) + data = apicall.df() + assert len(data) > 0 + assert str(data["state"].dtype) == "string" + assert str(data["issue"].dtype) == "datetime64[ns]" + assert str(data["date"].dtype) == "datetime64[ns]" + + def test_pub_covidcast_meta(self) -> None: + apicall = Epidata.pub_covidcast_meta() + data = apicall.df() + + assert len(data) > 0 + assert str(data["data_source"].dtype) == "string" + assert str(data["signal"].dtype) == "string" + assert str(data["time_type"].dtype) == "category" + assert str(data["min_time"].dtype) == "string" + assert str(data["max_time"].dtype) == "datetime64[ns]" + assert str(data["num_locations"].dtype) == "Int64" + assert str(data["min_value"].dtype) == "Float64" + assert str(data["max_value"].dtype) == "Float64" + assert str(data["mean_value"].dtype) == "Float64" + assert str(data["stdev_value"].dtype) == "Float64" + assert str(data["last_update"].dtype) == "Int64" + assert str(data["max_issue"].dtype) == "datetime64[ns]" + assert str(data["min_lag"].dtype) == "Int64" + assert str(data["max_lag"].dtype) == "Int64" + + def test_pub_covidcast(self) -> None: + apicall = Epidata.pub_covidcast( + data_source="jhu-csse", + signals="confirmed_7dav_incidence_prop", + geo_type="state", + time_type="day", + geo_values=["ca", "fl"], + time_values=EpiRange(20200601, 20200801), + ) + data = apicall.df() + + assert len(data) > 0 + + apicall = Epidata.pub_covidcast( + data_source="jhu-csse", + signals="confirmed_7dav_incidence_prop", + geo_type="state", + time_type="day", + geo_values="*", + time_values=EpiRange(20200601, 20200801), + ) + data = apicall.df() + + print(data.dtypes) + + assert str(data["source"].dtype) == "string" + assert str(data["signal"].dtype) == "string" + assert str(data["geo_type"].dtype) == "category" + assert str(data["geo_value"].dtype) == "string" + assert str(data["time_type"].dtype) == "category" + assert str(data["time_value"].dtype) == "datetime64[ns]" + assert str(data["issue"].dtype) == "datetime64[ns]" + assert str(data["lag"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64" + assert str(data["missing_value"].dtype) == "Int64" + assert str(data["missing_stderr"].dtype) == "Int64" + assert str(data["missing_sample_size"].dtype) == "Int64" + + def test_pub_delphi(self) -> None: + apicall = Epidata.pub_delphi(system="ec", epiweek=201501) + data = apicall.classic() # only supports classic + assert len(data) > 0 + + def test_pub_dengue_nowcast(self) -> None: + apicall = Epidata.pub_dengue_nowcast(locations="pr", epiweeks=EpiRange(201401, 202301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + assert str(data["std"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_sensors, reason="Dengue sensors key not available.") + def test_pvt_dengue_sensors(self) -> None: + apicall = Epidata.pvt_dengue_sensors( + auth=secret_sensors, names="ght", locations="ag", epiweeks=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + def test_pub_ecdc_ili(self) -> None: + apicall = Epidata.pub_ecdc_ili(regions="austria", epiweeks=EpiRange(201901, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + + def test_pub_flusurv(self) -> None: + apicall = Epidata.pub_flusurv(locations="CA", epiweeks=EpiRange(201701, 201801)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "string" + assert str(data["location"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["rate_age_0"].dtype) == "Float64" + assert str(data["rate_age_1"].dtype) == "Float64" + assert str(data["rate_age_2"].dtype) == "Float64" + assert str(data["rate_age_3"].dtype) == "Float64" + assert str(data["rate_age_4"].dtype) == "Float64" + assert str(data["rate_overall"].dtype) == "Float64" + + def test_pub_fluview_clinical(self) -> None: + apicall = Epidata.pub_fluview_clinical(regions="nat", epiweeks=EpiRange(201601, 201701)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["total_specimens"].dtype) == "Int64" + assert str(data["total_a"].dtype) == "Int64" + assert str(data["total_b"].dtype) == "Int64" + assert str(data["percent_positive"].dtype) == "Float64" + assert str(data["percent_a"].dtype) == "Float64" + assert str(data["percent_b"].dtype) == "Float64" + + def test_pub_fluview_meta(self) -> None: + apicall = Epidata.pub_fluview_meta() + data = apicall.df() + + assert len(data) > 0 + assert str(data["latest_update"].dtype) == "datetime64[ns]" + assert str(data["latest_issue"].dtype) == "datetime64[ns]" + assert str(data["table_rows"].dtype) == "Int64" + + def test_pub_fluview(self) -> None: + apicall = Epidata.pub_fluview(regions="nat", epiweeks=EpiRange(201201, 202005)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["num_ili"].dtype) == "Int64" + assert str(data["num_patients"].dtype) == "Int64" + assert str(data["wili"].dtype) == "Float64" + assert str(data["ili"].dtype) == "Float64" + + def test_pub_gft(self) -> None: + apicall = Epidata.pub_gft(locations="hhs1", epiweeks=EpiRange(201201, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num"].dtype) == "Int64" + + @pytest.mark.skipif(not secret_ght, reason="GHT key not available.") + def test_pvt_ght(self) -> None: + apicall = Epidata.pvt_ght( + auth=secret_ght, locations="ma", epiweeks=EpiRange(199301, 202304), query="how to get over the flu" + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + def test_pub_kcdc_ili(self) -> None: + apicall = Epidata.pub_kcdc_ili(regions="ROK", epiweeks=200436) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["ili"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_norostat, reason="Norostat key not available.") + def test_pvt_meta_norostat(self) -> None: + apicall = Epidata.pvt_meta_norostat(auth=secret_norostat) + data = apicall.classic() + assert len(data) > 0 + + def test_pub_meta(self) -> None: + apicall = Epidata.pub_meta() + data = apicall.classic() # only supports classic + assert len(data) > 0 + + def test_pub_nidss_dengue(self) -> None: + apicall = Epidata.pub_nidss_dengue(locations="taipei", epiweeks=EpiRange(201201, 201301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["count"].dtype) == "Int64" + + def test_pub_nidss_flu(self) -> None: + apicall = Epidata.pub_nidss_flu(regions="taipei", epiweeks=EpiRange(201501, 201601)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["visits"].dtype) == "Int64" + assert str(data["ili"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_norostat, reason="Norostat key not available.") + def test_pvt_norostat(self) -> None: + apicall = Epidata.pvt_norostat(auth=secret_norostat, location="1", epiweeks=201233) + data = apicall.df() + + # TODO: Need a non-trivial query for Norostat + # assert len(data) > 0 + # assert str(data['release_date'].dtype) == 'datetime64[ns]' + # assert str(data['epiweek'].dtype) == 'string' + + def test_pub_nowcast(self) -> None: + apicall = Epidata.pub_nowcast(locations="ca", epiweeks=EpiRange(201201, 201301)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + assert str(data["std"].dtype) == "Float64" + + def test_pub_paho_dengue(self) -> None: + apicall = Epidata.pub_paho_dengue(regions="ca", epiweeks=EpiRange(201401, 201501)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["region"].dtype) == "string" + assert str(data["serotype"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["issue"].dtype) == "string" + assert str(data["lag"].dtype) == "Int64" + assert str(data["total_pop"].dtype) == "Int64" + assert str(data["num_dengue"].dtype) == "Int64" + assert str(data["num_severe"].dtype) == "Int64" + assert str(data["num_deaths"].dtype) == "Int64" + assert str(data["incidence_rate"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_quidel, reason="Quidel key not available.") + def test_pvt_quidel(self) -> None: + apicall = Epidata.pvt_quidel(auth=secret_quidel, locations="hhs1", epiweeks=EpiRange(201201, 202001)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_sensors, reason="Sensors key not available.") + def test_pvt_sensors(self) -> None: + apicall = Epidata.pvt_sensors( + auth=secret_sensors, names="sar3", locations="nat", epiweeks=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["name"].dtype) == "string" + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Float64" + + @pytest.mark.skipif(not secret_twitter, reason="Twitter key not available.") + def test_pvt_twitter(self) -> None: + apicall = Epidata.pvt_twitter( + auth=secret_twitter, locations="CA", time_type="week", time_values=EpiRange(201501, 202001) + ) + data = apicall.df() + + assert len(data) > 0 + assert str(data["location"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["num"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["percent"].dtype) == "Float64" + + def test_pub_wiki(self) -> None: + apicall = Epidata.pub_wiki(articles="avian_influenza", time_type="week", time_values=EpiRange(201501, 201601)) + data = apicall.df() + + assert len(data) > 0 + assert str(data["article"].dtype) == "string" + assert str(data["epiweek"].dtype) == "string" + assert str(data["count"].dtype) == "Int64" + assert str(data["total"].dtype) == "Int64" + assert str(data["hour"].dtype) == "Int64" + assert str(data["value"].dtype) == "Float64" From 56c6a6e3ad8e2079e60f57542aec1cba5c41b7c2 Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 17:36:32 -0700 Subject: [PATCH 11/12] lint: format --- epidatpy/_parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epidatpy/_parse.py b/epidatpy/_parse.py index 4e9f95e..2f7b10c 100644 --- a/epidatpy/_parse.py +++ b/epidatpy/_parse.py @@ -8,7 +8,7 @@ def parse_api_date(value: Union[str, int, float, None]) -> Optional[date]: if value is None: return value v = str(value) - if len(v) == 10: # yyyy-mm-dd + if len(v) == 10: # yyyy-mm-dd d = datetime.strptime(v, "%Y-%m-%d").date() else: d = datetime.strptime(v, "%Y%m%d").date() @@ -27,7 +27,7 @@ def parse_api_date_or_week(value: Union[str, int, float, None]) -> Optional[date v = str(value) if len(v) == 6: d = Week.fromstring(v).startdate() - elif len(v) == 10: # yyyy-mm-dd + elif len(v) == 10: # yyyy-mm-dd d = datetime.strptime(v, "%Y-%m-%d").date() else: d = datetime.strptime(v, "%Y%m%d").date() From 3bb41f4b794de5ee55a7d28bd12feab3350cd23c Mon Sep 17 00:00:00 2001 From: Dmitry Shemetov Date: Mon, 15 Jul 2024 17:39:45 -0700 Subject: [PATCH 12/12] lint: lint --- tests/test_epidata_calls.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_epidata_calls.py b/tests/test_epidata_calls.py index cf2b12f..cf7f072 100644 --- a/tests/test_epidata_calls.py +++ b/tests/test_epidata_calls.py @@ -22,6 +22,8 @@ @pytest.mark.skipif(not auth, reason="DELPHI_EPIDATA_KEY not available.") class TestEpidataCalls: + """Make network call tests for Epidata.""" + @pytest.mark.skipif(not secret_cdc, reason="CDC key not available.") def test_pvt_cdc(self) -> None: apicall = Epidata.pvt_cdc(auth=secret_cdc, locations="fl,ca", epiweeks=EpiRange(201501, 201601)) @@ -313,9 +315,10 @@ def test_pvt_norostat(self) -> None: data = apicall.df() # TODO: Need a non-trivial query for Norostat - # assert len(data) > 0 - # assert str(data['release_date'].dtype) == 'datetime64[ns]' - # assert str(data['epiweek'].dtype) == 'string' + assert len(data) > 0 + assert str(data["release_date"].dtype) == "datetime64[ns]" + assert str(data["epiweek"].dtype) == "string" + assert str(data["value"].dtype) == "Int64" def test_pub_nowcast(self) -> None: apicall = Epidata.pub_nowcast(locations="ca", epiweeks=EpiRange(201201, 201301))