diff --git a/src/acquisition/covidcast/signal_dash_data_generator.py b/src/acquisition/covidcast/signal_dash_data_generator.py index 34c90b441..81d95728a 100644 --- a/src/acquisition/covidcast/signal_dash_data_generator.py +++ b/src/acquisition/covidcast/signal_dash_data_generator.py @@ -9,6 +9,7 @@ import pandas as pd from dataclasses import dataclass +from epiweeks import Week from typing import List # first party @@ -17,7 +18,9 @@ from delphi.epidata.acquisition.covidcast.logger import get_structured_logger -LOOKBACK_DAYS_FOR_COVERAGE = 28 +LOOKBACK_DAYS_FOR_COVERAGE = 56 +BASE_COVIDCAST = covidcast.covidcast.Epidata.BASE_URL[:-len("api.php")] + "covidcast" +COVERAGE_URL = f"{BASE_COVIDCAST}/coverage?format=csv&signal={{source}}:{{signal}}&days={LOOKBACK_DAYS_FOR_COVERAGE}" @dataclass class DashboardSignal: @@ -195,19 +198,15 @@ def get_latest_time_value_from_metadata(dashboard_signal, metadata): def get_coverage(dashboard_signal: DashboardSignal, metadata) -> List[DashboardSignalCoverage]: """Get the most recent coverage for the signal.""" - latest_time_value = get_latest_time_value_from_metadata( - dashboard_signal, metadata) - start_day = latest_time_value - datetime.timedelta(days = LOOKBACK_DAYS_FOR_COVERAGE) - latest_data = covidcast.signal( - dashboard_signal.source, - dashboard_signal.covidcast_signal, - end_day = latest_time_value, - start_day = start_day) - latest_data_without_megacounties = latest_data[~latest_data['geo_value'].str.endswith( - '000')] - count_by_geo_type_df = latest_data_without_megacounties.groupby( - ['geo_type', 'data_source', 'time_value', 'signal']).size().to_frame( - 'count').reset_index() + count_by_geo_type_df = pd.read_csv( + COVERAGE_URL.format(source=dashboard_signal.source, + signal=dashboard_signal.covidcast_signal)) + try: + count_by_geo_type_df["time_value"] = count_by_geo_type_df["time_value"].apply( + lambda x: pd.to_datetime(str(x), format="%Y%m%d")) + except: + count_by_geo_type_df["time_value"] = count_by_geo_type_df["time_value"].apply( + lambda x: pd.to_datetime(Week(x // 100, x % 100).startdate())) signal_coverage_list = [] @@ -215,7 +214,7 @@ def get_coverage(dashboard_signal: DashboardSignal, signal_coverage = DashboardSignalCoverage( signal_id=dashboard_signal.db_id, date=row['time_value'].date(), - geo_type=row['geo_type'], + geo_type='county', count=row['count']) signal_coverage_list.append(signal_coverage) diff --git a/tests/acquisition/covidcast/test_signal_dash_data_generator.py b/tests/acquisition/covidcast/test_signal_dash_data_generator.py index cd43e0ec3..ff1797b35 100644 --- a/tests/acquisition/covidcast/test_signal_dash_data_generator.py +++ b/tests/acquisition/covidcast/test_signal_dash_data_generator.py @@ -182,7 +182,8 @@ def test_get_latest_time_value_from_metadata(self): data_date = get_latest_time_value_from_metadata(signal, metadata) self.assertEqual(data_date, date(2021, 1, 1)) - @patch("covidcast.signal") + #@patch("covidcast.signal") + @patch("pandas.read_csv") def test_get_coverage(self, mock_signal): signal = DashboardSignal( db_id=1, name="Change", source="chng", @@ -198,18 +199,16 @@ def test_get_coverage(self, mock_signal): 'signal']) epidata_data = [ - ['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "state", "PA"], - ['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "state", "NY"], - ['chng', 'chng-sig', pd.Timestamp("2020-01-02"), "state", "NY"], + ['chng', 'chng-sig', 20200101, 2], + ['chng', 'chng-sig', 20200102, 1], ] epidata_df = pd.DataFrame( epidata_data, columns=[ - 'data_source', + 'source', 'signal', 'time_value', - 'geo_type', - 'geo_value']) + 'count']) mock_signal.return_value = epidata_df @@ -222,7 +221,7 @@ def test_get_coverage(self, mock_signal): 2020, 1, 1), - geo_type='state', + geo_type='county', count=2), DashboardSignalCoverage( signal_id=1, @@ -230,51 +229,6 @@ def test_get_coverage(self, mock_signal): 2020, 1, 2), - geo_type='state', - count=1), - ] - - self.assertListEqual(coverage, expected_coverage) - - @patch("covidcast.signal") - def test_get_coverage_megacounties_dropped(self, mock_signal): - signal = DashboardSignal( - db_id=1, name="Change", source="chng", - covidcast_signal="chng-sig", - latest_coverage_update=date(2021, 1, 1), - latest_status_update=date(2021, 1, 1)) - data = [['chng', pd.Timestamp("2020-01-01"), "chng-sig"]] - metadata = pd.DataFrame( - data, - columns=[ - 'data_source', - 'max_time', - 'signal']) - - epidata_data = [ - ['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "county", "11111"], - ['chng', 'chng-sig', pd.Timestamp("2020-01-01"), "county", "10000"], - ] - epidata_df = pd.DataFrame( - epidata_data, - columns=[ - 'data_source', - 'signal', - 'time_value', - 'geo_type', - 'geo_value']) - - mock_signal.return_value = epidata_df - - coverage = get_coverage(signal, metadata) - - expected_coverage = [ - DashboardSignalCoverage( - signal_id=1, - date=date( - 2020, - 1, - 1), geo_type='county', count=1), ]