Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/acquisition/covid_hosp/common/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Database:
def __init__(self,
connection,
table_name=None,
dataset_name=None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps add a docstring entry for this?

columns_and_types=None,
key_columns=None,
additional_fields=None):
Expand All @@ -40,6 +41,7 @@ def __init__(self,

self.connection = connection
self.table_name = table_name
self.dataset_name = dataset_name
self.publication_col_name = "issue" if table_name == 'covid_hosp_state_timeseries' else \
'publication_date'
self.columns_and_types = {
Expand Down Expand Up @@ -116,7 +118,7 @@ def contains_revision(self, revision):
`covid_hosp_meta`
WHERE
`dataset_name` = %s AND `revision_timestamp` = %s
''', (self.table_name, revision))
''', (self.dataset_name, revision))
for (result,) in cursor:
return bool(result)

Expand Down Expand Up @@ -145,7 +147,7 @@ def insert_metadata(self, publication_date, revision, meta_json):
)
VALUES
(%s, %s, %s, %s, NOW())
''', (self.table_name, publication_date, revision, meta_json))
''', (self.dataset_name, publication_date, revision, meta_json))

def insert_dataset(self, publication_date, dataframe):
"""Add a dataset to the database.
Expand Down Expand Up @@ -232,7 +234,7 @@ def get_max_issue(self):
from
`covid_hosp_meta`
WHERE
dataset_name = "{self.table_name}"
dataset_name = "{self.dataset_name}"
''')
for (result,) in cursor:
if result is not None:
Expand Down
15 changes: 8 additions & 7 deletions src/acquisition/covid_hosp/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,20 @@ def update_dataset(database, network, newer_than=None, older_than=None):
# download the dataset and add it to the database
dataset = Utils.merge_by_key_cols([network.fetch_dataset(url) for url, _ in revisions],
db.KEY_COLS)
# add metadata to the database using the last revision seen.
last_url, last_index = revisions[-1]
metadata_json = metadata.loc[last_index].reset_index().to_json()
# add metadata to the database
all_metadata = []
for url, index in revisions:
all_metadata.append((url, metadata.loc[index].reset_index().to_json()))
datasets.append((
issue_int,
dataset,
last_url,
metadata_json
all_metadata
))
with database.connect() as db:
for issue_int, dataset, last_url, metadata_json in datasets:
for issue_int, dataset, all_metadata in datasets:
db.insert_dataset(issue_int, dataset)
db.insert_metadata(issue_int, last_url, metadata_json)
for url, metadata_json in all_metadata:
db.insert_metadata(issue_int, url, metadata_json)
print(f'successfully acquired {len(dataset)} rows')

# note that the transaction is committed by exiting the `with` block
Expand Down
2 changes: 2 additions & 0 deletions src/acquisition/covid_hosp/facility/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.facility.network import Network


class Database(BaseDatabase):
Expand Down Expand Up @@ -213,5 +214,6 @@ def __init__(self, *args, **kwargs):
*args,
**kwargs,
table_name=Database.TABLE_NAME,
dataset_name=Network.DATASET_ID,
key_columns=Database.KEY_COLS,
columns_and_types=Database.ORDERED_CSV_COLUMNS)
2 changes: 2 additions & 0 deletions src/acquisition/covid_hosp/state_daily/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.state_daily.network import Network


class Database(BaseDatabase):
Expand Down Expand Up @@ -223,6 +224,7 @@ def __init__(self, *args, **kwargs):
*args,
**kwargs,
table_name=Database.TABLE_NAME,
dataset_name=Network.DATASET_ID,
columns_and_types=Database.ORDERED_CSV_COLUMNS,
key_columns=Database.KEY_COLS,
additional_fields=[Columndef('D', 'record_type', None)])
2 changes: 2 additions & 0 deletions src/acquisition/covid_hosp/state_timeseries/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from delphi.epidata.acquisition.covid_hosp.common.database import Database as BaseDatabase
from delphi.epidata.acquisition.covid_hosp.common.database import Columndef
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.state_timeseries.network import Network


class Database(BaseDatabase):
Expand Down Expand Up @@ -222,6 +223,7 @@ def __init__(self, *args, **kwargs):
*args,
**kwargs,
table_name=Database.TABLE_NAME,
dataset_name=Network.DATASET_ID,
columns_and_types=Database.ORDERED_CSV_COLUMNS,
key_columns=Database.KEY_COLS,
additional_fields=[Columndef('T', 'record_type', None)])
3 changes: 3 additions & 0 deletions src/ddl/migrations/covid_hosp_meta_v0.4.4-v0.4.5.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
UPDATE covid_hosp_meta SET dataset_name="g62h-syeh" WHERE revision_timestamp like "%g62h-syeh%";
UPDATE covid_hosp_meta SET dataset_name="6xf2-c3ie" where revision_timestamp like "%6xf2-c3ie%";
UPDATE covid_hosp_meta SET dataset_name="anag-cw7u" WHERE revision_timestamp LIKE "%anag-cw7u%";
8 changes: 4 additions & 4 deletions tests/acquisition/covid_hosp/common/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_contains_revision(self):

mock_connection = MagicMock()
mock_cursor = mock_connection.cursor()
database = Database(mock_connection, table_name=sentinel.table_name)
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)

with self.subTest(name='new revision'):
mock_cursor.__iter__.return_value = [(0,)]
Expand All @@ -78,7 +78,7 @@ def test_contains_revision(self):
# compare with boolean literal to test the type cast
self.assertIs(result, False)
query_values = mock_cursor.execute.call_args[0][-1]
self.assertEqual(query_values, (sentinel.table_name, sentinel.revision))
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))

with self.subTest(name='old revision'):
mock_cursor.__iter__.return_value = [(1,)]
Expand All @@ -88,7 +88,7 @@ def test_contains_revision(self):
# compare with boolean literal to test the type cast
self.assertIs(result, True)
query_values = mock_cursor.execute.call_args[0][-1]
self.assertEqual(query_values, (sentinel.table_name, sentinel.revision))
self.assertEqual(query_values, (sentinel.dataset_name, sentinel.revision))

def test_insert_metadata(self):
"""Add new metadata to the database."""
Expand All @@ -98,7 +98,7 @@ def test_insert_metadata(self):

mock_connection = MagicMock()
mock_cursor = mock_connection.cursor()
database = Database(mock_connection, table_name=sentinel.dataset_name)
database = Database(mock_connection, table_name=sentinel.table_name, dataset_name=sentinel.dataset_name)

result = database.insert_metadata(
sentinel.publication_date,
Expand Down
5 changes: 4 additions & 1 deletion tests/acquisition/covid_hosp/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def test_run_acquire_new_dataset(self):

self.assertTrue(result)

mock_connection.insert_metadata.assert_called_once()
# should have been called twice
mock_connection.insert_metadata.assert_called()
assert mock_connection.insert_metadata.call_count == 2
# most recent call should be for the final revision at url2
args = mock_connection.insert_metadata.call_args[0]
self.assertEqual(args[:2], (20210315, "url2"))
pd.testing.assert_frame_equal(
Expand Down