diff --git a/.bumpversion.cfg b/.bumpversion.cfg index c7ad53d42..2ddd10260 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.12 +current_version = 0.3.13 commit = False tag = False diff --git a/src/acquisition/covidcast/csv_importer.py b/src/acquisition/covidcast/csv_importer.py index 05669cb82..cce25cd34 100644 --- a/src/acquisition/covidcast/csv_importer.py +++ b/src/acquisition/covidcast/csv_importer.py @@ -43,9 +43,9 @@ class CsvImporter: "val": float, "se": float, "sample_size": float, - "missing_val": int, - "missing_se": int, - "missing_sample_size": int + "missing_val": "Int64", + "missing_se": "Int64", + "missing_sample_size": "Int64" } # NOTE: this should be a Python 3.7+ `dataclass`, but the server is on 3.4 diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py index 70660b60c..24597c8e1 100644 --- a/src/acquisition/covidcast/database.py +++ b/src/acquisition/covidcast/database.py @@ -326,6 +326,7 @@ def delete_batch(self, cc_deletions): ''' drop_tmp_table_sql = f'DROP TABLE {tmp_table_name}' + total = None try: self._cursor.execute(create_tmp_table_sql) self._cursor.execute(amend_tmp_table_sql) @@ -337,12 +338,18 @@ def delete_batch(self, cc_deletions): raise Exception(f"Bad deletions argument: need a filename or a list of tuples; got a {type(cc_deletions)}") self._cursor.execute(add_id_sql) self._cursor.execute(delete_sql) + total = self._cursor.rowcount self._cursor.execute(fix_latest_issue_sql) self._connection.commit() + + if total == -1: + # the SQL connector does not support returning number of rows affected (see PEP 249) + total = None except Exception as e: raise e finally: self._cursor.execute(drop_tmp_table_sql) + return total def compute_covidcast_meta(self, table_name='covidcast', use_index=True): """Compute and return metadata on all non-WIP COVIDcast signals.""" diff --git a/src/acquisition/covidcast/delete_batch.py b/src/acquisition/covidcast/delete_batch.py index 33bc6751d..fe40897fd 100644 --- a/src/acquisition/covidcast/delete_batch.py +++ b/src/acquisition/covidcast/delete_batch.py @@ -2,6 +2,7 @@ # standard library import argparse +import glob import os import time @@ -22,19 +23,21 @@ def get_argument_parser(): help="filename for log output (defaults to stdout)") return parser -def handle_file(deletion_file, database): +def handle_file(deletion_file, database, logger): logger.info("Deleting from csv file", filename=deletion_file) rows = [] with open(deletion_file) as f: for line in f: - rows.append(line.strip().split(",")) + fields = line.strip().split(",") + if len(fields) < 9: continue + rows.append(fields + ["day"]) rows = rows[1:] try: n = database.delete_batch(rows) logger.info("Deleted database rows", row_count=n) return n except Exception as e: - logger.exception('Exception while deleting rows:', e) + logger.exception('Exception while deleting rows', exception=e) database.rollback() return 0 @@ -49,7 +52,11 @@ def main(args): try: for deletion_file in sorted(glob.glob(os.path.join(args.deletion_dir, '*.csv'))): - all_n += handle_file(deletion_file) + n = handle_file(deletion_file, database, logger) + if n is not None: + all_n += n + else: + all_n = "rowcount unsupported" finally: database.disconnect(True) diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index af837e998..70270ebba 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -15,7 +15,7 @@ Epidata <- (function() { # API base url BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php' - client_version <- '0.3.12' + client_version <- '0.3.13' # Helper function to cast values and/or ranges to strings .listitem <- function(value) { diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index d0a76cf6c..734e94cb2 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -22,7 +22,7 @@ } })(this, function (exports, fetchImpl, jQuery) { const BASE_URL = "https://delphi.cmu.edu/epidata/"; - const client_version = "0.3.12"; + const client_version = "0.3.13"; // Helper function to cast values and/or ranges to strings function _listitem(value) { diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json index 906071310..2da1b9252 100644 --- a/src/client/packaging/npm/package.json +++ b/src/client/packaging/npm/package.json @@ -2,7 +2,7 @@ "name": "delphi_epidata", "description": "Delphi Epidata API Client", "authors": "Delphi Group", - "version": "0.3.12", + "version": "0.3.13", "license": "MIT", "homepage": "https://github.com/cmu-delphi/delphi-epidata", "bugs": { diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py index b1f527f5d..0be20a6cc 100644 --- a/src/client/packaging/pypi/delphi_epidata/__init__.py +++ b/src/client/packaging/pypi/delphi_epidata/__init__.py @@ -1,4 +1,4 @@ from .delphi_epidata import Epidata name = 'delphi_epidata' -__version__ = '0.3.12' +__version__ = '0.3.13' diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py index 7b77e137e..451b8974c 100644 --- a/src/client/packaging/pypi/setup.py +++ b/src/client/packaging/pypi/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="delphi_epidata", - version="0.3.12", + version="0.3.13", author="David Farrow", author_email="dfarrow0@gmail.com", description="A programmatic interface to Delphi's Epidata API.", diff --git a/src/server/_config.py b/src/server/_config.py index 123970100..f31f16164 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -5,7 +5,7 @@ load_dotenv() -VERSION = "0.3.12" +VERSION = "0.3.13" MAX_RESULTS = int(10e6) MAX_COMPATIBILITY_RESULTS = int(3650) diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv index f7d7eba65..6c275d50a 100644 --- a/src/server/endpoints/covidcast_utils/db_signals.csv +++ b/src/server/endpoints/covidcast_utils/db_signals.csv @@ -782,7 +782,7 @@ National provisional death counts is based on death certificate data received an nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people","{short_description}. National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctor’s office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests) +quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctor's office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests) quidel-covid-ag,covid_ag_raw_pct_positive,TRUE,covid_ag_smoothed_pct_positive,FALSE,{base_name} (7-day average),TRUE,,,day,Date,Percentage,percent,cases_testing,bad,TRUE,FALSE,FALSE,TRUE,TRUE, quidel-covid-ag,covid_ag_raw_pct_positive_age_0_17,FALSE,covid_ag_raw_pct_positive_age_0_17,FALSE,"COVID-19 Antigen Tests: Percent Positive, Ages 0-17",TRUE,Percentage of antigen tests that were positive for COVID-19 among people ages 0-17,"When a patient (whether at a doctor’s office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19 among people ages 0-17",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests) quidel-covid-ag,covid_ag_raw_pct_positive_age_0_17,TRUE,covid_ag_smoothed_pct_positive_age_0_17,FALSE,{base_name} (Smoothed),TRUE,,"{base_description}, smoothed using a 7-day moving average and geographical pooling",day,Date,Percentage,percent,cases_testing,bad,TRUE,FALSE,FALSE,TRUE,TRUE, diff --git a/src/server/endpoints/covidcast_utils/db_sources.csv b/src/server/endpoints/covidcast_utils/db_sources.csv index 2036e01c2..da7a7fbdc 100644 --- a/src/server/endpoints/covidcast_utils/db_sources.csv +++ b/src/server/endpoints/covidcast_utils/db_sources.csv @@ -7,7 +7,7 @@ fb-survey,fb-survey,Delphi US COVID-19 Trends and Impact Survey,"We conduct the [Question text](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html)" google-symptoms,google-symptoms,Google Symptoms Search Trends,"Google's [COVID-19 Search Trends symptoms dataset](http://goo.gle/covid19symptomdataset) reflects the relative volume of Google searches for a broad set of symptoms, signs and health conditions. This source includes signals for 7 symptom sets: 6 sets of COVID-related symptoms, and 1 set of control symptoms unrelated to COVID-19. -Because of the way this dataset is constructed, values are comparable across signals in the same location, but not across geographic regions, even within the same signal. Use caution in any geographic analyses.",anosmia_smoothed_search,"To download or use the data, you must agree to the Google [Terms of Service](https://policies.google.com/terms)",,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-symptoms.html) +Because of the way this dataset is constructed, values are comparable across signals in the same location, but not across geographic regions, even within the same signal. Use caution in any geographic analyses.",s05_smoothed_search,"To download or use the data, you must agree to the Google [Terms of Service](https://policies.google.com/terms)",,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-symptoms.html) hhs,hhs,U.S. Department of Health & Human Services,The US Department of Health & Human Services (HHS) publishes several datasets on patient impact and hospital capacity. This source includes only adult and pediatric hospital admissions with confirmed and suspected COVID-19 or confirmed influenza.,confirmed_admissions_1d,[Public Domain US Government](https://www.usa.gov/government-works),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/hhs.html) hospital-admissions,hospital-admissions,Hospital Admissions From Claims,"Information about hospital admissions, provided to us by health system partners. Using inpatient claim counts, we estimate the percentage of new hospital admissions with a COVID-associated diagnosis code in a given location, on a given day. diff --git a/tests/acquisition/covidcast/test_csv_importer.py b/tests/acquisition/covidcast/test_csv_importer.py index 3e7224d19..a99eb3a1a 100644 --- a/tests/acquisition/covidcast/test_csv_importer.py +++ b/tests/acquisition/covidcast/test_csv_importer.py @@ -278,13 +278,13 @@ def test_load_csv_with_valid_header(self): # now with missing values! data = { - 'geo_id': ['ca', 'tx', 'fl', 'ak'], - 'val': [np.nan, '1.2', '1.3', '1.4'], - 'se': ['2.1', "na", '2.3', '2.4'], - 'sample_size': ['301', '302', None, '304'], - 'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3, - 'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING], - 'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + 'geo_id': ['ca', 'tx', 'fl', 'ak', 'wa'], + 'val': [np.nan, '1.2', '1.3', '1.4', '1.5'], + 'se': ['2.1', "na", '2.3', '2.4', '2.5'], + 'sample_size': ['301', '302', None, '304', None], + 'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3 + [None], + 'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING] + [None], + 'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None] } mock_pandas = MagicMock() mock_pandas.read_csv.return_value = pandas.DataFrame(data=data) @@ -295,7 +295,7 @@ def test_load_csv_with_valid_header(self): self.assertTrue(mock_pandas.read_csv.called) self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath) - self.assertEqual(len(rows), 4) + self.assertEqual(len(rows), 5) self.assertEqual(rows[0].geo_value, 'ca') self.assertIsNone(rows[0].value) @@ -328,3 +328,11 @@ def test_load_csv_with_valid_header(self): self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING) self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING) + + self.assertEqual(rows[4].geo_value, 'wa') + self.assertEqual(rows[4].value, 1.5) + self.assertEqual(rows[4].stderr, 2.5) + self.assertEqual(rows[4].sample_size, None) + self.assertEqual(rows[4].missing_value, Nans.NOT_MISSING) + self.assertEqual(rows[4].missing_stderr, Nans.NOT_MISSING) + self.assertEqual(rows[4].missing_sample_size, Nans.OTHER) \ No newline at end of file