cmu-delphi · krivard · Mar 8, 2022 · Feb 28, 2022 · Feb 28, 2022 · Feb 28, 2022
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.12
+current_version = 0.3.13
 commit = False
 tag = False
 

diff --git a/src/acquisition/covidcast/csv_importer.py b/src/acquisition/covidcast/csv_importer.py
@@ -43,9 +43,9 @@ class CsvImporter:
     "val": float,
     "se": float,
     "sample_size": float,
-    "missing_val": int,
-    "missing_se": int,
-    "missing_sample_size": int
+    "missing_val": "Int64",
+    "missing_se": "Int64",
+    "missing_sample_size": "Int64"
   }
 
   # NOTE: this should be a Python 3.7+ `dataclass`, but the server is on 3.4

diff --git a/src/acquisition/covidcast/database.py b/src/acquisition/covidcast/database.py
@@ -326,6 +326,7 @@ def delete_batch(self, cc_deletions):
 '''
 
     drop_tmp_table_sql = f'DROP TABLE {tmp_table_name}'
+    total = None
     try:
       self._cursor.execute(create_tmp_table_sql)
       self._cursor.execute(amend_tmp_table_sql)
@@ -337,12 +338,18 @@ def delete_batch(self, cc_deletions):
         raise Exception(f"Bad deletions argument: need a filename or a list of tuples; got a {type(cc_deletions)}")
       self._cursor.execute(add_id_sql)
       self._cursor.execute(delete_sql)
+      total = self._cursor.rowcount
       self._cursor.execute(fix_latest_issue_sql)
       self._connection.commit()
+
+      if total == -1:
+        # the SQL connector does not support returning number of rows affected (see PEP 249)
+        total = None
     except Exception as e:
       raise e
     finally:
       self._cursor.execute(drop_tmp_table_sql)
+    return total
 
   def compute_covidcast_meta(self, table_name='covidcast', use_index=True):
     """Compute and return metadata on all non-WIP COVIDcast signals."""

diff --git a/src/acquisition/covidcast/delete_batch.py b/src/acquisition/covidcast/delete_batch.py
@@ -2,6 +2,7 @@
 
 # standard library
 import argparse
+import glob
 import os
 import time
 
@@ -22,19 +23,21 @@ def get_argument_parser():
       help="filename for log output (defaults to stdout)")
     return parser
 
-def handle_file(deletion_file, database):
+def handle_file(deletion_file, database, logger):
     logger.info("Deleting from csv file", filename=deletion_file)
     rows = []
     with open(deletion_file) as f:
         for line in f:
-            rows.append(line.strip().split(","))
+            fields = line.strip().split(",")
+            if len(fields) < 9: continue
+            rows.append(fields + ["day"])
     rows = rows[1:]
     try:
         n = database.delete_batch(rows)
         logger.info("Deleted database rows", row_count=n)
         return n
     except Exception as e:
-        logger.exception('Exception while deleting rows:', e)
+        logger.exception('Exception while deleting rows', exception=e)
         database.rollback()
     return 0
 
@@ -49,7 +52,11 @@ def main(args):
 
     try:
         for deletion_file in sorted(glob.glob(os.path.join(args.deletion_dir, '*.csv'))):
-            all_n += handle_file(deletion_file)
+            n = handle_file(deletion_file, database, logger)
+            if n is not None:
+                all_n += n
+            else:
+                all_n = "rowcount unsupported"
     finally:
         database.disconnect(True)
 

diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R
@@ -15,7 +15,7 @@ Epidata <- (function() {
   # API base url
   BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php'
 
-  client_version <- '0.3.12'
+  client_version <- '0.3.13'
 
   # Helper function to cast values and/or ranges to strings
   .listitem <- function(value) {

diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js
@@ -22,7 +22,7 @@
   }
 })(this, function (exports, fetchImpl, jQuery) {
   const BASE_URL = "https://delphi.cmu.edu/epidata/";
-  const client_version = "0.3.12";
+  const client_version = "0.3.13";
 
   // Helper function to cast values and/or ranges to strings
   function _listitem(value) {

diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json
@@ -2,7 +2,7 @@
   "name": "delphi_epidata",
   "description": "Delphi Epidata API Client",
   "authors": "Delphi Group",
-  "version": "0.3.12",
+  "version": "0.3.13",
   "license": "MIT",
   "homepage": "https://github.com/cmu-delphi/delphi-epidata",
   "bugs": {

diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py
@@ -1,4 +1,4 @@
 from .delphi_epidata import Epidata
 
 name = 'delphi_epidata'
-__version__ = '0.3.12'
+__version__ = '0.3.13'
diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="delphi_epidata",
-    version="0.3.12",
+    version="0.3.13",
     author="David Farrow",
     author_email="[email protected]",
     description="A programmatic interface to Delphi's Epidata API.",

diff --git a/src/server/_config.py b/src/server/_config.py
@@ -5,7 +5,7 @@
 
 load_dotenv()
 
-VERSION = "0.3.12"
+VERSION = "0.3.13"
 
 MAX_RESULTS = int(10e6)
 MAX_COMPATIBILITY_RESULTS = int(3650)

diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv
@@ -782,7 +782,7 @@ National provisional death counts is based on death certificate data received an
 nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people","{short_description}.
 
 National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE,
-quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctorâs office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests)
+quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctor's office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests)
 quidel-covid-ag,covid_ag_raw_pct_positive,TRUE,covid_ag_smoothed_pct_positive,FALSE,{base_name} (7-day average),TRUE,,,day,Date,Percentage,percent,cases_testing,bad,TRUE,FALSE,FALSE,TRUE,TRUE,
 quidel-covid-ag,covid_ag_raw_pct_positive_age_0_17,FALSE,covid_ag_raw_pct_positive_age_0_17,FALSE,"COVID-19 Antigen Tests: Percent Positive, Ages 0-17",TRUE,Percentage of antigen tests that were positive for COVID-19 among people ages 0-17,"When a patient (whether at a doctorâs office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19 among people ages 0-17",day,Date,Percentage,percent,cases_testing,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests)
 quidel-covid-ag,covid_ag_raw_pct_positive_age_0_17,TRUE,covid_ag_smoothed_pct_positive_age_0_17,FALSE,{base_name} (Smoothed),TRUE,,"{base_description}, smoothed using a 7-day moving average and geographical pooling",day,Date,Percentage,percent,cases_testing,bad,TRUE,FALSE,FALSE,TRUE,TRUE,

diff --git a/src/server/endpoints/covidcast_utils/db_sources.csv b/src/server/endpoints/covidcast_utils/db_sources.csv
@@ -7,7 +7,7 @@ fb-survey,fb-survey,Delphi US COVID-19 Trends and Impact Survey,"We conduct the
 [Question text](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html)"
 google-symptoms,google-symptoms,Google Symptoms Search Trends,"Google's [COVID-19 Search Trends symptoms dataset](http://goo.gle/covid19symptomdataset) reflects the relative volume of Google searches for a broad set of symptoms, signs and health conditions. This source includes signals for 7 symptom sets: 6 sets of COVID-related symptoms, and 1 set of control symptoms unrelated to COVID-19.
 
-Because of the way this dataset is constructed, values are comparable across signals in the same location, but not across geographic regions, even within the same signal. Use caution in any geographic analyses.",anosmia_smoothed_search,"To download or use the data, you must agree to the Google [Terms of Service](https://policies.google.com/terms)",,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-symptoms.html)
+Because of the way this dataset is constructed, values are comparable across signals in the same location, but not across geographic regions, even within the same signal. Use caution in any geographic analyses.",s05_smoothed_search,"To download or use the data, you must agree to the Google [Terms of Service](https://policies.google.com/terms)",,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-symptoms.html)
 hhs,hhs,U.S. Department of Health & Human Services,The US Department of Health & Human Services (HHS) publishes several datasets on patient impact and hospital capacity. This source includes only adult and pediatric hospital admissions with confirmed and suspected COVID-19 or confirmed influenza.,confirmed_admissions_1d,[Public Domain US Government](https://www.usa.gov/government-works),,[API Documentation](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/hhs.html)
 hospital-admissions,hospital-admissions,Hospital Admissions From Claims,"Information about hospital admissions, provided to us by health system partners. Using inpatient claim counts, we estimate the percentage of new hospital admissions with a COVID-associated diagnosis code in a given location, on a given day.
 

diff --git a/tests/acquisition/covidcast/test_csv_importer.py b/tests/acquisition/covidcast/test_csv_importer.py
@@ -278,13 +278,13 @@ def test_load_csv_with_valid_header(self):
 
     # now with missing values!
     data = {
-      'geo_id': ['ca', 'tx', 'fl', 'ak'],
-      'val': [np.nan, '1.2', '1.3', '1.4'],
-      'se': ['2.1', "na", '2.3', '2.4'],
-      'sample_size': ['301', '302', None, '304'],
-      'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3,
-      'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING],
-      'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2
+      'geo_id': ['ca', 'tx', 'fl', 'ak', 'wa'],
+      'val': [np.nan, '1.2', '1.3', '1.4', '1.5'],
+      'se': ['2.1', "na", '2.3', '2.4', '2.5'],
+      'sample_size': ['301', '302', None, '304', None],
+      'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3 + [None],
+      'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING] + [None],
+      'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 + [None]
     }
     mock_pandas = MagicMock()
     mock_pandas.read_csv.return_value = pandas.DataFrame(data=data)
@@ -295,7 +295,7 @@ def test_load_csv_with_valid_header(self):
 
     self.assertTrue(mock_pandas.read_csv.called)
     self.assertTrue(mock_pandas.read_csv.call_args[0][0], filepath)
-    self.assertEqual(len(rows), 4)
+    self.assertEqual(len(rows), 5)
 
     self.assertEqual(rows[0].geo_value, 'ca')
     self.assertIsNone(rows[0].value)
@@ -328,3 +328,11 @@ def test_load_csv_with_valid_header(self):
     self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING)
     self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING)
     self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING)
+
+    self.assertEqual(rows[4].geo_value, 'wa')
+    self.assertEqual(rows[4].value, 1.5)
+    self.assertEqual(rows[4].stderr, 2.5)
+    self.assertEqual(rows[4].sample_size, None)
+    self.assertEqual(rows[4].missing_value, Nans.NOT_MISSING)
+    self.assertEqual(rows[4].missing_stderr, Nans.NOT_MISSING)
+    self.assertEqual(rows[4].missing_sample_size, Nans.OTHER)