cmu-delphi · dshemetov · Aug 29, 2023 · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023
@@ -51,6 +51,8 @@ importFrom(covidcast,covidcast_signal)
 importFrom(data.table,"%chin%")
 importFrom(data.table,fread)
 importFrom(dplyr,bind_rows)
+importFrom(httr,GET)
+importFrom(httr,RETRY)
 importFrom(magrittr,"%>%")
 importFrom(purrr,pmap)
 importFrom(rlang,":=")

@@ -4,6 +4,20 @@
   returns the most recent value within a given time period for either a `day`
   or `epiweek` incidence period. For incidence signals, `get_target_response`
   still sums all values within the time period.
+- Raise non-successful HTTP statuses as errors in
+  `get_covidhub_forecast_dates`. This is especially useful for debugging
+  issues with GitHub API authentication.
+- Stop `get_forecast_dates` from swallowing all errors raised by
+  `get_covidhub_forecast_dates` and potentially silently returning bogus
+  results, which can cause mysterious and hard-to-debug errors downstream.
+  This means that `get_forecast_dates` may fail more often, but there are
+  several benefits. First, without valid forecast dates, downstream calls
+  won't get valid forecast data. Fetching forecast dates is fast, so the cost
+  of rerunning is low, while downloading forecasts is time-consuming. This
+  change also lets us verify GitHub API authentication upfront, which is
+  necessary for forecast downloads later.
+- Retry HTTP requests in `get_forecast_dates` if they don't succeed
+  initially.
 
 # evalcast 0.3.4
 

@@ -148,16 +148,12 @@ get_forecast_dates <- function(forecasters,
                                start_date,
                                end_date,
                                date_filtering_function) {
-  forecast_dates <- as_date(forecast_dates)
   forecaster_dates <- vector("list", length = length(forecasters))
   for (i in seq_len(length(forecasters))) {
-    forecaster_dates[[i]] <- tryCatch({
-      lubridate::as_date(get_covidhub_forecast_dates(forecasters[i]))
-    },
-    error = function(e) cat(sprintf("%i. %s\n", i, e$message))
-    )
+    forecaster_dates[[i]] <- lubridate::as_date(get_covidhub_forecast_dates(forecasters[i]))
   }
   if (length(forecast_dates) != 0) {
+    forecast_dates <- as_date(forecast_dates)
     # Intersect acts oddly with dates. If foo = as_date(bar), then foo == bar is
     # true, but (foo %in% bar) is false and intersect(foo, bar) is an empty
     # vector. Additionally, intersect returns a numeric object instead of a
@@ -431,6 +427,8 @@ get_forecaster_predictions_alt <- function(covidhub_forecaster_name,
 #'
 #' @return vector of forecast dates
 #'
+#' @importFrom httr GET RETRY
+#'
 #' @export
 get_covidhub_forecast_dates <- function(forecaster_name) {
   url <- "https://api.github.com/repos/reichlab/covid19-forecast-hub/git/trees/master"
@@ -451,25 +449,28 @@ get_covidhub_forecast_dates <- function(forecaster_name) {
 
   # Get the URL for the submissions folder "data-processed".
   submissions_folder <- url %>%
-    httr::GET(auth_header) %>%
+    RETRY("GET", url = ., auth_header) %>%
     is_rate_limit_exceeded() %>%
+    httr::stop_for_status() %>%
     httr::content() %>%
     purrr::pluck("tree") %>%
     magrittr::extract2(which(purrr::map_chr(., "path") == "data-processed"))
 
   # Get the URL for the specified forecaster folder.
   forecaster_folder <- submissions_folder$url %>%
-    httr::GET(auth_header) %>%
+    RETRY("GET", url = ., auth_header) %>%
     is_rate_limit_exceeded() %>%
+    httr::stop_for_status() %>%
     httr::content() %>%
     purrr::pluck("tree") %>%
     magrittr::extract2(which(purrr::map_chr(., "path") == forecaster_name))
 
   # Get the forecaster submission files.
   submission_file_pattern <- sprintf("^(20\\d{2}-\\d{2}-\\d{2})-%s.csv$", forecaster_name)
   submission_files <- forecaster_folder$url %>%
-    httr::GET(auth_header) %>%
+    RETRY("GET", url = ., auth_header) %>%
     is_rate_limit_exceeded() %>%
+    httr::stop_for_status() %>%
     httr::content() %>%
     purrr::pluck("tree") %>%
     purrr::map_chr("path") %>%