diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index 1564610aae594b..3bcec53cfe2acc 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -13,7 +13,6 @@ static struct oidset gh_client__oidset_queued = OIDSET_INIT; static unsigned long gh_client__oidset_count; -static int gh_client__includes_immediate; struct gh_server__process { struct subprocess_entry subprocess; /* must be first */ @@ -24,13 +23,20 @@ static int gh_server__subprocess_map_initialized; static struct hashmap gh_server__subprocess_map; static struct object_directory *gh_client__chosen_odb; -#define CAP_GET (1u<<1) +/* + * The "objects" capability has 2 verbs: "get" and "post". + */ +#define CAP_OBJECTS (1u<<1) +#define CAP_OBJECTS_NAME "objects" + +#define CAP_OBJECTS__VERB_GET1_NAME "get" +#define CAP_OBJECTS__VERB_POST_NAME "post" static int gh_client__start_fn(struct subprocess_entry *subprocess) { static int versions[] = {1, 0}; static struct subprocess_capability capabilities[] = { - { "get", CAP_GET }, + { CAP_OBJECTS_NAME, CAP_OBJECTS }, { NULL, 0 } }; @@ -42,14 +48,16 @@ static int gh_client__start_fn(struct subprocess_entry *subprocess) } /* - * Send: + * Send the queued OIDs in the OIDSET to gvfs-helper for it to + * fetch from the cache-server or main Git server using "/gvfs/objects" + * POST semantics. * - * get LF + * objects.post LF * ( LF)* * * */ -static int gh_client__get__send_command(struct child_process *process) +static int gh_client__send__objects_post(struct child_process *process) { struct oidset_iter iter; struct object_id *oid; @@ -60,7 +68,9 @@ static int gh_client__get__send_command(struct child_process *process) * so that we don't have to. */ - err = packet_write_fmt_gently(process->in, "get\n"); + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_POST_NAME "\n")); if (err) return err; @@ -79,6 +89,46 @@ static int gh_client__get__send_command(struct child_process *process) return 0; } +/* + * Send the given OID to gvfs-helper for it to fetch from the + * cache-server or main Git server using "/gvfs/objects" GET + * semantics. + * + * This ignores any queued OIDs. + * + * objects.get LF + * LF + * + * + */ +static int gh_client__send__objects_get(struct child_process *process, + const struct object_id *oid) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_GET1_NAME "\n")); + if (err) + return err; + + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + /* * Verify that the pathname found in the "odb" response line matches * what we requested. @@ -148,7 +198,7 @@ static void gh_client__update_packed_git(const char *line) } /* - * We expect: + * Both CAP_OBJECTS verbs return the same format response: * * * * @@ -179,7 +229,7 @@ static void gh_client__update_packed_git(const char *line) * grouped with a queued request for a blob. The tree-walk *might* be * able to continue and let the 404 blob be handled later. */ -static int gh_client__get__receive_response( +static int gh_client__objects__receive_response( struct child_process *process, enum gh_client__created *p_ghc, int *p_nr_loose, int *p_nr_packfile) @@ -259,17 +309,12 @@ static void gh_client__choose_odb(void) } } -static int gh_client__get(enum gh_client__created *p_ghc) +static struct gh_server__process *gh_client__find_long_running_process( + unsigned int cap_needed) { struct gh_server__process *entry; - struct child_process *process; struct argv_array argv = ARGV_ARRAY_INIT; struct strbuf quoted = STRBUF_INIT; - int nr_loose = 0; - int nr_packfile = 0; - int err = 0; - - trace2_region_enter("gh-client", "get", the_repository); gh_client__choose_odb(); @@ -285,6 +330,11 @@ static int gh_client__get(enum gh_client__created *p_ghc) sq_quote_argv_pretty("ed, argv.argv); + /* + * Find an existing long-running process with the above command + * line -or- create a new long-running process for this and + * subsequent 'get' requests. + */ if (!gh_server__subprocess_map_initialized) { gh_server__subprocess_map_initialized = 1; hashmap_init(&gh_server__subprocess_map, @@ -298,70 +348,24 @@ static int gh_client__get(enum gh_client__created *p_ghc) entry = xmalloc(sizeof(*entry)); entry->supported_capabilities = 0; - err = subprocess_start_argv( - &gh_server__subprocess_map, &entry->subprocess, 1, - &argv, gh_client__start_fn); - if (err) { - free(entry); - goto leave_region; - } + if (subprocess_start_argv(&gh_server__subprocess_map, + &entry->subprocess, 1, + &argv, gh_client__start_fn)) + FREE_AND_NULL(entry); } - process = &entry->subprocess.process; - - if (!(CAP_GET & entry->supported_capabilities)) { - error("gvfs-helper: does not support GET"); + if (entry && + (entry->supported_capabilities & cap_needed) != cap_needed) { + error("gvfs-helper: does not support needed capabilities"); subprocess_stop(&gh_server__subprocess_map, (struct subprocess_entry *)entry); - free(entry); - err = -1; - goto leave_region; + FREE_AND_NULL(entry); } - sigchain_push(SIGPIPE, SIG_IGN); - - err = gh_client__get__send_command(process); - if (!err) - err = gh_client__get__receive_response(process, p_ghc, - &nr_loose, &nr_packfile); - - sigchain_pop(SIGPIPE); - - if (err) { - subprocess_stop(&gh_server__subprocess_map, - (struct subprocess_entry *)entry); - free(entry); - } - -leave_region: argv_array_clear(&argv); strbuf_release("ed); - trace2_data_intmax("gh-client", the_repository, - "get/immediate", gh_client__includes_immediate); - - trace2_data_intmax("gh-client", the_repository, - "get/nr_objects", gh_client__oidset_count); - - if (nr_loose) - trace2_data_intmax("gh-client", the_repository, - "get/nr_loose", nr_loose); - - if (nr_packfile) - trace2_data_intmax("gh-client", the_repository, - "get/nr_packfile", nr_packfile); - - if (err) - trace2_data_intmax("gh-client", the_repository, - "get/error", err); - - trace2_region_leave("gh-client", "get", the_repository); - - oidset_clear(&gh_client__oidset_queued); - gh_client__oidset_count = 0; - gh_client__includes_immediate = 0; - - return err; + return entry; } void gh_client__queue_oid(const struct object_id *oid) @@ -388,28 +392,97 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr) gh_client__queue_oid(&oids[k]); } +/* + * Bulk fetch all of the queued OIDs in the OIDSET. + */ int gh_client__drain_queue(enum gh_client__created *p_ghc) { + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + *p_ghc = GHC__CREATED__NOTHING; if (!gh_client__oidset_count) return 0; - return gh_client__get(p_ghc); + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/post", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_post(process); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "objects/post/nr_objects", gh_client__oidset_count); + trace2_region_leave("gh-client", "objects/post", the_repository); + + oidset_clear(&gh_client__oidset_queued); + gh_client__oidset_count = 0; + + return err; } +/* + * Get exactly 1 object immediately. + * Ignore any queued objects. + */ int gh_client__get_immediate(const struct object_id *oid, enum gh_client__created *p_ghc) { - gh_client__includes_immediate = 1; + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; // TODO consider removing this trace2. it is useful for interactive // TODO debugging, but may generate way too much noise for a data // TODO event. trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); - if (!oidset_insert(&gh_client__oidset_queued, oid)) - gh_client__oidset_count++; + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/get", the_repository); - return gh_client__drain_queue(p_ghc); + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_get(process, oid); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_region_leave("gh-client", "objects/get", the_repository); + + return err; } diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h index 5f5e824bb0771a..a5a951ff5b5bfe 100644 --- a/gvfs-helper-client.h +++ b/gvfs-helper-client.h @@ -32,13 +32,14 @@ enum gh_client__created { }; /* - * Ask `gvfs-helper server` to immediately fetch an object. - * Wait for the response. + * Ask `gvfs-helper server` to immediately fetch a single object + * using "/gvfs/objects" GET semantics. * - * This may also fetch any queued (non-immediate) objects and - * so may create one or more loose objects and/or packfiles. - * It is undefined whether the requested OID will be loose or - * in a packfile. + * A long-running background process is used to make subsequent + * requests more efficient. + * + * A loose object will be created in the shared-cache ODB and + * in-memory cache updated. */ int gh_client__get_immediate(const struct object_id *oid, enum gh_client__created *p_ghc); @@ -47,16 +48,21 @@ int gh_client__get_immediate(const struct object_id *oid, * Queue this OID for a future fetch using `gvfs-helper service`. * It does not wait. * - * The GHC layer is free to process this queue in any way it wants, - * including individual fetches, bulk fetches, and batching. And - * it may add queued objects to immediate requests. - * * Callers should not rely on the queued object being on disk until * the queue has been drained. */ void gh_client__queue_oid(const struct object_id *oid); void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); +/* + * Ask `gvfs-helper server` to fetch the set of queued OIDs using + * "/gvfs/objects" POST semantics. + * + * A long-running background process is used to subsequent requests + * more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + */ int gh_client__drain_queue(enum gh_client__created *p_ghc); #endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c index 8561a3fcf900df..34d98ff3807774 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -46,7 +46,28 @@ // // get // -// Fetch 1 or more objects. If a cache-server is configured, +// Fetch 1 or more objects one at a time using a "/gvfs/objects" +// GET request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// post +// +// Fetch 1 or more objects in bulk using a "/gvfs/objects" POST +// request. +// +// If a cache-server is configured, // try it first. Optionally fallback to the main Git server. // // The set of objects is given on stdin and is assumed to be @@ -61,6 +82,11 @@ // // --depth= // defaults to "1" // +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -73,21 +99,37 @@ // --block-size= // defaults to "4000" // // Request objects from server in batches of at -// most n objects (not bytes). +// most n objects (not bytes) when using POST +// requests. // // --depth= // defaults to "1" // -// Interactive verb: get +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. // -// Fetch 1 or more objects. If a cache-server is configured, -// try it first. Optionally fallback to the main Git server. +// Interactive verb: objects.get +// +// Fetch 1 or more objects, one at a time, using a +// "/gvfs/ojbects" GET requests. +// +// Each object will be created as a loose object in the ODB. +// +// Interactive verb: objects.post +// +// Fetch 1 or more objects, in bulk, using one or more +// "/gvfs/objects" POST requests. +// +// For both verbs, if a cache-server is configured, try it first. +// Optionally fallback to the main Git server. // // Create 1 or more loose objects and/or packfiles in the // shared-cache ODB. (The pathname of the selected ODB is // reported at the beginning of the response; this should // match the pathname given on the command line). // -// git> get +// git> objects.get | objects.post // git> // git> // git> ... @@ -106,20 +148,6 @@ // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET // -// Example: -// -// $ git -c core.virtualizeobjects=false -c core.usegvfshelper=false -// rev-list --objects --no-walk --missing=print HEAD -// | grep "^?" -// | sed 's/^?//' -// | git gvfs-helper get-missing -// -// Note: In this example, we need to turn off "core.virtualizeobjects" and -// "core.usegvfshelper" when building the list of objects. This prevents -// rev-list (in oid_object_info_extended() from automatically fetching -// them with read-object-hook or "gvfs-helper server" sub-process (and -// defeating the whole purpose of this example). -// ////////////////////////////////////////////////////////////////// #include "cache.h" @@ -152,21 +180,45 @@ static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), + N_("git gvfs-helper [] post []"), N_("git gvfs-helper [] server []"), NULL }; -static const char *const get_usage[] = { +static const char *const objects_get_usage[] = { N_("git gvfs-helper [] get []"), NULL }; +static const char *const objects_post_usage[] = { + N_("git gvfs-helper [] post []"), + NULL +}; + static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL }; -#define GH__DEFAULT_BLOCK_SIZE 4000 +/* + * "commitDepth" field in gvfs protocol + */ +#define GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH 1 + +/* + * Chunk/block size in number of objects we request in each packfile + */ +#define GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE 4000 + +/* + * Retry attempts (after the initial request) for transient errors and 429s. + */ +#define GH__DEFAULT_MAX_RETRIES 6 + +/* + * Maximum delay in seconds for transient (network) error retries. + */ +#define GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC 300 /* * Our exit-codes. @@ -175,16 +227,18 @@ enum gh__error_code { GH__ERROR_CODE__USAGE = -1, /* will be mapped to usage() */ GH__ERROR_CODE__OK = 0, GH__ERROR_CODE__ERROR = 1, /* unspecified */ -// GH__ERROR_CODE__CACHE_SERVER_NOT_FOUND = 2, - GH__ERROR_CODE__CURL_ERROR = 3, - GH__ERROR_CODE__HTTP_401 = 4, - GH__ERROR_CODE__HTTP_404 = 5, - GH__ERROR_CODE__HTTP_UNEXPECTED_CODE = 6, - GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 7, + GH__ERROR_CODE__CURL_ERROR = 2, + GH__ERROR_CODE__HTTP_401 = 3, + GH__ERROR_CODE__HTTP_404 = 4, + GH__ERROR_CODE__HTTP_429 = 5, + GH__ERROR_CODE__HTTP_503 = 6, + GH__ERROR_CODE__HTTP_OTHER = 7, + GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 8, GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE = 8, - GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 9, - GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 10, - GH__ERROR_CODE__SUBPROCESS_SYNTAX = 11, + GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 10, + GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, + GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, + GH__ERROR_CODE__INDEX_PACK_FAILED = 13, }; enum gh__cache_server_mode { @@ -210,6 +264,8 @@ static struct gh__cmd_opts { int depth; int block_size; + int max_retries; + int max_transient_backoff_sec; enum gh__cache_server_mode cache_server_mode; } gh__cmd_opts; @@ -234,6 +290,69 @@ static struct gh__global { } gh__global; +enum gh__server_type { + GH__SERVER_TYPE__MAIN = 0, + GH__SERVER_TYPE__CACHE = 1, + + GH__SERVER_TYPE__NR, +}; + +static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { + "(main)", + "(cs)" +}; + +enum gh__objects_mode { + /* + * Bulk fetch objects. + * + * But also, force the use of HTTP POST regardless of how many + * objects we are requesting. + * + * The GVFS Protocol treats requests for commit objects + * differently in GET and POST requests WRT whether it + * automatically also fetches the referenced trees. + */ + GH__OBJECTS_MODE__POST, + + /* + * Fetch objects one at a time using HTTP GET. + * + * Force the use of GET (primarily because of the commit + * object treatment). + */ + GH__OBJECTS_MODE__GET, +}; + +struct gh__azure_throttle +{ + unsigned long tstu_limit; + unsigned long tstu_remaining; + + unsigned long reset_sec; + unsigned long retry_after_sec; +}; + +static void gh__azure_throttle__zero(struct gh__azure_throttle *azure) +{ + azure->tstu_limit = 0; + azure->tstu_remaining = 0; + azure->reset_sec = 0; + azure->retry_after_sec = 0; +} + +#define GH__AZURE_THROTTLE_INIT { \ + .tstu_limit = 0, \ + .tstu_remaining = 0, \ + .reset_sec = 0, \ + .retry_after_sec = 0, \ + } + +static struct gh__azure_throttle gh__global_throttle[GH__SERVER_TYPE__NR] = { + GH__AZURE_THROTTLE_INIT, + GH__AZURE_THROTTLE_INIT, +}; + /* * Stolen from http.c */ @@ -260,9 +379,27 @@ enum gh__progress_state { * Parameters to drive an HTTP request (with any necessary retries). */ struct gh__request_params { - int b_is_post; /* POST=1 or GET=0 */ + /* + * b_is_post indicates if the current HTTP request is a POST=1 or + * a GET=0. This is a lower level field used to setup CURL and + * the tempfile used to receive the content. + * + * It is related to, but different from the GH__OBJECTS_MODE__ + * field that we present to the gvfs-helper client or in the CLI + * (which only concerns the semantics of the /gvfs/objects protocol + * on the set of requested OIDs). + * + * For example, we use an HTTP GET to get the /gvfs/config data + * into a buffer. + */ + int b_is_post; int b_write_to_file; /* write to file=1 or strbuf=0 */ - int b_no_cache_server; /* force main server only */ + int b_permit_cache_server_if_defined; + + enum gh__server_type server_type; + + int k_attempt; /* robust retry attempt */ + int k_transient_delay_sec; /* delay before transient error retries */ unsigned long object_count; /* number of objects being fetched */ @@ -271,9 +408,16 @@ struct gh__request_params { struct curl_slist *headers; /* additional http headers to send */ struct tempfile *tempfile; /* for response content when file */ struct strbuf *buffer; /* for response content when strbuf */ - struct strbuf label; /* for trace2 regions */ + struct strbuf tr2_label; /* for trace2 regions */ struct strbuf loose_path; + struct object_id loose_oid; + + struct strbuf temp_path_pack; + struct strbuf temp_path_idx; + struct strbuf final_path_pack; + struct strbuf final_path_idx; + struct strbuf final_packfile_filename; /* * Note that I am putting all of the progress-related instance data @@ -295,24 +439,36 @@ struct gh__request_params { */ struct strbuf progress_msg; struct progress *progress; + + struct strbuf e2eid; }; #define GH__REQUEST_PARAMS_INIT { \ .b_is_post = 0, \ .b_write_to_file = 0, \ - .b_no_cache_server = 0, \ + .b_permit_cache_server_if_defined = 1, \ + .server_type = GH__SERVER_TYPE__MAIN, \ + .k_attempt = 0, \ + .k_transient_delay_sec = 0, \ .object_count = 0, \ .post_payload = NULL, \ .headers = NULL, \ .tempfile = NULL, \ .buffer = NULL, \ - .label = STRBUF_INIT, \ + .tr2_label = STRBUF_INIT, \ .loose_path = STRBUF_INIT, \ + .loose_oid = {{0}}, \ + .temp_path_pack = STRBUF_INIT, \ + .temp_path_idx = STRBUF_INIT, \ + .final_path_pack = STRBUF_INIT, \ + .final_path_idx = STRBUF_INIT, \ + .final_packfile_filename = STRBUF_INIT, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ .progress_msg = STRBUF_INIT, \ .progress = NULL, \ + .e2eid = STRBUF_INIT, \ } static void gh__request_params__release(struct gh__request_params *params) @@ -329,8 +485,13 @@ static void gh__request_params__release(struct gh__request_params *params) params->buffer = NULL; /* we do not own this */ - strbuf_release(¶ms->label); + strbuf_release(¶ms->tr2_label); strbuf_release(¶ms->loose_path); + strbuf_release(¶ms->temp_path_pack); + strbuf_release(¶ms->temp_path_idx); + strbuf_release(¶ms->final_path_pack); + strbuf_release(¶ms->final_path_idx); + strbuf_release(¶ms->final_packfile_filename); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); @@ -338,8 +499,55 @@ static void gh__request_params__release(struct gh__request_params *params) stop_progress(¶ms->progress); params->progress = NULL; + + strbuf_release(¶ms->e2eid); } +/* + * How we handle retries for various unexpected network errors. + */ +enum gh__retry_mode { + /* + * The operation was successful, so no retry is needed. + * Use this for HTTP 200, for example. + */ + GH__RETRY_MODE__SUCCESS = 0, + + /* + * Retry using the normal 401 Auth mechanism. + */ + GH__RETRY_MODE__HTTP_401, + + /* + * Fail because at least one of the requested OIDs does not exist. + */ + GH__RETRY_MODE__FAIL_404, + + /* + * A transient network error, such as dropped connection + * or network IO error. Our belief is that a retry MAY + * succeed. (See Gremlins and Cosmic Rays....) + */ + GH__RETRY_MODE__TRANSIENT, + + /* + * Request was blocked completely because of a 429. + */ + GH__RETRY_MODE__HTTP_429, + + /* + * Request failed because the server was (temporarily?) offline. + */ + GH__RETRY_MODE__HTTP_503, + + /* + * The operation had a hard failure and we have no + * expectation that a second attempt will give a different + * answer, such as a bad hostname or a mal-formed URL. + */ + GH__RETRY_MODE__HARD_FAIL, +}; + /* * Bucket to describe the results of an HTTP requests (may be * overwritten during retries so that it describes the final attempt). @@ -347,29 +555,344 @@ static void gh__request_params__release(struct gh__request_params *params) struct gh__response_status { struct strbuf error_message; struct strbuf content_type; - long response_code; /* http response code */ - CURLcode curl_code; enum gh__error_code ec; + enum gh__retry_mode retry; intmax_t bytes_received; + struct gh__azure_throttle *azure; }; #define GH__RESPONSE_STATUS_INIT { \ .error_message = STRBUF_INIT, \ .content_type = STRBUF_INIT, \ - .response_code = 0, \ - .curl_code = CURLE_OK, \ .ec = GH__ERROR_CODE__OK, \ + .retry = GH__RETRY_MODE__SUCCESS, \ .bytes_received = 0, \ + .azure = NULL, \ } static void gh__response_status__zero(struct gh__response_status *s) { strbuf_setlen(&s->error_message, 0); strbuf_setlen(&s->content_type, 0); - s->response_code = 0; - s->curl_code = CURLE_OK; s->ec = GH__ERROR_CODE__OK; + s->retry = GH__RETRY_MODE__SUCCESS; s->bytes_received = 0; + s->azure = NULL; +} + +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status); +static void install_loose(struct gh__request_params *params, + struct gh__response_status *status); + +/* + * Log the E2EID for the current request. + * + * Since every HTTP request to the cache-server and to the main Git server + * will send back a unique E2EID (probably a GUID), we don't want to overload + * telemetry with each ID -- rather, only the ones for which there was a + * problem and that may be helpful in a post mortem. + */ +static void log_e2eid(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (!params->e2eid.len) + return; + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__TRANSIENT: + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + break; + } + + if (trace2_is_enabled()) { + struct strbuf key = STRBUF_INIT; + + strbuf_addstr(&key, "e2eid"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, + params->e2eid.buf); + + strbuf_release(&key); + } +} + +/* + * Normalize a few HTTP response codes before we try to decide + * how to dispatch on them. + */ +static long gh__normalize_odd_codes(struct gh__request_params *params, + long http_response_code) +{ + if (params->server_type == GH__SERVER_TYPE__CACHE && + http_response_code == 400) { + /* + * The cache-server sends a somewhat bogus 400 instead of + * the normal 401 when AUTH is required. Fixup the status + * to hide that. + * + * TODO Technically, the cache-server could send a 400 + * TODO for many reasons, not just for their bogus + * TODO pseudo-401, but we're going to assume it is a + * TODO 401 for now. We should confirm the expected + * TODO error message in the response-body. + */ + return 401; + } + + if (http_response_code == 203) { + /* + * A proxy server transformed a 200 from the origin server + * into a 203. We don't care about the subtle distinction. + */ + return 200; + } + + return http_response_code; +} + +/* + * Map HTTP response codes into a retry strategy. + * See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + * + * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops + */ +static void compute_retry_mode_from_http_response( + struct gh__response_status *status, + long http_response_code) +{ + switch (http_response_code) { + + case 200: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + case 301: /* all the various flavors of HTTP Redirect */ + case 302: + case 303: + case 304: + case 305: + case 306: + case 307: + case 308: + /* + * TODO Consider a redirected-retry (with or without + * TODO a Retry-After header). + */ + goto hard_fail; + + case 401: + strbuf_addstr(&status->error_message, + "(http:401) Not Authorized"); + status->retry = GH__RETRY_MODE__HTTP_401; + status->ec = GH__ERROR_CODE__HTTP_401; + return; + + case 404: + /* + * TODO if params->object_count > 1, consider + * TODO splitting the request into 2 halves + * TODO and retrying each half in series. + */ + strbuf_addstr(&status->error_message, + "(http:404) Not Found"); + status->retry = GH__RETRY_MODE__FAIL_404; + status->ec = GH__ERROR_CODE__HTTP_404; + return; + + case 429: + /* + * This is a hard block because we've been bad. + */ + strbuf_addstr(&status->error_message, + "(http:429) Too Many Requests [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_429; + status->ec = GH__ERROR_CODE__HTTP_429; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + case 503: + /* + * We assume that this comes with a "Retry-After" header like 429s. + */ + strbuf_addstr(&status->error_message, + "(http:503) Server Unavailable [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_503; + status->ec = GH__ERROR_CODE__HTTP_503; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + default: + goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", + (int)http_response_code); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__HTTP_OTHER; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; +} + +/* + * Map CURLE errors code to a retry strategy. + * See and + * https://curl.haxx.se/libcurl/c/libcurl-errors.html + * + * This could be a static table rather than a switch, but + * that is harder to debug and we may want to selectively + * log errors. + * + * I've commented out all of the hard-fail cases for now + * and let the default handle them. This is to indicate + * that I considered them and found them to be not actionable. + * Also, the spelling of some of the CURLE_ symbols seem + * to change between curl releases on different platforms, + * so I'm not going to fight that. + */ +static void compute_retry_mode_from_curl_error( + struct gh__response_status *status, + CURLcode curl_code) +{ + switch (curl_code) { + case CURLE_OK: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + //se CURLE_UNSUPPORTED_PROTOCOL: goto hard_fail; + //se CURLE_FAILED_INIT: goto hard_fail; + //se CURLE_URL_MALFORMAT: goto hard_fail; + //se CURLE_NOT_BUILT_IN: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_PROXY: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_HOST: goto hard_fail; + case CURLE_COULDNT_CONNECT: goto transient; + //se CURLE_WEIRD_SERVER_REPLY: goto hard_fail; + //se CURLE_REMOTE_ACCESS_DENIED: goto hard_fail; + //se CURLE_FTP_ACCEPT_FAILED: goto hard_fail; + //se CURLE_FTP_WEIRD_PASS_REPLY: goto hard_fail; + //se CURLE_FTP_ACCEPT_TIMEOUT: goto hard_fail; + //se CURLE_FTP_WEIRD_PASV_REPLY: goto hard_fail; + //se CURLE_FTP_WEIRD_227_FORMAT: goto hard_fail; + //se CURLE_FTP_CANT_GET_HOST: goto hard_fail; + case CURLE_HTTP2: goto transient; + //se CURLE_FTP_COULDNT_SET_TYPE: goto hard_fail; + case CURLE_PARTIAL_FILE: goto transient; + //se CURLE_FTP_COULDNT_RETR_FILE: goto hard_fail; + //se CURLE_OBSOLETE20: goto hard_fail; + //se CURLE_QUOTE_ERROR: goto hard_fail; + //se CURLE_HTTP_RETURNED_ERROR: goto hard_fail; + case CURLE_WRITE_ERROR: goto transient; + //se CURLE_OBSOLETE24: goto hard_fail; + case CURLE_UPLOAD_FAILED: goto transient; + //se CURLE_READ_ERROR: goto hard_fail; + //se CURLE_OUT_OF_MEMORY: goto hard_fail; + case CURLE_OPERATION_TIMEDOUT: goto transient; + //se CURLE_OBSOLETE29: goto hard_fail; + //se CURLE_FTP_PORT_FAILED: goto hard_fail; + //se CURLE_FTP_COULDNT_USE_REST: goto hard_fail; + //se CURLE_OBSOLETE32: goto hard_fail; + //se CURLE_RANGE_ERROR: goto hard_fail; + case CURLE_HTTP_POST_ERROR: goto transient; + //se CURLE_SSL_CONNECT_ERROR: goto hard_fail; + //se CURLE_BAD_DOWNLOAD_RESUME: goto hard_fail; + //se CURLE_FILE_COULDNT_READ_FILE: goto hard_fail; + //se CURLE_LDAP_CANNOT_BIND: goto hard_fail; + //se CURLE_LDAP_SEARCH_FAILED: goto hard_fail; + //se CURLE_OBSOLETE40: goto hard_fail; + //se CURLE_FUNCTION_NOT_FOUND: goto hard_fail; + //se CURLE_ABORTED_BY_CALLBACK: goto hard_fail; + //se CURLE_BAD_FUNCTION_ARGUMENT: goto hard_fail; + //se CURLE_OBSOLETE44: goto hard_fail; + //se CURLE_INTERFACE_FAILED: goto hard_fail; + //se CURLE_OBSOLETE46: goto hard_fail; + //se CURLE_TOO_MANY_REDIRECTS: goto hard_fail; + //se CURLE_UNKNOWN_OPTION: goto hard_fail; + //se CURLE_TELNET_OPTION_SYNTAX: goto hard_fail; + //se CURLE_OBSOLETE50: goto hard_fail; + //se CURLE_PEER_FAILED_VERIFICATION: goto hard_fail; + //se CURLE_GOT_NOTHING: goto hard_fail; + //se CURLE_SSL_ENGINE_NOTFOUND: goto hard_fail; + //se CURLE_SSL_ENGINE_SETFAILED: goto hard_fail; + case CURLE_SEND_ERROR: goto transient; + case CURLE_RECV_ERROR: goto transient; + //se CURLE_OBSOLETE57: goto hard_fail; + //se CURLE_SSL_CERTPROBLEM: goto hard_fail; + //se CURLE_SSL_CIPHER: goto hard_fail; + //se CURLE_SSL_CACERT: goto hard_fail; + //se CURLE_BAD_CONTENT_ENCODING: goto hard_fail; + //se CURLE_LDAP_INVALID_URL: goto hard_fail; + //se CURLE_FILESIZE_EXCEEDED: goto hard_fail; + //se CURLE_USE_SSL_FAILED: goto hard_fail; + //se CURLE_SEND_FAIL_REWIND: goto hard_fail; + //se CURLE_SSL_ENGINE_INITFAILED: goto hard_fail; + //se CURLE_LOGIN_DENIED: goto hard_fail; + //se CURLE_TFTP_NOTFOUND: goto hard_fail; + //se CURLE_TFTP_PERM: goto hard_fail; + //se CURLE_REMOTE_DISK_FULL: goto hard_fail; + //se CURLE_TFTP_ILLEGAL: goto hard_fail; + //se CURLE_TFTP_UNKNOWNID: goto hard_fail; + //se CURLE_REMOTE_FILE_EXISTS: goto hard_fail; + //se CURLE_TFTP_NOSUCHUSER: goto hard_fail; + //se CURLE_CONV_FAILED: goto hard_fail; + //se CURLE_CONV_REQD: goto hard_fail; + //se CURLE_SSL_CACERT_BADFILE: goto hard_fail; + //se CURLE_REMOTE_FILE_NOT_FOUND: goto hard_fail; + //se CURLE_SSH: goto hard_fail; + //se CURLE_SSL_SHUTDOWN_FAILED: goto hard_fail; + case CURLE_AGAIN: goto transient; + //se CURLE_SSL_CRL_BADFILE: goto hard_fail; + //se CURLE_SSL_ISSUER_ERROR: goto hard_fail; + //se CURLE_FTP_PRET_FAILED: goto hard_fail; + //se CURLE_RTSP_CSEQ_ERROR: goto hard_fail; + //se CURLE_RTSP_SESSION_ERROR: goto hard_fail; + //se CURLE_FTP_BAD_FILE_LIST: goto hard_fail; + //se CURLE_CHUNK_FAILED: goto hard_fail; + //se CURLE_NO_CONNECTION_AVAILABLE: goto hard_fail; + //se CURLE_SSL_PINNEDPUBKEYNOTMATCH: goto hard_fail; + //se CURLE_SSL_INVALIDCERTSTATUS: goto hard_fail; +#ifdef CURLE_HTTP2_STREAM + case CURLE_HTTP2_STREAM: goto transient; +#endif + default: goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; + +transient: + strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; } /* @@ -382,40 +905,31 @@ static void gh__response_status__set_from_slot( struct gh__response_status *status, const struct active_request_slot *slot) { - status->curl_code = slot->results->curl_result; + long http_response_code; + CURLcode curl_code; + + curl_code = slot->results->curl_result; gh__curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &status->content_type); curl_easy_getinfo(slot->curl, CURLINFO_RESPONSE_CODE, - &status->response_code); + &http_response_code); strbuf_setlen(&status->error_message, 0); - if (status->response_code == 200) - status->ec = GH__ERROR_CODE__OK; - - else if (status->response_code == 401) { - strbuf_addstr(&status->error_message, "401 Not Authorized"); - status->ec = GH__ERROR_CODE__HTTP_401; - - } else if (status->response_code == 404) { - strbuf_addstr(&status->error_message, "404 Not Found"); - status->ec = GH__ERROR_CODE__HTTP_404; - - } else if (status->curl_code != CURLE_OK) { - strbuf_addf(&status->error_message, "%s (curl)", - curl_easy_strerror(status->curl_code)); - status->ec = GH__ERROR_CODE__CURL_ERROR; + http_response_code = gh__normalize_odd_codes(params, + http_response_code); - trace2_data_string("gvfs-helper", NULL, - "error/curl", status->error_message.buf); - } else { - strbuf_addf(&status->error_message, "HTTP %ld Unexpected", - status->response_code); - status->ec = GH__ERROR_CODE__HTTP_UNEXPECTED_CODE; - - trace2_data_string("gvfs-helper", NULL, - "error/http", status->error_message.buf); - } + /* + * Use normalized response/status codes form curl/http to decide + * how to set the error-code we propagate *AND* to decide if we + * we should retry because of transient network problems. + */ + if (curl_code == CURLE_OK || + curl_code == CURLE_HTTP_RETURNED_ERROR) + compute_retry_mode_from_http_response(status, + http_response_code); + else + compute_retry_mode_from_curl_error(status, curl_code); if (status->ec != GH__ERROR_CODE__OK) status->bytes_received = 0; @@ -433,26 +947,6 @@ static void gh__response_status__release(struct gh__response_status *status) strbuf_release(&status->content_type); } -/* - * The cache-server sends a somewhat bogus 400 instead of - * the normal 401 when AUTH is required. Fixup the status - * to hide that. - */ -static void fixup_cache_server_400_to_401(struct gh__response_status *status) -{ - if (status->response_code != 400) - return; - - /* - * TODO Technically, the cache-server could send a 400 - * TODO for many reasons, not just for their bogus - * TODO pseudo-401, but we're going to assume it is a - * TODO 401 for now. We should confirm the expected - * TODO error message in the response-body. - */ - status->response_code = 401; -} - static int gh__curl_progress_cb(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) @@ -543,8 +1037,13 @@ static int gh__curl_progress_cb(void *clientp, enter_phase_2: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase2_msg.len) { - strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", - params->progress_base_phase2_msg.buf); + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes sent)", + params->progress_base_phase2_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", + params->progress_base_phase2_msg.buf); params->progress = start_progress(params->progress_msg.buf, ultotal); display_progress(params->progress, ulnow); } @@ -554,8 +1053,13 @@ static int gh__curl_progress_cb(void *clientp, enter_phase_3: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase3_msg.len) { - strbuf_addf(¶ms->progress_msg, "%s (bytes received)", - params->progress_base_phase3_msg.buf); + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes received)", + params->progress_base_phase3_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes received)", + params->progress_base_phase3_msg.buf); params->progress = start_progress(params->progress_msg.buf, dltotal); display_progress(params->progress, dlnow); } @@ -572,12 +1076,19 @@ static void gh__run_one_slot(struct active_request_slot *slot, struct gh__request_params *params, struct gh__response_status *status) { - trace2_region_enter("gvfs-helper", params->label.buf, NULL); + struct strbuf key = STRBUF_INIT; + + strbuf_addbuf(&key, ¶ms->tr2_label); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + params->progress_state = GH__PROGRESS_STATE__START; + strbuf_setlen(¶ms->e2eid, 0); + + trace2_region_enter("gvfs-helper", key.buf, NULL); if (!start_active_slot(slot)) { - status->curl_code = CURLE_FAILED_INIT; /* a bit of a lie */ - strbuf_addstr(&status->error_message, - "failed to start HTTP request"); + compute_retry_mode_from_curl_error(status, + CURLE_FAILED_INIT); } else { run_active_slot(slot); if (params->b_write_to_file) @@ -585,27 +1096,38 @@ static void gh__run_one_slot(struct active_request_slot *slot, gh__response_status__set_from_slot(params, status, slot); - if (status->ec == GH__ERROR_CODE__OK) { - int old_len = params->label.len; + log_e2eid(params, status); - strbuf_addstr(¶ms->label, "/nr_objects"); - trace2_data_intmax("gvfs-helper", NULL, - params->label.buf, - params->object_count); - strbuf_setlen(¶ms->label, old_len); + if (status->ec == GH__ERROR_CODE__OK) { + int old_len = key.len; - strbuf_addstr(¶ms->label, "/nr_bytes"); + /* + * We only log the number of bytes received. + * We do not log the number of objects requested + * because the server may give us more than that + * (such as when we request a commit). + */ + strbuf_addstr(&key, "/nr_bytes"); trace2_data_intmax("gvfs-helper", NULL, - params->label.buf, + key.buf, status->bytes_received); - strbuf_setlen(¶ms->label, old_len); + strbuf_setlen(&key, old_len); } } if (params->progress) stop_progress(¶ms->progress); - trace2_region_leave("gvfs-helper", params->label.buf, NULL); + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { + if (params->b_is_post && params->object_count > 1) + install_packfile(params, status); + else + install_loose(params, status); + } + + trace2_region_leave("gvfs-helper", key.buf, NULL); + + strbuf_release(&key); } static int option_parse_cache_server_mode(const struct option *opt, @@ -753,8 +1275,8 @@ static void lookup_main_url(void) trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); } -static void do__gvfs_config(struct gh__response_status *status, - struct strbuf *config_data); +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data); /* * Find the URL of the cache-server, if we have one. @@ -813,7 +1335,7 @@ static void select_cache_server(void) * well-known by the main Git server. */ - do__gvfs_config(&status, &config_data); + do__http_get__gvfs_config(&status, &config_data); if (status.ec == GH__ERROR_CODE__OK) { /* @@ -871,13 +1393,10 @@ static void select_cache_server(void) * Read stdin until EOF (or a blank line) and add the desired OIDs * to the oidset. * - * Stdin should contain a list of OIDs. It may have additional - * decoration that we need to strip out. - * - * We expect: - * [] // present OIDs + * Stdin should contain a list of OIDs. Lines may have additional + * text following the OID that we ignore. */ -static unsigned long read_stdin_from_rev_list(struct oidset *oids) +static unsigned long read_stdin_for_oids(struct oidset *oids) { struct object_id oid; struct strbuf buf_stdin = STRBUF_INIT; @@ -899,17 +1418,23 @@ static unsigned long read_stdin_from_rev_list(struct oidset *oids) /* * Build a complete JSON payload for a gvfs/objects POST request - * containing the first n OIDs in an OIDSET index by the iterator. + * containing the first `nr_in_block` OIDs found in the OIDSET + * indexed by the given iterator. * * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * Return the number of OIDs we actually put into the payload. + * If only 1 OID was found, also return it. */ static unsigned long build_json_payload__gvfs_objects( struct json_writer *jw_req, struct oidset_iter *iter, - unsigned long nr_in_block) + unsigned long nr_in_block, + struct object_id *oid_out) { unsigned long k; const struct object_id *oid; + const struct object_id *oid_prev = NULL; k = 0; @@ -920,10 +1445,18 @@ static unsigned long build_json_payload__gvfs_objects( while (k < nr_in_block && (oid = oidset_iter_next(iter))) { jw_array_string(jw_req, oid_to_hex(oid)); k++; + oid_prev = oid; } jw_end(jw_req); jw_end(jw_req); + if (oid_out) { + if (k == 1) + oidcpy(oid_out, oid_prev); + else + oidclr(oid_out); + } + return k; } @@ -1053,45 +1586,146 @@ static void select_odb(void) * * TODO Consider using lockfile for this rather than naked tempfile. */ -static struct tempfile *create_tempfile_for_packfile(void) +static void create_tempfile_for_packfile( + struct gh__request_params *params, + struct gh__response_status *status) { static unsigned int nth = 0; static struct timeval tv = {0}; static struct tm tm = {0}; static time_t secs = 0; - static char tbuf[32] = {0}; + static char date[32] = {0}; - struct tempfile *tempfile = NULL; - struct strbuf buf_path = STRBUF_INIT; + struct strbuf basename = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; + int len_p; + enum scld_error scld; + + gh__response_status__zero(status); if (!nth) { + /* + * Create a string to use in the name of all packfiles + * created by this process. + */ gettimeofday(&tv, NULL); secs = tv.tv_sec; gmtime_r(&secs, &tm); - xsnprintf(tbuf, sizeof(tbuf), "%4d%02d%02d-%02d%02d%02d-%06ld", + xsnprintf(date, sizeof(date), "%4d%02d%02d-%02d%02d%02d-%06ld", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, (long)tv.tv_usec); } - // TODO should this be in the "/pack/tempPacks/" - // TODO directory instead? YES + /* + * Create a for this packfile using a series number , + * so that all of the chunks we download will group together. + */ + strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); - strbuf_addbuf(&buf_path, &gh__global.buf_odb_path); - strbuf_complete(&buf_path, '/'); - strbuf_addf(&buf_path, "pack/vfs-%s-%04d.temp", tbuf, nth++); + /* + * We will stream the data into a managed tempfile() in: + * + * "/pack/tempPacks/vfs--.temp" + */ + strbuf_setlen(&buf, 0); + strbuf_addbuf(&buf, &gh__global.buf_odb_path); + strbuf_complete(&buf, '/'); + strbuf_addstr(&buf, "pack/"); + len_p = buf.len; + strbuf_addstr(&buf, "tempPacks/"); + strbuf_addbuf(&buf, &basename); + strbuf_addstr(&buf, ".temp"); + + scld = safe_create_leading_directories(buf.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + strbuf_addf(&status->error_message, + "could not create directory for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } - tempfile = create_tempfile(buf_path.buf); - fdopen_tempfile(tempfile, "w"); + params->tempfile = create_tempfile(buf.buf); + if (!params->tempfile) { + strbuf_addf(&status->error_message, + "could not create tempfile for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } - strbuf_release(&buf_path); + fdopen_tempfile(params->tempfile, "w"); - return tempfile; -} + /* + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. + * + * We do this into the tempPacks directory to avoid contaminating + * the real pack directory until we know there is no corruption. + * + * "/pack/tempPacks/vfs--.temp.pack" + * "/pack/tempPacks/vfs--.temp.idx" + */ + strbuf_setlen(¶ms->temp_path_pack, 0); + strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); -/* - * Create a tempfile to stream a loose object into. + strbuf_setlen(¶ms->temp_path_idx, 0); + strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + + /* + * Later, if all goes well, we will install them as: + * + * "/pack/vfs--.pack" + * "/pack/vfs--.idx" + */ + strbuf_setlen(&buf, len_p); + strbuf_setlen(¶ms->final_path_pack, 0); + strbuf_addf(¶ms->final_path_pack, "%s%s.pack", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_path_idx, 0); + strbuf_addf(¶ms->final_path_idx, "%s%s.idx", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_packfile_filename, 0); + strbuf_addf(¶ms->final_packfile_filename, "%s.pack", + basename.buf); + +cleanup: + strbuf_release(&buf); + strbuf_release(&basename); +} + +/* + * Create a pathname to the loose object in the shared-cache ODB + * with the given OID. Try to "mkdir -p" to ensure the parent + * directories exist. + */ +static int create_loose_pathname_in_odb(struct strbuf *buf_path, + const struct object_id *oid) +{ + enum scld_error scld; + const char *hex; + + hex = oid_to_hex(oid); + + strbuf_setlen(buf_path, 0); + strbuf_addbuf(buf_path, &gh__global.buf_odb_path); + strbuf_complete(buf_path, '/'); + strbuf_add(buf_path, hex, 2); + strbuf_addch(buf_path, '/'); + strbuf_addstr(buf_path, hex+2); + + scld = safe_create_leading_directories(buf_path->buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) + return -1; + + return 0; +} + +/* + * Create a tempfile to stream a loose object into. * * We create a tempfile in the chosen ODB directory and let CURL * automatically stream data to the file. @@ -1100,23 +1734,14 @@ static struct tempfile *create_tempfile_for_packfile(void) */ static void create_tempfile_for_loose( struct gh__request_params *params, - struct gh__response_status *status, - const struct object_id *oid) + struct gh__response_status *status) { + static int nth = 0; struct strbuf buf_path = STRBUF_INIT; - const char *hex; gh__response_status__zero(status); - hex = oid_to_hex(oid); - - strbuf_addbuf(&buf_path, &gh__global.buf_odb_path); - strbuf_complete(&buf_path, '/'); - strbuf_add(&buf_path, hex, 2); - - if (!file_exists(buf_path.buf) && - mkdir(buf_path.buf, 0777) == -1 && - !file_exists(buf_path.buf)) { + if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { strbuf_addf(&status->error_message, "cannot create directory for loose object '%s'", buf_path.buf); @@ -1124,9 +1749,6 @@ static void create_tempfile_for_loose( goto cleanup; } - strbuf_addch(&buf_path, '/'); - strbuf_addstr(&buf_path, hex+2); - /* Remember the full path of the final destination. */ strbuf_setlen(¶ms->loose_path, 0); strbuf_addbuf(¶ms->loose_path, &buf_path); @@ -1136,7 +1758,7 @@ static void create_tempfile_for_loose( * using lockfiles to avoid issues with stale locks after * crashes. */ - strbuf_addf(&buf_path, ".%08u.temp", getpid()); + strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); params->tempfile = create_tempfile(buf_path.buf); if (!params->tempfile) { @@ -1153,85 +1775,34 @@ static void create_tempfile_for_loose( } /* - * Extract the filename portion of the given pathname. - * - * TODO Wish I could find a strbuf_filename() function for this. - */ -static void extract_filename(struct strbuf *filename, - const struct strbuf *pathname) -{ - size_t len = pathname->len; - - strbuf_setlen(filename, 0); - - while (len > 0 && !is_dir_sep(pathname->buf[len - 1])) - len--; - - strbuf_addstr(filename, &pathname->buf[len]); -} - -/* - * Convert the tempfile into a permanent .pack packfile in the ODB. - * Create the corresponding .idx file. - * - * Return the filename (not pathname) of the resulting packfile. + * Convert the tempfile into a temporary .pack, index it into a temporary .idx + * file, and then install the pair into ODB. */ -static void install_packfile(struct gh__response_status *status, - struct tempfile **pp_tempfile, - struct strbuf *packfile_filename) +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status) { struct child_process ip = CHILD_PROCESS_INIT; - struct strbuf pack_name_tmp = STRBUF_INIT; - struct strbuf pack_name_dst = STRBUF_INIT; - struct strbuf idx_name_tmp = STRBUF_INIT; - struct strbuf idx_name_dst = STRBUF_INIT; - size_t len_base; - - gh__response_status__zero(status); - - strbuf_setlen(packfile_filename, 0); /* - * start with ".temp" (that is owned by tempfile class). - * rename to ".pack.temp" to break ownership. - * - * create ".idx.temp" on provisional packfile. - * - * officially install both ".{pack,idx}.temp" as - * ".{pack,idx}". + * When we request more than 1 object, the server should always + * send us a packfile. */ - - strbuf_addstr(&pack_name_tmp, get_tempfile_path(*pp_tempfile)); - if (!strip_suffix(pack_name_tmp.buf, ".temp", &len_base)) { - /* - * This is more of a BUG(), but I want the error - * code propagated. - */ + if (strcmp(status->content_type.buf, + "application/x-git-packfile")) { strbuf_addf(&status->error_message, - "packfile tempfile does not end in '.temp': '%s'", - pack_name_tmp.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "install_packfile: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; goto cleanup; } - strbuf_setlen(&pack_name_tmp, (int)len_base); - strbuf_addbuf(&pack_name_dst, &pack_name_tmp); - strbuf_addbuf(&idx_name_tmp, &pack_name_tmp); - strbuf_addbuf(&idx_name_dst, &pack_name_tmp); - - strbuf_addstr(&pack_name_tmp, ".pack.temp"); - strbuf_addstr(&pack_name_dst, ".pack"); - strbuf_addstr(&idx_name_tmp, ".idx.temp"); - strbuf_addstr(&idx_name_dst, ".idx"); - - // TODO if either pack_name_dst or idx_name_dst already - // TODO exists in the ODB, create alternate names so that - // TODO we don't step on them. + gh__response_status__zero(status); - if (rename_tempfile(pp_tempfile, pack_name_tmp.buf) == -1) { + if (rename_tempfile(¶ms->tempfile, + params->temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", - pack_name_tmp.buf); + params->temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } @@ -1239,59 +1810,54 @@ static void install_packfile(struct gh__response_status *status, argv_array_push(&ip.args, "index-pack"); if (gh__cmd_opts.show_progress) argv_array_push(&ip.args, "-v"); - argv_array_pushl(&ip.args, "-o", idx_name_tmp.buf, NULL); - argv_array_push(&ip.args, pack_name_tmp.buf); + argv_array_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); + argv_array_push(&ip.args, params->temp_path_pack.buf); ip.git_cmd = 1; ip.no_stdin = 1; ip.no_stdout = 1; - // TODO consider capturing stdout from index-pack because - // TODO it will contain the SHA of the packfile and we can - // TODO (should?) add it to the .pack and .idx pathnames - // TODO when we install them. - // TODO - // TODO See pipe_command() rather than run_command(). - // TODO - // TODO Or should be SHA-it ourselves (or read the last 20 bytes)? - /* - * Note that I DO NOT have a region around the index-pack process. - * The region in gh__run_one_slot() currently only covers the - * download time. This index-pack is a separate step not covered - * in the above region. Later, if/when we have CURL directly stream - * to index-pack, that region will be the combined download+index - * time. So, I'm not going to introduce it here. + * Note that I DO NOT have a trace2 region around the + * index-pack process by itself. Currently, we are inside the + * trace2 region for running the request and that's fine. + * Later, if/when we stream the download directly to + * index-pack, it will be inside under the same region anyway. + * So, I'm not going to introduce it here. */ if (run_command(&ip)) { - unlink(pack_name_tmp.buf); - unlink(idx_name_tmp.buf); + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); strbuf_addf(&status->error_message, - "index-pack failed on '%s'", pack_name_tmp.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "index-pack failed on '%s'", + params->temp_path_pack.buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } - if (finalize_object_file(pack_name_tmp.buf, pack_name_dst.buf) || - finalize_object_file(idx_name_tmp.buf, idx_name_dst.buf)) { - unlink(pack_name_tmp.buf); - unlink(pack_name_dst.buf); - unlink(idx_name_tmp.buf); - unlink(idx_name_dst.buf); + if (finalize_object_file(params->temp_path_pack.buf, + params->final_path_pack.buf) || + finalize_object_file(params->temp_path_idx.buf, + params->final_path_idx.buf)) { + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); + unlink(params->final_path_pack.buf); + unlink(params->final_path_idx.buf); strbuf_addf(&status->error_message, "could not install packfile '%s'", - pack_name_dst.buf); + params->final_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } - extract_filename(packfile_filename, &pack_name_dst); - cleanup: child_process_clear(&ip); - strbuf_release(&pack_name_tmp); - strbuf_release(&pack_name_dst); - strbuf_release(&idx_name_tmp); - strbuf_release(&idx_name_dst); } /* @@ -1302,6 +1868,19 @@ static void install_loose(struct gh__request_params *params, { struct strbuf tmp_path = STRBUF_INIT; + /* + * We expect a loose object when we do a GET -or- when we + * do a POST with only 1 object. + */ + if (strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + strbuf_addf(&status->error_message, + "install_loose: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; + return; + } + gh__response_status__zero(status); /* @@ -1361,7 +1940,248 @@ static void gh_http_cleanup(void) } /* - * Do a single HTTP request without auth-retry or fallback. + * buffer has ": [\r]\n" + */ +static void parse_resp_hdr_1(const char *buffer, size_t size, size_t nitems, + struct strbuf *key, struct strbuf *value) +{ + const char *end = buffer + (size * nitems); + const char *p; + + p = strchr(buffer, ':'); + + strbuf_setlen(key, 0); + strbuf_add(key, buffer, (p - buffer)); + + p++; /* skip ':' */ + p++; /* skip ' ' */ + + strbuf_setlen(value, 0); + strbuf_add(value, p, (end - p)); + strbuf_trim_trailing_newline(value); +} + +static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, + void *void_params) +{ + struct gh__request_params *params = void_params; + struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; + + if (starts_with(buffer, "X-RateLimit-")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * The following X- headers are specific to AzureDevOps. + * Other servers have similar sets of values, but I haven't + * compared them in depth. + */ + // trace2_printf("Throttle: %s %s", key.buf, val.buf); + + if (!strcmp(key.buf, "X-RateLimit-Resource")) { + /* + * The name of the resource that is complaining. + * Just log it because we can't do anything with it. + */ + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/resource"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + } + + else if (!strcmp(key.buf, "X-RateLimit-Delay")) { + /* + * The amount of delay added to our response. + * Just log it because we can't do anything with it. + */ + unsigned long tarpit_delay_ms; + + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/delay_ms"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + git_parse_ulong(val.buf, &tarpit_delay_ms); + + trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + } + + else if (!strcmp(key.buf, "X-RateLimit-Limit")) { + /* + * The resource limit/quota before we get a 429. + */ + git_parse_ulong(val.buf, &azure->tstu_limit); + } + + else if (!strcmp(key.buf, "X-RateLimit-Remaining")) { + /* + * The amount of our quota remaining. When zero, we + * should get 429s on futher requests until the reset + * time. + */ + git_parse_ulong(val.buf, &azure->tstu_remaining); + } + + else if (!strcmp(key.buf, "X-RateLimit-Reset")) { + /* + * The server gave us a time-in-seconds-since-the-epoch + * for when our quota will be reset (if we stop all + * activity right now). + * + * Checkpoint the local system clock so we can do some + * sanity checks on any clock skew. Also, since we get + * the headers before we get the content, we can adjust + * our delay to compensate for the full download time. + */ + unsigned long now = time(NULL); + unsigned long reset_time; + + git_parse_ulong(val.buf, &reset_time); + if (reset_time > now) + azure->reset_sec = reset_time - now; + } + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "Retry-After")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * We get this header with a 429 and 503 and possibly a 30x. + * + * Curl does have CURLINFO_RETRY_AFTER that nicely parses and + * normalizes the value (and supports HTTP/1.1 usage), but it + * is not present yet in the version shipped with the Mac, so + * we do it directly here. + */ + git_parse_ulong(val.buf, &azure->retry_after_sec); + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "X-VSS-E2EID")) { + struct strbuf key = STRBUF_INIT; + + /* + * Capture the E2EID as it goes by, but don't log it until we + * know the request result. + */ + parse_resp_hdr_1(buffer, size, nitems, &key, ¶ms->e2eid); + + strbuf_release(&key); + } + + return nitems * size; +} + +/* + * Wait "duration" seconds and drive the progress mechanism. + * + * We spin slightly faster than we need to to keep the progress bar + * drawn (especially if the user presses return while waiting) and to + * compensate for delay factors built into the progress class (which + * might wait for 2 seconds before drawing the first message). + */ +static void do_throttle_spin(struct gh__request_params *params, + const char *tr2_label, + const char *progress_msg, + int duration) +{ + struct strbuf region = STRBUF_INIT; + struct progress *progress = NULL; + unsigned long begin = time(NULL); + unsigned long now = begin; + unsigned long end = begin + duration; + + strbuf_addstr(®ion, tr2_label); + strbuf_addstr(®ion, gh__server_type_label[params->server_type]); + trace2_region_enter("gvfs-helper", region.buf, NULL); + + progress = start_progress(progress_msg, duration); + while (now < end) { + display_progress(progress, (now - begin)); + + sleep_millisec(100); + + now = time(NULL); + } + display_progress(progress, duration); + stop_progress(&progress); + + trace2_region_leave("gvfs-helper", region.buf, NULL); + strbuf_release(®ion); +} + +/* + * Delay the outbound request if necessary in response to previous throttle + * blockages or hints. Throttle data is somewhat orthogonal to the status + * results from any previous request and/or the request params of the next + * request. + * + * Note that the throttle info also is cross-process information, such as + * 2 concurrent fetches in 2 different terminal windows to the same server + * will be sharing the same server quota. These could be coordinated too, + * so that a blockage received in one process would prevent the other + * process from starting another request (and also blocked or extending + * the delay interval). We're NOT going to do that level of integration. + * We will let both processes independently attempt the next request. + * This may cause us to miss the end-of-quota boundary if the server + * extends it because of the second request. + * + * TODO Should we have a max-wait option and then return a hard-error + * TODO of some type? + */ +static void do_throttle_wait(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct gh__azure_throttle *azure = + &gh__global_throttle[params->server_type]; + + if (azure->retry_after_sec) { + /* + * We were given a hard delay (such as after a 429). + * Spin until the requested time. + */ + do_throttle_spin(params, "throttle/hard", + "Waiting on hard throttle (sec)", + azure->retry_after_sec); + return; + } + + if (azure->reset_sec > 0) { + /* + * We were given a hint that we are overloading + * the server. Voluntarily backoff (before we + * get tarpitted or blocked). + */ + do_throttle_spin(params, "throttle/soft", + "Waiting on soft throttle (sec)", + azure->reset_sec); + return; + } + + if (params->k_transient_delay_sec) { + /* + * Insert an arbitrary delay before retrying after a + * transient (network) failure. + */ + do_throttle_spin(params, "throttle/transient", + "Waiting to retry after network error (sec)", + params->k_transient_delay_sec); + return; + } +} + +/* + * Do a single HTTP request WITHOUT robust-retry, auth-retry or fallback. */ static void do_req(const char *url_base, const char *url_component, @@ -1376,14 +2196,28 @@ static void do_req(const char *url_base, gh__response_status__zero(status); if (params->b_write_to_file) { - // TODO ftruncate tempfile ?? + /* Delete dirty tempfile from a previous attempt. */ + if (params->tempfile) + delete_tempfile(¶ms->tempfile); + + if (params->b_is_post && params->object_count > 1) + create_tempfile_for_packfile(params, status); + else + create_tempfile_for_loose(params, status); + + if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) + return; } else { + /* Guard against caller using dirty buffer */ strbuf_setlen(params->buffer, 0); } end_url_with_slash(&rest_url, url_base); strbuf_addstr(&rest_url, url_component); + do_throttle_wait(params, status); + gh__azure_throttle__zero(&gh__global_throttle[params->server_type]); + slot = get_active_slot(); slot->results = &results; @@ -1412,6 +2246,9 @@ static void do_req(const char *url_base, curl_easy_setopt(slot->curl, CURLOPT_FILE, params->buffer); } + curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, parse_resp_hdr); + curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, params); + if (creds && creds->username) { /* * Force CURL to respect the username/password we provide by @@ -1438,8 +2275,8 @@ static void do_req(const char *url_base, * with the main Git server (because the cache-server * doesn't handle 401s). * - * TODO Think about if we really need to handle this case - * TODO and if so, add an .is_main to params. + * TODO Think about if we really need to handle this case. + * TODO Guard with "if (params->sever_type == __MAIN)" */ curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); } @@ -1457,23 +2294,126 @@ static void do_req(const char *url_base, gh__run_one_slot(slot, params, status); } +/* + * Compute the delay for the nth attempt. + * + * No delay for the first attempt. Then use a normal exponential backoff + * starting from 8. + */ +static int compute_transient_delay(int attempt) +{ + int v; + + if (attempt < 1) + return 0; + + /* + * Let 8K be our hard limit (for integer overflow protection). + * That's over 2 hours. This is 8<<10. + */ + if (attempt > 10) + attempt = 10; + + v = 8 << (attempt - 1); + + if (v > gh__cmd_opts.max_transient_backoff_sec) + v = gh__cmd_opts.max_transient_backoff_sec; + + return v; +} + +/* + * Robustly make an HTTP request. Retry if necessary to hide common + * transient network errors and/or 429 blockages. + * + * For a transient (network) failure (where we do not have a throttle + * delay factor), we should insert a small delay to let the network + * recover. The outage might be because the VPN dropped, or the + * machine went to sleep or something and we want to give the network + * time to come back up. Insert AI here :-) + */ +static void do_req__with_robust_retry(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + for (params->k_attempt = 0; + params->k_attempt < gh__cmd_opts.max_retries + 1; + params->k_attempt++) { + + do_req(url_base, url_component, creds, params, status); + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: /* caller does auth-retry */ + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + /* + * We should have gotten a "Retry-After" header with + * these and that gives us the wait time. If not, + * fallthru and use the backoff delay. + */ + if (gh__global_throttle[params->server_type].retry_after_sec) + continue; + /*fallthru*/ + + case GH__RETRY_MODE__TRANSIENT: + params->k_transient_delay_sec = + compute_transient_delay(params->k_attempt); + continue; + } + } +} + static void do_req__to_main(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { -// lookup_main_creds(); + params->server_type = GH__SERVER_TYPE__MAIN; - do_req(gh__global.main_url, url_component, &gh__global.main_creds, - params, status); + /* + * When talking to the main Git server, we do allow non-auth'd + * initial requests (mainly for testing, since production servers + * will usually always want creds), so we DO NOT force load the + * user's creds here and let the normal 401 mechanism handle things. + * This has a slight perf penalty -- if we're bulk fetching 4000 + * objects from a production server, we're going to do a 100K+ byte + * POST just to get a 401 and then repeat the 100K+ byte POST + * with the creds. + * + * TODO Consider making a test or runtime flag to alter this. + * TODO If set/not-set, add a call here to pre-populate the creds. + * TODO + * TODO lookup_main_creds(); + * TODO + * + * TODO Testing with GIT_TRACE_CURL shows that curl will *SOMETIME* + * TODO send an "Expect: 100-continue" and silently handle/eat the + * TODO "HTTP/1.1 100 Continue" before the POST-body is sent, + * TODO so maybe this isn't an issue. (Other than the extra + * TODO roundtrip for the first set of headers.) More testing here + * TODO should be performed. + */ - if (status->response_code == 401) { + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); + + if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_main_creds(); - do_req(gh__global.main_url, url_component, &gh__global.main_creds, - params, status); + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); } - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) approve_main_creds(); } @@ -1481,32 +2421,39 @@ static void do_req__to_cache_server(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { + params->server_type = GH__SERVER_TYPE__CACHE; + synthesize_cache_server_creds(); - do_req(gh__global.cache_server_url, url_component, &gh__global.cache_creds, - params, status); - fixup_cache_server_400_to_401(status); + do_req__with_robust_retry(gh__global.cache_server_url, url_component, + &gh__global.cache_creds, + params, status); - if (status->response_code == 401) { + if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_cache_server_creds(); - do_req(gh__global.cache_server_url, url_component, - &gh__global.cache_creds, params, status); - fixup_cache_server_400_to_401(status); + do_req__with_robust_retry(gh__global.cache_server_url, + url_component, + &gh__global.cache_creds, + params, status); } - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) approve_cache_server_creds(); } +/* + * Try the cache-server (if configured) then fall-back to the main Git server. + */ static void do_req__with_fallback(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { - if (gh__global.cache_server_url && !params->b_no_cache_server) { + if (gh__global.cache_server_url && + params->b_permit_cache_server_if_defined) { do_req__to_cache_server(url_component, params, status); - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) return; if (!gh__cmd_opts.try_fallback) @@ -1521,7 +2468,7 @@ static void do_req__with_fallback(const char *url_component, * Falling-back would likely just cause the 3rd (or maybe * 4th) cred prompt. */ - if (status->response_code == 401) + if (status->retry == GH__RETRY_MODE__HTTP_401) return; } @@ -1533,16 +2480,17 @@ static void do_req__with_fallback(const char *url_component, * * Return server's response buffer. This is probably a raw JSON string. */ -static void do__gvfs_config(struct gh__response_status *status, - struct strbuf *config_data) +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; - strbuf_addstr(¶ms.label, "GET/config"); + strbuf_addstr(¶ms.tr2_label, "GET/config"); params.b_is_post = 0; params.b_write_to_file = 0; - params.b_no_cache_server = 1; /* they don't handle gvfs/config API */ + /* cache-servers do not handle gvfs/config REST calls */ + params.b_permit_cache_server_if_defined = 0; params.buffer = config_data; params.object_count = 1; /* a bit of a lie */ @@ -1573,12 +2521,34 @@ static void do__gvfs_config(struct gh__response_status *status, gh__request_params__release(¶ms); } +static void setup_gvfs_objects_progress(struct gh__request_params *params, + unsigned long num, unsigned long den) +{ + if (!gh__cmd_opts.show_progress) + return; + + if (params->b_is_post && params->object_count > 1) { + strbuf_addf(¶ms->progress_base_phase2_msg, + "Requesting packfile %ld/%ld with %ld objects", + num, den, params->object_count); + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving packfile %ld/%ld with %ld objects", + num, den, params->object_count); + } else { + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving %ld/%ld loose object", + num, den); + } +} + /* * Call "gvfs/objects/" REST API to fetch a loose object * and write it to the ODB. */ -static void do__loose__gvfs_object(struct gh__response_status *status, - const struct object_id *oid) +static void do__http_get__gvfs_object(struct gh__response_status *status, + const struct object_id *oid, + unsigned long l_num, unsigned long l_den, + struct string_list *result_list) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; struct strbuf component_url = STRBUF_INIT; @@ -1587,11 +2557,11 @@ static void do__loose__gvfs_object(struct gh__response_status *status, strbuf_addf(&component_url, "gvfs/objects/%s", oid_to_hex(oid)); - strbuf_addstr(¶ms.label, "GET/objects"); + strbuf_addstr(¶ms.tr2_label, "GET/objects"); params.b_is_post = 0; params.b_write_to_file = 1; - params.b_no_cache_server = 0; + params.b_permit_cache_server_if_defined = 1; params.object_count = 1; @@ -1601,41 +2571,46 @@ static void do__loose__gvfs_object(struct gh__response_status *status, params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); - create_tempfile_for_loose(¶ms, status, oid); - if (!params.tempfile) - goto cleanup; + oidcpy(¶ms.loose_oid, oid); - if (gh__cmd_opts.show_progress) { - /* - * Likewise, a gvfs/objects/{oid} has a very small reqest - * payload, so I don't see any need to report progress on - * the upload side of the GET. So just report progress - * on the download side. - */ - strbuf_addstr(¶ms.progress_base_phase3_msg, - "Receiving 1 loose object"); - } + setup_gvfs_objects_progress(¶ms, l_num, l_den); do_req__with_fallback(component_url.buf, ¶ms, status); - if (status->ec == GH__ERROR_CODE__OK) - install_loose(¶ms, status); + if (status->ec == GH__ERROR_CODE__OK) { + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, "loose %s", + oid_to_hex(¶ms.loose_oid)); + + string_list_append(result_list, msg.buf); + strbuf_release(&msg); + } -cleanup: gh__request_params__release(¶ms); strbuf_release(&component_url); } /* - * Call "gvfs/objects" POST REST API to fetch a packfile containing - * the objects in the requested OIDSET. Returns the filename (not - * pathname) to the new packfile. + * Call "gvfs/objects" POST REST API to fetch a batch of objects + * from the OIDSET. Normal, this is results in a packfile containing + * `nr_wanted_in_block` objects. And we return the number actually + * consumed (along with the filename of the resulting packfile). + * + * However, if we only have 1 oid (remaining) in the OIDSET, the + * server will respond to our POST with a loose object rather than + * a packfile with 1 object. + * + * Append a message to the result_list describing the result. + * + * Return the number of OIDs consumed from the OIDSET. */ -static void do__packfile__gvfs_objects(struct gh__response_status *status, - struct oidset_iter *iter, - unsigned long nr_wanted_in_block, - struct strbuf *output_filename, - unsigned long *nr_taken) +static void do__http_post__gvfs_objects(struct gh__response_status *status, + struct oidset_iter *iter, + unsigned long nr_wanted_in_block, + int j_pack_num, int j_pack_den, + struct string_list *result_list, + unsigned long *nr_oid_taken) { struct json_writer jw_req = JSON_WRITER_INIT; struct gh__request_params params = GH__REQUEST_PARAMS_INIT; @@ -1643,14 +2618,14 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, gh__response_status__zero(status); params.object_count = build_json_payload__gvfs_objects( - &jw_req, iter, nr_wanted_in_block); - *nr_taken = params.object_count; + &jw_req, iter, nr_wanted_in_block, ¶ms.loose_oid); + *nr_oid_taken = params.object_count; - strbuf_addstr(¶ms.label, "POST/objects"); + strbuf_addstr(¶ms.tr2_label, "POST/objects"); params.b_is_post = 1; params.b_write_to_file = 1; - params.b_no_cache_server = 0; + params.b_permit_cache_server_if_defined = 1; params.post_payload = &jw_req.json; @@ -1663,7 +2638,7 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, "Content-Type: application/json"); /* * We really always want a packfile. But if the payload only - * requests 1 OID, the server will/may send us a single loose + * requests 1 OID, the server will send us a single loose * objects instead. (Apparently the server ignores us when we * only send application/x-git-packfile and does it anyway.) * @@ -1675,199 +2650,172 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, params.headers = curl_slist_append(params.headers, "Accept: application/x-git-loose-object"); - params.tempfile = create_tempfile_for_packfile(); - if (!params.tempfile) { - strbuf_addstr(&status->error_message, - "could not create tempfile for packfile"); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; - goto cleanup; - } - - if (gh__cmd_opts.show_progress) { - strbuf_addf(¶ms.progress_base_phase2_msg, - "Requesting packfile with %ld objects", - params.object_count); - strbuf_addf(¶ms.progress_base_phase3_msg, - "Receiving packfile with %ld objects", - params.object_count); - } + setup_gvfs_objects_progress(¶ms, j_pack_num, j_pack_den); do_req__with_fallback("gvfs/objects", ¶ms, status); if (status->ec == GH__ERROR_CODE__OK) { - if (!strcmp(status->content_type.buf, - "application/x-git-packfile")) { - - // TODO Consider having a worker thread to manage - // TODO running index-pack and then install the - // TODO resulting .idx and .pack files. This would - // TODO let us interleave those steps with our thread - // TODO fetching the next block of objects from the - // TODO server. (Need to think about how progress - // TODO messages from our thread and index-pack - // TODO would mesh.) - // TODO - // TODO But then again, if we hack index-pack to write - // TODO to our alternate and stream the data thru it, - // TODO it won't matter. - - install_packfile(status, ¶ms.tempfile, - output_filename); - goto cleanup; - } + struct strbuf msg = STRBUF_INIT; - if (!strcmp(status->content_type.buf, - "application/x-git-loose-object")) - { - /* - * This should not happen (when we request - * more than one object). The server can send - * us a loose object (even when we use the - * POST form) if there is only one object in - * the payload (and despite the set of accept - * headers we send), so I'm going to leave - * this here. - */ - strbuf_addstr(&status->error_message, - "received loose object when packfile expected"); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } + if (params.object_count > 1) + strbuf_addf(&msg, "packfile %s", + params.final_packfile_filename.buf); + else + strbuf_addf(&msg, "loose %s", + oid_to_hex(¶ms.loose_oid)); - strbuf_addf(&status->error_message, - "received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; + string_list_append(result_list, msg.buf); + strbuf_release(&msg); } -cleanup: gh__request_params__release(¶ms); jw_release(&jw_req); } /* - * Bulk or individually fetch a list of objects in one or more http requests. - * Create one or more packfiles and/or loose objects. + * Drive one or more HTTP GET requests to fetch the objects + * in the given OIDSET. These are received into loose objects. * - * We accumulate results for each request in `result_list` until we get a + * Accumulate results for each request in `result_list` until we get a * hard error and have to stop. */ -static void do_fetch_oidset(struct gh__response_status *status, - struct oidset *oids, - unsigned long nr_total, - struct string_list *result_list) +static void do__http_get__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) { struct oidset_iter iter; - struct strbuf output_filename = STRBUF_INIT; - struct strbuf msg = STRBUF_INIT; struct strbuf err404 = STRBUF_INIT; const struct object_id *oid; unsigned long k; - unsigned long nr_taken; int had_404 = 0; gh__response_status__zero(status); - if (!nr_total) + if (!nr_oid_total) return; oidset_iter_init(oids, &iter); - for (k = 0; k < nr_total; k += nr_taken) { - if (nr_total - k == 1 || gh__cmd_opts.block_size == 1) { - oid = oidset_iter_next(&iter); - nr_taken = 1; + for (k = 0; k < nr_oid_total; k++) { + oid = oidset_iter_next(&iter); - do__loose__gvfs_object(status, oid); + do__http_get__gvfs_object(status, oid, k+1, nr_oid_total, + result_list); + /* + * If we get a 404 for an individual object, ignore + * it and get the rest. We'll fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, "%s: from GET %s", + status->error_message.buf, + oid_to_hex(oid)); /* - * If we get a 404 for an individual object, ignore - * it and get the rest. We'll fixup the 'ec' later. + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. */ - if (status->ec == GH__ERROR_CODE__HTTP_404) { - if (!err404.len) - strbuf_addf(&err404, "%s: loose object %s", - status->error_message.buf, - oid_to_hex(oid)); - /* - * Mark the fetch as "incomplete", but don't - * stop trying to get other chunks. - */ - had_404 = 1; - continue; - } + had_404 = 1; + continue; + } - if (status->ec != GH__ERROR_CODE__OK) { - /* Stop at the first hard error. */ - strbuf_addf(&status->error_message, ": loose %s", - oid_to_hex(oid)); - goto cleanup; - } + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addf(&status->error_message, ": from GET %s", + oid_to_hex(oid)); + goto cleanup; + } + } - strbuf_setlen(&msg, 0); - strbuf_addf(&msg, "loose %s", oid_to_hex(oid)); - string_list_append(result_list, msg.buf); +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } - } else { - strbuf_setlen(&output_filename, 0); + strbuf_release(&err404); +} - do__packfile__gvfs_objects(status, &iter, - gh__cmd_opts.block_size, - &output_filename, - &nr_taken); +/* + * Drive one or more HTTP POST requests to bulk fetch the objects in + * the given OIDSET. Create one or more packfiles and/or loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_post__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + unsigned long k; + unsigned long nr_oid_taken; + int j_pack_den = 0; + int j_pack_num = 0; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; + + oidset_iter_init(oids, &iter); + + j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) + / gh__cmd_opts.block_size); + for (k = 0; k < nr_oid_total; k += nr_oid_taken) { + j_pack_num++; + + do__http_post__gvfs_objects(status, &iter, + gh__cmd_opts.block_size, + j_pack_num, j_pack_den, + result_list, + &nr_oid_taken); + + /* + * Because the oidset iterator has random + * order, it does no good to say the k-th or + * n-th chunk was incomplete; the client + * cannot use that index for anything. + * + * We get a 404 when at least one object in + * the chunk was not found. + * + * For now, ignore the 404 and go on to the + * next chunk and then fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, + "%s: from POST", + status->error_message.buf); /* - * Because the oidset iterator has random - * order, it does no good to say the k-th or - * n-th chunk was incomplete; the client - * cannot use that index for anything. - * - * We get a 404 when at least one object in - * the chunk was not found. - * - * TODO Consider various retry strategies (such as - * TODO loose or bisect) on the members within this - * TODO chunk to reduce the impact of the miss. - * - * For now, ignore the 404 and go on to the - * next chunk and then fixup the 'ec' later. + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. */ - if (status->ec == GH__ERROR_CODE__HTTP_404) { - if (!err404.len) - strbuf_addf(&err404, - "%s: packfile object", - status->error_message.buf); - /* - * Mark the fetch as "incomplete", but don't - * stop trying to get other chunks. - */ - had_404 = 1; - continue; - } - - if (status->ec != GH__ERROR_CODE__OK) { - /* Stop at the first hard error. */ - strbuf_addstr(&status->error_message, - ": in packfile"); - goto cleanup; - } + had_404 = 1; + continue; + } - strbuf_setlen(&msg, 0); - strbuf_addf(&msg, "packfile %s", output_filename.buf); - string_list_append(result_list, msg.buf); + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addstr(&status->error_message, + ": from POST"); + goto cleanup; } } cleanup: - strbuf_release(&msg); - strbuf_release(&err404); - strbuf_release(&output_filename); - if (had_404 && status->ec == GH__ERROR_CODE__OK) { strbuf_setlen(&status->error_message, 0); - strbuf_addstr(&status->error_message, "404 Not Found"); + strbuf_addbuf(&status->error_message, &err404); status->ec = GH__ERROR_CODE__HTTP_404; } + + strbuf_release(&err404); } /* @@ -1905,7 +2853,7 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) finish_init(0); - do__gvfs_config(&status, &config_data); + do__http_get__gvfs_config(&status, &config_data); ec = status.ec; if (ec == GH__ERROR_CODE__OK) @@ -1920,16 +2868,67 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) } /* - * Read a list of objects from stdin and fetch them in a single request (or - * multiple block-size requests). + * Read a list of objects from stdin and fetch them as a series of + * single object HTTP GET requests. */ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) { static struct option get_options[] = { + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("get"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_get_usage, get_options); + + argc = parse_options(argc, argv, NULL, get_options, objects_get_usage, 0); + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("get: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them in a single request (or + * multiple block-size requests) using one or more HTTP POST requests. + */ +static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) +{ + static struct option post_options[] = { OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -1937,26 +2936,25 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) struct oidset oids = OIDSET_INIT; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; - unsigned long nr_total; + unsigned long nr_oid_total; int k; - trace2_cmd_mode("get"); + trace2_cmd_mode("post"); if (argc > 1 && !strcmp(argv[1], "-h")) - usage_with_options(get_usage, get_options); + usage_with_options(objects_post_usage, post_options); - argc = parse_options(argc, argv, NULL, get_options, get_usage, 0); + argc = parse_options(argc, argv, NULL, post_options, objects_post_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); - nr_total = read_stdin_from_rev_list(&oids); + nr_oid_total = read_stdin_for_oids(&oids); - trace2_region_enter("gvfs-helper", "get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "get/nr_objects", nr_total); - do_fetch_oidset(&status, &oids, nr_total, &result_list); - trace2_region_leave("gvfs-helper", "get", NULL); + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); ec = status.ec; @@ -1964,7 +2962,7 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) printf("%s\n", result_list.items[k].string); if (ec != GH__ERROR_CODE__OK) - error("get: %s", status.error_message.buf); + error("post: %s", status.error_message.buf); gh__response_status__release(&status); oidset_clear(&oids); @@ -1974,12 +2972,14 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) } /* - * Handle the 'get' command when in "server mode". Only call error() and set ec - * for hard errors where we cannot communicate correctly with the foreground - * client process. Pass any actual data errors (such as 404's or 401's from - * the fetch back to the client process. + * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * + * Only call error() and set ec for hard errors where we cannot + * communicate correctly with the foreground client process. Pass any + * actual data errors (such as 404's or 401's from the fetch) back to + * the client process. */ -static enum gh__error_code do_server_subprocess_get(void) +static enum gh__error_code do_server_subprocess__objects(const char *verb_line) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct oidset oids = OIDSET_INIT; @@ -1990,12 +2990,19 @@ static enum gh__error_code do_server_subprocess_get(void) int len; int err; int k; - unsigned long nr_total = 0; + enum gh__objects_mode objects_mode; + unsigned long nr_oid_total = 0; + + if (!strcmp(verb_line, "objects.get")) + objects_mode = GH__OBJECTS_MODE__GET; + else if (!strcmp(verb_line, "objects.post")) + objects_mode = GH__OBJECTS_MODE__POST; + else { + error("server: unexpected objects-mode verb '%s'", verb_line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } - /* - * Inside the "get" command, we expect a list of OIDs - * and a flush. - */ while (1) { len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) @@ -2008,10 +3015,10 @@ static enum gh__error_code do_server_subprocess_get(void) } if (!oidset_insert(&oids, &oid)) - nr_total++; + nr_oid_total++; } - if (!nr_total) { + if (!nr_oid_total) { if (packet_write_fmt_gently(1, "ok\n")) { error("server: cannot write 'get' result to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; @@ -2020,10 +3027,10 @@ static enum gh__error_code do_server_subprocess_get(void) goto cleanup; } - trace2_region_enter("gvfs-helper", "server/get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "server/get/nr_objects", nr_total); - do_fetch_oidset(&status, &oids, nr_total, &result_list); - trace2_region_leave("gvfs-helper", "server/get", NULL); + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); /* * Write pathname of the ODB where we wrote all of the objects @@ -2078,7 +3085,7 @@ static enum gh__error_code do_server_subprocess_get(void) return ec; } -typedef enum gh__error_code (fn_subprocess_cmd)(void); +typedef enum gh__error_code (fn_subprocess_cmd)(const char *verb_line); struct subprocess_capability { const char *name; @@ -2087,7 +3094,7 @@ struct subprocess_capability { }; static struct subprocess_capability caps[] = { - { "get", 0, do_server_subprocess_get }, + { "objects", 0, do_server_subprocess__objects }, { NULL, 0, NULL }, }; @@ -2180,6 +3187,8 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -2196,6 +3205,8 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) argc = parse_options(argc, argv, NULL, server_options, server_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); @@ -2214,8 +3225,9 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) } for (k = 0; caps[k].name; k++) { - if (caps[k].client_has && !strcmp(line, caps[k].name)) { - ec = (caps[k].pfn)(); + if (caps[k].client_has && + starts_with(line, caps[k].name)) { + ec = (caps[k].pfn)(line); if (ec != GH__ERROR_CODE__OK) goto cleanup; goto top_of_loop; @@ -2236,6 +3248,9 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv) if (!strcmp(argv[0], "get")) return do_sub_cmd__get(argc, argv); + if (!strcmp(argv[0], "post")) + return do_sub_cmd__post(argc, argv); + if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); @@ -2285,13 +3300,23 @@ int cmd_main(int argc, const char **argv) setup_git_directory_gently(NULL); - git_config(git_default_config, NULL); - /* Set any non-zero initial values in gh__cmd_opts. */ - gh__cmd_opts.depth = 1; - gh__cmd_opts.block_size = GH__DEFAULT_BLOCK_SIZE; + gh__cmd_opts.depth = GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH; + gh__cmd_opts.block_size = GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE; + gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; + gh__cmd_opts.max_transient_backoff_sec = + GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC; + gh__cmd_opts.show_progress = !!isatty(2); + // TODO use existing gvfs config settings to override our GH__DEFAULT_ + // TODO values in gh__cmd_opts. (And maybe add/remove our command line + // TODO options for them.) + // TODO + // TODO See "scalar.max-retries" (and maybe "gvfs.max-retries") + + git_config(git_default_config, NULL); + argc = parse_options(argc, argv, NULL, main_options, main_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc == 0)