From bd71c8d79b3cc34651bab6b1e5fc44579585ad98 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 10 Oct 2019 10:58:07 -0400 Subject: [PATCH 1/2] gvfs-helper: V2 robust retry and throttling Add robust-retry mechanism to automatically retry a request after network errors. This includes retry after: [] transient network problems reported by CURL. [] http 429 throttling (with associated Retry-After) [] http 503 server unavailable (with associated Retry-After) Add voluntary throttling using Azure X-RateLimit-* hints to avoid being soft-throttled (tarpitted) or hard-throttled (429) on later requests. Add global (outside of a single request) azure-throttle data to track the rate limit hints from the cache-server and main Git server independently. Add exponential retry backoff. This is used for transient network problems when we don't have a Retry-After hint. Move the call to index-pack earlier in the response/error handling sequence so that if we receive a 200 but yet the packfile is truncated/corrupted, we can use the regular retry logic to get it again. Refactor the way we create tempfiles for packfiles to use /pack/tempPacks/ rather than working directly in the /pack/ directory. Move the code to create a new tempfile to the start of a single request attempt (initial and retry attempts), rather than at the overall start of a request. This gives us a fresh tempfile for each network request attempt. This simplifies the retry mechanism and isolates us from the file ownership issues hidden within the tempfile class. And avoids the need to truncate previous incomplete results. This was necessary because index-pack was pulled into the retry loop. Minor: Add support for logging X-VSS-E2EID to telemetry on network errors. Minor: rename variable: params.b_no_cache_server --> params.b_permit_cache_server_if_defined. This variable is used to indicate whether we should try to use the cache-server when it is defined. Got rid of double-negative logic. Minor: rename variable: params.label --> params.tr2_label Clarify that this variable is only used with trace2 logging. Minor: Move the code to automatically map cache-server 400 responses to normal 401 response earlier in the response/error handling sequence to simplify later retry logic. Minor: Decorate trace2 messages with "(cs)" or "(main)" to identify the server in log messages. Add params->server_type to simplify this. Signed-off-by: Jeff Hostetler --- gvfs-helper.c | 1429 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 1126 insertions(+), 303 deletions(-) diff --git a/gvfs-helper.c b/gvfs-helper.c index 8561a3fcf900df..f13eace2c3cca0 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -61,6 +61,11 @@ // // --depth= // defaults to "1" // +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// // server // // Interactive/sub-process mode. Listen for a series of commands @@ -77,6 +82,11 @@ // // --depth= // defaults to "1" // +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// // Interactive verb: get // // Fetch 1 or more objects. If a cache-server is configured, @@ -166,8 +176,26 @@ static const char *const server_usage[] = { NULL }; +/* + * "commitDepth" field in gvfs protocol + */ +#define GH__DEFAULT_COMMIT_DEPTH 1 + +/* + * Chunk/block size in number of objects we request in each packfile + */ #define GH__DEFAULT_BLOCK_SIZE 4000 +/* + * Retry attempts (after the initial request) for transient errors and 429s. + */ +#define GH__DEFAULT_MAX_RETRIES 6 + +/* + * Maximum delay in seconds for transient (network) error retries. + */ +#define GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC 300 + /* * Our exit-codes. */ @@ -175,16 +203,18 @@ enum gh__error_code { GH__ERROR_CODE__USAGE = -1, /* will be mapped to usage() */ GH__ERROR_CODE__OK = 0, GH__ERROR_CODE__ERROR = 1, /* unspecified */ -// GH__ERROR_CODE__CACHE_SERVER_NOT_FOUND = 2, - GH__ERROR_CODE__CURL_ERROR = 3, - GH__ERROR_CODE__HTTP_401 = 4, - GH__ERROR_CODE__HTTP_404 = 5, - GH__ERROR_CODE__HTTP_UNEXPECTED_CODE = 6, - GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 7, + GH__ERROR_CODE__CURL_ERROR = 2, + GH__ERROR_CODE__HTTP_401 = 3, + GH__ERROR_CODE__HTTP_404 = 4, + GH__ERROR_CODE__HTTP_429 = 5, + GH__ERROR_CODE__HTTP_503 = 6, + GH__ERROR_CODE__HTTP_OTHER = 7, + GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 8, GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE = 8, - GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 9, - GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 10, - GH__ERROR_CODE__SUBPROCESS_SYNTAX = 11, + GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 10, + GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, + GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, + GH__ERROR_CODE__INDEX_PACK_FAILED = 13, }; enum gh__cache_server_mode { @@ -210,6 +240,8 @@ static struct gh__cmd_opts { int depth; int block_size; + int max_retries; + int max_transient_backoff_sec; enum gh__cache_server_mode cache_server_mode; } gh__cmd_opts; @@ -234,6 +266,47 @@ static struct gh__global { } gh__global; +enum gh__server_type { + GH__SERVER_TYPE__MAIN = 0, + GH__SERVER_TYPE__CACHE = 1, + + GH__SERVER_TYPE__NR, +}; + +static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { + "(main)", + "(cs)" +}; + +struct gh__azure_throttle +{ + unsigned long tstu_limit; + unsigned long tstu_remaining; + + unsigned long reset_sec; + unsigned long retry_after_sec; +}; + +static void gh__azure_throttle__zero(struct gh__azure_throttle *azure) +{ + azure->tstu_limit = 0; + azure->tstu_remaining = 0; + azure->reset_sec = 0; + azure->retry_after_sec = 0; +} + +#define GH__AZURE_THROTTLE_INIT { \ + .tstu_limit = 0, \ + .tstu_remaining = 0, \ + .reset_sec = 0, \ + .retry_after_sec = 0, \ + } + +static struct gh__azure_throttle gh__global_throttle[GH__SERVER_TYPE__NR] = { + GH__AZURE_THROTTLE_INIT, + GH__AZURE_THROTTLE_INIT, +}; + /* * Stolen from http.c */ @@ -262,7 +335,12 @@ enum gh__progress_state { struct gh__request_params { int b_is_post; /* POST=1 or GET=0 */ int b_write_to_file; /* write to file=1 or strbuf=0 */ - int b_no_cache_server; /* force main server only */ + int b_permit_cache_server_if_defined; + + enum gh__server_type server_type; + + int k_attempt; /* robust retry attempt */ + int k_transient_delay_sec; /* delay before transient error retries */ unsigned long object_count; /* number of objects being fetched */ @@ -271,9 +349,16 @@ struct gh__request_params { struct curl_slist *headers; /* additional http headers to send */ struct tempfile *tempfile; /* for response content when file */ struct strbuf *buffer; /* for response content when strbuf */ - struct strbuf label; /* for trace2 regions */ + struct strbuf tr2_label; /* for trace2 regions */ struct strbuf loose_path; + struct object_id loose_oid; + + struct strbuf temp_path_pack; + struct strbuf temp_path_idx; + struct strbuf final_path_pack; + struct strbuf final_path_idx; + struct strbuf final_packfile_filename; /* * Note that I am putting all of the progress-related instance data @@ -295,24 +380,36 @@ struct gh__request_params { */ struct strbuf progress_msg; struct progress *progress; + + struct strbuf e2eid; }; #define GH__REQUEST_PARAMS_INIT { \ .b_is_post = 0, \ .b_write_to_file = 0, \ - .b_no_cache_server = 0, \ + .b_permit_cache_server_if_defined = 1, \ + .server_type = GH__SERVER_TYPE__MAIN, \ + .k_attempt = 0, \ + .k_transient_delay_sec = 0, \ .object_count = 0, \ .post_payload = NULL, \ .headers = NULL, \ .tempfile = NULL, \ .buffer = NULL, \ - .label = STRBUF_INIT, \ + .tr2_label = STRBUF_INIT, \ .loose_path = STRBUF_INIT, \ + .loose_oid = {{0}}, \ + .temp_path_pack = STRBUF_INIT, \ + .temp_path_idx = STRBUF_INIT, \ + .final_path_pack = STRBUF_INIT, \ + .final_path_idx = STRBUF_INIT, \ + .final_packfile_filename = STRBUF_INIT, \ .progress_state = GH__PROGRESS_STATE__START, \ .progress_base_phase2_msg = STRBUF_INIT, \ .progress_base_phase3_msg = STRBUF_INIT, \ .progress_msg = STRBUF_INIT, \ .progress = NULL, \ + .e2eid = STRBUF_INIT, \ } static void gh__request_params__release(struct gh__request_params *params) @@ -329,8 +426,13 @@ static void gh__request_params__release(struct gh__request_params *params) params->buffer = NULL; /* we do not own this */ - strbuf_release(¶ms->label); + strbuf_release(¶ms->tr2_label); strbuf_release(¶ms->loose_path); + strbuf_release(¶ms->temp_path_pack); + strbuf_release(¶ms->temp_path_idx); + strbuf_release(¶ms->final_path_pack); + strbuf_release(¶ms->final_path_idx); + strbuf_release(¶ms->final_packfile_filename); strbuf_release(¶ms->progress_base_phase2_msg); strbuf_release(¶ms->progress_base_phase3_msg); @@ -338,8 +440,55 @@ static void gh__request_params__release(struct gh__request_params *params) stop_progress(¶ms->progress); params->progress = NULL; + + strbuf_release(¶ms->e2eid); } +/* + * How we handle retries for various unexpected network errors. + */ +enum gh__retry_mode { + /* + * The operation was successful, so no retry is needed. + * Use this for HTTP 200, for example. + */ + GH__RETRY_MODE__SUCCESS = 0, + + /* + * Retry using the normal 401 Auth mechanism. + */ + GH__RETRY_MODE__HTTP_401, + + /* + * Fail because at least one of the requested OIDs does not exist. + */ + GH__RETRY_MODE__FAIL_404, + + /* + * A transient network error, such as dropped connection + * or network IO error. Our belief is that a retry MAY + * succeed. (See Gremlins and Cosmic Rays....) + */ + GH__RETRY_MODE__TRANSIENT, + + /* + * Request was blocked completely because of a 429. + */ + GH__RETRY_MODE__HTTP_429, + + /* + * Request failed because the server was (temporarily?) offline. + */ + GH__RETRY_MODE__HTTP_503, + + /* + * The operation had a hard failure and we have no + * expectation that a second attempt will give a different + * answer, such as a bad hostname or a mal-formed URL. + */ + GH__RETRY_MODE__HARD_FAIL, +}; + /* * Bucket to describe the results of an HTTP requests (may be * overwritten during retries so that it describes the final attempt). @@ -350,7 +499,9 @@ struct gh__response_status { long response_code; /* http response code */ CURLcode curl_code; enum gh__error_code ec; + enum gh__retry_mode retry; intmax_t bytes_received; + struct gh__azure_throttle *azure; }; #define GH__RESPONSE_STATUS_INIT { \ @@ -359,7 +510,9 @@ struct gh__response_status { .response_code = 0, \ .curl_code = CURLE_OK, \ .ec = GH__ERROR_CODE__OK, \ + .retry = GH__RETRY_MODE__SUCCESS, \ .bytes_received = 0, \ + .azure = NULL, \ } static void gh__response_status__zero(struct gh__response_status *s) @@ -369,7 +522,323 @@ static void gh__response_status__zero(struct gh__response_status *s) s->response_code = 0; s->curl_code = CURLE_OK; s->ec = GH__ERROR_CODE__OK; + s->retry = GH__RETRY_MODE__SUCCESS; s->bytes_received = 0; + s->azure = NULL; +} + +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status); +static void install_loose(struct gh__request_params *params, + struct gh__response_status *status); + +/* + * Log the E2EID for the current request. + * + * Since every HTTP request to the cache-server and to the main Git server + * will send back a unique E2EID (probably a GUID), we don't want to overload + * telemetry with each ID -- rather, only the ones for which there was a + * problem and that may be helpful in a post mortem. + */ +static void log_e2eid(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (!params->e2eid.len) + return; + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__TRANSIENT: + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + break; + } + + if (trace2_is_enabled()) { + struct strbuf key = STRBUF_INIT; + + strbuf_addstr(&key, "e2eid"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, + params->e2eid.buf); + + strbuf_release(&key); + } +} + +/* + * Normalize a few error codes before we try to decide + * how to dispatch on them. + */ +static void gh__response_status__normalize_odd_codes( + struct gh__request_params *params, + struct gh__response_status *status) +{ + if (params->server_type == GH__SERVER_TYPE__CACHE && + status->response_code == 400) { + /* + * The cache-server sends a somewhat bogus 400 instead of + * the normal 401 when AUTH is required. Fixup the status + * to hide that. + * + * TODO Technically, the cache-server could send a 400 + * TODO for many reasons, not just for their bogus + * TODO pseudo-401, but we're going to assume it is a + * TODO 401 for now. We should confirm the expected + * TODO error message in the response-body. + */ + status->response_code = 401; + } + + if (status->response_code == 203) { + /* + * A proxy server transformed a 200 from the origin server + * into a 203. We don't care about the subtle distinction. + */ + status->response_code = 200; + } +} + +/* + * Map HTTP response codes into a retry strategy. + * See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + * + * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops + */ +static void compute_retry_mode_from_http_response( + struct gh__response_status *status) +{ + switch (status->response_code) { + + case 200: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + case 301: /* all the various flavors of HTTP Redirect */ + case 302: + case 303: + case 304: + case 305: + case 306: + case 307: + case 308: + /* + * TODO Consider a redirected-retry (with or without + * TODO a Retry-After header). + */ + goto hard_fail; + + case 401: + strbuf_addstr(&status->error_message, + "(http:401) Not Authorized"); + status->retry = GH__RETRY_MODE__HTTP_401; + status->ec = GH__ERROR_CODE__HTTP_401; + return; + + case 404: + /* + * TODO if params->object_count > 1, consider + * TODO splitting the request into 2 halves + * TODO and retrying each half in series. + */ + strbuf_addstr(&status->error_message, + "(http:404) Not Found"); + status->retry = GH__RETRY_MODE__FAIL_404; + status->ec = GH__ERROR_CODE__HTTP_404; + return; + + case 429: + /* + * This is a hard block because we've been bad. + */ + strbuf_addstr(&status->error_message, + "(http:429) Too Many Requests [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_429; + status->ec = GH__ERROR_CODE__HTTP_429; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + case 503: + /* + * We assume that this comes with a "Retry-After" header like 429s. + */ + strbuf_addstr(&status->error_message, + "(http:503) Server Unavailable [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_503; + status->ec = GH__ERROR_CODE__HTTP_503; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; + + default: + goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", + (int)status->response_code); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__HTTP_OTHER; + + trace2_data_string("gvfs-helper", NULL, "error/http", + status->error_message.buf); + return; +} + +/* + * Map CURLE errors code to a retry strategy. + * See and + * https://curl.haxx.se/libcurl/c/libcurl-errors.html + * + * This could be a static table rather than a switch, but + * that is harder to debug and we may want to selectively + * log errors. + * + * I've commented out all of the hard-fail cases for now + * and let the default handle them. This is to indicate + * that I considered them and found them to be not actionable. + * Also, the spelling of some of the CURLE_ symbols seem + * to change between curl releases on different platforms, + * so I'm not going to fight that. + */ +static void compute_retry_mode_from_curl_error( + struct gh__response_status *status) +{ + switch (status->curl_code) { + case CURLE_OK: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + //se CURLE_UNSUPPORTED_PROTOCOL: goto hard_fail; + //se CURLE_FAILED_INIT: goto hard_fail; + //se CURLE_URL_MALFORMAT: goto hard_fail; + //se CURLE_NOT_BUILT_IN: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_PROXY: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_HOST: goto hard_fail; + case CURLE_COULDNT_CONNECT: goto transient; + //se CURLE_WEIRD_SERVER_REPLY: goto hard_fail; + //se CURLE_REMOTE_ACCESS_DENIED: goto hard_fail; + //se CURLE_FTP_ACCEPT_FAILED: goto hard_fail; + //se CURLE_FTP_WEIRD_PASS_REPLY: goto hard_fail; + //se CURLE_FTP_ACCEPT_TIMEOUT: goto hard_fail; + //se CURLE_FTP_WEIRD_PASV_REPLY: goto hard_fail; + //se CURLE_FTP_WEIRD_227_FORMAT: goto hard_fail; + //se CURLE_FTP_CANT_GET_HOST: goto hard_fail; + case CURLE_HTTP2: goto transient; + //se CURLE_FTP_COULDNT_SET_TYPE: goto hard_fail; + case CURLE_PARTIAL_FILE: goto transient; + //se CURLE_FTP_COULDNT_RETR_FILE: goto hard_fail; + //se CURLE_OBSOLETE20: goto hard_fail; + //se CURLE_QUOTE_ERROR: goto hard_fail; + //se CURLE_HTTP_RETURNED_ERROR: goto hard_fail; + case CURLE_WRITE_ERROR: goto transient; + //se CURLE_OBSOLETE24: goto hard_fail; + case CURLE_UPLOAD_FAILED: goto transient; + //se CURLE_READ_ERROR: goto hard_fail; + //se CURLE_OUT_OF_MEMORY: goto hard_fail; + case CURLE_OPERATION_TIMEDOUT: goto transient; + //se CURLE_OBSOLETE29: goto hard_fail; + //se CURLE_FTP_PORT_FAILED: goto hard_fail; + //se CURLE_FTP_COULDNT_USE_REST: goto hard_fail; + //se CURLE_OBSOLETE32: goto hard_fail; + //se CURLE_RANGE_ERROR: goto hard_fail; + case CURLE_HTTP_POST_ERROR: goto transient; + //se CURLE_SSL_CONNECT_ERROR: goto hard_fail; + //se CURLE_BAD_DOWNLOAD_RESUME: goto hard_fail; + //se CURLE_FILE_COULDNT_READ_FILE: goto hard_fail; + //se CURLE_LDAP_CANNOT_BIND: goto hard_fail; + //se CURLE_LDAP_SEARCH_FAILED: goto hard_fail; + //se CURLE_OBSOLETE40: goto hard_fail; + //se CURLE_FUNCTION_NOT_FOUND: goto hard_fail; + //se CURLE_ABORTED_BY_CALLBACK: goto hard_fail; + //se CURLE_BAD_FUNCTION_ARGUMENT: goto hard_fail; + //se CURLE_OBSOLETE44: goto hard_fail; + //se CURLE_INTERFACE_FAILED: goto hard_fail; + //se CURLE_OBSOLETE46: goto hard_fail; + //se CURLE_TOO_MANY_REDIRECTS: goto hard_fail; + //se CURLE_UNKNOWN_OPTION: goto hard_fail; + //se CURLE_TELNET_OPTION_SYNTAX: goto hard_fail; + //se CURLE_OBSOLETE50: goto hard_fail; + //se CURLE_PEER_FAILED_VERIFICATION: goto hard_fail; + //se CURLE_GOT_NOTHING: goto hard_fail; + //se CURLE_SSL_ENGINE_NOTFOUND: goto hard_fail; + //se CURLE_SSL_ENGINE_SETFAILED: goto hard_fail; + case CURLE_SEND_ERROR: goto transient; + case CURLE_RECV_ERROR: goto transient; + //se CURLE_OBSOLETE57: goto hard_fail; + //se CURLE_SSL_CERTPROBLEM: goto hard_fail; + //se CURLE_SSL_CIPHER: goto hard_fail; + //se CURLE_SSL_CACERT: goto hard_fail; + //se CURLE_BAD_CONTENT_ENCODING: goto hard_fail; + //se CURLE_LDAP_INVALID_URL: goto hard_fail; + //se CURLE_FILESIZE_EXCEEDED: goto hard_fail; + //se CURLE_USE_SSL_FAILED: goto hard_fail; + //se CURLE_SEND_FAIL_REWIND: goto hard_fail; + //se CURLE_SSL_ENGINE_INITFAILED: goto hard_fail; + //se CURLE_LOGIN_DENIED: goto hard_fail; + //se CURLE_TFTP_NOTFOUND: goto hard_fail; + //se CURLE_TFTP_PERM: goto hard_fail; + //se CURLE_REMOTE_DISK_FULL: goto hard_fail; + //se CURLE_TFTP_ILLEGAL: goto hard_fail; + //se CURLE_TFTP_UNKNOWNID: goto hard_fail; + //se CURLE_REMOTE_FILE_EXISTS: goto hard_fail; + //se CURLE_TFTP_NOSUCHUSER: goto hard_fail; + //se CURLE_CONV_FAILED: goto hard_fail; + //se CURLE_CONV_REQD: goto hard_fail; + //se CURLE_SSL_CACERT_BADFILE: goto hard_fail; + //se CURLE_REMOTE_FILE_NOT_FOUND: goto hard_fail; + //se CURLE_SSH: goto hard_fail; + //se CURLE_SSL_SHUTDOWN_FAILED: goto hard_fail; + case CURLE_AGAIN: goto transient; + //se CURLE_SSL_CRL_BADFILE: goto hard_fail; + //se CURLE_SSL_ISSUER_ERROR: goto hard_fail; + //se CURLE_FTP_PRET_FAILED: goto hard_fail; + //se CURLE_RTSP_CSEQ_ERROR: goto hard_fail; + //se CURLE_RTSP_SESSION_ERROR: goto hard_fail; + //se CURLE_FTP_BAD_FILE_LIST: goto hard_fail; + //se CURLE_CHUNK_FAILED: goto hard_fail; + //se CURLE_NO_CONNECTION_AVAILABLE: goto hard_fail; + //se CURLE_SSL_PINNEDPUBKEYNOTMATCH: goto hard_fail; + //se CURLE_SSL_INVALIDCERTSTATUS: goto hard_fail; +#ifdef CURLE_HTTP2_STREAM + case CURLE_HTTP2_STREAM: goto transient; +#endif + default: goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", + status->curl_code, + curl_easy_strerror(status->curl_code)); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; + +transient: + strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", + status->curl_code, + curl_easy_strerror(status->curl_code)); + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string("gvfs-helper", NULL, "error/curl", + status->error_message.buf); + return; } /* @@ -390,32 +859,18 @@ static void gh__response_status__set_from_slot( strbuf_setlen(&status->error_message, 0); - if (status->response_code == 200) - status->ec = GH__ERROR_CODE__OK; - - else if (status->response_code == 401) { - strbuf_addstr(&status->error_message, "401 Not Authorized"); - status->ec = GH__ERROR_CODE__HTTP_401; - - } else if (status->response_code == 404) { - strbuf_addstr(&status->error_message, "404 Not Found"); - status->ec = GH__ERROR_CODE__HTTP_404; - - } else if (status->curl_code != CURLE_OK) { - strbuf_addf(&status->error_message, "%s (curl)", - curl_easy_strerror(status->curl_code)); - status->ec = GH__ERROR_CODE__CURL_ERROR; - - trace2_data_string("gvfs-helper", NULL, - "error/curl", status->error_message.buf); - } else { - strbuf_addf(&status->error_message, "HTTP %ld Unexpected", - status->response_code); - status->ec = GH__ERROR_CODE__HTTP_UNEXPECTED_CODE; + gh__response_status__normalize_odd_codes(params, status); - trace2_data_string("gvfs-helper", NULL, - "error/http", status->error_message.buf); - } + /* + * Use normalized response/status codes form curl/http to decide + * how to set the error-code we propagate *AND* to decide if we + * we should retry because of transient network problems. + */ + if (status->curl_code == CURLE_OK || + status->curl_code == CURLE_HTTP_RETURNED_ERROR) + compute_retry_mode_from_http_response(status); + else + compute_retry_mode_from_curl_error(status); if (status->ec != GH__ERROR_CODE__OK) status->bytes_received = 0; @@ -433,26 +888,6 @@ static void gh__response_status__release(struct gh__response_status *status) strbuf_release(&status->content_type); } -/* - * The cache-server sends a somewhat bogus 400 instead of - * the normal 401 when AUTH is required. Fixup the status - * to hide that. - */ -static void fixup_cache_server_400_to_401(struct gh__response_status *status) -{ - if (status->response_code != 400) - return; - - /* - * TODO Technically, the cache-server could send a 400 - * TODO for many reasons, not just for their bogus - * TODO pseudo-401, but we're going to assume it is a - * TODO 401 for now. We should confirm the expected - * TODO error message in the response-body. - */ - status->response_code = 401; -} - static int gh__curl_progress_cb(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) @@ -543,8 +978,13 @@ static int gh__curl_progress_cb(void *clientp, enter_phase_2: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase2_msg.len) { - strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", - params->progress_base_phase2_msg.buf); + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes sent)", + params->progress_base_phase2_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", + params->progress_base_phase2_msg.buf); params->progress = start_progress(params->progress_msg.buf, ultotal); display_progress(params->progress, ulnow); } @@ -554,8 +994,13 @@ static int gh__curl_progress_cb(void *clientp, enter_phase_3: strbuf_setlen(¶ms->progress_msg, 0); if (params->progress_base_phase3_msg.len) { - strbuf_addf(¶ms->progress_msg, "%s (bytes received)", - params->progress_base_phase3_msg.buf); + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes received)", + params->progress_base_phase3_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes received)", + params->progress_base_phase3_msg.buf); params->progress = start_progress(params->progress_msg.buf, dltotal); display_progress(params->progress, dlnow); } @@ -572,12 +1017,19 @@ static void gh__run_one_slot(struct active_request_slot *slot, struct gh__request_params *params, struct gh__response_status *status) { - trace2_region_enter("gvfs-helper", params->label.buf, NULL); + struct strbuf key = STRBUF_INIT; + + strbuf_addbuf(&key, ¶ms->tr2_label); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + params->progress_state = GH__PROGRESS_STATE__START; + strbuf_setlen(¶ms->e2eid, 0); + + trace2_region_enter("gvfs-helper", key.buf, NULL); if (!start_active_slot(slot)) { status->curl_code = CURLE_FAILED_INIT; /* a bit of a lie */ - strbuf_addstr(&status->error_message, - "failed to start HTTP request"); + compute_retry_mode_from_curl_error(status); } else { run_active_slot(slot); if (params->b_write_to_file) @@ -585,27 +1037,38 @@ static void gh__run_one_slot(struct active_request_slot *slot, gh__response_status__set_from_slot(params, status, slot); - if (status->ec == GH__ERROR_CODE__OK) { - int old_len = params->label.len; + log_e2eid(params, status); - strbuf_addstr(¶ms->label, "/nr_objects"); - trace2_data_intmax("gvfs-helper", NULL, - params->label.buf, - params->object_count); - strbuf_setlen(¶ms->label, old_len); + if (status->ec == GH__ERROR_CODE__OK) { + int old_len = key.len; - strbuf_addstr(¶ms->label, "/nr_bytes"); + /* + * We only log the number of bytes received. + * We do not log the number of objects requested + * because the server may give us more than that + * (such as when we request a commit). + */ + strbuf_addstr(&key, "/nr_bytes"); trace2_data_intmax("gvfs-helper", NULL, - params->label.buf, + key.buf, status->bytes_received); - strbuf_setlen(¶ms->label, old_len); + strbuf_setlen(&key, old_len); } } if (params->progress) stop_progress(¶ms->progress); - trace2_region_leave("gvfs-helper", params->label.buf, NULL); + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { + if (params->b_is_post) + install_packfile(params, status); + else + install_loose(params, status); + } + + trace2_region_leave("gvfs-helper", key.buf, NULL); + + strbuf_release(&key); } static int option_parse_cache_server_mode(const struct option *opt, @@ -1053,41 +1516,115 @@ static void select_odb(void) * * TODO Consider using lockfile for this rather than naked tempfile. */ -static struct tempfile *create_tempfile_for_packfile(void) +static void create_tempfile_for_packfile( + struct gh__request_params *params, + struct gh__response_status *status) { static unsigned int nth = 0; static struct timeval tv = {0}; static struct tm tm = {0}; static time_t secs = 0; - static char tbuf[32] = {0}; + static char date[32] = {0}; - struct tempfile *tempfile = NULL; - struct strbuf buf_path = STRBUF_INIT; + struct strbuf basename = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; + int len_p; + enum scld_error scld; + + gh__response_status__zero(status); if (!nth) { + /* + * Create a string to use in the name of all packfiles + * created by this process. + */ gettimeofday(&tv, NULL); secs = tv.tv_sec; gmtime_r(&secs, &tm); - xsnprintf(tbuf, sizeof(tbuf), "%4d%02d%02d-%02d%02d%02d-%06ld", + xsnprintf(date, sizeof(date), "%4d%02d%02d-%02d%02d%02d-%06ld", tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, (long)tv.tv_usec); } - // TODO should this be in the "/pack/tempPacks/" - // TODO directory instead? YES + /* + * Create a for this packfile using a series number , + * so that all of the chunks we download will group together. + */ + strbuf_addf(&basename, "vfs-%s-%04d", date, nth++); - strbuf_addbuf(&buf_path, &gh__global.buf_odb_path); - strbuf_complete(&buf_path, '/'); - strbuf_addf(&buf_path, "pack/vfs-%s-%04d.temp", tbuf, nth++); + /* + * We will stream the data into a managed tempfile() in: + * + * "/pack/tempPacks/vfs--.temp" + */ + strbuf_setlen(&buf, 0); + strbuf_addbuf(&buf, &gh__global.buf_odb_path); + strbuf_complete(&buf, '/'); + strbuf_addstr(&buf, "pack/"); + len_p = buf.len; + strbuf_addstr(&buf, "tempPacks/"); + strbuf_addbuf(&buf, &basename); + strbuf_addstr(&buf, ".temp"); + + scld = safe_create_leading_directories(buf.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + strbuf_addf(&status->error_message, + "could not create directory for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + params->tempfile = create_tempfile(buf.buf); + if (!params->tempfile) { + strbuf_addf(&status->error_message, + "could not create tempfile for packfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } - tempfile = create_tempfile(buf_path.buf); - fdopen_tempfile(tempfile, "w"); + fdopen_tempfile(params->tempfile, "w"); - strbuf_release(&buf_path); + /* + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. + * + * We do this into the tempPacks directory to avoid contaminating + * the real pack directory until we know there is no corruption. + * + * "/pack/tempPacks/vfs--.temp.pack" + * "/pack/tempPacks/vfs--.temp.idx" + */ + strbuf_setlen(¶ms->temp_path_pack, 0); + strbuf_addf(¶ms->temp_path_pack, "%s.pack", buf.buf); + + strbuf_setlen(¶ms->temp_path_idx, 0); + strbuf_addf(¶ms->temp_path_idx, "%s.idx", buf.buf); + + /* + * Later, if all goes well, we will install them as: + * + * "/pack/vfs--.pack" + * "/pack/vfs--.idx" + */ + strbuf_setlen(&buf, len_p); + strbuf_setlen(¶ms->final_path_pack, 0); + strbuf_addf(¶ms->final_path_pack, "%s%s.pack", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_path_idx, 0); + strbuf_addf(¶ms->final_path_idx, "%s%s.idx", + buf.buf, basename.buf); + strbuf_setlen(¶ms->final_packfile_filename, 0); + strbuf_addf(¶ms->final_packfile_filename, "%s.pack", + basename.buf); - return tempfile; +cleanup: + strbuf_release(&buf); + strbuf_release(&basename); } /* @@ -1100,15 +1637,15 @@ static struct tempfile *create_tempfile_for_packfile(void) */ static void create_tempfile_for_loose( struct gh__request_params *params, - struct gh__response_status *status, - const struct object_id *oid) + struct gh__response_status *status) { + static int nth = 0; struct strbuf buf_path = STRBUF_INIT; const char *hex; gh__response_status__zero(status); - hex = oid_to_hex(oid); + hex = oid_to_hex(¶ms->loose_oid); strbuf_addbuf(&buf_path, &gh__global.buf_odb_path); strbuf_complete(&buf_path, '/'); @@ -1136,7 +1673,7 @@ static void create_tempfile_for_loose( * using lockfiles to avoid issues with stale locks after * crashes. */ - strbuf_addf(&buf_path, ".%08u.temp", getpid()); + strbuf_addf(&buf_path, ".%08u.%.06u.temp", getpid(), nth++); params->tempfile = create_tempfile(buf_path.buf); if (!params->tempfile) { @@ -1153,85 +1690,34 @@ static void create_tempfile_for_loose( } /* - * Extract the filename portion of the given pathname. - * - * TODO Wish I could find a strbuf_filename() function for this. + * Convert the tempfile into a temporary .pack, index it into a temporary .idx + * file, and then install the pair into ODB. */ -static void extract_filename(struct strbuf *filename, - const struct strbuf *pathname) -{ - size_t len = pathname->len; - - strbuf_setlen(filename, 0); - - while (len > 0 && !is_dir_sep(pathname->buf[len - 1])) - len--; - - strbuf_addstr(filename, &pathname->buf[len]); -} - -/* - * Convert the tempfile into a permanent .pack packfile in the ODB. - * Create the corresponding .idx file. - * - * Return the filename (not pathname) of the resulting packfile. - */ -static void install_packfile(struct gh__response_status *status, - struct tempfile **pp_tempfile, - struct strbuf *packfile_filename) +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status) { struct child_process ip = CHILD_PROCESS_INIT; - struct strbuf pack_name_tmp = STRBUF_INIT; - struct strbuf pack_name_dst = STRBUF_INIT; - struct strbuf idx_name_tmp = STRBUF_INIT; - struct strbuf idx_name_dst = STRBUF_INIT; - size_t len_base; - - gh__response_status__zero(status); - - strbuf_setlen(packfile_filename, 0); /* - * start with ".temp" (that is owned by tempfile class). - * rename to ".pack.temp" to break ownership. - * - * create ".idx.temp" on provisional packfile. - * - * officially install both ".{pack,idx}.temp" as - * ".{pack,idx}". + * When we request more than 1 object, the server should always + * send us a packfile. */ - - strbuf_addstr(&pack_name_tmp, get_tempfile_path(*pp_tempfile)); - if (!strip_suffix(pack_name_tmp.buf, ".temp", &len_base)) { - /* - * This is more of a BUG(), but I want the error - * code propagated. - */ + if (strcmp(status->content_type.buf, + "application/x-git-packfile")) { strbuf_addf(&status->error_message, - "packfile tempfile does not end in '.temp': '%s'", - pack_name_tmp.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; goto cleanup; } - strbuf_setlen(&pack_name_tmp, (int)len_base); - strbuf_addbuf(&pack_name_dst, &pack_name_tmp); - strbuf_addbuf(&idx_name_tmp, &pack_name_tmp); - strbuf_addbuf(&idx_name_dst, &pack_name_tmp); - - strbuf_addstr(&pack_name_tmp, ".pack.temp"); - strbuf_addstr(&pack_name_dst, ".pack"); - strbuf_addstr(&idx_name_tmp, ".idx.temp"); - strbuf_addstr(&idx_name_dst, ".idx"); - - // TODO if either pack_name_dst or idx_name_dst already - // TODO exists in the ODB, create alternate names so that - // TODO we don't step on them. + gh__response_status__zero(status); - if (rename_tempfile(pp_tempfile, pack_name_tmp.buf) == -1) { + if (rename_tempfile(¶ms->tempfile, + params->temp_path_pack.buf) == -1) { strbuf_addf(&status->error_message, "could not rename packfile to '%s'", - pack_name_tmp.buf); + params->temp_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } @@ -1239,59 +1725,54 @@ static void install_packfile(struct gh__response_status *status, argv_array_push(&ip.args, "index-pack"); if (gh__cmd_opts.show_progress) argv_array_push(&ip.args, "-v"); - argv_array_pushl(&ip.args, "-o", idx_name_tmp.buf, NULL); - argv_array_push(&ip.args, pack_name_tmp.buf); + argv_array_pushl(&ip.args, "-o", params->temp_path_idx.buf, NULL); + argv_array_push(&ip.args, params->temp_path_pack.buf); ip.git_cmd = 1; ip.no_stdin = 1; ip.no_stdout = 1; - // TODO consider capturing stdout from index-pack because - // TODO it will contain the SHA of the packfile and we can - // TODO (should?) add it to the .pack and .idx pathnames - // TODO when we install them. - // TODO - // TODO See pipe_command() rather than run_command(). - // TODO - // TODO Or should be SHA-it ourselves (or read the last 20 bytes)? - /* - * Note that I DO NOT have a region around the index-pack process. - * The region in gh__run_one_slot() currently only covers the - * download time. This index-pack is a separate step not covered - * in the above region. Later, if/when we have CURL directly stream - * to index-pack, that region will be the combined download+index - * time. So, I'm not going to introduce it here. + * Note that I DO NOT have a trace2 region around the + * index-pack process by itself. Currently, we are inside the + * trace2 region for running the request and that's fine. + * Later, if/when we stream the download directly to + * index-pack, it will be inside under the same region anyway. + * So, I'm not going to introduce it here. */ if (run_command(&ip)) { - unlink(pack_name_tmp.buf); - unlink(idx_name_tmp.buf); + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); strbuf_addf(&status->error_message, - "index-pack failed on '%s'", pack_name_tmp.buf); - status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + "index-pack failed on '%s'", + params->temp_path_pack.buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; goto cleanup; } - if (finalize_object_file(pack_name_tmp.buf, pack_name_dst.buf) || - finalize_object_file(idx_name_tmp.buf, idx_name_dst.buf)) { - unlink(pack_name_tmp.buf); - unlink(pack_name_dst.buf); - unlink(idx_name_tmp.buf); - unlink(idx_name_dst.buf); + if (finalize_object_file(params->temp_path_pack.buf, + params->final_path_pack.buf) || + finalize_object_file(params->temp_path_idx.buf, + params->final_path_idx.buf)) { + unlink(params->temp_path_pack.buf); + unlink(params->temp_path_idx.buf); + unlink(params->final_path_pack.buf); + unlink(params->final_path_idx.buf); strbuf_addf(&status->error_message, "could not install packfile '%s'", - pack_name_dst.buf); + params->final_path_pack.buf); status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; goto cleanup; } - extract_filename(packfile_filename, &pack_name_dst); - cleanup: child_process_clear(&ip); - strbuf_release(&pack_name_tmp); - strbuf_release(&pack_name_dst); - strbuf_release(&idx_name_tmp); - strbuf_release(&idx_name_dst); } /* @@ -1361,7 +1842,250 @@ static void gh_http_cleanup(void) } /* - * Do a single HTTP request without auth-retry or fallback. + * buffer has ": [\r]\n" + */ +static void parse_resp_hdr_1(const char *buffer, size_t size, size_t nitems, + struct strbuf *key, struct strbuf *value) +{ + const char *end = buffer + (size * nitems); + const char *p; + + p = strchr(buffer, ':'); + + strbuf_setlen(key, 0); + strbuf_add(key, buffer, (p - buffer)); + + p++; /* skip ':' */ + p++; /* skip ' ' */ + + strbuf_setlen(value, 0); + strbuf_add(value, p, (end - p)); + strbuf_trim_trailing_newline(value); +} + +static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, + void *void_params) +{ + struct gh__request_params *params = void_params; + struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; + + if (starts_with(buffer, "X-RateLimit-")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * The following X- headers are specific to AzureDevOps. + * Other servers have similar sets of values, but I haven't + * compared them in depth. + * + * TODO Remove this. + */ + trace2_printf("Throttle: %s %s", key.buf, val.buf); + + if (!strcmp(key.buf, "X-RateLimit-Resource")) { + /* + * The name of the resource that is complaining. + * Just log it because we can't do anything with it. + */ + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/resource"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string("gvfs-helper", NULL, key.buf, val.buf); + } + + else if (!strcmp(key.buf, "X-RateLimit-Delay")) { + /* + * The amount of delay added to our response. + * Just log it because we can't do anything with it. + */ + unsigned long tarpit_delay_ms; + + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/delay_ms"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + git_parse_ulong(val.buf, &tarpit_delay_ms); + + trace2_data_intmax("gvfs-helper", NULL, key.buf, tarpit_delay_ms); + } + + else if (!strcmp(key.buf, "X-RateLimit-Limit")) { + /* + * The resource limit/quota before we get a 429. + */ + git_parse_ulong(val.buf, &azure->tstu_limit); + } + + else if (!strcmp(key.buf, "X-RateLimit-Remaining")) { + /* + * The amount of our quota remaining. When zero, we + * should get 429s on futher requests until the reset + * time. + */ + git_parse_ulong(val.buf, &azure->tstu_remaining); + } + + else if (!strcmp(key.buf, "X-RateLimit-Reset")) { + /* + * The server gave us a time-in-seconds-since-the-epoch + * for when our quota will be reset (if we stop all + * activity right now). + * + * Checkpoint the local system clock so we can do some + * sanity checks on any clock skew. Also, since we get + * the headers before we get the content, we can adjust + * our delay to compensate for the full download time. + */ + unsigned long now = time(NULL); + unsigned long reset_time; + + git_parse_ulong(val.buf, &reset_time); + if (reset_time > now) + azure->reset_sec = reset_time - now; + } + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "Retry-After")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * We get this header with a 429 and 503 and possibly a 30x. + * + * Curl does have CURLINFO_RETRY_AFTER that nicely parses and + * normalizes the value (and supports HTTP/1.1 usage), but it + * is not present yet in the version shipped with the Mac, so + * we do it directly here. + */ + git_parse_ulong(val.buf, &azure->retry_after_sec); + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "X-VSS-E2EID")) { + struct strbuf key = STRBUF_INIT; + + /* + * Capture the E2EID as it goes by, but don't log it until we + * know the request result. + */ + parse_resp_hdr_1(buffer, size, nitems, &key, ¶ms->e2eid); + + strbuf_release(&key); + } + + return nitems * size; +} + +/* + * Wait "duration" seconds and drive the progress mechanism. + * + * We spin slightly faster than we need to to keep the progress bar + * drawn (especially if the user presses return while waiting) and to + * compensate for delay factors built into the progress class (which + * might wait for 2 seconds before drawing the first message). + */ +static void do_throttle_spin(struct gh__request_params *params, + const char *tr2_label, + const char *progress_msg, + int duration) +{ + struct strbuf region = STRBUF_INIT; + struct progress *progress = NULL; + unsigned long begin = time(NULL); + unsigned long now = begin; + unsigned long end = begin + duration; + + strbuf_addstr(®ion, tr2_label); + strbuf_addstr(®ion, gh__server_type_label[params->server_type]); + trace2_region_enter("gvfs-helper", region.buf, NULL); + + progress = start_progress(progress_msg, duration); + while (now < end) { + display_progress(progress, (now - begin)); + + sleep_millisec(100); + + now = time(NULL); + } + display_progress(progress, duration); + stop_progress(&progress); + + trace2_region_leave("gvfs-helper", region.buf, NULL); + strbuf_release(®ion); +} + +/* + * Delay the outbound request if necessary in response to previous throttle + * blockages or hints. Throttle data is somewhat orthogonal to the status + * results from any previous request and/or the request params of the next + * request. + * + * Note that the throttle info also is cross-process information, such as + * 2 concurrent fetches in 2 different terminal windows to the same server + * will be sharing the same server quota. These could be coordinated too, + * so that a blockage received in one process would prevent the other + * process from starting another request (and also blocked or extending + * the delay interval). We're NOT going to do that level of integration. + * We will let both processes independently attempt the next request. + * This may cause us to miss the end-of-quota boundary if the server + * extends it because of the second request. + * + * TODO Should we have a max-wait option and then return a hard-error + * TODO of some type? + */ +static void do_throttle_wait(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct gh__azure_throttle *azure = + &gh__global_throttle[params->server_type]; + + if (azure->retry_after_sec) { + /* + * We were given a hard delay (such as after a 429). + * Spin until the requested time. + */ + do_throttle_spin(params, "throttle/hard", + "Waiting on hard throttle (sec)", + azure->retry_after_sec); + return; + } + + if (azure->reset_sec > 0) { + /* + * We were given a hint that we are overloading + * the server. Voluntarily backoff (before we + * get tarpitted or blocked). + */ + do_throttle_spin(params, "throttle/soft", + "Waiting on soft throttle (sec)", + azure->reset_sec); + return; + } + + if (params->k_transient_delay_sec) { + /* + * Insert an arbitrary delay before retrying after a + * transient (network) failure. + */ + do_throttle_spin(params, "throttle/transient", + "Waiting to retry after network error (sec)", + params->k_transient_delay_sec); + return; + } +} + +/* + * Do a single HTTP request WITHOUT robust-retry, auth-retry or fallback. */ static void do_req(const char *url_base, const char *url_component, @@ -1376,14 +2100,27 @@ static void do_req(const char *url_base, gh__response_status__zero(status); if (params->b_write_to_file) { - // TODO ftruncate tempfile ?? + /* Delete dirty tempfile from a previous attempt. */ + if (params->tempfile) + delete_tempfile(¶ms->tempfile); + + if (params->b_is_post) + create_tempfile_for_packfile(params, status); + else + create_tempfile_for_loose(params, status); + if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) + return; } else { + /* Guard against caller using dirty buffer */ strbuf_setlen(params->buffer, 0); } end_url_with_slash(&rest_url, url_base); strbuf_addstr(&rest_url, url_component); + do_throttle_wait(params, status); + gh__azure_throttle__zero(&gh__global_throttle[params->server_type]); + slot = get_active_slot(); slot->results = &results; @@ -1412,6 +2149,9 @@ static void do_req(const char *url_base, curl_easy_setopt(slot->curl, CURLOPT_FILE, params->buffer); } + curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, parse_resp_hdr); + curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, params); + if (creds && creds->username) { /* * Force CURL to respect the username/password we provide by @@ -1438,8 +2178,8 @@ static void do_req(const char *url_base, * with the main Git server (because the cache-server * doesn't handle 401s). * - * TODO Think about if we really need to handle this case - * TODO and if so, add an .is_main to params. + * TODO Think about if we really need to handle this case. + * TODO Guard with "if (params->sever_type == __MAIN)" */ curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); } @@ -1457,20 +2197,123 @@ static void do_req(const char *url_base, gh__run_one_slot(slot, params, status); } +/* + * Compute the delay for the nth attempt. + * + * No delay for the first attempt. Then use a normal exponential backoff + * starting from 8. + */ +static int compute_transient_delay(int attempt) +{ + int v; + + if (attempt < 1) + return 0; + + /* + * Let 8K be our hard limit (for integer overflow protection). + * That's over 2 hours. This is 8<<10. + */ + if (attempt > 10) + attempt = 10; + + v = 8 << (attempt - 1); + + if (v > gh__cmd_opts.max_transient_backoff_sec) + v = gh__cmd_opts.max_transient_backoff_sec; + + return v; +} + +/* + * Robustly make an HTTP request. Retry if necessary to hide common + * transient network errors and/or 429 blockages. + * + * For a transient (network) failure (where we do not have a throttle + * delay factor), we should insert a small delay to let the network + * recover. The outage might be because the VPN dropped, or the + * machine went to sleep or something and we want to give the network + * time to come back up. Insert AI here :-) + */ +static void do_req__with_robust_retry(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + for (params->k_attempt = 0; + params->k_attempt < gh__cmd_opts.max_retries + 1; + params->k_attempt++) { + + do_req(url_base, url_component, creds, params, status); + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: /* caller does auth-retry */ + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + /* + * We should have gotten a "Retry-After" header with + * these and that gives us the wait time. If not, + * fallthru and use the backoff delay. + */ + if (gh__global_throttle[params->server_type].retry_after_sec) + continue; + /*fallthru*/ + + case GH__RETRY_MODE__TRANSIENT: + params->k_transient_delay_sec = + compute_transient_delay(params->k_attempt); + continue; + } + } +} + static void do_req__to_main(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { -// lookup_main_creds(); + params->server_type = GH__SERVER_TYPE__MAIN; - do_req(gh__global.main_url, url_component, &gh__global.main_creds, - params, status); + /* + * When talking to the main Git server, we do allow non-auth'd + * initial requests (mainly for testing, since production servers + * will usually always want creds), so we DO NOT force load the + * user's creds here and let the normal 401 mechanism handle things. + * This has a slight perf penalty -- if we're bulk fetching 4000 + * objects from a production server, we're going to do a 100K+ byte + * POST just to get a 401 and then repeat the 100K+ byte POST + * with the creds. + * + * TODO Consider making a test or runtime flag to alter this. + * TODO If set/not-set, add a call here to pre-populate the creds. + * TODO + * TODO lookup_main_creds(); + * TODO + * + * TODO Testing with GIT_TRACE_CURL shows that curl will *SOMETIME* + * TODO send an "Expect: 100-continue" and silently handle/eat the + * TODO "HTTP/1.1 100 Continue" before the POST-body is sent, + * TODO so maybe this isn't an issue. (Other than the extra + * TODO roundtrip for the first set of headers.) More testing here + * TODO should be performed. + */ + + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); if (status->response_code == 401) { refresh_main_creds(); - do_req(gh__global.main_url, url_component, &gh__global.main_creds, - params, status); + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); } if (status->response_code == 200) @@ -1481,29 +2324,36 @@ static void do_req__to_cache_server(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { + params->server_type = GH__SERVER_TYPE__CACHE; + synthesize_cache_server_creds(); - do_req(gh__global.cache_server_url, url_component, &gh__global.cache_creds, - params, status); - fixup_cache_server_400_to_401(status); + do_req__with_robust_retry(gh__global.cache_server_url, url_component, + &gh__global.cache_creds, + params, status); if (status->response_code == 401) { refresh_cache_server_creds(); - do_req(gh__global.cache_server_url, url_component, - &gh__global.cache_creds, params, status); - fixup_cache_server_400_to_401(status); + do_req__with_robust_retry(gh__global.cache_server_url, + url_component, + &gh__global.cache_creds, + params, status); } if (status->response_code == 200) approve_cache_server_creds(); } +/* + * Try the cache-server (if configured) then fall-back to the main Git server. + */ static void do_req__with_fallback(const char *url_component, struct gh__request_params *params, struct gh__response_status *status) { - if (gh__global.cache_server_url && !params->b_no_cache_server) { + if (gh__global.cache_server_url && + params->b_permit_cache_server_if_defined) { do_req__to_cache_server(url_component, params, status); if (status->response_code == 200) @@ -1538,11 +2388,12 @@ static void do__gvfs_config(struct gh__response_status *status, { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; - strbuf_addstr(¶ms.label, "GET/config"); + strbuf_addstr(¶ms.tr2_label, "GET/config"); params.b_is_post = 0; params.b_write_to_file = 0; - params.b_no_cache_server = 1; /* they don't handle gvfs/config API */ + /* cache-servers do not handle gvfs/config REST calls */ + params.b_permit_cache_server_if_defined = 0; params.buffer = config_data; params.object_count = 1; /* a bit of a lie */ @@ -1587,11 +2438,11 @@ static void do__loose__gvfs_object(struct gh__response_status *status, strbuf_addf(&component_url, "gvfs/objects/%s", oid_to_hex(oid)); - strbuf_addstr(¶ms.label, "GET/objects"); + strbuf_addstr(¶ms.tr2_label, "GET/objects"); params.b_is_post = 0; params.b_write_to_file = 1; - params.b_no_cache_server = 0; + params.b_permit_cache_server_if_defined = 1; params.object_count = 1; @@ -1601,9 +2452,7 @@ static void do__loose__gvfs_object(struct gh__response_status *status, params.headers = curl_slist_append(params.headers, "Pragma: no-cache"); - create_tempfile_for_loose(¶ms, status, oid); - if (!params.tempfile) - goto cleanup; + oidcpy(¶ms.loose_oid, oid); if (gh__cmd_opts.show_progress) { /* @@ -1618,10 +2467,6 @@ static void do__loose__gvfs_object(struct gh__response_status *status, do_req__with_fallback(component_url.buf, ¶ms, status); - if (status->ec == GH__ERROR_CODE__OK) - install_loose(¶ms, status); - -cleanup: gh__request_params__release(¶ms); strbuf_release(&component_url); } @@ -1634,23 +2479,26 @@ static void do__loose__gvfs_object(struct gh__response_status *status, static void do__packfile__gvfs_objects(struct gh__response_status *status, struct oidset_iter *iter, unsigned long nr_wanted_in_block, + int j_pack_num, int j_pack_den, struct strbuf *output_filename, - unsigned long *nr_taken) + unsigned long *nr_oid_taken) { struct json_writer jw_req = JSON_WRITER_INIT; struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + strbuf_setlen(output_filename, 0); + gh__response_status__zero(status); params.object_count = build_json_payload__gvfs_objects( &jw_req, iter, nr_wanted_in_block); - *nr_taken = params.object_count; + *nr_oid_taken = params.object_count; - strbuf_addstr(¶ms.label, "POST/objects"); + strbuf_addstr(¶ms.tr2_label, "POST/objects"); params.b_is_post = 1; params.b_write_to_file = 1; - params.b_no_cache_server = 0; + params.b_permit_cache_server_if_defined = 1; params.post_payload = &jw_req.json; @@ -1675,73 +2523,21 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, params.headers = curl_slist_append(params.headers, "Accept: application/x-git-loose-object"); - params.tempfile = create_tempfile_for_packfile(); - if (!params.tempfile) { - strbuf_addstr(&status->error_message, - "could not create tempfile for packfile"); - status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; - goto cleanup; - } - if (gh__cmd_opts.show_progress) { strbuf_addf(¶ms.progress_base_phase2_msg, - "Requesting packfile with %ld objects", + "Requesting packfile %d/%d with %ld objects", + j_pack_num, j_pack_den, params.object_count); strbuf_addf(¶ms.progress_base_phase3_msg, - "Receiving packfile with %ld objects", + "Receiving packfile %d/%d with %ld objects", + j_pack_num, j_pack_den, params.object_count); } do_req__with_fallback("gvfs/objects", ¶ms, status); + if (status->ec == GH__ERROR_CODE__OK) + strbuf_addbuf(output_filename, ¶ms.final_packfile_filename); - if (status->ec == GH__ERROR_CODE__OK) { - if (!strcmp(status->content_type.buf, - "application/x-git-packfile")) { - - // TODO Consider having a worker thread to manage - // TODO running index-pack and then install the - // TODO resulting .idx and .pack files. This would - // TODO let us interleave those steps with our thread - // TODO fetching the next block of objects from the - // TODO server. (Need to think about how progress - // TODO messages from our thread and index-pack - // TODO would mesh.) - // TODO - // TODO But then again, if we hack index-pack to write - // TODO to our alternate and stream the data thru it, - // TODO it won't matter. - - install_packfile(status, ¶ms.tempfile, - output_filename); - goto cleanup; - } - - if (!strcmp(status->content_type.buf, - "application/x-git-loose-object")) - { - /* - * This should not happen (when we request - * more than one object). The server can send - * us a loose object (even when we use the - * POST form) if there is only one object in - * the payload (and despite the set of accept - * headers we send), so I'm going to leave - * this here. - */ - strbuf_addstr(&status->error_message, - "received loose object when packfile expected"); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } - - strbuf_addf(&status->error_message, - "received unknown content-type '%s'", - status->content_type.buf); - status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; - goto cleanup; - } - -cleanup: gh__request_params__release(¶ms); jw_release(&jw_req); } @@ -1755,7 +2551,7 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, */ static void do_fetch_oidset(struct gh__response_status *status, struct oidset *oids, - unsigned long nr_total, + unsigned long nr_oid_total, struct string_list *result_list) { struct oidset_iter iter; @@ -1764,19 +2560,25 @@ static void do_fetch_oidset(struct gh__response_status *status, struct strbuf err404 = STRBUF_INIT; const struct object_id *oid; unsigned long k; - unsigned long nr_taken; + unsigned long nr_oid_taken; int had_404 = 0; + int j_pack_den = 0; + int j_pack_num = 0; gh__response_status__zero(status); - if (!nr_total) + if (!nr_oid_total) return; + if (nr_oid_total > 1) + j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) + / gh__cmd_opts.block_size); + oidset_iter_init(oids, &iter); - for (k = 0; k < nr_total; k += nr_taken) { - if (nr_total - k == 1 || gh__cmd_opts.block_size == 1) { + for (k = 0; k < nr_oid_total; k += nr_oid_taken) { + if (nr_oid_total - k == 1 || gh__cmd_opts.block_size == 1) { oid = oidset_iter_next(&iter); - nr_taken = 1; + nr_oid_taken = 1; do__loose__gvfs_object(status, oid); @@ -1811,10 +2613,13 @@ static void do_fetch_oidset(struct gh__response_status *status, } else { strbuf_setlen(&output_filename, 0); + j_pack_num++; + do__packfile__gvfs_objects(status, &iter, gh__cmd_opts.block_size, + j_pack_num, j_pack_den, &output_filename, - &nr_taken); + &nr_oid_taken); /* * Because the oidset iterator has random @@ -1930,6 +2735,8 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -1937,7 +2744,7 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) struct oidset oids = OIDSET_INIT; struct string_list result_list = STRING_LIST_INIT_DUP; enum gh__error_code ec = GH__ERROR_CODE__OK; - unsigned long nr_total; + unsigned long nr_oid_total; int k; trace2_cmd_mode("get"); @@ -1948,14 +2755,16 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) argc = parse_options(argc, argv, NULL, get_options, get_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); - nr_total = read_stdin_from_rev_list(&oids); + nr_oid_total = read_stdin_from_rev_list(&oids); trace2_region_enter("gvfs-helper", "get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "get/nr_objects", nr_total); - do_fetch_oidset(&status, &oids, nr_total, &result_list); + trace2_data_intmax("gvfs-helper", NULL, "get/nr_objects", nr_oid_total); + do_fetch_oidset(&status, &oids, nr_oid_total, &result_list); trace2_region_leave("gvfs-helper", "get", NULL); ec = status.ec; @@ -1990,7 +2799,7 @@ static enum gh__error_code do_server_subprocess_get(void) int len; int err; int k; - unsigned long nr_total = 0; + unsigned long nr_oid_total = 0; /* * Inside the "get" command, we expect a list of OIDs @@ -2008,10 +2817,10 @@ static enum gh__error_code do_server_subprocess_get(void) } if (!oidset_insert(&oids, &oid)) - nr_total++; + nr_oid_total++; } - if (!nr_total) { + if (!nr_oid_total) { if (packet_write_fmt_gently(1, "ok\n")) { error("server: cannot write 'get' result to client"); ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; @@ -2021,8 +2830,8 @@ static enum gh__error_code do_server_subprocess_get(void) } trace2_region_enter("gvfs-helper", "server/get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "server/get/nr_objects", nr_total); - do_fetch_oidset(&status, &oids, nr_total, &result_list); + trace2_data_intmax("gvfs-helper", NULL, "server/get/nr_objects", nr_oid_total); + do_fetch_oidset(&status, &oids, nr_oid_total, &result_list); trace2_region_leave("gvfs-helper", "server/get", NULL); /* @@ -2180,6 +2989,8 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), OPT_END(), }; @@ -2196,6 +3007,8 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) argc = parse_options(argc, argv, NULL, server_options, server_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; finish_init(1); @@ -2285,13 +3098,23 @@ int cmd_main(int argc, const char **argv) setup_git_directory_gently(NULL); - git_config(git_default_config, NULL); - /* Set any non-zero initial values in gh__cmd_opts. */ - gh__cmd_opts.depth = 1; + gh__cmd_opts.depth = GH__DEFAULT_COMMIT_DEPTH; gh__cmd_opts.block_size = GH__DEFAULT_BLOCK_SIZE; + gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; + gh__cmd_opts.max_transient_backoff_sec = + GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC; + gh__cmd_opts.show_progress = !!isatty(2); + // TODO use existing gvfs config settings to override our GH__DEFAULT_ + // TODO values in gh__cmd_opts. (And maybe add/remove our command line + // TODO options for them.) + // TODO + // TODO See "scalar.max-retries" (and maybe "gvfs.max-retries") + + git_config(git_default_config, NULL); + argc = parse_options(argc, argv, NULL, main_options, main_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc == 0) From 5c65e9a00a765524c3adf42e3d3ddcd1af4d60e1 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 21 Oct 2019 12:29:19 -0400 Subject: [PATCH 2/2] gvfs-helper: expose gvfs/objects GET and POST semantics Expose the differences in the semantics of GET and POST for the "gvfs/objects" API: HTTP GET: fetches a single loose object over the network. When a commit object is requested, it just returns the single object. HTTP POST: fetches a batch of objects over the network. When the oid-set contains a commit object, all referenced trees are also included in the response. gvfs-helper is updated to take "get" and "post" command line options. the gvfs-helper "server" mode is updated to take "objects.get" and "objects.post" verbs. For convenience, the "get" option and the "objects.get" verb do allow more than one object to be requested. gvfs-helper will automatically issue a series of (single object) HTTP GET requests and creating a series of loose objects. The "post" option and the "objects.post" verb will perform bulk object fetching using the batch-size chunking. Individual HTTP POST requests containing more than one object will be created as a packfile. A HTTP POST for a single object will create a loose object. This commit also contains some refactoring to eliminate the assumption that POST is always associated with packfiles. In gvfs-helper-client.c, gh_client__get_immediate() now uses the "objects.get" verb and ignores any currently queued objects. In gvfs-helper-client.c, the OIDSET built by gh_client__queue_oid() is only processed when gh_client__drain_queue() is called. The queue is processed using the "object.post" verb. Signed-off-by: Jeff Hostetler --- gvfs-helper-client.c | 225 +++++++++----- gvfs-helper-client.h | 26 +- gvfs-helper.c | 712 +++++++++++++++++++++++++++---------------- 3 files changed, 622 insertions(+), 341 deletions(-) diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c index 1564610aae594b..3bcec53cfe2acc 100644 --- a/gvfs-helper-client.c +++ b/gvfs-helper-client.c @@ -13,7 +13,6 @@ static struct oidset gh_client__oidset_queued = OIDSET_INIT; static unsigned long gh_client__oidset_count; -static int gh_client__includes_immediate; struct gh_server__process { struct subprocess_entry subprocess; /* must be first */ @@ -24,13 +23,20 @@ static int gh_server__subprocess_map_initialized; static struct hashmap gh_server__subprocess_map; static struct object_directory *gh_client__chosen_odb; -#define CAP_GET (1u<<1) +/* + * The "objects" capability has 2 verbs: "get" and "post". + */ +#define CAP_OBJECTS (1u<<1) +#define CAP_OBJECTS_NAME "objects" + +#define CAP_OBJECTS__VERB_GET1_NAME "get" +#define CAP_OBJECTS__VERB_POST_NAME "post" static int gh_client__start_fn(struct subprocess_entry *subprocess) { static int versions[] = {1, 0}; static struct subprocess_capability capabilities[] = { - { "get", CAP_GET }, + { CAP_OBJECTS_NAME, CAP_OBJECTS }, { NULL, 0 } }; @@ -42,14 +48,16 @@ static int gh_client__start_fn(struct subprocess_entry *subprocess) } /* - * Send: + * Send the queued OIDs in the OIDSET to gvfs-helper for it to + * fetch from the cache-server or main Git server using "/gvfs/objects" + * POST semantics. * - * get LF + * objects.post LF * ( LF)* * * */ -static int gh_client__get__send_command(struct child_process *process) +static int gh_client__send__objects_post(struct child_process *process) { struct oidset_iter iter; struct object_id *oid; @@ -60,7 +68,9 @@ static int gh_client__get__send_command(struct child_process *process) * so that we don't have to. */ - err = packet_write_fmt_gently(process->in, "get\n"); + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_POST_NAME "\n")); if (err) return err; @@ -79,6 +89,46 @@ static int gh_client__get__send_command(struct child_process *process) return 0; } +/* + * Send the given OID to gvfs-helper for it to fetch from the + * cache-server or main Git server using "/gvfs/objects" GET + * semantics. + * + * This ignores any queued OIDs. + * + * objects.get LF + * LF + * + * + */ +static int gh_client__send__objects_get(struct child_process *process, + const struct object_id *oid) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_GET1_NAME "\n")); + if (err) + return err; + + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + /* * Verify that the pathname found in the "odb" response line matches * what we requested. @@ -148,7 +198,7 @@ static void gh_client__update_packed_git(const char *line) } /* - * We expect: + * Both CAP_OBJECTS verbs return the same format response: * * * * @@ -179,7 +229,7 @@ static void gh_client__update_packed_git(const char *line) * grouped with a queued request for a blob. The tree-walk *might* be * able to continue and let the 404 blob be handled later. */ -static int gh_client__get__receive_response( +static int gh_client__objects__receive_response( struct child_process *process, enum gh_client__created *p_ghc, int *p_nr_loose, int *p_nr_packfile) @@ -259,17 +309,12 @@ static void gh_client__choose_odb(void) } } -static int gh_client__get(enum gh_client__created *p_ghc) +static struct gh_server__process *gh_client__find_long_running_process( + unsigned int cap_needed) { struct gh_server__process *entry; - struct child_process *process; struct argv_array argv = ARGV_ARRAY_INIT; struct strbuf quoted = STRBUF_INIT; - int nr_loose = 0; - int nr_packfile = 0; - int err = 0; - - trace2_region_enter("gh-client", "get", the_repository); gh_client__choose_odb(); @@ -285,6 +330,11 @@ static int gh_client__get(enum gh_client__created *p_ghc) sq_quote_argv_pretty("ed, argv.argv); + /* + * Find an existing long-running process with the above command + * line -or- create a new long-running process for this and + * subsequent 'get' requests. + */ if (!gh_server__subprocess_map_initialized) { gh_server__subprocess_map_initialized = 1; hashmap_init(&gh_server__subprocess_map, @@ -298,70 +348,24 @@ static int gh_client__get(enum gh_client__created *p_ghc) entry = xmalloc(sizeof(*entry)); entry->supported_capabilities = 0; - err = subprocess_start_argv( - &gh_server__subprocess_map, &entry->subprocess, 1, - &argv, gh_client__start_fn); - if (err) { - free(entry); - goto leave_region; - } + if (subprocess_start_argv(&gh_server__subprocess_map, + &entry->subprocess, 1, + &argv, gh_client__start_fn)) + FREE_AND_NULL(entry); } - process = &entry->subprocess.process; - - if (!(CAP_GET & entry->supported_capabilities)) { - error("gvfs-helper: does not support GET"); + if (entry && + (entry->supported_capabilities & cap_needed) != cap_needed) { + error("gvfs-helper: does not support needed capabilities"); subprocess_stop(&gh_server__subprocess_map, (struct subprocess_entry *)entry); - free(entry); - err = -1; - goto leave_region; + FREE_AND_NULL(entry); } - sigchain_push(SIGPIPE, SIG_IGN); - - err = gh_client__get__send_command(process); - if (!err) - err = gh_client__get__receive_response(process, p_ghc, - &nr_loose, &nr_packfile); - - sigchain_pop(SIGPIPE); - - if (err) { - subprocess_stop(&gh_server__subprocess_map, - (struct subprocess_entry *)entry); - free(entry); - } - -leave_region: argv_array_clear(&argv); strbuf_release("ed); - trace2_data_intmax("gh-client", the_repository, - "get/immediate", gh_client__includes_immediate); - - trace2_data_intmax("gh-client", the_repository, - "get/nr_objects", gh_client__oidset_count); - - if (nr_loose) - trace2_data_intmax("gh-client", the_repository, - "get/nr_loose", nr_loose); - - if (nr_packfile) - trace2_data_intmax("gh-client", the_repository, - "get/nr_packfile", nr_packfile); - - if (err) - trace2_data_intmax("gh-client", the_repository, - "get/error", err); - - trace2_region_leave("gh-client", "get", the_repository); - - oidset_clear(&gh_client__oidset_queued); - gh_client__oidset_count = 0; - gh_client__includes_immediate = 0; - - return err; + return entry; } void gh_client__queue_oid(const struct object_id *oid) @@ -388,28 +392,97 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr) gh_client__queue_oid(&oids[k]); } +/* + * Bulk fetch all of the queued OIDs in the OIDSET. + */ int gh_client__drain_queue(enum gh_client__created *p_ghc) { + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + *p_ghc = GHC__CREATED__NOTHING; if (!gh_client__oidset_count) return 0; - return gh_client__get(p_ghc); + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/post", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_post(process); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "objects/post/nr_objects", gh_client__oidset_count); + trace2_region_leave("gh-client", "objects/post", the_repository); + + oidset_clear(&gh_client__oidset_queued); + gh_client__oidset_count = 0; + + return err; } +/* + * Get exactly 1 object immediately. + * Ignore any queued objects. + */ int gh_client__get_immediate(const struct object_id *oid, enum gh_client__created *p_ghc) { - gh_client__includes_immediate = 1; + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; // TODO consider removing this trace2. it is useful for interactive // TODO debugging, but may generate way too much noise for a data // TODO event. trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); - if (!oidset_insert(&gh_client__oidset_queued, oid)) - gh_client__oidset_count++; + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/get", the_repository); - return gh_client__drain_queue(p_ghc); + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_get(process, oid); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_region_leave("gh-client", "objects/get", the_repository); + + return err; } diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h index 5f5e824bb0771a..a5a951ff5b5bfe 100644 --- a/gvfs-helper-client.h +++ b/gvfs-helper-client.h @@ -32,13 +32,14 @@ enum gh_client__created { }; /* - * Ask `gvfs-helper server` to immediately fetch an object. - * Wait for the response. + * Ask `gvfs-helper server` to immediately fetch a single object + * using "/gvfs/objects" GET semantics. * - * This may also fetch any queued (non-immediate) objects and - * so may create one or more loose objects and/or packfiles. - * It is undefined whether the requested OID will be loose or - * in a packfile. + * A long-running background process is used to make subsequent + * requests more efficient. + * + * A loose object will be created in the shared-cache ODB and + * in-memory cache updated. */ int gh_client__get_immediate(const struct object_id *oid, enum gh_client__created *p_ghc); @@ -47,16 +48,21 @@ int gh_client__get_immediate(const struct object_id *oid, * Queue this OID for a future fetch using `gvfs-helper service`. * It does not wait. * - * The GHC layer is free to process this queue in any way it wants, - * including individual fetches, bulk fetches, and batching. And - * it may add queued objects to immediate requests. - * * Callers should not rely on the queued object being on disk until * the queue has been drained. */ void gh_client__queue_oid(const struct object_id *oid); void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); +/* + * Ask `gvfs-helper server` to fetch the set of queued OIDs using + * "/gvfs/objects" POST semantics. + * + * A long-running background process is used to subsequent requests + * more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + */ int gh_client__drain_queue(enum gh_client__created *p_ghc); #endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c index f13eace2c3cca0..34d98ff3807774 100644 --- a/gvfs-helper.c +++ b/gvfs-helper.c @@ -46,7 +46,28 @@ // // get // -// Fetch 1 or more objects. If a cache-server is configured, +// Fetch 1 or more objects one at a time using a "/gvfs/objects" +// GET request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// post +// +// Fetch 1 or more objects in bulk using a "/gvfs/objects" POST +// request. +// +// If a cache-server is configured, // try it first. Optionally fallback to the main Git server. // // The set of objects is given on stdin and is assumed to be @@ -78,7 +99,8 @@ // --block-size= // defaults to "4000" // // Request objects from server in batches of at -// most n objects (not bytes). +// most n objects (not bytes) when using POST +// requests. // // --depth= // defaults to "1" // @@ -87,17 +109,27 @@ // Number of retries after transient network errors. // Set to zero to disable such retries. // -// Interactive verb: get +// Interactive verb: objects.get +// +// Fetch 1 or more objects, one at a time, using a +// "/gvfs/ojbects" GET requests. +// +// Each object will be created as a loose object in the ODB. +// +// Interactive verb: objects.post // -// Fetch 1 or more objects. If a cache-server is configured, -// try it first. Optionally fallback to the main Git server. +// Fetch 1 or more objects, in bulk, using one or more +// "/gvfs/objects" POST requests. +// +// For both verbs, if a cache-server is configured, try it first. +// Optionally fallback to the main Git server. // // Create 1 or more loose objects and/or packfiles in the // shared-cache ODB. (The pathname of the selected ODB is // reported at the beginning of the response; this should // match the pathname given on the command line). // -// git> get +// git> objects.get | objects.post // git> // git> // git> ... @@ -116,20 +148,6 @@ // [2] Documentation/technical/long-running-process-protocol.txt // [3] See GIT_TRACE_PACKET // -// Example: -// -// $ git -c core.virtualizeobjects=false -c core.usegvfshelper=false -// rev-list --objects --no-walk --missing=print HEAD -// | grep "^?" -// | sed 's/^?//' -// | git gvfs-helper get-missing -// -// Note: In this example, we need to turn off "core.virtualizeobjects" and -// "core.usegvfshelper" when building the list of objects. This prevents -// rev-list (in oid_object_info_extended() from automatically fetching -// them with read-object-hook or "gvfs-helper server" sub-process (and -// defeating the whole purpose of this example). -// ////////////////////////////////////////////////////////////////// #include "cache.h" @@ -162,15 +180,21 @@ static const char * const main_usage[] = { N_("git gvfs-helper [] config []"), N_("git gvfs-helper [] get []"), + N_("git gvfs-helper [] post []"), N_("git gvfs-helper [] server []"), NULL }; -static const char *const get_usage[] = { +static const char *const objects_get_usage[] = { N_("git gvfs-helper [] get []"), NULL }; +static const char *const objects_post_usage[] = { + N_("git gvfs-helper [] post []"), + NULL +}; + static const char *const server_usage[] = { N_("git gvfs-helper [] server []"), NULL @@ -179,12 +203,12 @@ static const char *const server_usage[] = { /* * "commitDepth" field in gvfs protocol */ -#define GH__DEFAULT_COMMIT_DEPTH 1 +#define GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH 1 /* * Chunk/block size in number of objects we request in each packfile */ -#define GH__DEFAULT_BLOCK_SIZE 4000 +#define GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE 4000 /* * Retry attempts (after the initial request) for transient errors and 429s. @@ -278,6 +302,28 @@ static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { "(cs)" }; +enum gh__objects_mode { + /* + * Bulk fetch objects. + * + * But also, force the use of HTTP POST regardless of how many + * objects we are requesting. + * + * The GVFS Protocol treats requests for commit objects + * differently in GET and POST requests WRT whether it + * automatically also fetches the referenced trees. + */ + GH__OBJECTS_MODE__POST, + + /* + * Fetch objects one at a time using HTTP GET. + * + * Force the use of GET (primarily because of the commit + * object treatment). + */ + GH__OBJECTS_MODE__GET, +}; + struct gh__azure_throttle { unsigned long tstu_limit; @@ -333,7 +379,20 @@ enum gh__progress_state { * Parameters to drive an HTTP request (with any necessary retries). */ struct gh__request_params { - int b_is_post; /* POST=1 or GET=0 */ + /* + * b_is_post indicates if the current HTTP request is a POST=1 or + * a GET=0. This is a lower level field used to setup CURL and + * the tempfile used to receive the content. + * + * It is related to, but different from the GH__OBJECTS_MODE__ + * field that we present to the gvfs-helper client or in the CLI + * (which only concerns the semantics of the /gvfs/objects protocol + * on the set of requested OIDs). + * + * For example, we use an HTTP GET to get the /gvfs/config data + * into a buffer. + */ + int b_is_post; int b_write_to_file; /* write to file=1 or strbuf=0 */ int b_permit_cache_server_if_defined; @@ -496,8 +555,6 @@ enum gh__retry_mode { struct gh__response_status { struct strbuf error_message; struct strbuf content_type; - long response_code; /* http response code */ - CURLcode curl_code; enum gh__error_code ec; enum gh__retry_mode retry; intmax_t bytes_received; @@ -507,8 +564,6 @@ struct gh__response_status { #define GH__RESPONSE_STATUS_INIT { \ .error_message = STRBUF_INIT, \ .content_type = STRBUF_INIT, \ - .response_code = 0, \ - .curl_code = CURLE_OK, \ .ec = GH__ERROR_CODE__OK, \ .retry = GH__RETRY_MODE__SUCCESS, \ .bytes_received = 0, \ @@ -519,8 +574,6 @@ static void gh__response_status__zero(struct gh__response_status *s) { strbuf_setlen(&s->error_message, 0); strbuf_setlen(&s->content_type, 0); - s->response_code = 0; - s->curl_code = CURLE_OK; s->ec = GH__ERROR_CODE__OK; s->retry = GH__RETRY_MODE__SUCCESS; s->bytes_received = 0; @@ -574,15 +627,14 @@ static void log_e2eid(struct gh__request_params *params, } /* - * Normalize a few error codes before we try to decide + * Normalize a few HTTP response codes before we try to decide * how to dispatch on them. */ -static void gh__response_status__normalize_odd_codes( - struct gh__request_params *params, - struct gh__response_status *status) +static long gh__normalize_odd_codes(struct gh__request_params *params, + long http_response_code) { if (params->server_type == GH__SERVER_TYPE__CACHE && - status->response_code == 400) { + http_response_code == 400) { /* * The cache-server sends a somewhat bogus 400 instead of * the normal 401 when AUTH is required. Fixup the status @@ -594,16 +646,18 @@ static void gh__response_status__normalize_odd_codes( * TODO 401 for now. We should confirm the expected * TODO error message in the response-body. */ - status->response_code = 401; + return 401; } - if (status->response_code == 203) { + if (http_response_code == 203) { /* * A proxy server transformed a 200 from the origin server * into a 203. We don't care about the subtle distinction. */ - status->response_code = 200; + return 200; } + + return http_response_code; } /* @@ -613,9 +667,10 @@ static void gh__response_status__normalize_odd_codes( * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops */ static void compute_retry_mode_from_http_response( - struct gh__response_status *status) + struct gh__response_status *status, + long http_response_code) { - switch (status->response_code) { + switch (http_response_code) { case 200: status->retry = GH__RETRY_MODE__SUCCESS; @@ -687,7 +742,7 @@ static void compute_retry_mode_from_http_response( hard_fail: strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", - (int)status->response_code); + (int)http_response_code); status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__HTTP_OTHER; @@ -713,9 +768,10 @@ static void compute_retry_mode_from_http_response( * so I'm not going to fight that. */ static void compute_retry_mode_from_curl_error( - struct gh__response_status *status) + struct gh__response_status *status, + CURLcode curl_code) { - switch (status->curl_code) { + switch (curl_code) { case CURLE_OK: status->retry = GH__RETRY_MODE__SUCCESS; status->ec = GH__ERROR_CODE__OK; @@ -820,8 +876,7 @@ static void compute_retry_mode_from_curl_error( hard_fail: strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", - status->curl_code, - curl_easy_strerror(status->curl_code)); + curl_code, curl_easy_strerror(curl_code)); status->retry = GH__RETRY_MODE__HARD_FAIL; status->ec = GH__ERROR_CODE__CURL_ERROR; @@ -831,8 +886,7 @@ static void compute_retry_mode_from_curl_error( transient: strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", - status->curl_code, - curl_easy_strerror(status->curl_code)); + curl_code, curl_easy_strerror(curl_code)); status->retry = GH__RETRY_MODE__TRANSIENT; status->ec = GH__ERROR_CODE__CURL_ERROR; @@ -851,26 +905,31 @@ static void gh__response_status__set_from_slot( struct gh__response_status *status, const struct active_request_slot *slot) { - status->curl_code = slot->results->curl_result; + long http_response_code; + CURLcode curl_code; + + curl_code = slot->results->curl_result; gh__curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &status->content_type); curl_easy_getinfo(slot->curl, CURLINFO_RESPONSE_CODE, - &status->response_code); + &http_response_code); strbuf_setlen(&status->error_message, 0); - gh__response_status__normalize_odd_codes(params, status); + http_response_code = gh__normalize_odd_codes(params, + http_response_code); /* * Use normalized response/status codes form curl/http to decide * how to set the error-code we propagate *AND* to decide if we * we should retry because of transient network problems. */ - if (status->curl_code == CURLE_OK || - status->curl_code == CURLE_HTTP_RETURNED_ERROR) - compute_retry_mode_from_http_response(status); + if (curl_code == CURLE_OK || + curl_code == CURLE_HTTP_RETURNED_ERROR) + compute_retry_mode_from_http_response(status, + http_response_code); else - compute_retry_mode_from_curl_error(status); + compute_retry_mode_from_curl_error(status, curl_code); if (status->ec != GH__ERROR_CODE__OK) status->bytes_received = 0; @@ -1028,8 +1087,8 @@ static void gh__run_one_slot(struct active_request_slot *slot, trace2_region_enter("gvfs-helper", key.buf, NULL); if (!start_active_slot(slot)) { - status->curl_code = CURLE_FAILED_INIT; /* a bit of a lie */ - compute_retry_mode_from_curl_error(status); + compute_retry_mode_from_curl_error(status, + CURLE_FAILED_INIT); } else { run_active_slot(slot); if (params->b_write_to_file) @@ -1060,7 +1119,7 @@ static void gh__run_one_slot(struct active_request_slot *slot, stop_progress(¶ms->progress); if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) { - if (params->b_is_post) + if (params->b_is_post && params->object_count > 1) install_packfile(params, status); else install_loose(params, status); @@ -1216,8 +1275,8 @@ static void lookup_main_url(void) trace2_data_string("gvfs-helper", NULL, "remote/url", gh__global.main_url); } -static void do__gvfs_config(struct gh__response_status *status, - struct strbuf *config_data); +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data); /* * Find the URL of the cache-server, if we have one. @@ -1276,7 +1335,7 @@ static void select_cache_server(void) * well-known by the main Git server. */ - do__gvfs_config(&status, &config_data); + do__http_get__gvfs_config(&status, &config_data); if (status.ec == GH__ERROR_CODE__OK) { /* @@ -1334,13 +1393,10 @@ static void select_cache_server(void) * Read stdin until EOF (or a blank line) and add the desired OIDs * to the oidset. * - * Stdin should contain a list of OIDs. It may have additional - * decoration that we need to strip out. - * - * We expect: - * [] // present OIDs + * Stdin should contain a list of OIDs. Lines may have additional + * text following the OID that we ignore. */ -static unsigned long read_stdin_from_rev_list(struct oidset *oids) +static unsigned long read_stdin_for_oids(struct oidset *oids) { struct object_id oid; struct strbuf buf_stdin = STRBUF_INIT; @@ -1362,17 +1418,23 @@ static unsigned long read_stdin_from_rev_list(struct oidset *oids) /* * Build a complete JSON payload for a gvfs/objects POST request - * containing the first n OIDs in an OIDSET index by the iterator. + * containing the first `nr_in_block` OIDs found in the OIDSET + * indexed by the given iterator. * * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * Return the number of OIDs we actually put into the payload. + * If only 1 OID was found, also return it. */ static unsigned long build_json_payload__gvfs_objects( struct json_writer *jw_req, struct oidset_iter *iter, - unsigned long nr_in_block) + unsigned long nr_in_block, + struct object_id *oid_out) { unsigned long k; const struct object_id *oid; + const struct object_id *oid_prev = NULL; k = 0; @@ -1383,10 +1445,18 @@ static unsigned long build_json_payload__gvfs_objects( while (k < nr_in_block && (oid = oidset_iter_next(iter))) { jw_array_string(jw_req, oid_to_hex(oid)); k++; + oid_prev = oid; } jw_end(jw_req); jw_end(jw_req); + if (oid_out) { + if (k == 1) + oidcpy(oid_out, oid_prev); + else + oidclr(oid_out); + } + return k; } @@ -1627,6 +1697,33 @@ static void create_tempfile_for_packfile( strbuf_release(&basename); } +/* + * Create a pathname to the loose object in the shared-cache ODB + * with the given OID. Try to "mkdir -p" to ensure the parent + * directories exist. + */ +static int create_loose_pathname_in_odb(struct strbuf *buf_path, + const struct object_id *oid) +{ + enum scld_error scld; + const char *hex; + + hex = oid_to_hex(oid); + + strbuf_setlen(buf_path, 0); + strbuf_addbuf(buf_path, &gh__global.buf_odb_path); + strbuf_complete(buf_path, '/'); + strbuf_add(buf_path, hex, 2); + strbuf_addch(buf_path, '/'); + strbuf_addstr(buf_path, hex+2); + + scld = safe_create_leading_directories(buf_path->buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) + return -1; + + return 0; +} + /* * Create a tempfile to stream a loose object into. * @@ -1641,19 +1738,10 @@ static void create_tempfile_for_loose( { static int nth = 0; struct strbuf buf_path = STRBUF_INIT; - const char *hex; gh__response_status__zero(status); - hex = oid_to_hex(¶ms->loose_oid); - - strbuf_addbuf(&buf_path, &gh__global.buf_odb_path); - strbuf_complete(&buf_path, '/'); - strbuf_add(&buf_path, hex, 2); - - if (!file_exists(buf_path.buf) && - mkdir(buf_path.buf, 0777) == -1 && - !file_exists(buf_path.buf)) { + if (create_loose_pathname_in_odb(&buf_path, ¶ms->loose_oid)) { strbuf_addf(&status->error_message, "cannot create directory for loose object '%s'", buf_path.buf); @@ -1661,9 +1749,6 @@ static void create_tempfile_for_loose( goto cleanup; } - strbuf_addch(&buf_path, '/'); - strbuf_addstr(&buf_path, hex+2); - /* Remember the full path of the final destination. */ strbuf_setlen(¶ms->loose_path, 0); strbuf_addbuf(¶ms->loose_path, &buf_path); @@ -1705,7 +1790,7 @@ static void install_packfile(struct gh__request_params *params, if (strcmp(status->content_type.buf, "application/x-git-packfile")) { strbuf_addf(&status->error_message, - "received unknown content-type '%s'", + "install_packfile: received unknown content-type '%s'", status->content_type.buf); status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; goto cleanup; @@ -1783,6 +1868,19 @@ static void install_loose(struct gh__request_params *params, { struct strbuf tmp_path = STRBUF_INIT; + /* + * We expect a loose object when we do a GET -or- when we + * do a POST with only 1 object. + */ + if (strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + strbuf_addf(&status->error_message, + "install_loose: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; + return; + } + gh__response_status__zero(status); /* @@ -1879,10 +1977,8 @@ static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, * The following X- headers are specific to AzureDevOps. * Other servers have similar sets of values, but I haven't * compared them in depth. - * - * TODO Remove this. */ - trace2_printf("Throttle: %s %s", key.buf, val.buf); + // trace2_printf("Throttle: %s %s", key.buf, val.buf); if (!strcmp(key.buf, "X-RateLimit-Resource")) { /* @@ -2104,10 +2200,11 @@ static void do_req(const char *url_base, if (params->tempfile) delete_tempfile(¶ms->tempfile); - if (params->b_is_post) + if (params->b_is_post && params->object_count > 1) create_tempfile_for_packfile(params, status); else create_tempfile_for_loose(params, status); + if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) return; } else { @@ -2308,7 +2405,7 @@ static void do_req__to_main(const char *url_component, &gh__global.main_creds, params, status); - if (status->response_code == 401) { + if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_main_creds(); do_req__with_robust_retry(gh__global.main_url, url_component, @@ -2316,7 +2413,7 @@ static void do_req__to_main(const char *url_component, params, status); } - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) approve_main_creds(); } @@ -2332,7 +2429,7 @@ static void do_req__to_cache_server(const char *url_component, &gh__global.cache_creds, params, status); - if (status->response_code == 401) { + if (status->retry == GH__RETRY_MODE__HTTP_401) { refresh_cache_server_creds(); do_req__with_robust_retry(gh__global.cache_server_url, @@ -2341,7 +2438,7 @@ static void do_req__to_cache_server(const char *url_component, params, status); } - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) approve_cache_server_creds(); } @@ -2356,7 +2453,7 @@ static void do_req__with_fallback(const char *url_component, params->b_permit_cache_server_if_defined) { do_req__to_cache_server(url_component, params, status); - if (status->response_code == 200) + if (status->retry == GH__RETRY_MODE__SUCCESS) return; if (!gh__cmd_opts.try_fallback) @@ -2371,7 +2468,7 @@ static void do_req__with_fallback(const char *url_component, * Falling-back would likely just cause the 3rd (or maybe * 4th) cred prompt. */ - if (status->response_code == 401) + if (status->retry == GH__RETRY_MODE__HTTP_401) return; } @@ -2383,8 +2480,8 @@ static void do_req__with_fallback(const char *url_component, * * Return server's response buffer. This is probably a raw JSON string. */ -static void do__gvfs_config(struct gh__response_status *status, - struct strbuf *config_data) +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; @@ -2424,12 +2521,34 @@ static void do__gvfs_config(struct gh__response_status *status, gh__request_params__release(¶ms); } +static void setup_gvfs_objects_progress(struct gh__request_params *params, + unsigned long num, unsigned long den) +{ + if (!gh__cmd_opts.show_progress) + return; + + if (params->b_is_post && params->object_count > 1) { + strbuf_addf(¶ms->progress_base_phase2_msg, + "Requesting packfile %ld/%ld with %ld objects", + num, den, params->object_count); + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving packfile %ld/%ld with %ld objects", + num, den, params->object_count); + } else { + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving %ld/%ld loose object", + num, den); + } +} + /* * Call "gvfs/objects/" REST API to fetch a loose object * and write it to the ODB. */ -static void do__loose__gvfs_object(struct gh__response_status *status, - const struct object_id *oid) +static void do__http_get__gvfs_object(struct gh__response_status *status, + const struct object_id *oid, + unsigned long l_num, unsigned long l_den, + struct string_list *result_list) { struct gh__request_params params = GH__REQUEST_PARAMS_INIT; struct strbuf component_url = STRBUF_INIT; @@ -2454,44 +2573,52 @@ static void do__loose__gvfs_object(struct gh__response_status *status, oidcpy(¶ms.loose_oid, oid); - if (gh__cmd_opts.show_progress) { - /* - * Likewise, a gvfs/objects/{oid} has a very small reqest - * payload, so I don't see any need to report progress on - * the upload side of the GET. So just report progress - * on the download side. - */ - strbuf_addstr(¶ms.progress_base_phase3_msg, - "Receiving 1 loose object"); - } + setup_gvfs_objects_progress(¶ms, l_num, l_den); do_req__with_fallback(component_url.buf, ¶ms, status); + if (status->ec == GH__ERROR_CODE__OK) { + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, "loose %s", + oid_to_hex(¶ms.loose_oid)); + + string_list_append(result_list, msg.buf); + strbuf_release(&msg); + } + gh__request_params__release(¶ms); strbuf_release(&component_url); } /* - * Call "gvfs/objects" POST REST API to fetch a packfile containing - * the objects in the requested OIDSET. Returns the filename (not - * pathname) to the new packfile. + * Call "gvfs/objects" POST REST API to fetch a batch of objects + * from the OIDSET. Normal, this is results in a packfile containing + * `nr_wanted_in_block` objects. And we return the number actually + * consumed (along with the filename of the resulting packfile). + * + * However, if we only have 1 oid (remaining) in the OIDSET, the + * server will respond to our POST with a loose object rather than + * a packfile with 1 object. + * + * Append a message to the result_list describing the result. + * + * Return the number of OIDs consumed from the OIDSET. */ -static void do__packfile__gvfs_objects(struct gh__response_status *status, - struct oidset_iter *iter, - unsigned long nr_wanted_in_block, - int j_pack_num, int j_pack_den, - struct strbuf *output_filename, - unsigned long *nr_oid_taken) +static void do__http_post__gvfs_objects(struct gh__response_status *status, + struct oidset_iter *iter, + unsigned long nr_wanted_in_block, + int j_pack_num, int j_pack_den, + struct string_list *result_list, + unsigned long *nr_oid_taken) { struct json_writer jw_req = JSON_WRITER_INIT; struct gh__request_params params = GH__REQUEST_PARAMS_INIT; - strbuf_setlen(output_filename, 0); - gh__response_status__zero(status); params.object_count = build_json_payload__gvfs_objects( - &jw_req, iter, nr_wanted_in_block); + &jw_req, iter, nr_wanted_in_block, ¶ms.loose_oid); *nr_oid_taken = params.object_count; strbuf_addstr(¶ms.tr2_label, "POST/objects"); @@ -2511,7 +2638,7 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, "Content-Type: application/json"); /* * We really always want a packfile. But if the payload only - * requests 1 OID, the server will/may send us a single loose + * requests 1 OID, the server will send us a single loose * objects instead. (Apparently the server ignores us when we * only send application/x-git-packfile and does it anyway.) * @@ -2523,156 +2650,172 @@ static void do__packfile__gvfs_objects(struct gh__response_status *status, params.headers = curl_slist_append(params.headers, "Accept: application/x-git-loose-object"); - if (gh__cmd_opts.show_progress) { - strbuf_addf(¶ms.progress_base_phase2_msg, - "Requesting packfile %d/%d with %ld objects", - j_pack_num, j_pack_den, - params.object_count); - strbuf_addf(¶ms.progress_base_phase3_msg, - "Receiving packfile %d/%d with %ld objects", - j_pack_num, j_pack_den, - params.object_count); - } + setup_gvfs_objects_progress(¶ms, j_pack_num, j_pack_den); do_req__with_fallback("gvfs/objects", ¶ms, status); - if (status->ec == GH__ERROR_CODE__OK) - strbuf_addbuf(output_filename, ¶ms.final_packfile_filename); + + if (status->ec == GH__ERROR_CODE__OK) { + struct strbuf msg = STRBUF_INIT; + + if (params.object_count > 1) + strbuf_addf(&msg, "packfile %s", + params.final_packfile_filename.buf); + else + strbuf_addf(&msg, "loose %s", + oid_to_hex(¶ms.loose_oid)); + + string_list_append(result_list, msg.buf); + strbuf_release(&msg); + } gh__request_params__release(¶ms); jw_release(&jw_req); } /* - * Bulk or individually fetch a list of objects in one or more http requests. - * Create one or more packfiles and/or loose objects. + * Drive one or more HTTP GET requests to fetch the objects + * in the given OIDSET. These are received into loose objects. * - * We accumulate results for each request in `result_list` until we get a + * Accumulate results for each request in `result_list` until we get a * hard error and have to stop. */ -static void do_fetch_oidset(struct gh__response_status *status, - struct oidset *oids, - unsigned long nr_oid_total, - struct string_list *result_list) +static void do__http_get__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) { struct oidset_iter iter; - struct strbuf output_filename = STRBUF_INIT; - struct strbuf msg = STRBUF_INIT; struct strbuf err404 = STRBUF_INIT; const struct object_id *oid; unsigned long k; - unsigned long nr_oid_taken; int had_404 = 0; - int j_pack_den = 0; - int j_pack_num = 0; gh__response_status__zero(status); if (!nr_oid_total) return; - if (nr_oid_total > 1) - j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) - / gh__cmd_opts.block_size); - oidset_iter_init(oids, &iter); - for (k = 0; k < nr_oid_total; k += nr_oid_taken) { - if (nr_oid_total - k == 1 || gh__cmd_opts.block_size == 1) { - oid = oidset_iter_next(&iter); - nr_oid_taken = 1; + for (k = 0; k < nr_oid_total; k++) { + oid = oidset_iter_next(&iter); - do__loose__gvfs_object(status, oid); + do__http_get__gvfs_object(status, oid, k+1, nr_oid_total, + result_list); + /* + * If we get a 404 for an individual object, ignore + * it and get the rest. We'll fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, "%s: from GET %s", + status->error_message.buf, + oid_to_hex(oid)); /* - * If we get a 404 for an individual object, ignore - * it and get the rest. We'll fixup the 'ec' later. + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. */ - if (status->ec == GH__ERROR_CODE__HTTP_404) { - if (!err404.len) - strbuf_addf(&err404, "%s: loose object %s", - status->error_message.buf, - oid_to_hex(oid)); - /* - * Mark the fetch as "incomplete", but don't - * stop trying to get other chunks. - */ - had_404 = 1; - continue; - } + had_404 = 1; + continue; + } - if (status->ec != GH__ERROR_CODE__OK) { - /* Stop at the first hard error. */ - strbuf_addf(&status->error_message, ": loose %s", - oid_to_hex(oid)); - goto cleanup; - } + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addf(&status->error_message, ": from GET %s", + oid_to_hex(oid)); + goto cleanup; + } + } - strbuf_setlen(&msg, 0); - strbuf_addf(&msg, "loose %s", oid_to_hex(oid)); - string_list_append(result_list, msg.buf); +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } - } else { - strbuf_setlen(&output_filename, 0); + strbuf_release(&err404); +} - j_pack_num++; +/* + * Drive one or more HTTP POST requests to bulk fetch the objects in + * the given OIDSET. Create one or more packfiles and/or loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_post__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + unsigned long k; + unsigned long nr_oid_taken; + int j_pack_den = 0; + int j_pack_num = 0; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; - do__packfile__gvfs_objects(status, &iter, - gh__cmd_opts.block_size, - j_pack_num, j_pack_den, - &output_filename, - &nr_oid_taken); + oidset_iter_init(oids, &iter); + + j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) + / gh__cmd_opts.block_size); + + for (k = 0; k < nr_oid_total; k += nr_oid_taken) { + j_pack_num++; + do__http_post__gvfs_objects(status, &iter, + gh__cmd_opts.block_size, + j_pack_num, j_pack_den, + result_list, + &nr_oid_taken); + + /* + * Because the oidset iterator has random + * order, it does no good to say the k-th or + * n-th chunk was incomplete; the client + * cannot use that index for anything. + * + * We get a 404 when at least one object in + * the chunk was not found. + * + * For now, ignore the 404 and go on to the + * next chunk and then fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, + "%s: from POST", + status->error_message.buf); /* - * Because the oidset iterator has random - * order, it does no good to say the k-th or - * n-th chunk was incomplete; the client - * cannot use that index for anything. - * - * We get a 404 when at least one object in - * the chunk was not found. - * - * TODO Consider various retry strategies (such as - * TODO loose or bisect) on the members within this - * TODO chunk to reduce the impact of the miss. - * - * For now, ignore the 404 and go on to the - * next chunk and then fixup the 'ec' later. + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. */ - if (status->ec == GH__ERROR_CODE__HTTP_404) { - if (!err404.len) - strbuf_addf(&err404, - "%s: packfile object", - status->error_message.buf); - /* - * Mark the fetch as "incomplete", but don't - * stop trying to get other chunks. - */ - had_404 = 1; - continue; - } - - if (status->ec != GH__ERROR_CODE__OK) { - /* Stop at the first hard error. */ - strbuf_addstr(&status->error_message, - ": in packfile"); - goto cleanup; - } + had_404 = 1; + continue; + } - strbuf_setlen(&msg, 0); - strbuf_addf(&msg, "packfile %s", output_filename.buf); - string_list_append(result_list, msg.buf); + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addstr(&status->error_message, + ": from POST"); + goto cleanup; } } cleanup: - strbuf_release(&msg); - strbuf_release(&err404); - strbuf_release(&output_filename); - if (had_404 && status->ec == GH__ERROR_CODE__OK) { strbuf_setlen(&status->error_message, 0); - strbuf_addstr(&status->error_message, "404 Not Found"); + strbuf_addbuf(&status->error_message, &err404); status->ec = GH__ERROR_CODE__HTTP_404; } + + strbuf_release(&err404); } /* @@ -2710,7 +2853,7 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) finish_init(0); - do__gvfs_config(&status, &config_data); + do__http_get__gvfs_config(&status, &config_data); ec = status.ec; if (ec == GH__ERROR_CODE__OK) @@ -2725,12 +2868,61 @@ static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) } /* - * Read a list of objects from stdin and fetch them in a single request (or - * multiple block-size requests). + * Read a list of objects from stdin and fetch them as a series of + * single object HTTP GET requests. */ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) { static struct option get_options[] = { + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("get"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_get_usage, get_options); + + argc = parse_options(argc, argv, NULL, get_options, objects_get_usage, 0); + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("get: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them in a single request (or + * multiple block-size requests) using one or more HTTP POST requests. + */ +static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) +{ + static struct option post_options[] = { OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, N_("number of objects to request at a time")), OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, @@ -2747,12 +2939,12 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) unsigned long nr_oid_total; int k; - trace2_cmd_mode("get"); + trace2_cmd_mode("post"); if (argc > 1 && !strcmp(argv[1], "-h")) - usage_with_options(get_usage, get_options); + usage_with_options(objects_post_usage, post_options); - argc = parse_options(argc, argv, NULL, get_options, get_usage, 0); + argc = parse_options(argc, argv, NULL, post_options, objects_post_usage, 0); if (gh__cmd_opts.depth < 1) gh__cmd_opts.depth = 1; if (gh__cmd_opts.max_retries < 0) @@ -2760,12 +2952,9 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) finish_init(1); - nr_oid_total = read_stdin_from_rev_list(&oids); + nr_oid_total = read_stdin_for_oids(&oids); - trace2_region_enter("gvfs-helper", "get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "get/nr_objects", nr_oid_total); - do_fetch_oidset(&status, &oids, nr_oid_total, &result_list); - trace2_region_leave("gvfs-helper", "get", NULL); + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); ec = status.ec; @@ -2773,7 +2962,7 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) printf("%s\n", result_list.items[k].string); if (ec != GH__ERROR_CODE__OK) - error("get: %s", status.error_message.buf); + error("post: %s", status.error_message.buf); gh__response_status__release(&status); oidset_clear(&oids); @@ -2783,12 +2972,14 @@ static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) } /* - * Handle the 'get' command when in "server mode". Only call error() and set ec - * for hard errors where we cannot communicate correctly with the foreground - * client process. Pass any actual data errors (such as 404's or 401's from - * the fetch back to the client process. + * Handle the 'objects.get' and 'objects.post' verbs in "server mode". + * + * Only call error() and set ec for hard errors where we cannot + * communicate correctly with the foreground client process. Pass any + * actual data errors (such as 404's or 401's from the fetch) back to + * the client process. */ -static enum gh__error_code do_server_subprocess_get(void) +static enum gh__error_code do_server_subprocess__objects(const char *verb_line) { struct gh__response_status status = GH__RESPONSE_STATUS_INIT; struct oidset oids = OIDSET_INIT; @@ -2799,12 +2990,19 @@ static enum gh__error_code do_server_subprocess_get(void) int len; int err; int k; + enum gh__objects_mode objects_mode; unsigned long nr_oid_total = 0; - /* - * Inside the "get" command, we expect a list of OIDs - * and a flush. - */ + if (!strcmp(verb_line, "objects.get")) + objects_mode = GH__OBJECTS_MODE__GET; + else if (!strcmp(verb_line, "objects.post")) + objects_mode = GH__OBJECTS_MODE__POST; + else { + error("server: unexpected objects-mode verb '%s'", verb_line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + while (1) { len = packet_read_line_gently(0, NULL, &line); if (len < 0 || !line) @@ -2829,10 +3027,10 @@ static enum gh__error_code do_server_subprocess_get(void) goto cleanup; } - trace2_region_enter("gvfs-helper", "server/get", NULL); - trace2_data_intmax("gvfs-helper", NULL, "server/get/nr_objects", nr_oid_total); - do_fetch_oidset(&status, &oids, nr_oid_total, &result_list); - trace2_region_leave("gvfs-helper", "server/get", NULL); + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); /* * Write pathname of the ODB where we wrote all of the objects @@ -2887,7 +3085,7 @@ static enum gh__error_code do_server_subprocess_get(void) return ec; } -typedef enum gh__error_code (fn_subprocess_cmd)(void); +typedef enum gh__error_code (fn_subprocess_cmd)(const char *verb_line); struct subprocess_capability { const char *name; @@ -2896,7 +3094,7 @@ struct subprocess_capability { }; static struct subprocess_capability caps[] = { - { "get", 0, do_server_subprocess_get }, + { "objects", 0, do_server_subprocess__objects }, { NULL, 0, NULL }, }; @@ -3027,8 +3225,9 @@ static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) } for (k = 0; caps[k].name; k++) { - if (caps[k].client_has && !strcmp(line, caps[k].name)) { - ec = (caps[k].pfn)(); + if (caps[k].client_has && + starts_with(line, caps[k].name)) { + ec = (caps[k].pfn)(line); if (ec != GH__ERROR_CODE__OK) goto cleanup; goto top_of_loop; @@ -3049,6 +3248,9 @@ static enum gh__error_code do_sub_cmd(int argc, const char **argv) if (!strcmp(argv[0], "get")) return do_sub_cmd__get(argc, argv); + if (!strcmp(argv[0], "post")) + return do_sub_cmd__post(argc, argv); + if (!strcmp(argv[0], "config")) return do_sub_cmd__config(argc, argv); @@ -3099,8 +3301,8 @@ int cmd_main(int argc, const char **argv) setup_git_directory_gently(NULL); /* Set any non-zero initial values in gh__cmd_opts. */ - gh__cmd_opts.depth = GH__DEFAULT_COMMIT_DEPTH; - gh__cmd_opts.block_size = GH__DEFAULT_BLOCK_SIZE; + gh__cmd_opts.depth = GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH; + gh__cmd_opts.block_size = GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE; gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; gh__cmd_opts.max_transient_backoff_sec = GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC;