@@ -439,6 +439,106 @@ service ContentAddressableStorage {
439439 rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
440440 option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
441441 }
442+
443+ // Split a blob into chunks.
444+ //
445+ // This call splits a blob into chunks, stores the chunks in the CAS, and
446+ // returns a list of the chunk digests. Using this list, a client can check
447+ // which chunks are locally available and just fetch the missing ones. The
448+ // desired blob can be assembled by concatenating the fetched chunks in the
449+ // order of the digests from the list.
450+ //
451+ // This rpc can be used to reduce the required data to download a large blob
452+ // from CAS if chunks from earlier downloads of a different version of this
453+ // blob are locally available. For this procedure to work properly, blobs need
454+ // to be split in a content-defined way, rather than with fixed-sized
455+ // chunking.
456+ //
457+ // If a split request is answered successfully, a client can expect the
458+ // following guarantees from the server:
459+ // 1. The blob chunks are stored in CAS.
460+ // 2. Concatenating the blob chunks in the order of the digest list returned
461+ // by the server results in the original blob.
462+ //
463+ // Servers are free to implement this functionality, but they need to declare
464+ // whether they support it or not by setting the
465+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
466+ // field accordingly.
467+ //
468+ // Clients are free to use this functionality, it is just an optimization to
469+ // reduce download network traffic, when downloading large blobs from the CAS.
470+ // However, clients need to check first the server capabilities, whether blob
471+ // splitting is supported by the server.
472+ //
473+ // Hints:
474+ //
475+ // * For clients, it is recommended to verify whether the digest of the blob
476+ // assembled by the fetched chunks results in the requested blob digest.
477+ //
478+ // * Since the generated chunks are stored as blobs, they underlie the same
479+ // lifetimes as other blobs. However, their lifetimes are extended if they
480+ // are part of the result of a split blob request.
481+ //
482+ // * When blob splitting and splicing is used at the same time, the clients
483+ // and the server should out-of-band agree upon a chunking algorithm used by
484+ // all parties to benefit from each others chunk data and avoid unnecessary
485+ // data duplication.
486+ //
487+ // Errors:
488+ //
489+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
490+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
491+ // chunks.
492+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
493+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
494+ }
495+
496+ // Splice a blob from chunks.
497+ //
498+ // This is the complementary operation to the
499+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
500+ // function to handle the chunked upload of large blobs to save upload
501+ // traffic.
502+ //
503+ // If a client needs to upload a large blob and is able to split a blob into
504+ // chunks in such a way that reusable chunks are obtained, e.g., by means of
505+ // content-defined chunking, it can first determine which parts of the blob
506+ // are already available in the remote CAS and upload the missing chunks, and
507+ // then use this API to instruct the server to splice the original blob from
508+ // the remotely available blob chunks.
509+ //
510+ // Servers are free to implement this functionality, but they need to declare
511+ // whether they support it or not by setting the
512+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
513+ // field accordingly.
514+ //
515+ // Clients are free to use this functionality, it is just an optimization to
516+ // reduce upload traffic, when uploading large blobs to the CAS. However,
517+ // clients need to check first the server capabilities, whether blob splicing
518+ // is supported by the server.
519+ //
520+ // Hints:
521+ //
522+ // * In order to ensure data consistency of the CAS, the server will verify
523+ // the spliced result whether digest calculation results in the provided
524+ // digest from the request and will reject a splice request if this check
525+ // fails.
526+ //
527+ // * When blob splitting and splicing is used at the same time, the clients
528+ // and the server should out-of-band agree upon a chunking algorithm used by
529+ // all parties to benefit from each others chunk data and avoid unnecessary
530+ // data duplication.
531+ //
532+ // Errors:
533+ //
534+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
535+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
536+ // spliced blob.
537+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
538+ // provided expected digest.
539+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
540+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
541+ }
442542}
443543
444544// The Capabilities service may be used by remote execution clients to query
@@ -1846,6 +1946,91 @@ message GetTreeResponse {
18461946 string next_page_token = 2 ;
18471947}
18481948
1949+ // A request message for
1950+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1951+ message SplitBlobRequest {
1952+ // The instance of the execution system to operate against. A server may
1953+ // support multiple instances of the execution system (with their own workers,
1954+ // storage, caches, etc.). The server MAY require use of this field to select
1955+ // between them in an implementation-defined fashion, otherwise it can be
1956+ // omitted.
1957+ string instance_name = 1 ;
1958+
1959+ // The digest of the blob to be split.
1960+ Digest blob_digest = 2 ;
1961+
1962+ // The digest function of the blob to be split.
1963+ //
1964+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1965+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1966+ // that case the server SHOULD infer the digest function using the
1967+ // length of the blob digest hashes and the digest functions announced
1968+ // in the server's capabilities.
1969+ DigestFunction.Value digest_function = 4 ;
1970+ }
1971+
1972+ // A response message for
1973+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1974+ message SplitBlobResponse {
1975+ // The ordered list of digests of the chunks into which the blob was split.
1976+ // The original blob is assembled by concatenating the chunk data according to
1977+ // the order of the digests given by this list.
1978+ repeated Digest chunk_digests = 1 ;
1979+
1980+ // The digest function of the chunks.
1981+ //
1982+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1983+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1984+ // that case the server SHOULD infer the digest function using the
1985+ // length of the blob digest hashes and the digest functions announced
1986+ // in the server's capabilities.
1987+ DigestFunction.Value digest_function = 2 ;
1988+ }
1989+
1990+ // A request message for
1991+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1992+ message SpliceBlobRequest {
1993+ // The instance of the execution system to operate against. A server may
1994+ // support multiple instances of the execution system (with their own workers,
1995+ // storage, caches, etc.). The server MAY require use of this field to select
1996+ // between them in an implementation-defined fashion, otherwise it can be
1997+ // omitted.
1998+ string instance_name = 1 ;
1999+
2000+ // Expected digest of the spliced blob.
2001+ Digest blob_digest = 2 ;
2002+
2003+ // The ordered list of digests of the chunks which need to be concatenated to
2004+ // assemble the original blob.
2005+ repeated Digest chunk_digests = 3 ;
2006+
2007+ // The digest function of the blob to be spliced as well as of the chunks to
2008+ // be concatenated.
2009+ //
2010+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
2011+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
2012+ // that case the server SHOULD infer the digest function using the
2013+ // length of the blob digest hashes and the digest functions announced
2014+ // in the server's capabilities.
2015+ DigestFunction.Value digest_function = 4 ;
2016+ }
2017+
2018+ // A response message for
2019+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
2020+ message SpliceBlobResponse {
2021+ // Computed digest of the spliced blob.
2022+ Digest blob_digest = 1 ;
2023+
2024+ // The digest function of the spliced blob.
2025+ //
2026+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
2027+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
2028+ // that case the server SHOULD infer the digest function using the
2029+ // length of the blob digest hashes and the digest functions announced
2030+ // in the server's capabilities.
2031+ DigestFunction.Value digest_function = 2 ;
2032+ }
2033+
18492034// A request message for
18502035// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
18512036message GetCapabilitiesRequest {
@@ -2076,6 +2261,20 @@ message CacheCapabilities {
20762261 // - If the cache implementation returns a given limit, it MAY still serve
20772262 // blobs larger than this limit.
20782263 int64 max_cas_blob_size_bytes = 8 ;
2264+
2265+ // Whether blob splitting is supported for the particular server/instance. If
2266+ // yes, the server/instance implements the specified behavior for blob
2267+ // splitting and a meaningful result can be expected from the
2268+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2269+ // operation.
2270+ bool blob_split_support = 9 ;
2271+
2272+ // Whether blob splicing is supported for the particular server/instance. If
2273+ // yes, the server/instance implements the specified behavior for blob
2274+ // splicing and a meaningful result can be expected from the
2275+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2276+ // operation.
2277+ bool blob_splice_support = 10 ;
20792278}
20802279
20812280// Capabilities of the remote execution system.
0 commit comments