@@ -439,6 +439,115 @@ service ContentAddressableStorage {
439439 rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
440440 option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
441441 }
442+
443+ // Split a blob into chunks.
444+ //
445+ // This call splits a blob into chunks, stores the chunks in the CAS, and
446+ // returns a list of the chunk digests. Using this list, a client can check
447+ // which chunks are locally available and just fetch the missing ones. The
448+ // desired blob can be assembled by concatenating the fetched chunks in the
449+ // order of the digests from the list.
450+ //
451+ // This rpc can be used to reduce the required data to download a large blob
452+ // from CAS if chunks from earlier downloads of a different version of this
453+ // blob are locally available. For this procedure to work properly, blobs
454+ // SHOULD be split in a content-defined way, rather than with fixed-sized
455+ // chunking.
456+ //
457+ // If a split request is answered successfully, a client can expect the
458+ // following guarantees from the server:
459+ // 1. The blob chunks are stored in CAS.
460+ // 2. Concatenating the blob chunks in the order of the digest list returned
461+ // by the server results in the original blob.
462+ //
463+ // Servers MAY implement this functionality, but MUST declare whether they
464+ // support it or not by setting the
465+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
466+ // field accordingly.
467+ //
468+ // Clients MAY use this functionality, it is just an optimization to reduce
469+ // download network traffic, when downloading large blobs from the CAS.
470+ // However, clients MUST first check the server capabilities, whether blob
471+ // splitting is supported by the server.
472+ //
473+ // Clients SHOULD verify whether the digest of the blob assembled by the
474+ // fetched chunks results in the requested blob digest.
475+ //
476+ // Since the generated chunks are stored as blobs, they underlie the same
477+ // lifetimes as other blobs. In particular, the chunk lifetimes are
478+ // independent from the lifetime of the original blob:
479+ // * A blob and any chunk derived from it may be evicted from the CAS at
480+ // different times.
481+ // * A call to Split extends the lifetime of the original blob, and sets
482+ // the lifetimes of the resulting chunks (or extends the lifetimes of
483+ // already-existing chunks).
484+ // * Touching a chunk extends its lifetime, but does not extend the
485+ // lifetime of the original blob.
486+ // * Touching the original blob extends its lifetime, but does not extend
487+ // the lifetimes of chunks derived from it.
488+ //
489+ // When blob splitting and splicing is used at the same time, the clients and
490+ // the server SHOULD agree out-of-band upon a chunking algorithm used by both
491+ // parties to benefit from each others chunk data and avoid unnecessary data
492+ // duplication.
493+ //
494+ // Errors:
495+ //
496+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
497+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
498+ // chunks.
499+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
500+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
501+ }
502+
503+ // Splice a blob from chunks.
504+ //
505+ // This is the complementary operation to the
506+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
507+ // function to handle the chunked upload of large blobs to save upload
508+ // traffic.
509+ //
510+ // If a client needs to upload a large blob and is able to split a blob into
511+ // chunks in such a way that reusable chunks are obtained, e.g., by means of
512+ // content-defined chunking, it can first determine which parts of the blob
513+ // are already available in the remote CAS and upload the missing chunks, and
514+ // then use this API to instruct the server to splice the original blob from
515+ // the remotely available blob chunks.
516+ //
517+ // Servers MAY implement this functionality, but MUST declare whether they
518+ // support it or not by setting the
519+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
520+ // field accordingly.
521+ //
522+ // Clients MAY use this functionality, it is just an optimization to reduce
523+ // upload traffic, when uploading large blobs to the CAS. However, clients
524+ // MUST first check the server capabilities, whether blob splicing is
525+ // supported by the server.
526+ //
527+ // In order to ensure data consistency of the CAS, the server MUST only add
528+ // entries to the CAS under a hash the server verified itself. In particular,
529+ // it MUST NOT trust the result hash provided by the client. The server MAY
530+ // accept a request as no-op if the client-provided result hash is already in
531+ // CAS; the life time of that blob is then extended as usual. If the
532+ // client-provided result is not in CAS, the server SHOULD verify the result
533+ // hash sent by the client and reject requests where a different splice result
534+ // is obtained.
535+ //
536+ // When blob splitting and splicing is used at the same time, the clients and
537+ // the server SHOULD agree out-of-band upon a chunking algorithm used by both
538+ // parties to benefit from each others chunk data and avoid unnecessary data
539+ // duplication.
540+ //
541+ // Errors:
542+ //
543+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
544+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
545+ // spliced blob.
546+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
547+ // provided expected digest.
548+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
549+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
550+ }
442551}
443552
444553// The Capabilities service may be used by remote execution clients to query
@@ -1846,6 +1955,78 @@ message GetTreeResponse {
18461955 string next_page_token = 2 ;
18471956}
18481957
1958+ // A request message for
1959+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1960+ message SplitBlobRequest {
1961+ // The instance of the execution system to operate against. A server may
1962+ // support multiple instances of the execution system (with their own workers,
1963+ // storage, caches, etc.). The server MAY require use of this field to select
1964+ // between them in an implementation-defined fashion, otherwise it can be
1965+ // omitted.
1966+ string instance_name = 1 ;
1967+
1968+ // The digest of the blob to be split.
1969+ Digest blob_digest = 2 ;
1970+
1971+ // The digest function of the blob to be split.
1972+ //
1973+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1974+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1975+ // that case the server SHOULD infer the digest function using the
1976+ // length of the blob digest hashes and the digest functions announced
1977+ // in the server's capabilities.
1978+ DigestFunction.Value digest_function = 3 ;
1979+ }
1980+
1981+ // A response message for
1982+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1983+ message SplitBlobResponse {
1984+ // The ordered list of digests of the chunks into which the blob was split.
1985+ // The original blob is assembled by concatenating the chunk data according to
1986+ // the order of the digests given by this list.
1987+ //
1988+ // The server MUST use the same digest function as the one explicitly or
1989+ // implicitly (through hash length) specified in the split request.
1990+ repeated Digest chunk_digests = 1 ;
1991+ }
1992+
1993+ // A request message for
1994+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1995+ message SpliceBlobRequest {
1996+ // The instance of the execution system to operate against. A server may
1997+ // support multiple instances of the execution system (with their own workers,
1998+ // storage, caches, etc.). The server MAY require use of this field to select
1999+ // between them in an implementation-defined fashion, otherwise it can be
2000+ // omitted.
2001+ string instance_name = 1 ;
2002+
2003+ // Expected digest of the spliced blob.
2004+ Digest blob_digest = 2 ;
2005+
2006+ // The ordered list of digests of the chunks which need to be concatenated to
2007+ // assemble the original blob.
2008+ repeated Digest chunk_digests = 3 ;
2009+
2010+ // The digest function of all chunks to be concatenated and of the blob to be
2011+ // spliced. The server MUST use the same digest function for both cases.
2012+ //
2013+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256, SHA384,
2014+ // SHA512, or VSO, the client MAY leave this field unset. In that case the
2015+ // server SHOULD infer the digest function using the length of the blob digest
2016+ // hashes and the digest functions announced in the server's capabilities.
2017+ DigestFunction.Value digest_function = 4 ;
2018+ }
2019+
2020+ // A response message for
2021+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
2022+ message SpliceBlobResponse {
2023+ // Computed digest of the spliced blob.
2024+ //
2025+ // The server MUST use the same digest function as the one explicitly or
2026+ // implicitly (through hash length) specified in the splice request.
2027+ Digest blob_digest = 1 ;
2028+ }
2029+
18492030// A request message for
18502031// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
18512032message GetCapabilitiesRequest {
@@ -2076,6 +2257,20 @@ message CacheCapabilities {
20762257 // - If the cache implementation returns a given limit, it MAY still serve
20772258 // blobs larger than this limit.
20782259 int64 max_cas_blob_size_bytes = 8 ;
2260+
2261+ // Whether blob splitting is supported for the particular server/instance. If
2262+ // yes, the server/instance implements the specified behavior for blob
2263+ // splitting and a meaningful result can be expected from the
2264+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2265+ // operation.
2266+ bool blob_split_support = 9 ;
2267+
2268+ // Whether blob splicing is supported for the particular server/instance. If
2269+ // yes, the server/instance implements the specified behavior for blob
2270+ // splicing and a meaningful result can be expected from the
2271+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2272+ // operation.
2273+ bool blob_splice_support = 10 ;
20792274}
20802275
20812276// Capabilities of the remote execution system.
0 commit comments