Skip to content

Commit 7f1eaf4

Browse files
Harden ZIP streaming to reject repeated entries and other malformed ZIP files (#15136)
## Summary uv will now reject ZIP files that meet any of the following conditions: - Multiple local header entries exist for the same file with different contents. - A local header entry exists for a file that isn't included in the end-of-central directory record. - An entry exists in the end-of-central directory record that does not have a corresponding local header. - The ZIP file contains contents after the first end-of-central directory record. - The CRC32 doesn't match between the local file header and the end-of-central directory record. - The compressed size doesn't match between the local file header and the end-of-central directory record. - The uncompressed size doesn't match between the local file header and the end-of-central directory record. - The reported central directory offset (in the end-of-central-directory header) does not match the actual offset. - The reported ZIP64 end of central directory locator offset does not match the actual offset. We also validate the above for files with data descriptors, which we previously ignored. Wheels from the most recent releases of the top 15,000 packages on PyPI have been confirmed to pass these checks, and PyPI will also reject ZIPs under many of the same conditions (at upload time) in the future. In rare cases, this validation can be disabled by setting `UV_INSECURE_NO_ZIP_VALIDATION=1`. Any validations should be reported to the uv issue tracker and to the upstream package maintainer.
1 parent 038bf56 commit 7f1eaf4

File tree

15 files changed

+1147
-73
lines changed

15 files changed

+1147
-73
lines changed

Cargo.lock

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ async-channel = { version = "2.3.1" }
8282
async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] }
8383
async-trait = { version = "0.1.82" }
8484
async_http_range_reader = { version = "0.9.1" }
85-
async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "c909fda63fcafe4af496a07bfda28a5aae97e58d", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
85+
async_zip = { git = "https://github.com/astral-sh/rs-async-zip", rev = "285e48742b74ab109887d62e1ae79e7c15fd4878", features = ["bzip2", "deflate", "lzma", "tokio", "xz", "zstd"] }
8686
axoupdater = { version = "0.9.0", default-features = false }
8787
backon = { version = "1.3.0" }
8888
base64 = { version = "0.22.1" }

crates/uv-dev/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ uv-client = { workspace = true }
2222
uv-configuration = { workspace = true }
2323
uv-distribution-filename = { workspace = true }
2424
uv-distribution-types = { workspace = true }
25-
uv-extract = { workspace = true, optional = true }
25+
uv-extract = { workspace = true }
2626
uv-installer = { workspace = true }
2727
uv-macros = { workspace = true }
2828
uv-options-metadata = { workspace = true }
@@ -39,20 +39,23 @@ anstream = { workspace = true }
3939
anyhow = { workspace = true }
4040
clap = { workspace = true, features = ["derive", "wrap_help"] }
4141
fs-err = { workspace = true, features = ["tokio"] }
42+
futures = { workspace = true }
4243
itertools = { workspace = true }
4344
markdown = { version = "1.0.0" }
4445
owo-colors = { workspace = true }
4546
poloto = { version = "19.1.2", optional = true }
4647
pretty_assertions = { version = "1.4.1" }
47-
reqwest = { workspace = true }
48+
reqwest = { workspace = true, features = ["stream"] }
4849
resvg = { version = "0.29.0", optional = true }
4950
schemars = { workspace = true }
5051
serde = { workspace = true }
5152
serde_json = { workspace = true }
5253
serde_yaml = { version = "0.9.34" }
5354
tagu = { version = "0.1.6", optional = true }
55+
tempfile = { workspace = true }
5456
textwrap = { workspace = true }
5557
tokio = { workspace = true }
58+
tokio-util = { workspace = true }
5659
tracing = { workspace = true }
5760
tracing-durations-export = { workspace = true, features = ["plot"] }
5861
tracing-subscriber = { workspace = true }

crates/uv-dev/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::generate_options_reference::Args as GenerateOptionsReferenceArgs;
1414
use crate::generate_sysconfig_mappings::Args as GenerateSysconfigMetadataArgs;
1515
#[cfg(feature = "render")]
1616
use crate::render_benchmarks::RenderBenchmarksArgs;
17+
use crate::validate_zip::ValidateZipArgs;
1718
use crate::wheel_metadata::WheelMetadataArgs;
1819

1920
mod clear_compile;
@@ -25,6 +26,7 @@ mod generate_json_schema;
2526
mod generate_options_reference;
2627
mod generate_sysconfig_mappings;
2728
mod render_benchmarks;
29+
mod validate_zip;
2830
mod wheel_metadata;
2931

3032
const ROOT_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../");
@@ -33,6 +35,8 @@ const ROOT_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../");
3335
enum Cli {
3436
/// Display the metadata for a `.whl` at a given URL.
3537
WheelMetadata(WheelMetadataArgs),
38+
/// Validate that a `.whl` or `.zip` file at a given URL is a valid ZIP file.
39+
ValidateZip(ValidateZipArgs),
3640
/// Compile all `.py` to `.pyc` files in the tree.
3741
Compile(CompileArgs),
3842
/// Remove all `.pyc` in the tree.
@@ -59,6 +63,7 @@ pub async fn run() -> Result<()> {
5963
let cli = Cli::parse();
6064
match cli {
6165
Cli::WheelMetadata(args) => wheel_metadata::wheel_metadata(args).await?,
66+
Cli::ValidateZip(args) => validate_zip::validate_zip(args).await?,
6267
Cli::Compile(args) => compile::compile(args).await?,
6368
Cli::ClearCompile(args) => clear_compile::clear_compile(&args)?,
6469
Cli::GenerateAll(args) => generate_all::main(&args).await?,

crates/uv-dev/src/validate_zip.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use std::ops::Deref;
2+
3+
use anyhow::{Result, bail};
4+
use clap::Parser;
5+
use futures::TryStreamExt;
6+
use tokio_util::compat::FuturesAsyncReadCompatExt;
7+
8+
use uv_cache::{Cache, CacheArgs};
9+
use uv_client::RegistryClientBuilder;
10+
use uv_pep508::VerbatimUrl;
11+
use uv_pypi_types::ParsedUrl;
12+
13+
#[derive(Parser)]
14+
pub(crate) struct ValidateZipArgs {
15+
url: VerbatimUrl,
16+
#[command(flatten)]
17+
cache_args: CacheArgs,
18+
}
19+
20+
pub(crate) async fn validate_zip(args: ValidateZipArgs) -> Result<()> {
21+
let cache = Cache::try_from(args.cache_args)?.init()?;
22+
let client = RegistryClientBuilder::new(cache).build();
23+
24+
let ParsedUrl::Archive(archive) = ParsedUrl::try_from(args.url.to_url())? else {
25+
bail!("Only archive URLs are supported");
26+
};
27+
28+
let response = client
29+
.uncached_client(&archive.url)
30+
.get(archive.url.deref().clone())
31+
.send()
32+
.await?;
33+
let reader = response
34+
.bytes_stream()
35+
.map_err(std::io::Error::other)
36+
.into_async_read();
37+
38+
let target = tempfile::TempDir::new()?;
39+
40+
uv_extract::stream::unzip(reader.compat(), target.path()).await?;
41+
42+
Ok(())
43+
}

crates/uv-extract/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ workspace = true
1919
uv-configuration = { workspace = true }
2020
uv-distribution-filename = { workspace = true }
2121
uv-pypi-types = { workspace = true }
22+
uv-static = { workspace = true }
2223

2324
astral-tokio-tar = { workspace = true }
2425
async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] }

crates/uv-extract/src/error.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,79 @@ pub enum Error {
1414
NonSingularArchive(Vec<OsString>),
1515
#[error("The top-level of the archive must only contain a list directory, but it's empty")]
1616
EmptyArchive,
17+
#[error("ZIP local header filename at offset {offset} does not use UTF-8 encoding")]
18+
LocalHeaderNotUtf8 { offset: u64 },
19+
#[error("ZIP central directory entry filename at index {index} does not use UTF-8 encoding")]
20+
CentralDirectoryEntryNotUtf8 { index: u64 },
1721
#[error("Bad CRC (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())]
1822
BadCrc32 {
1923
path: PathBuf,
2024
computed: u32,
2125
expected: u32,
2226
},
27+
#[error("Bad uncompressed size (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())]
28+
BadUncompressedSize {
29+
path: PathBuf,
30+
computed: u64,
31+
expected: u64,
32+
},
33+
#[error("Bad compressed size (got {computed:08x}, expected {expected:08x}) for file: {}", path.display())]
34+
BadCompressedSize {
35+
path: PathBuf,
36+
computed: u64,
37+
expected: u64,
38+
},
39+
#[error("ZIP file contains multiple entries with different contents for: {}", path.display())]
40+
DuplicateLocalFileHeader { path: PathBuf },
41+
#[error("ZIP file contains a local file header without a corresponding central-directory record entry for: {} ({offset})", path.display())]
42+
MissingCentralDirectoryEntry { path: PathBuf, offset: u64 },
43+
#[error("ZIP file contains an end-of-central-directory record entry, but no local file header for: {} ({offset}", path.display())]
44+
MissingLocalFileHeader { path: PathBuf, offset: u64 },
45+
#[error("ZIP file uses conflicting paths for the local file header at {} (got {}, expected {})", offset, local_path.display(), central_directory_path.display())]
46+
ConflictingPaths {
47+
offset: u64,
48+
local_path: PathBuf,
49+
central_directory_path: PathBuf,
50+
},
51+
#[error("ZIP file uses conflicting checksums for the local file header and central-directory record (got {local_crc32}, expected {central_directory_crc32}) for: {} ({offset})", path.display())]
52+
ConflictingChecksums {
53+
path: PathBuf,
54+
offset: u64,
55+
local_crc32: u32,
56+
central_directory_crc32: u32,
57+
},
58+
#[error("ZIP file uses conflicting compressed sizes for the local file header and central-directory record (got {local_compressed_size}, expected {central_directory_compressed_size}) for: {} ({offset})", path.display())]
59+
ConflictingCompressedSizes {
60+
path: PathBuf,
61+
offset: u64,
62+
local_compressed_size: u64,
63+
central_directory_compressed_size: u64,
64+
},
65+
#[error("ZIP file uses conflicting uncompressed sizes for the local file header and central-directory record (got {local_uncompressed_size}, expected {central_directory_uncompressed_size}) for: {} ({offset})", path.display())]
66+
ConflictingUncompressedSizes {
67+
path: PathBuf,
68+
offset: u64,
69+
local_uncompressed_size: u64,
70+
central_directory_uncompressed_size: u64,
71+
},
72+
#[error("ZIP file contains trailing contents after the end-of-central-directory record")]
73+
TrailingContents,
74+
#[error(
75+
"ZIP file reports a number of entries in the central directory that conflicts with the actual number of entries (got {actual}, expected {expected})"
76+
)]
77+
ConflictingNumberOfEntries { actual: u64, expected: u64 },
78+
#[error("Data descriptor is missing for file: {}", path.display())]
79+
MissingDataDescriptor { path: PathBuf },
80+
#[error("File contains an unexpected data descriptor: {}", path.display())]
81+
UnexpectedDataDescriptor { path: PathBuf },
82+
#[error(
83+
"ZIP file end-of-central-directory record contains a comment that appears to be an embedded ZIP file"
84+
)]
85+
ZipInZip,
86+
#[error("ZIP64 end-of-central-directory record contains unsupported extensible data")]
87+
ExtensibleData,
88+
#[error("ZIP file end-of-central-directory record contains multiple entries with the same path, but conflicting modes: {}", path.display())]
89+
DuplicateExecutableFileHeader { path: PathBuf },
2390
}
2491

2592
impl Error {

0 commit comments

Comments
 (0)