diff --git a/Cargo.lock b/Cargo.lock index da834d7287bfc..5ef911246d5e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3523,6 +3523,17 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddb6b06d20fba9ed21fca3d696ee1b6e870bca0bcf9fa2971f6ae2436de576a" +[[package]] +name = "tar" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -4322,14 +4333,17 @@ name = "uv-build-backend" version = "0.1.0" dependencies = [ "csv", + "flate2", "fs-err", "glob", + "globset", "indoc", "insta", "itertools 0.13.0", "serde", "sha2", "spdx", + "tar", "tempfile", "thiserror", "toml", diff --git a/Cargo.toml b/Cargo.toml index 1ad189a41e623..5bd8e9491fb16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -103,6 +103,7 @@ fs-err = { version = "2.11.0" } fs2 = { version = "0.4.3" } futures = { version = "0.3.30" } glob = { version = "0.3.1" } +globset = { version = "0.4.15" } globwalk = { version = "0.9.1" } goblin = { version = "0.9.0", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] } hex = { version = "0.4.3" } @@ -126,7 +127,7 @@ path-slash = { version = "0.2.1" } pathdiff = { version = "0.2.1" } petgraph = { version = "0.6.5" } platform-info = { version = "2.0.3" } -procfs = { version = "0.17.0" , default-features = false, features = ["flate2"] } +procfs = { version = "0.17.0", default-features = false, features = ["flate2"] } proc-macro2 = { version = "1.0.86" } pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" } version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" } @@ -153,6 +154,7 @@ smallvec = { version = "1.13.2" } spdx = { version = "0.10.6" } syn = { version = "2.0.77" } sys-info = { version = "0.9.1" } +tar = { version = "0.4.43" } target-lexicon = { version = "0.12.16" } tempfile = { version = "3.12.0" } textwrap = { version = "0.16.1" } diff --git a/crates/uv-build-backend/Cargo.toml b/crates/uv-build-backend/Cargo.toml index b1e924c0eef41..962dcaf5d8de5 100644 --- a/crates/uv-build-backend/Cargo.toml +++ b/crates/uv-build-backend/Cargo.toml @@ -21,13 +21,16 @@ uv-pep508 = { workspace = true } uv-pypi-types = { workspace = true } uv-warnings = { workspace = true } -csv = { workspace = true} +csv = { workspace = true } +flate2 = { workspace = true } fs-err = { workspace = true } glob = { workspace = true } +globset = { workspace = true } itertools = { workspace = true } serde = { workspace = true } sha2 = { workspace = true } spdx = { workspace = true } +tar = { workspace = true } thiserror = { workspace = true } toml = { workspace = true } tracing = { workspace = true } diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 37915a7027757..51c8d95bd7f91 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -3,17 +3,21 @@ mod pep639_glob; use crate::metadata::{PyProjectToml, ValidationError}; use crate::pep639_glob::Pep639GlobError; +use flate2::write::GzEncoder; +use flate2::Compression; use fs_err::File; use glob::{GlobError, PatternError}; +use globset::{Glob, GlobSetBuilder}; use itertools::Itertools; use sha2::{Digest, Sha256}; use std::fs::FileType; -use std::io::{BufReader, Read, Write}; +use std::io::{BufReader, Cursor, Read, Write}; use std::path::{Path, PathBuf, StripPrefixError}; use std::{io, mem}; +use tar::{EntryType, Header}; use thiserror::Error; use tracing::{debug, trace}; -use uv_distribution_filename::WheelFilename; +use uv_distribution_filename::{SourceDistExtension, SourceDistFilename, WheelFilename}; use uv_fs::Simplified; use walkdir::WalkDir; use zip::{CompressionMethod, ZipWriter}; @@ -33,6 +37,9 @@ pub enum Error { /// [`GlobError`] is a wrapped io error. #[error(transparent)] Glob(#[from] GlobError), + /// [`globset::Error`] shows the glob that failed to parse. + #[error(transparent)] + GlobSet(#[from] globset::Error), #[error("Failed to walk source tree: `{}`", root.user_display())] WalkDir { root: PathBuf, @@ -43,8 +50,8 @@ pub enum Error { NotUtf8Path(PathBuf), #[error("Failed to walk source tree")] StripPrefix(#[from] StripPrefixError), - #[error("Unsupported file type: {0:?}")] - UnsupportedFileType(FileType), + #[error("Unsupported file type {1:?}: `{}`", _0.user_display())] + UnsupportedFileType(PathBuf, FileType), #[error("Failed to write wheel zip archive")] Zip(#[from] zip::result::ZipError), #[error("Failed to write RECORD file")] @@ -53,6 +60,8 @@ pub enum Error { MissingModule(PathBuf), #[error("Inconsistent metadata between prepare and build step: `{0}`")] InconsistentSteps(&'static str), + #[error("Failed to write to {}", _0.user_display())] + TarWrite(PathBuf, #[source] io::Error), } /// Allow dispatching between writing to a directory, writing to zip and writing to a `.tar.gz`. @@ -276,7 +285,7 @@ fn write_hashed( } /// Build a wheel from the source tree and place it in the output directory. -pub fn build( +pub fn build_wheel( source_tree: &Path, wheel_dir: &Path, metadata_directory: Option<&Path>, @@ -323,7 +332,10 @@ pub fn build( wheel_writer.write_file(relative_path_str, entry.path())?; } else { // TODO(konsti): We may want to support symlinks, there is support for installing them. - return Err(Error::UnsupportedFileType(entry.file_type())); + return Err(Error::UnsupportedFileType( + entry.path().to_path_buf(), + entry.file_type(), + )); } entry.path(); @@ -342,6 +354,126 @@ pub fn build( Ok(filename) } +/// Build a source distribution from the source tree and place it in the output directory. +pub fn build_source_dist( + source_tree: &Path, + source_dist_directory: &Path, + uv_version: &str, +) -> Result { + let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?; + let pyproject_toml = PyProjectToml::parse(&contents)?; + pyproject_toml.check_build_system(uv_version); + + let filename = SourceDistFilename { + name: pyproject_toml.name().clone(), + version: pyproject_toml.version().clone(), + extension: SourceDistExtension::TarGz, + }; + + let top_level = format!("{}-{}", pyproject_toml.name(), pyproject_toml.version()); + + let source_dist_path = source_dist_directory.join(filename.to_string()); + let tar_gz = File::create(&source_dist_path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut tar = tar::Builder::new(enc); + + let metadata = pyproject_toml + .to_metadata(source_tree)? + .core_metadata_format(); + + let mut header = Header::new_gnu(); + header.set_size(metadata.bytes().len() as u64); + header.set_mode(0o644); + header.set_cksum(); + tar.append_data( + &mut header, + Path::new(&top_level).join("PKG-INFO"), + Cursor::new(metadata), + ) + .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; + + let includes = ["src/**/*", "pyproject.toml"]; + let mut include_builder = GlobSetBuilder::new(); + for include in includes { + include_builder.add(Glob::new(include)?); + } + let include_matcher = include_builder.build()?; + + let excludes = ["__pycache__", "*.pyc", "*.pyo"]; + let mut exclude_builder = GlobSetBuilder::new(); + for exclude in excludes { + exclude_builder.add(Glob::new(exclude)?); + } + let exclude_matcher = exclude_builder.build()?; + + // TODO(konsti): Add files linked by pyproject.toml + + for file in WalkDir::new(source_tree).into_iter().filter_entry(|dir| { + let relative = dir + .path() + .strip_prefix(source_tree) + .expect("walkdir starts with root"); + // TODO(konsti): Also check that we're matching at least a prefix of an include matcher. + !exclude_matcher.is_match(relative) + }) { + let entry = file.map_err(|err| Error::WalkDir { + root: source_tree.to_path_buf(), + err, + })?; + let relative = entry + .path() + .strip_prefix(source_tree) + .expect("walkdir starts with root"); + if !include_matcher.is_match(relative) { + trace!("Excluding {}", relative.user_display()); + continue; + } + debug!("Including {}", relative.user_display()); + + let metadata = fs_err::metadata(entry.path())?; + let mut header = Header::new_gnu(); + #[cfg(unix)] + { + header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata)); + } + #[cfg(not(unix))] + { + header.set_mode(0o644); + } + + if entry.file_type().is_dir() { + header.set_entry_type(EntryType::Directory); + header + .set_path(Path::new(&top_level).join(relative)) + .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; + header.set_size(0); + header.set_cksum(); + tar.append(&header, io::empty()) + .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; + continue; + } else if entry.file_type().is_file() { + header.set_size(metadata.len()); + header.set_cksum(); + tar.append_data( + &mut header, + Path::new(&top_level).join(relative), + BufReader::new(File::open(entry.path())?), + ) + .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; + } else { + return Err(Error::UnsupportedFileType( + relative.to_path_buf(), + entry.file_type(), + )); + } + } + + tar.finish() + .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; + + Ok(filename) +} + /// Write the dist-info directory to the output directory without building the wheel. pub fn metadata( source_tree: &Path, @@ -350,7 +482,7 @@ pub fn metadata( ) -> Result { let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?; let pyproject_toml = PyProjectToml::parse(&contents)?; - pyproject_toml.check_build_system("1.0.0+test"); + pyproject_toml.check_build_system(uv_version); let filename = WheelFilename { name: pyproject_toml.name().clone(), diff --git a/crates/uv-build-backend/src/tests.rs b/crates/uv-build-backend/src/tests.rs index f90d7d63211c0..265e083bda687 100644 --- a/crates/uv-build-backend/src/tests.rs +++ b/crates/uv-build-backend/src/tests.rs @@ -46,7 +46,7 @@ fn test_record() { fn test_determinism() { let temp1 = TempDir::new().unwrap(); let uv_backend = Path::new("../../scripts/packages/uv_backend"); - build(uv_backend, temp1.path(), None, "1.0.0+test").unwrap(); + build_wheel(uv_backend, temp1.path(), None, "1.0.0+test").unwrap(); // Touch the file to check that we don't serialize the last modified date. fs_err::write( @@ -56,7 +56,7 @@ fn test_determinism() { .unwrap(); let temp2 = TempDir::new().unwrap(); - build(uv_backend, temp2.path(), None, "1.0.0+test").unwrap(); + build_wheel(uv_backend, temp2.path(), None, "1.0.0+test").unwrap(); let wheel_filename = "uv_backend-0.1.0-py3-none-any.whl"; assert_eq!( diff --git a/crates/uv/src/commands/build_backend.rs b/crates/uv/src/commands/build_backend.rs index a49a389e64e21..475c592525af9 100644 --- a/crates/uv/src/commands/build_backend.rs +++ b/crates/uv/src/commands/build_backend.rs @@ -5,14 +5,20 @@ use anyhow::Result; use std::env; use std::path::Path; -pub(crate) fn build_sdist(_sdist_directory: &Path) -> Result { - todo!() +pub(crate) fn build_sdist(sdist_directory: &Path) -> Result { + let filename = uv_build_backend::build_source_dist( + &env::current_dir()?, + sdist_directory, + uv_version::version(), + )?; + println!("{filename}"); + Ok(ExitStatus::Success) } pub(crate) fn build_wheel( wheel_directory: &Path, metadata_directory: Option<&Path>, ) -> Result { - let filename = uv_build_backend::build( + let filename = uv_build_backend::build_wheel( &env::current_dir()?, wheel_directory, metadata_directory,