Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build basic source distributions #8886

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ fs-err = { version = "2.11.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.30" }
glob = { version = "0.3.1" }
globset = { version = "0.4.15" }
globwalk = { version = "0.9.1" }
goblin = { version = "0.9.0", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] }
hex = { version = "0.4.3" }
Expand All @@ -126,7 +127,7 @@ path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
petgraph = { version = "0.6.5" }
platform-info = { version = "2.0.3" }
procfs = { version = "0.17.0" , default-features = false, features = ["flate2"] }
procfs = { version = "0.17.0", default-features = false, features = ["flate2"] }
proc-macro2 = { version = "1.0.86" }
pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" }
version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" }
Expand All @@ -153,6 +154,7 @@ smallvec = { version = "1.13.2" }
spdx = { version = "0.10.6" }
syn = { version = "2.0.77" }
sys-info = { version = "0.9.1" }
tar = { version = "0.4.43" }
target-lexicon = { version = "0.12.16" }
tempfile = { version = "3.12.0" }
textwrap = { version = "0.16.1" }
Expand Down
5 changes: 4 additions & 1 deletion crates/uv-build-backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@ uv-pep508 = { workspace = true }
uv-pypi-types = { workspace = true }
uv-warnings = { workspace = true }

csv = { workspace = true}
csv = { workspace = true }
flate2 = { workspace = true }
fs-err = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
itertools = { workspace = true }
serde = { workspace = true }
sha2 = { workspace = true }
spdx = { workspace = true }
tar = { workspace = true }
thiserror = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
Expand Down
143 changes: 136 additions & 7 deletions crates/uv-build-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ mod pep639_glob;

use crate::metadata::{PyProjectToml, ValidationError};
use crate::pep639_glob::Pep639GlobError;
use flate2::write::GzEncoder;
use flate2::Compression;
use fs_err::File;
use glob::{GlobError, PatternError};
use globset::{Glob, GlobSetBuilder};
use itertools::Itertools;
use sha2::{Digest, Sha256};
use std::fs::FileType;
use std::io::{BufReader, Read, Write};
use std::io::{BufReader, Cursor, Read, Write};
use std::path::{Path, PathBuf, StripPrefixError};
use std::{io, mem};
use tar::{EntryType, Header};
use thiserror::Error;
use tracing::{debug, trace};
use uv_distribution_filename::WheelFilename;
use uv_distribution_filename::{SourceDistExtension, SourceDistFilename, WheelFilename};
use uv_fs::Simplified;
use walkdir::WalkDir;
use zip::{CompressionMethod, ZipWriter};
Expand Down Expand Up @@ -43,8 +47,8 @@ pub enum Error {
NotUtf8Path(PathBuf),
#[error("Failed to walk source tree")]
StripPrefix(#[from] StripPrefixError),
#[error("Unsupported file type: {0:?}")]
UnsupportedFileType(FileType),
#[error("Unsupported file type {1:?}: `{}`", _0.user_display())]
UnsupportedFileType(PathBuf, FileType),
#[error("Failed to write wheel zip archive")]
Zip(#[from] zip::result::ZipError),
#[error("Failed to write RECORD file")]
Expand All @@ -53,6 +57,8 @@ pub enum Error {
MissingModule(PathBuf),
#[error("Inconsistent metadata between prepare and build step: `{0}`")]
InconsistentSteps(&'static str),
#[error("Failed to write to {}", _0.user_display())]
TarWrite(PathBuf, #[source] io::Error),
}

/// Allow dispatching between writing to a directory, writing to zip and writing to a `.tar.gz`.
Expand Down Expand Up @@ -276,7 +282,7 @@ fn write_hashed(
}

/// Build a wheel from the source tree and place it in the output directory.
pub fn build(
pub fn build_wheel(
source_tree: &Path,
wheel_dir: &Path,
metadata_directory: Option<&Path>,
Expand Down Expand Up @@ -323,7 +329,10 @@ pub fn build(
wheel_writer.write_file(relative_path_str, entry.path())?;
} else {
// TODO(konsti): We may want to support symlinks, there is support for installing them.
return Err(Error::UnsupportedFileType(entry.file_type()));
return Err(Error::UnsupportedFileType(
entry.path().to_path_buf(),
entry.file_type(),
));
}

entry.path();
Expand All @@ -342,6 +351,126 @@ pub fn build(
Ok(filename)
}

/// Build a source distribution from the source tree and place it in the output directory.
pub fn build_source_dist(
source_tree: &Path,
source_dist_directory: &Path,
uv_version: &str,
) -> Result<SourceDistFilename, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system(uv_version);

let filename = SourceDistFilename {
name: pyproject_toml.name().clone(),
version: pyproject_toml.version().clone(),
extension: SourceDistExtension::TarGz,
};

let top_level = format!("{}-{}", pyproject_toml.name(), pyproject_toml.version());

let source_dist_path = source_dist_directory.join(filename.to_string());
let tar_gz = File::create(&source_dist_path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut tar = tar::Builder::new(enc);

let metadata = pyproject_toml
.to_metadata(source_tree)?
.core_metadata_format();

let mut header = Header::new_gnu();
header.set_size(metadata.bytes().len() as u64);
header.set_mode(0o644);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not fully clear what the right default here is

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this seems fine to me generally speaking. Or is there a more specific concern you have?

header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join("PKG-INFO"),
Cursor::new(metadata),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;

let includes = ["src/**/*", "pyproject.toml"];
let mut include_builder = GlobSetBuilder::new();
for include in includes {
include_builder.add(Glob::new(include).expect("TODO"));
}
let include_matcher = include_builder.build().expect("TODO");

let excludes = ["__pycache__", "*.pyc", "*.pyo"];
let mut exclude_builder = GlobSetBuilder::new();
for exclude in excludes {
exclude_builder.add(Glob::new(exclude).expect("TODO"));
}
let exclude_matcher = exclude_builder.build().expect("TODO");

// TODO(konsti): Add files linked by pyproject.toml

for file in WalkDir::new(&source_tree).into_iter().filter_entry(|dir| {
let relative = dir
.path()
.strip_prefix(&source_tree)
.expect("walkdir starts with root");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is correct.

// TODO(konsti): Also check that we're matching at least a prefix of an include matcher.
!exclude_matcher.is_match(relative)
}) {
let entry = file.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
let relative = entry
.path()
.strip_prefix(&source_tree)
.expect("walkdir starts with root");
if !include_matcher.is_match(relative) {
trace!("Excluding {}", relative.user_display());
continue;
}
debug!("Including {}", relative.user_display());

let metadata = fs_err::metadata(entry.path())?;
let mut header = Header::new_gnu();
#[cfg(unix)]
{
header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata));
}
#[cfg(not(unix))]
{
header.set_mode(0o644);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not fully clear what the right default here is, since we're e.g. writing source dists that get used on unix

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not even sure what this means on non-Unix?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem we want to avoid is that we're writing a .tar.gz on windows, setting a default 000 mode and then fail to read the unpacked files on unix.

There are other odd implications, e.g., when you check out a git repo and build a source dist from it, it may be different when built on windows and when built on unix: On windows, we lose the permissions that may be stored in git (as ntfs does not support them), while on unix, we copy the permissions to the archive.

In the wild, 644 and 755 seem to be the most popular defaults: https://github.com/search?q=header.set_mode&type=code

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, and I imagine at least on Unix, a user's umask will smooth things out even when 644 is undesirable.

Copy link

@harkabeeparolus harkabeeparolus Nov 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's what bsdtar / libarchive does on Windows. My C is a bit rusty (hehe 🙄), but basically:

  1. Start with read bits for everyone, i.e. user, group and other: 444
  2. If the Windows file/dir is not readonly, add write bits for everyone (with bitwise OR): 222 (resulting in 666)
  3. If it is a directory, add execute bits for everyone: 111
  4. If it is a regular file ending in *.bat, *.cmd, or *.exe, add execute bits for everyone: 111

I feel like there ought to be a portable Rust crate for creating tar files that performs reasonably similar heuristics on Windows, if you don't have a strong desire to roll your own heuristics. 😊

This will work fine while running as a normal user (non-root), since tar by default applies the normal user's umask when extracting a tar archive. From the manual page:

‘--no-same-permissions’
When extracting an archive, subtract the user’s umask from files from the permissions specified in the archive. This is the default behavior for ordinary users.

And:

‘-p’
‘--same-permissions’
‘--preserve-permissions’
Extract all protection information.

This option causes tar to set the modes (access permissions) of extracted files exactly as recorded in the archive. If this option is not used, the current umask setting limits the permissions on extracted files. This option is by default enabled when tar is executed by a superuser.

If someone does extract a random tar file from the Internet as root, they should know to use --no-same-permissions and similar flags, in order not to shoot themselves in the foot.

}

if entry.file_type().is_dir() {
header.set_entry_type(EntryType::Directory);
header
.set_path(Path::new(&top_level).join(relative))
.expect("TODO");
header.set_size(0);
header.set_cksum();
tar.append(&header, io::empty())
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
continue;
} else if entry.file_type().is_file() {
header.set_size(metadata.len());
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join(relative),
BufReader::new(File::open(&entry.path())?),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
} else {
return Err(Error::UnsupportedFileType(
relative.to_path_buf(),
entry.file_type(),
));
}
}

tar.finish()
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;

Ok(filename)
}

/// Write the dist-info directory to the output directory without building the wheel.
pub fn metadata(
source_tree: &Path,
Expand All @@ -350,7 +479,7 @@ pub fn metadata(
) -> Result<String, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system("1.0.0+test");
pyproject_toml.check_build_system(uv_version);

let filename = WheelFilename {
name: pyproject_toml.name().clone(),
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-build-backend/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn test_record() {
fn test_determinism() {
let temp1 = TempDir::new().unwrap();
let uv_backend = Path::new("../../scripts/packages/uv_backend");
build(uv_backend, temp1.path(), None, "1.0.0+test").unwrap();
build_wheel(uv_backend, temp1.path(), None, "1.0.0+test").unwrap();

// Touch the file to check that we don't serialize the last modified date.
fs_err::write(
Expand All @@ -56,7 +56,7 @@ fn test_determinism() {
.unwrap();

let temp2 = TempDir::new().unwrap();
build(uv_backend, temp2.path(), None, "1.0.0+test").unwrap();
build_wheel(uv_backend, temp2.path(), None, "1.0.0+test").unwrap();

let wheel_filename = "uv_backend-0.1.0-py3-none-any.whl";
assert_eq!(
Expand Down
12 changes: 9 additions & 3 deletions crates/uv/src/commands/build_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ use anyhow::Result;
use std::env;
use std::path::Path;

pub(crate) fn build_sdist(_sdist_directory: &Path) -> Result<ExitStatus> {
todo!()
pub(crate) fn build_sdist(sdist_directory: &Path) -> Result<ExitStatus> {
let filename = uv_build_backend::build_source_dist(
&env::current_dir()?,
sdist_directory,
uv_version::version(),
)?;
println!("{filename}");
Ok(ExitStatus::Success)
}
pub(crate) fn build_wheel(
wheel_directory: &Path,
metadata_directory: Option<&Path>,
) -> Result<ExitStatus> {
let filename = uv_build_backend::build(
let filename = uv_build_backend::build_wheel(
&env::current_dir()?,
wheel_directory,
metadata_directory,
Expand Down
Loading