-
-
Notifications
You must be signed in to change notification settings - Fork 13.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rustPlatform.fetchCargoVendor: init #349360
base: master
Are you sure you want to change the base?
Changes from all commits
247e53b
4b29b44
a7e9c7d
569fec5
d27382d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,278 @@ | ||
import functools | ||
import hashlib | ||
import json | ||
import multiprocessing as mp | ||
import re | ||
import shutil | ||
import subprocess | ||
import sys | ||
import tomllib | ||
from pathlib import Path | ||
from typing import Any, TypedDict | ||
|
||
import requests | ||
|
||
|
||
eprint = functools.partial(print, file=sys.stderr) | ||
|
||
|
||
def load_toml(path: Path) -> dict[str, Any]: | ||
with open(path, "rb") as f: | ||
return tomllib.load(f) | ||
|
||
|
||
def download_file_with_checksum(url: str, destination_path: Path, expected_checksum: str) -> None: | ||
"""Downloads a file and computes its SHA-256 checksum while writing it to disk.""" | ||
sha256_hash = hashlib.sha256() | ||
with requests.get(url, stream=True) as response: | ||
if not response.ok: | ||
raise Exception(f"Failed to fetch file from {url}. Status code: {response.status_code}") | ||
with open(destination_path, "wb") as file: | ||
for chunk in response.iter_content(1024): # Download in chunks | ||
if chunk: # Filter out keep-alive chunks | ||
file.write(chunk) | ||
sha256_hash.update(chunk) | ||
|
||
# Compute the final checksum | ||
calculated_checksum = sha256_hash.hexdigest() | ||
|
||
if calculated_checksum != expected_checksum: | ||
raise Exception(f"Hash mismatch! File fetched from {url} had checksum {calculated_checksum}, expected {expected_checksum}.") | ||
|
||
|
||
def get_download_url_for_tarball(pkg: dict[str, Any]) -> str: | ||
# TODO: support other registries | ||
# maybe fetch config.json from the registry root and get the dl key | ||
# See: https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration | ||
if pkg["source"] != "registry+https:/rust-lang/crates.io-index": | ||
raise Exception("Only the default crates.io registry is supported.") | ||
|
||
return f"https://crates.io/api/v1/crates/{pkg["name"]}/{pkg["version"]}/download" | ||
|
||
|
||
def download_tarball(pkg: dict[str, Any], out_dir: Path) -> None: | ||
|
||
url = get_download_url_for_tarball(pkg) | ||
filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz" | ||
|
||
# TODO: allow legacy checksum specification, see importCargoLock for example | ||
# also, don't forget about the other usage of the checksum | ||
expected_checksum = pkg["checksum"] | ||
|
||
tarball_out_dir = out_dir / "tarballs" / filename | ||
eprint(f"Fetching {url} -> tarballs/{filename}") | ||
|
||
download_file_with_checksum(url, tarball_out_dir, expected_checksum) | ||
|
||
|
||
def download_git_tree(url: str, git_sha_rev: str, out_dir: Path) -> None: | ||
|
||
tree_out_dir = out_dir / "git" / git_sha_rev | ||
eprint(f"Fetching {url}#{git_sha_rev} -> git/{git_sha_rev}") | ||
|
||
subprocess.check_output(["nix-prefetch-git", "--builder", "--quiet", "--url", url, "--rev", git_sha_rev, "--out", tree_out_dir]) | ||
|
||
|
||
GIT_SOURCE_REGEX = re.compile("git\\+(?P<url>[^?]+)(\\?(?P<type>rev|tag|branch)=(?P<value>.*))?#(?P<git_sha_rev>.*)") | ||
|
||
|
||
class GitSourceInfo(TypedDict): | ||
url: str | ||
type: str | None | ||
value: str | None | ||
git_sha_rev: str | ||
|
||
|
||
def parse_git_source(source: str) -> GitSourceInfo: | ||
match = GIT_SOURCE_REGEX.match(source) | ||
if match is None: | ||
raise Exception(f"Unable to process git source: {source}.") | ||
# hack: make the type system accept that the function outputs GitSourceInfo | ||
source_info: Any = match.groupdict(default=None) | ||
return source_info | ||
|
||
|
||
def create_vendor_staging(lockfile_path: Path, out_dir: Path) -> None: | ||
cargo_toml = load_toml(lockfile_path) | ||
|
||
git_packages: list[dict[str, Any]] = [] | ||
registry_packages: list[dict[str, Any]] = [] | ||
|
||
for pkg in cargo_toml["package"]: | ||
# ignore local dependenices | ||
if "source" not in pkg.keys(): | ||
eprint(f"Skipping local dependency: {pkg["name"]}") | ||
continue | ||
source = pkg["source"] | ||
|
||
if source.startswith("git+"): | ||
git_packages.append(pkg) | ||
elif source.startswith("registry+"): | ||
registry_packages.append(pkg) | ||
else: | ||
raise Exception(f"Can't process source: {source}.") | ||
|
||
git_sha_rev_to_url: dict[str, str] = {} | ||
for pkg in git_packages: | ||
source_info = parse_git_source(pkg["source"]) | ||
git_sha_rev_to_url[source_info["git_sha_rev"]] = source_info["url"] | ||
|
||
out_dir.mkdir(exist_ok=True) | ||
shutil.copy(lockfile_path, out_dir / "Cargo.lock") | ||
|
||
# create a pool with at most 10 concurrent jobs | ||
with mp.Pool(min(10, mp.cpu_count())) as pool: | ||
|
||
if len(git_packages) != 0: | ||
(out_dir / "git").mkdir() | ||
# run download jobs in parallel | ||
pool.starmap(download_git_tree, ((url, git_sha_rev, out_dir) for git_sha_rev, url in git_sha_rev_to_url.items())) | ||
|
||
if len(registry_packages) != 0: | ||
(out_dir / "tarballs").mkdir() | ||
# run download jobs in parallel | ||
pool.starmap(download_tarball, ((pkg, out_dir) for pkg in registry_packages)) | ||
|
||
|
||
def get_manifest_metadata(manifest_path: Path) -> dict[str, Any]: | ||
output = subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps", "--manifest-path", manifest_path]) | ||
return json.loads(output) | ||
|
||
|
||
def try_get_crate_manifest_path_from_mainfest_path(manifest_path: Path, crate_name: str) -> Path | None: | ||
metadata = get_manifest_metadata(manifest_path) | ||
|
||
for pkg in metadata["packages"]: | ||
if pkg["name"] == crate_name: | ||
return Path(pkg["manifest_path"]) | ||
|
||
return None | ||
|
||
|
||
def find_crate_manifest_in_tree(tree: Path, crate_name: str) -> Path: | ||
# in some cases Cargo.toml is not located at the top level, so we also look at subdirectories | ||
manifest_paths = tree.glob("**/Cargo.toml") | ||
|
||
for manifest_path in manifest_paths: | ||
res = try_get_crate_manifest_path_from_mainfest_path(manifest_path, crate_name) | ||
if res is not None: | ||
return res | ||
|
||
raise Exception(f"Couldn't find manifest for crate {crate_name} inside {tree}.") | ||
|
||
|
||
def copy_and_patch_git_crate_subtree(git_tree: Path, crate_name: str, crate_out_dir: Path) -> None: | ||
crate_manifest_path = find_crate_manifest_in_tree(git_tree, crate_name) | ||
crate_tree = crate_manifest_path.parent | ||
|
||
eprint(f"Copying to {crate_out_dir}") | ||
shutil.copytree(crate_tree, crate_out_dir) | ||
crate_out_dir.chmod(0o755) | ||
|
||
with open(crate_manifest_path, "r") as f: | ||
manifest_data = f.read() | ||
|
||
if "workspace" in manifest_data: | ||
crate_manifest_metadata = get_manifest_metadata(crate_manifest_path) | ||
workspace_root = Path(crate_manifest_metadata["workspace_root"]) | ||
|
||
root_manifest_path = workspace_root / "Cargo.toml" | ||
manifest_path = crate_out_dir / "Cargo.toml" | ||
|
||
manifest_path.chmod(0o644) | ||
eprint(f"Patching {manifest_path}") | ||
subprocess.check_output(["replace-workspace-values", manifest_path, root_manifest_path]) | ||
|
||
|
||
def extract_crate_tarball_contents(tarball_path: Path, crate_out_dir: Path) -> None: | ||
eprint(f"Unpacking to {crate_out_dir}") | ||
crate_out_dir.mkdir() | ||
subprocess.check_output(["tar", "xf", tarball_path, "-C", crate_out_dir, "--strip-components=1"]) | ||
|
||
|
||
def create_vendor(vendor_staging_dir: Path, out_dir: Path) -> None: | ||
lockfile_path = vendor_staging_dir / "Cargo.lock" | ||
out_dir.mkdir(exist_ok=True) | ||
shutil.copy(lockfile_path, out_dir / "Cargo.lock") | ||
|
||
cargo_toml = load_toml(lockfile_path) | ||
|
||
config_lines = [ | ||
'[source.vendored-sources]', | ||
'directory = "@vendor@"', | ||
'[source.crates-io]', | ||
'replace-with = "vendored-sources"', | ||
] | ||
|
||
seen_source_keys = set() | ||
for pkg in cargo_toml["package"]: | ||
|
||
# ignore local dependenices | ||
if "source" not in pkg.keys(): | ||
continue | ||
|
||
source: str = pkg["source"] | ||
|
||
dir_name = f"{pkg["name"]}-{pkg["version"]}" | ||
crate_out_dir = out_dir / dir_name | ||
|
||
if source.startswith("git+"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would add readability splitting for pkg in cargo_toml["package"]:
# ignore local dependenices
if "source" not in pkg.keys():
continue
source: str = pkg["source"]
if source.startswith("git+"): # maybe use str.partition if it always as a `+`
result = [not_good_at_names]_git(...)
elif source.startswith("registry+"):
result = [not_good_at_names]_registry(...)
else:
raise ...;
process_common_part(result) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I factored out a part of the logic |
||
|
||
source_info = parse_git_source(pkg["source"]) | ||
git_sha_rev = source_info["git_sha_rev"] | ||
git_tree = vendor_staging_dir / "git" / git_sha_rev | ||
|
||
copy_and_patch_git_crate_subtree(git_tree, pkg["name"], crate_out_dir) | ||
|
||
# git based crates allow having no checksum information | ||
with open(crate_out_dir / ".cargo-checksum.json", "w") as f: | ||
json.dump({"files": {}}, f) | ||
|
||
source_key = source[0:source.find("#")] | ||
|
||
if source_key not in seen_source_keys: | ||
config_lines.append(f'[source."{source_key}"]') | ||
config_lines.append(f'git = "{source_info["url"]}"') | ||
if source_info["type"] is not None: | ||
config_lines.append(f'{source_info["type"]} = "{source_info["value"]}"') | ||
config_lines.append('replace-with = "vendored-sources"') | ||
|
||
seen_source_keys.add(source_key) | ||
|
||
elif source.startswith("registry+"): | ||
|
||
filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz" | ||
tarball_path = vendor_staging_dir / "tarballs" / filename | ||
|
||
extract_crate_tarball_contents(tarball_path, crate_out_dir) | ||
|
||
# non-git based crates need the package checksum at minimum | ||
with open(crate_out_dir / ".cargo-checksum.json", "w") as f: | ||
json.dump({"files": {}, "package": pkg["checksum"]}, f) | ||
|
||
else: | ||
raise Exception(f"Can't process source: {source}.") | ||
|
||
(out_dir / ".cargo").mkdir() | ||
with open(out_dir / ".cargo" / "config.toml", "w") as config_file: | ||
config_file.writelines(line + "\n" for line in config_lines) | ||
|
||
|
||
def main() -> None: | ||
subcommand = sys.argv[1] | ||
|
||
subcommand_func_dict = { | ||
"create-vendor-staging": lambda: create_vendor_staging(lockfile_path=Path(sys.argv[2]), out_dir=Path(sys.argv[3])), | ||
"create-vendor": lambda: create_vendor(vendor_staging_dir=Path(sys.argv[2]), out_dir=Path(sys.argv[3])) | ||
} | ||
|
||
subcommand_func = subcommand_func_dict.get(subcommand) | ||
|
||
if subcommand_func is None: | ||
raise Exception(f"Unknown subcommand: '{subcommand}'. Must be one of {list(subcommand_func_dict.keys())}") | ||
|
||
subcommand_func() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,92 @@ | ||||||||||||||||||||||
{ | ||||||||||||||||||||||
lib, | ||||||||||||||||||||||
stdenvNoCC, | ||||||||||||||||||||||
runCommand, | ||||||||||||||||||||||
writers, | ||||||||||||||||||||||
python3Packages, | ||||||||||||||||||||||
cargo, | ||||||||||||||||||||||
nix-prefetch-git, | ||||||||||||||||||||||
cacert, | ||||||||||||||||||||||
}: | ||||||||||||||||||||||
|
||||||||||||||||||||||
let | ||||||||||||||||||||||
replaceWorkspaceValues = writers.writePython3Bin "replace-workspace-values" { | ||||||||||||||||||||||
libraries = with python3Packages; [ | ||||||||||||||||||||||
tomli | ||||||||||||||||||||||
tomli-w | ||||||||||||||||||||||
TomaSajt marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||
]; | ||||||||||||||||||||||
flakeIgnore = [ | ||||||||||||||||||||||
"E501" | ||||||||||||||||||||||
"W503" | ||||||||||||||||||||||
]; | ||||||||||||||||||||||
} (builtins.readFile ./replace-workspace-values.py); | ||||||||||||||||||||||
|
||||||||||||||||||||||
fetchCargoVendorUtil = writers.writePython3Bin "fetch-cargo-vendor-util" { | ||||||||||||||||||||||
libraries = with python3Packages; [ | ||||||||||||||||||||||
requests | ||||||||||||||||||||||
]; | ||||||||||||||||||||||
flakeIgnore = [ | ||||||||||||||||||||||
"E501" | ||||||||||||||||||||||
]; | ||||||||||||||||||||||
} (builtins.readFile ./fetch-cargo-vendor-util.py); | ||||||||||||||||||||||
in | ||||||||||||||||||||||
|
||||||||||||||||||||||
{ | ||||||||||||||||||||||
name ? if args ? pname && args ? version then "${args.pname}-${args.version}" else "cargo-deps", | ||||||||||||||||||||||
hash ? (throw "fetchCargoVendor requires a `hash` value to be set for ${name}"), | ||||||||||||||||||||||
nativeBuildInputs ? [ ], | ||||||||||||||||||||||
... | ||||||||||||||||||||||
}@args: | ||||||||||||||||||||||
|
||||||||||||||||||||||
# TODO: add asserts about pname version and name | ||||||||||||||||||||||
|
||||||||||||||||||||||
let | ||||||||||||||||||||||
removedArgs = [ | ||||||||||||||||||||||
"name" | ||||||||||||||||||||||
"pname" | ||||||||||||||||||||||
"version" | ||||||||||||||||||||||
"nativeBuildInputs" | ||||||||||||||||||||||
"hash" | ||||||||||||||||||||||
]; | ||||||||||||||||||||||
|
||||||||||||||||||||||
vendorStaging = stdenvNoCC.mkDerivation ( | ||||||||||||||||||||||
{ | ||||||||||||||||||||||
name = "${name}-vendor-staging"; | ||||||||||||||||||||||
|
||||||||||||||||||||||
nativeBuildInputs = [ | ||||||||||||||||||||||
fetchCargoVendorUtil | ||||||||||||||||||||||
nix-prefetch-git | ||||||||||||||||||||||
cacert | ||||||||||||||||||||||
] ++ nativeBuildInputs; | ||||||||||||||||||||||
|
||||||||||||||||||||||
buildPhase = '' | ||||||||||||||||||||||
runHook preBuild | ||||||||||||||||||||||
|
||||||||||||||||||||||
fetch-cargo-vendor-util create-vendor-staging ./Cargo.lock "$out" | ||||||||||||||||||||||
|
||||||||||||||||||||||
runHook postBuild | ||||||||||||||||||||||
''; | ||||||||||||||||||||||
|
||||||||||||||||||||||
dontInstall = true; | ||||||||||||||||||||||
dontFixup = true; | ||||||||||||||||||||||
|
||||||||||||||||||||||
outputHash = hash; | ||||||||||||||||||||||
outputHashAlgo = if hash == "" then "sha256" else null; | ||||||||||||||||||||||
outputHashMode = "recursive"; | ||||||||||||||||||||||
} | ||||||||||||||||||||||
// builtins.removeAttrs args removedArgs | ||||||||||||||||||||||
); | ||||||||||||||||||||||
in | ||||||||||||||||||||||
|
||||||||||||||||||||||
runCommand "${name}-vendor" | ||||||||||||||||||||||
{ | ||||||||||||||||||||||
inherit vendorStaging; | ||||||||||||||||||||||
nativeBuildInputs = [ | ||||||||||||||||||||||
fetchCargoVendorUtil | ||||||||||||||||||||||
cargo | ||||||||||||||||||||||
replaceWorkspaceValues | ||||||||||||||||||||||
]; | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be nice to also type check this:
Suggested change
|
||||||||||||||||||||||
} | ||||||||||||||||||||||
'' | ||||||||||||||||||||||
fetch-cargo-vendor-util create-vendor "$vendorStaging" "$out" | ||||||||||||||||||||||
'' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to capture output here: