Rollup merge of #77407 - pietroalbini:less-build-manifest, r=Mark-Simulacrum

Improve build-manifest to work with the improved promote-release

This PR makes some changes to build-manifest to have it work better with the other improvements I'm making to [promote-release](https://github.com/rust-lang/promote-release).

A new way to invoke the tool was added: `./x.py run src/tools/build-manifest`. The new invocation disables the generation of `.sha256` files and the generation of GPG signatures, as those steps are not tied to the Rust version we're building the manifest of: handling them in `promote-release` will improve the maintenability of our release process. Invocations through the old command (`./x.py dist hash-and-sign`) are referred inside the source code as "legacy". The new invocation also enables internal parallelism, disabled on legacy to avoid overloading our old server.

Improvements were also made on how the checksums included in the manifest are generated:

* The manifest is first generated with placeholder checksums, and then a function walks through the manifes and calculates only the needed hashes. Before this PR, all the hashes were calculated beforehand, including the hashes of unused files.
* Calculating the hashes is now done in parallel with rayon, to better utilize all the available disk bandwidth.
* The `sha2` crate is now used instead of the `sha256sum` CLI tool: this avoids the overhead of calling another process, but more importantly enables hardware acceleration whenever available (the `sha256sum` CLI tool doesn't support it at all).

r? @Mark-Simulacrum
This PR is best reviewed commit-by-commit.
This commit is contained in:
Dylan DPC 2020-10-05 02:29:33 +02:00 committed by GitHub
commit fffeaa7b83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 332 additions and 128 deletions

View File

@ -183,7 +183,16 @@ dependencies = [
"block-padding",
"byte-tools",
"byteorder",
"generic-array",
"generic-array 0.12.3",
]
[[package]]
name = "block-buffer"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
dependencies = [
"generic-array 0.14.4",
]
[[package]]
@ -233,8 +242,11 @@ version = "0.1.0"
dependencies = [
"anyhow",
"flate2",
"hex 0.4.2",
"rayon",
"serde",
"serde_json",
"sha2",
"tar",
"toml",
]
@ -687,6 +699,12 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a21fa21941700a3cd8fcb4091f361a6a712fac632f85d9f487cc892045d55c6"
[[package]]
name = "cpuid-bool"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
[[package]]
name = "crates-io"
version = "0.31.1"
@ -884,7 +902,16 @@ version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
dependencies = [
"generic-array",
"generic-array 0.12.3",
]
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array 0.14.4",
]
[[package]]
@ -1166,6 +1193,16 @@ dependencies = [
"typenum",
]
[[package]]
name = "generic-array"
version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.21"
@ -1835,9 +1872,9 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a18af3dcaf2b0219366cdb4e2af65a6101457b415c3d1a5c71dd9c2b7c77b9c8"
dependencies = [
"block-buffer",
"digest",
"opaque-debug",
"block-buffer 0.7.3",
"digest 0.8.1",
"opaque-debug 0.2.3",
]
[[package]]
@ -2097,6 +2134,12 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]]
name = "open"
version = "1.4.0"
@ -4362,10 +4405,23 @@ version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
dependencies = [
"block-buffer",
"digest",
"block-buffer 0.7.3",
"digest 0.8.1",
"fake-simd",
"opaque-debug",
"opaque-debug 0.2.3",
]
[[package]]
name = "sha2"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
dependencies = [
"block-buffer 0.9.0",
"cfg-if",
"cpuid-bool",
"digest 0.9.0",
"opaque-debug 0.3.0",
]
[[package]]

View File

@ -477,7 +477,7 @@ impl<'a> Builder<'a> {
install::Src,
install::Rustc
),
Kind::Run => describe!(run::ExpandYamlAnchors,),
Kind::Run => describe!(run::ExpandYamlAnchors, run::BuildManifest,),
}
}

View File

@ -46,7 +46,7 @@ pub fn pkgname(builder: &Builder<'_>, component: &str) -> String {
}
}
fn distdir(builder: &Builder<'_>) -> PathBuf {
pub(crate) fn distdir(builder: &Builder<'_>) -> PathBuf {
builder.out.join("dist")
}
@ -2371,6 +2371,7 @@ impl Step for HashSign {
cmd.arg(addr);
cmd.arg(&builder.config.channel);
cmd.arg(&builder.src);
cmd.env("BUILD_MANIFEST_LEGACY", "1");
builder.create_dir(&distdir(builder));

View File

@ -1,5 +1,7 @@
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
use crate::dist::distdir;
use crate::tool::Tool;
use build_helper::output;
use std::process::Command;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
@ -41,3 +43,43 @@ fn try_run(builder: &Builder<'_>, cmd: &mut Command) -> bool {
}
true
}
#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)]
pub struct BuildManifest;
impl Step for BuildManifest {
type Output = ();
const ONLY_HOSTS: bool = true;
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("src/tools/build-manifest")
}
fn make_run(run: RunConfig<'_>) {
run.builder.ensure(BuildManifest);
}
fn run(self, builder: &Builder<'_>) {
// This gets called by `promote-release`
// (https://github.com/rust-lang/promote-release).
let mut cmd = builder.tool_cmd(Tool::BuildManifest);
let sign = builder.config.dist_sign_folder.as_ref().unwrap_or_else(|| {
panic!("\n\nfailed to specify `dist.sign-folder` in `config.toml`\n\n")
});
let addr = builder.config.dist_upload_addr.as_ref().unwrap_or_else(|| {
panic!("\n\nfailed to specify `dist.upload-addr` in `config.toml`\n\n")
});
let today = output(Command::new("date").arg("+%Y-%m-%d"));
cmd.arg(sign);
cmd.arg(distdir(builder));
cmd.arg(today.trim());
cmd.arg(addr);
cmd.arg(&builder.config.channel);
cmd.arg(&builder.src);
builder.create_dir(&distdir(builder));
builder.run(&mut cmd);
}
}

View File

@ -11,3 +11,6 @@ serde_json = "1.0"
anyhow = "1.0.32"
flate2 = "1.0.16"
tar = "0.4.29"
sha2 = "0.9.1"
rayon = "1.3.1"
hex = "0.4.2"

View File

@ -20,8 +20,7 @@ Then, you can generate the manifest and all the packages from `path/to/dist` to
`path/to/output` with:
```
$ BUILD_MANIFEST_DISABLE_SIGNING=1 cargo +nightly run \
path/to/dist path/to/output 1970-01-01 http://example.com \
$ cargo +nightly run path/to/dist path/to/output 1970-01-01 http://example.com \
CHANNEL path/to/rust/repo
```

View File

@ -4,17 +4,22 @@
//! via `x.py dist hash-and-sign`; the cmdline arguments are set up
//! by rustbuild (in `src/bootstrap/dist.rs`).
mod manifest;
mod versions;
use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
use crate::versions::{PkgType, Versions};
use serde::Serialize;
use std::collections::BTreeMap;
use std::collections::HashMap;
use rayon::prelude::*;
use sha2::Digest;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::env;
use std::error::Error;
use std::fs::{self, File};
use std::io::{self, Read, Write};
use std::io::{self, BufReader, Read, Write};
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::Mutex;
use std::time::Instant;
static HOSTS: &[&str] = &[
"aarch64-unknown-linux-gnu",
@ -167,57 +172,6 @@ static MINGW: &[&str] = &["i686-pc-windows-gnu", "x86_64-pc-windows-gnu"];
static NIGHTLY_ONLY_COMPONENTS: &[&str] = &["miri-preview", "rust-analyzer-preview"];
#[derive(Serialize)]
#[serde(rename_all = "kebab-case")]
struct Manifest {
manifest_version: String,
date: String,
pkg: BTreeMap<String, Package>,
renames: BTreeMap<String, Rename>,
profiles: BTreeMap<String, Vec<String>>,
}
#[derive(Serialize)]
struct Package {
version: String,
git_commit_hash: Option<String>,
target: BTreeMap<String, Target>,
}
#[derive(Serialize)]
struct Rename {
to: String,
}
#[derive(Serialize, Default)]
struct Target {
available: bool,
url: Option<String>,
hash: Option<String>,
xz_url: Option<String>,
xz_hash: Option<String>,
components: Option<Vec<Component>>,
extensions: Option<Vec<Component>>,
}
impl Target {
fn unavailable() -> Self {
Self::default()
}
}
#[derive(Serialize)]
struct Component {
pkg: String,
target: String,
}
impl Component {
fn from_str(pkg: &str, target: &str) -> Self {
Self { pkg: pkg.to_string(), target: target.to_string() }
}
}
macro_rules! t {
($e:expr) => {
match $e {
@ -232,25 +186,33 @@ struct Builder {
input: PathBuf,
output: PathBuf,
gpg_passphrase: String,
digests: BTreeMap<String, String>,
s3_address: String,
date: String,
should_sign: bool,
legacy: bool,
legacy_gpg_passphrase: String,
}
fn main() {
// Avoid signing packages while manually testing
// Do NOT set this envvar in CI
let should_sign = env::var("BUILD_MANIFEST_DISABLE_SIGNING").is_err();
// Up until Rust 1.48 the release process relied on build-manifest to create the SHA256
// checksums of released files and to sign the tarballs. That was moved over to promote-release
// in time for the branching of Rust 1.48, but the old release process still had to work the
// old way.
//
// When running build-manifest through the old ./x.py dist hash-and-sign the environment
// variable will be set, enabling the legacy behavior of generating the .sha256 files and
// signing the tarballs.
//
// Once the old release process is fully decommissioned, the environment variable, all the
// related code in this tool and ./x.py dist hash-and-sign can be removed.
let legacy = env::var("BUILD_MANIFEST_LEGACY").is_ok();
// Safety check to ensure signing is always enabled on CI
// The CI environment variable is set by both Travis and AppVeyor
if !should_sign && env::var("CI").is_ok() {
println!("The 'BUILD_MANIFEST_DISABLE_SIGNING' env var can't be enabled on CI.");
println!("If you're not running this on CI, unset the 'CI' env var.");
panic!();
// Avoid overloading the old server in legacy mode.
if legacy {
rayon::ThreadPoolBuilder::new()
.num_threads(1)
.build_global()
.expect("failed to initialize Rayon");
}
let mut args = env::args().skip(1);
@ -263,7 +225,7 @@ fn main() {
// Do not ask for a passphrase while manually testing
let mut passphrase = String::new();
if should_sign {
if legacy {
// `x.py` passes the passphrase via stdin.
t!(io::stdin().read_to_string(&mut passphrase));
}
@ -273,12 +235,11 @@ fn main() {
input,
output,
gpg_passphrase: passphrase,
digests: BTreeMap::new(),
s3_address,
date,
should_sign,
legacy,
legacy_gpg_passphrase: passphrase,
}
.build();
}
@ -286,7 +247,9 @@ fn main() {
impl Builder {
fn build(&mut self) {
self.check_toolstate();
self.digest_and_sign();
if self.legacy {
self.digest_and_sign();
}
let manifest = self.build_manifest();
let rust_version = self.versions.package_version(&PkgType::Rust).unwrap();
@ -324,10 +287,9 @@ impl Builder {
/// Hash all files, compute their signatures, and collect the hashes in `self.digests`.
fn digest_and_sign(&mut self) {
for file in t!(self.input.read_dir()).map(|e| t!(e).path()) {
let filename = file.file_name().unwrap().to_str().unwrap();
let digest = self.hash(&file);
file.file_name().unwrap().to_str().unwrap();
self.hash(&file);
self.sign(&file);
assert!(self.digests.insert(filename.to_string(), digest).is_none());
}
}
@ -343,6 +305,9 @@ impl Builder {
self.add_profiles_to(&mut manifest);
self.add_renames_to(&mut manifest);
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
self.fill_missing_hashes(&mut manifest);
manifest
}
@ -438,9 +403,12 @@ impl Builder {
fn target_host_combination(&mut self, host: &str, manifest: &Manifest) -> Option<Target> {
let filename = self.versions.tarball_name(&PkgType::Rust, host).unwrap();
let digest = self.digests.remove(&filename)?;
let xz_filename = filename.replace(".tar.gz", ".tar.xz");
let xz_digest = self.digests.remove(&xz_filename);
let mut target = Target::from_compressed_tar(self, &filename);
if !target.available {
return None;
}
let mut components = Vec::new();
let mut extensions = Vec::new();
@ -496,15 +464,9 @@ impl Builder {
extensions.retain(&has_component);
components.retain(&has_component);
Some(Target {
available: true,
url: Some(self.url(&filename)),
hash: Some(digest),
xz_url: xz_digest.as_ref().map(|_| self.url(&xz_filename)),
xz_hash: xz_digest,
components: Some(components),
extensions: Some(extensions),
})
target.components = Some(components);
target.extensions = Some(extensions);
Some(target)
}
fn profile(
@ -542,37 +504,19 @@ impl Builder {
let targets = targets
.iter()
.map(|name| {
if is_present {
// The component generally exists, but it might still be missing for this target.
let target = if is_present {
let filename = self
.versions
.tarball_name(&PkgType::from_component(pkgname), name)
.unwrap();
let digest = match self.digests.remove(&filename) {
Some(digest) => digest,
// This component does not exist for this target -- skip it.
None => return (name.to_string(), Target::unavailable()),
};
let xz_filename = filename.replace(".tar.gz", ".tar.xz");
let xz_digest = self.digests.remove(&xz_filename);
(
name.to_string(),
Target {
available: true,
url: Some(self.url(&filename)),
hash: Some(digest),
xz_url: xz_digest.as_ref().map(|_| self.url(&xz_filename)),
xz_hash: xz_digest,
components: None,
extensions: None,
},
)
Target::from_compressed_tar(self, &filename)
} else {
// If the component is not present for this build add it anyway but mark it as
// unavailable -- this way rustup won't allow upgrades without --force
(name.to_string(), Target::unavailable())
}
Target::unavailable()
};
(name.to_string(), target)
})
.collect();
@ -586,8 +530,9 @@ impl Builder {
);
}
fn url(&self, filename: &str) -> String {
format!("{}/{}/{}", self.s3_address, self.date, filename)
fn url(&self, path: &Path) -> String {
let file_name = path.file_name().unwrap().to_str().unwrap();
format!("{}/{}/{}", self.s3_address, self.date, file_name)
}
fn hash(&self, path: &Path) -> String {
@ -608,7 +553,7 @@ impl Builder {
}
fn sign(&self, path: &Path) {
if !self.should_sign {
if !self.legacy {
return;
}
@ -631,10 +576,45 @@ impl Builder {
.arg(path)
.stdin(Stdio::piped());
let mut child = t!(cmd.spawn());
t!(child.stdin.take().unwrap().write_all(self.gpg_passphrase.as_bytes()));
t!(child.stdin.take().unwrap().write_all(self.legacy_gpg_passphrase.as_bytes()));
assert!(t!(child.wait()).success());
}
fn fill_missing_hashes(&self, manifest: &mut Manifest) {
// First collect all files that need hashes
let mut need_hashes = HashSet::new();
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
need_hashes.insert(path.clone());
}
});
let collected = Mutex::new(HashMap::new());
let collection_start = Instant::now();
println!(
"collecting hashes for {} tarballs across {} threads",
need_hashes.len(),
rayon::current_num_threads().min(need_hashes.len()),
);
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
Ok(hash) => {
collected.lock().unwrap().insert(path, hash);
}
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
});
let collected = collected.into_inner().unwrap();
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
crate::manifest::visit_file_hashes(manifest, |file_hash| {
if let FileHash::Missing(path) = file_hash {
match collected.get(path) {
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
None => panic!("missing hash for file {}", path.display()),
}
}
})
}
fn write_channel_files(&self, channel_name: &str, manifest: &Manifest) {
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
self.write(&manifest.date, channel_name, "-date.txt");
@ -648,7 +628,16 @@ impl Builder {
fn write(&self, contents: &str, channel_name: &str, suffix: &str) {
let dst = self.output.join(format!("channel-rust-{}{}", channel_name, suffix));
t!(fs::write(&dst, contents));
self.hash(&dst);
self.sign(&dst);
if self.legacy {
self.hash(&dst);
self.sign(&dst);
}
}
}
fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
let mut file = BufReader::new(File::open(path)?);
let mut sha256 = sha2::Sha256::default();
std::io::copy(&mut file, &mut sha256)?;
Ok(hex::encode(sha256.finalize()))
}

View File

@ -0,0 +1,114 @@
use crate::Builder;
use serde::{Serialize, Serializer};
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
#[derive(Serialize)]
#[serde(rename_all = "kebab-case")]
pub(crate) struct Manifest {
pub(crate) manifest_version: String,
pub(crate) date: String,
pub(crate) pkg: BTreeMap<String, Package>,
pub(crate) renames: BTreeMap<String, Rename>,
pub(crate) profiles: BTreeMap<String, Vec<String>>,
}
#[derive(Serialize)]
pub(crate) struct Package {
pub(crate) version: String,
pub(crate) git_commit_hash: Option<String>,
pub(crate) target: BTreeMap<String, Target>,
}
#[derive(Serialize)]
pub(crate) struct Rename {
pub(crate) to: String,
}
#[derive(Serialize, Default)]
pub(crate) struct Target {
pub(crate) available: bool,
pub(crate) url: Option<String>,
pub(crate) hash: Option<FileHash>,
pub(crate) xz_url: Option<String>,
pub(crate) xz_hash: Option<FileHash>,
pub(crate) components: Option<Vec<Component>>,
pub(crate) extensions: Option<Vec<Component>>,
}
impl Target {
pub(crate) fn from_compressed_tar(builder: &Builder, base_path: &str) -> Self {
let base_path = builder.input.join(base_path);
let gz = Self::tarball_variant(&base_path, "gz");
let xz = Self::tarball_variant(&base_path, "xz");
if gz.is_none() {
return Self::unavailable();
}
Self {
available: true,
components: None,
extensions: None,
// .gz
url: gz.as_ref().map(|path| builder.url(path)),
hash: gz.map(FileHash::Missing),
// .xz
xz_url: xz.as_ref().map(|path| builder.url(path)),
xz_hash: xz.map(FileHash::Missing),
}
}
fn tarball_variant(base: &Path, ext: &str) -> Option<PathBuf> {
let mut path = base.to_path_buf();
path.set_extension(ext);
if path.is_file() { Some(path) } else { None }
}
pub(crate) fn unavailable() -> Self {
Self::default()
}
}
#[derive(Serialize)]
pub(crate) struct Component {
pub(crate) pkg: String,
pub(crate) target: String,
}
impl Component {
pub(crate) fn from_str(pkg: &str, target: &str) -> Self {
Self { pkg: pkg.to_string(), target: target.to_string() }
}
}
#[allow(unused)]
pub(crate) enum FileHash {
Missing(PathBuf),
Present(String),
}
impl Serialize for FileHash {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
match self {
FileHash::Missing(path) => Err(serde::ser::Error::custom(format!(
"can't serialize a missing hash for file {}",
path.display()
))),
FileHash::Present(inner) => inner.serialize(serializer),
}
}
}
pub(crate) fn visit_file_hashes(manifest: &mut Manifest, mut f: impl FnMut(&mut FileHash)) {
for pkg in manifest.pkg.values_mut() {
for target in pkg.target.values_mut() {
if let Some(hash) = &mut target.hash {
f(hash);
}
if let Some(hash) = &mut target.xz_hash {
f(hash);
}
}
}
}