Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use BOLT in CI to optimize librustc_driver #102487

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/bootstrap/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,11 @@ impl Step for Rustc {
cargo.arg("-p").arg(krate);
}

if compiler.stage == 1 {
// Relocations are required for BOLT to work.
cargo.rustflag(&format!("-Clink-args=-Wl,-q"));
}

let _guard = builder.msg_sysroot_tool(
Kind::Build,
compiler.stage,
Expand Down
3 changes: 2 additions & 1 deletion src/ci/docker/host-x86_64/dist-x86_64-linux/build-gcc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ cd ../gcc-build
hide_output ../gcc-$GCC/configure \
--prefix=/rustroot \
--enable-languages=c,c++ \
--disable-gnu-unique-object
--disable-gnu-unique-object \
--enable-cxx-flags='-fno-reorder-blocks-and-partition'
hide_output make -j$(nproc)
hide_output make install
ln -s gcc /rustroot/bin/cc
Expand Down
14 changes: 10 additions & 4 deletions src/tools/opt-dist/src/bolt.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use anyhow::Context;

use crate::exec::cmd;
use crate::training::LlvmBoltProfile;
use crate::training::BoltProfile;
use camino::{Utf8Path, Utf8PathBuf};

use crate::utils::io::copy_file;

/// Instruments an artifact at the given `path` (in-place) with BOLT and then calls `func`.
/// After this function finishes, the original file will be restored.
pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(
pub fn with_bolt_instrumented<F: FnOnce(&Utf8Path) -> anyhow::Result<R>, R>(
path: &Utf8Path,
func: F,
) -> anyhow::Result<R> {
Expand All @@ -20,10 +20,16 @@ pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(

let instrumented_path = tempfile::NamedTempFile::new()?.into_temp_path();

let profile_dir =
tempfile::TempDir::new().context("Could not create directory for BOLT profiles")?;
let profile_prefix = profile_dir.path().join("prof.fdata");
let profile_prefix = Utf8Path::from_path(&profile_prefix).unwrap();

// Instrument the original file with BOLT, saving the result into `instrumented_path`
cmd(&["llvm-bolt"])
.arg("-instrument")
.arg(path)
.arg(&format!("--instrumentation-file={profile_prefix}"))
// Make sure that each process will write its profiles into a separate file
.arg("--instrumentation-file-append-pid")
.arg("-o")
Expand All @@ -36,11 +42,11 @@ pub fn with_bolt_instrumented<F: FnOnce() -> anyhow::Result<R>, R>(

// Run the function that will make use of the instrumented artifact.
// The original file will be restored when `_backup_file` is dropped.
func()
func(profile_prefix)
}

/// Optimizes the file at `path` with BOLT in-place using the given `profile`.
pub fn bolt_optimize(path: &Utf8Path, profile: &LlvmBoltProfile) -> anyhow::Result<()> {
pub fn bolt_optimize(path: &Utf8Path, profile: &BoltProfile) -> anyhow::Result<()> {
// Copy the artifact to a new location, so that we do not use the same input and output file.
// BOLT cannot handle optimizing when the input and output is the same file, because it performs
// in-place patching.
Expand Down
4 changes: 2 additions & 2 deletions src/tools/opt-dist/src/exec.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::environment::Environment;
use crate::metrics::{load_metrics, record_metrics};
use crate::timer::TimerSection;
use crate::training::{LlvmBoltProfile, LlvmPGOProfile, RustcPGOProfile};
use crate::training::{BoltProfile, LlvmPGOProfile, RustcPGOProfile};
use camino::{Utf8Path, Utf8PathBuf};
use std::collections::BTreeMap;
use std::fs::File;
Expand Down Expand Up @@ -159,7 +159,7 @@ impl Bootstrap {
self
}

pub fn with_bolt_profile(mut self, profile: LlvmBoltProfile) -> Self {
pub fn with_bolt_profile(mut self, profile: BoltProfile) -> Self {
self.cmd = self.cmd.arg("--reproducible-artifact").arg(profile.0.as_str());
self
}
Expand Down
56 changes: 38 additions & 18 deletions src/tools/opt-dist/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use crate::environment::{Environment, EnvironmentBuilder};
use crate::exec::{cmd, Bootstrap};
use crate::tests::run_tests;
use crate::timer::Timer;
use crate::training::{gather_llvm_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles};
use crate::training::{
gather_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles, llvm_benchmarks,
rustc_benchmarks,
};
use crate::utils::io::{copy_directory, move_directory, reset_directory};
use crate::utils::{
clear_llvm_files, format_env_variables, print_binary_sizes, print_free_disk_space,
Expand Down Expand Up @@ -245,13 +248,13 @@ fn execute_pipeline(
Ok(profile)
})?;

let llvm_bolt_profile = if env.use_bolt() {
let bolt_profiles = if env.use_bolt() {
// Stage 3: Build BOLT instrumented LLVM
// We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles.
// Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build.
// BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc,
// therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused.
timer.section("Stage 3 (LLVM BOLT)", |stage| {
timer.section("Stage 3 (BOLT)", |stage| {
stage.section("Build PGO optimized LLVM", |stage| {
Bootstrap::build(env)
.with_llvm_bolt_ldflags()
Expand All @@ -260,16 +263,17 @@ fn execute_pipeline(
.run(stage)
})?;

// Find the path to the `libLLVM.so` file
let llvm_lib = io::find_file_in_dir(
&env.build_artifacts().join("stage2").join("lib"),
"libLLVM",
".so",
)?;
let libdir = env.build_artifacts().join("stage2").join("lib");
let llvm_lib = io::find_file_in_dir(&libdir, "libLLVM", ".so")?;

// Instrument it and gather profiles
let profile = with_bolt_instrumented(&llvm_lib, || {
stage.section("Gather profiles", |_| gather_llvm_bolt_profiles(env))
log::info!("Optimizing {llvm_lib} with BOLT");

// FIXME(kobzol: try gather profiles together, at once for LLVM and rustc
// Instrument the libraries and gather profiles
let llvm_profile = with_bolt_instrumented(&llvm_lib, |llvm_profile_dir| {
stage.section("Gather profiles", |_| {
gather_bolt_profiles(env, "LLVM", llvm_benchmarks(env), llvm_profile_dir)
})
})?;
print_free_disk_space()?;

Expand All @@ -278,27 +282,43 @@ fn execute_pipeline(
// the final dist build. However, when BOLT optimizes an artifact, it does so *in-place*,
// therefore it will actually optimize all the hard links, which means that the final
// packaged `libLLVM.so` file *will* be BOLT optimized.
bolt_optimize(&llvm_lib, &profile).context("Could not optimize LLVM with BOLT")?;
bolt_optimize(&llvm_lib, &llvm_profile).context("Could not optimize LLVM with BOLT")?;

let rustc_lib = io::find_file_in_dir(&libdir, "librustc_driver", ".so")?;

log::info!("Optimizing {rustc_lib} with BOLT");

// Instrument it and gather profiles
let rustc_profile = with_bolt_instrumented(&rustc_lib, |rustc_profile_dir| {
stage.section("Gather profiles", |_| {
gather_bolt_profiles(env, "rustc", rustc_benchmarks(env), rustc_profile_dir)
})
})?;
print_free_disk_space()?;

// Now optimize the library with BOLT.
bolt_optimize(&rustc_lib, &rustc_profile)
.context("Could not optimize rustc with BOLT")?;

// LLVM is not being cleared here, we want to use the BOLT-optimized LLVM
Ok(Some(profile))
Ok(vec![llvm_profile, rustc_profile])
})?
} else {
None
vec![]
};

let mut dist = Bootstrap::dist(env, &dist_args)
.llvm_pgo_optimize(&llvm_pgo_profile)
.rustc_pgo_optimize(&rustc_pgo_profile)
.avoid_rustc_rebuild();

if let Some(llvm_bolt_profile) = llvm_bolt_profile {
dist = dist.with_bolt_profile(llvm_bolt_profile);
for bolt_profile in bolt_profiles {
dist = dist.with_bolt_profile(bolt_profile);
}

// Final stage: Assemble the dist artifacts
// The previous PGO optimized rustc build and PGO optimized LLVM builds should be reused.
timer.section("Stage 4 (final build)", |stage| dist.run(stage))?;
timer.section("Stage 5 (final build)", |stage| dist.run(stage))?;

// After dist has finished, run a subset of the test suite on the optimized artifacts to discover
// possible regressions.
Expand Down
53 changes: 34 additions & 19 deletions src/tools/opt-dist/src/training.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ const RUSTC_PGO_CRATES: &[&str] = &[
"bitmaps-3.1.0",
];

const LLVM_BOLT_CRATES: &[&str] = LLVM_PGO_CRATES;

fn init_compiler_benchmarks(
env: &Environment,
profiles: &[&str],
Expand Down Expand Up @@ -113,6 +111,14 @@ fn log_profile_stats(
Ok(())
}

pub fn llvm_benchmarks(env: &Environment) -> CmdBuilder {
init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES)
}

pub fn rustc_benchmarks(env: &Environment) -> CmdBuilder {
init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES)
}

pub struct LlvmPGOProfile(pub Utf8PathBuf);

pub fn gather_llvm_profiles(
Expand All @@ -122,9 +128,7 @@ pub fn gather_llvm_profiles(
log::info!("Running benchmarks with PGO instrumented LLVM");

with_log_group("Running benchmarks", || {
init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES)
.run()
.context("Cannot gather LLVM PGO profiles")
llvm_benchmarks(env).run().context("Cannot gather LLVM PGO profiles")
})?;

let merged_profile = env.artifact_dir().join("llvm-pgo.profdata");
Expand Down Expand Up @@ -157,7 +161,7 @@ pub fn gather_rustc_profiles(
// Here we're profiling the `rustc` frontend, so we also include `Check`.
// The benchmark set includes various stress tests that put the frontend under pressure.
with_log_group("Running benchmarks", || {
init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES)
rustc_benchmarks(env)
.env("LLVM_PROFILE_FILE", profile_template.as_str())
.run()
.context("Cannot gather rustc PGO profiles")
Expand All @@ -176,23 +180,25 @@ pub fn gather_rustc_profiles(
Ok(RustcPGOProfile(merged_profile))
}

pub struct LlvmBoltProfile(pub Utf8PathBuf);
pub struct BoltProfile(pub Utf8PathBuf);

pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltProfile> {
log::info!("Running benchmarks with BOLT instrumented LLVM");
pub fn gather_bolt_profiles(
env: &Environment,
name: &str,
benchmarks: CmdBuilder,
profile_prefix: &Utf8Path,
) -> anyhow::Result<BoltProfile> {
log::info!("Running benchmarks with BOLT instrumented {name}");

with_log_group("Running benchmarks", || {
init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["Full"], LLVM_BOLT_CRATES)
.run()
.context("Cannot gather LLVM BOLT profiles")
benchmarks.run().with_context(|| "Cannot gather {name} BOLT profiles")
})?;

let merged_profile = env.artifact_dir().join("llvm-bolt.profdata");
let profile_root = Utf8PathBuf::from("/tmp/prof.fdata");
log::info!("Merging LLVM BOLT profiles to {merged_profile}");
let merged_profile = env.artifact_dir().join(format!("{name}-bolt.profdata"));
log::info!("Merging {name} BOLT profiles from {profile_prefix} to {merged_profile}");

let profiles: Vec<_> =
glob::glob(&format!("{profile_root}*"))?.collect::<Result<Vec<_>, _>>()?;
glob::glob(&format!("{profile_prefix}*"))?.collect::<Result<Vec<_>, _>>()?;

let mut merge_args = vec!["merge-fdata"];
merge_args.extend(profiles.iter().map(|p| p.to_str().unwrap()));
Expand All @@ -204,7 +210,7 @@ pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltPr
.context("Cannot merge BOLT profiles")
})?;

log::info!("LLVM BOLT statistics");
log::info!("{name} BOLT statistics");
log::info!(
"{merged_profile}: {}",
humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY)
Expand All @@ -216,8 +222,17 @@ pub fn gather_llvm_bolt_profiles(env: &Environment) -> anyhow::Result<LlvmBoltPr
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.sum::<u64>();
log::info!("{profile_root}: {}", humansize::format_size(size, BINARY));
log::info!("{profile_prefix}: {}", humansize::format_size(size, BINARY));
log::info!("Profile file count: {}", profiles.len());

Ok(LlvmBoltProfile(merged_profile))
// Delete the gathered profiles
for profile in glob::glob(&format!("{profile_prefix}*"))?.into_iter() {
if let Ok(profile) = profile {
if let Err(error) = std::fs::remove_file(&profile) {
log::error!("Cannot delete BOLT profile {}: {error:?}", profile.display());
}
}
}

Ok(BoltProfile(merged_profile))
}
Loading