Fetching symlink inputs for top-level output

Top-level output files can be symlinks to non-top-level output files, such as in the case of a cc_binary linking against a .so built in the same build. I reuse the existing mayInsensitivelyPropagateInputs() method, which was originally introduced for action rewinding, but seems to have exactly the intended semantics here as well. Unfortunately, this requires a small change to the BlazeModule API to pass in the full analysis result (so we can access the action graph, so we can read the aformentioned flag). I considered using execution-phase information on whether an output is a symlink. However, at that point it's too late! The current design only allows pulling an output file *when its generating action runs*. It would be better if this was changed to pull output files independently of action execution. That would avoid a lot of problems with the current design, such as #10902. Fixes #11532. Change-Id: Iaf1e48895311fcf52d9e1802d53598288788a921 Closes #11536. Change-Id: Ie1bf49a8d08f0b2422426ecd95fe79b3686f8427 PiperOrigin-RevId: 332939828
bazelbuild · Sep 21, 2020 · 07e152e · 07e152e
1 parent 1511dd2
commit 07e152e
Show file tree

Hide file tree

Showing 5 changed files with 180 additions and 15 deletions.
diff --git a/src/main/java/com/google/devtools/build/lib/actions/ActionExecutionMetadata.java b/src/main/java/com/google/devtools/build/lib/actions/ActionExecutionMetadata.java
@@ -99,7 +99,7 @@ public interface ActionExecutionMetadata extends ActionAnalysisMetadata {
  * contents are equal to their inputs' contents, and a symlink action does not consume its inputs'
  * contents.
  *
- * <p>This property is relevant for action rewinding.
+ * <p>This property is relevant for action rewinding and top-level output fetching.
  */
  default boolean mayInsensitivelyPropagateInputs() {
  return false;

diff --git a/src/main/java/com/google/devtools/build/lib/buildtool/AnalysisPhaseRunner.java b/src/main/java/com/google/devtools/build/lib/buildtool/AnalysisPhaseRunner.java
@@ -129,7 +129,7 @@ public static AnalysisResult execute(
  }
 
  for (BlazeModule module : env.getRuntime().getBlazeModules()) {
- module.afterAnalysis(env, request, buildOptions, analysisResult.getTargetsToBuild());
+ module.afterAnalysis(env, request, buildOptions, analysisResult);
  }
 
  reportTargets(env, analysisResult);

diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java
@@ -27,8 +27,11 @@
 import com.google.common.flogger.GoogleLogger;
 import com.google.common.util.concurrent.ListeningScheduledExecutorService;
 import com.google.common.util.concurrent.MoreExecutors;
+import com.google.devtools.build.lib.actions.ActionExecutionMetadata;
+import com.google.devtools.build.lib.actions.ActionGraph;
 import com.google.devtools.build.lib.actions.ActionInput;
 import com.google.devtools.build.lib.actions.Artifact;
+import com.google.devtools.build.lib.analysis.AnalysisResult;
 import com.google.devtools.build.lib.analysis.ConfiguredTarget;
 import com.google.devtools.build.lib.analysis.FilesToRunProvider;
 import com.google.devtools.build.lib.analysis.RunfilesSupport;
@@ -89,8 +92,10 @@
 import io.grpc.Context;
 import io.grpc.ManagedChannel;
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Optional;
+import java.util.Set;
 import java.util.concurrent.Executors;
 
 /** RemoteModule provides distributed cache and remote execution for Bazel. */
@@ -584,7 +589,7 @@ private static ImmutableList<ActionInput> getTestOutputs(ConfiguredTarget testTa
  return testProvider.getTestParams().getOutputs();
  }
 
- private static NestedSet<? extends ActionInput> getArtifactsToBuild(
+ private static NestedSet<Artifact> getArtifactsToBuild(
  ConfiguredTarget buildTarget, TopLevelArtifactContext topLevelArtifactContext) {
  return TopLevelArtifactHelper.getAllArtifactsToBuild(buildTarget, topLevelArtifactContext)
  .getImportantArtifacts();
@@ -603,26 +608,67 @@ public void afterAnalysis(
  CommandEnvironment env,
  BuildRequest request,
  BuildOptions buildOptions,
- Iterable<ConfiguredTarget> configuredTargets) {
+ AnalysisResult analysisResult) {
  // The actionContextProvider may be null if remote execution is disabled or if there was an
- // error during
- // initialization.
+ // error during initialization.
  if (remoteOutputsMode != null
  && remoteOutputsMode.downloadToplevelOutputsOnly()
  && actionContextProvider != null) {
  boolean isTestCommand = env.getCommandName().equals("test");
  TopLevelArtifactContext artifactContext = request.getTopLevelArtifactContext();
- ImmutableSet.Builder<ActionInput> filesToDownload = ImmutableSet.builder();
- for (ConfiguredTarget configuredTarget : configuredTargets) {
+ Set<ActionInput> filesToDownload = new HashSet<>();
+ for (ConfiguredTarget configuredTarget : analysisResult.getTargetsToBuild()) {
  if (isTestCommand && isTestRule(configuredTarget)) {
- // When running a test download the test.log and test.xml.
+ // When running a test download the test.log and test.xml. These are never symlinks.
  filesToDownload.addAll(getTestOutputs(configuredTarget));
  } else {
- filesToDownload.addAll(getArtifactsToBuild(configuredTarget, artifactContext).toList());
- filesToDownload.addAll(getRunfiles(configuredTarget));
+ fetchSymlinkDependenciesRecursively(
+ analysisResult.getActionGraph(),
+ filesToDownload,
+ getArtifactsToBuild(configuredTarget, artifactContext).toList());
+ fetchSymlinkDependenciesRecursively(
+ analysisResult.getActionGraph(), filesToDownload, getRunfiles(configuredTarget));
  }
  }
- actionContextProvider.setFilesToDownload(filesToDownload.build());
+ actionContextProvider.setFilesToDownload(ImmutableSet.copyOf(filesToDownload));
+ }
+ }
+
+ // This is a short-term fix for top-level outputs that are symlinks. Unfortunately, we cannot
+ // reliably tell after analysis whether actions will create symlinks (the RE protocol allows any
+ // action to generate and return symlinks), but at least we can handle basic C++ rules with this
+ // change.
+ // TODO(ulfjack): I think we should separate downloading files from action execution. That would
+ // also resolve issues around action invalidation - we currently invalidate actions to trigger
+ // downloads of top-level outputs when the top-level targets change.
+ private static void fetchSymlinkDependenciesRecursively(
+ ActionGraph actionGraph, Set<ActionInput> builder, List<Artifact> inputs) {
+ for (Artifact input : inputs) {
+ // Only fetch recursively if we don't have the file to avoid visiting nodes multiple times.
+ if (builder.add(input)) {
+ fetchSymlinkDependenciesRecursively(actionGraph, builder, input);
+ }
+ }
+ }
+
+ private static void fetchSymlinkDependenciesRecursively(
+ ActionGraph actionGraph, Set<ActionInput> builder, Artifact artifact) {
+ if (!(actionGraph.getGeneratingAction(artifact) instanceof ActionExecutionMetadata)) {
+ // The top-level artifact could be a tree artifact, in which case the generating action may
+ // be an ActionTemplate, which does not implement ActionExecutionMetadata. We don't handle
+ // this case right now, so exit.
+ return;
+ }
+ ActionExecutionMetadata action =
+ (ActionExecutionMetadata) actionGraph.getGeneratingAction(artifact);
+ if (action.mayInsensitivelyPropagateInputs()) {
+ List<Artifact> inputs = action.getInputs().toList();
+ if (inputs.size() > 5) {
+ logger.atWarning().log(
+ "Action with a lot of inputs insensitively propagates them; this could be performance"
+ + " problem");
+ }
+ fetchSymlinkDependenciesRecursively(actionGraph, builder, inputs);
  }
  }
 

diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeModule.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeModule.java
@@ -16,10 +16,10 @@
 import com.google.auto.value.AutoValue;
 import com.google.common.collect.ImmutableList;
 import com.google.common.eventbus.SubscriberExceptionHandler;
+import com.google.devtools.build.lib.analysis.AnalysisResult;
 import com.google.devtools.build.lib.analysis.BlazeDirectories;
 import com.google.devtools.build.lib.analysis.BlazeVersionInfo;
 import com.google.devtools.build.lib.analysis.ConfiguredRuleClassProvider;
-import com.google.devtools.build.lib.analysis.ConfiguredTarget;
 import com.google.devtools.build.lib.analysis.ServerDirectories;
 import com.google.devtools.build.lib.analysis.ViewCreationFailedException;
 import com.google.devtools.build.lib.analysis.config.BuildOptions;
@@ -281,13 +281,13 @@ public BuildOptions getDefaultBuildOptions(BlazeRuntime runtime) {
  * @param env the command environment
  * @param request the build request
  * @param buildOptions the build's top-level options
- * @param configuredTargets the build's requested top-level targets as {@link ConfiguredTarget}s
+ * @param analysisResult the build's analysis result
  */
  public void afterAnalysis(
  CommandEnvironment env,
  BuildRequest request,
  BuildOptions buildOptions,
- Iterable<ConfiguredTarget> configuredTargets)
+ AnalysisResult analysisResult)
  throws InterruptedException, ViewCreationFailedException {}
 
  /**

diff --git a/src/test/shell/bazel/remote/remote_execution_test.sh b/src/test/shell/bazel/remote/remote_execution_test.sh
@@ -1207,6 +1207,125 @@ EOF
  || fail "Expected runfile bazel-bin/a/create_bar.sh to be downloaded"
 }
 
+# Test that --remote_download_toplevel fetches inputs to symlink actions. In
+# particular, cc_binary links against a symlinked imported .so file, and only
+# the symlink is in the runfiles.
+function test_downloads_toplevel_symlinks() {
+ if [[ "$PLATFORM" == "darwin" ]]; then
+ # TODO(b/37355380): This test is disabled due to RemoteWorker not supporting
+ # setting SDKROOT and DEVELOPER_DIR appropriately, as is required of
+ # action executors in order to select the appropriate Xcode toolchain.
+ return 0
+ fi
+
+ mkdir -p a
+
+ cat > a/bar.cc <<'EOF'
+int f() {
+ return 42;
+}
+EOF
+
+ cat > a/foo.cc <<'EOF'
+extern int f();
+int main() { return f() == 42 ? 0 : 1; }
+EOF
+
+ cat > a/BUILD <<'EOF'
+cc_binary(
+ name = "foo",
+ srcs = ["foo.cc"],
+ deps = [":libbar_lib"],
+)
+
+cc_import(
+ name = "libbar_lib",
+ shared_library = ":libbar.so",
+)
+
+cc_binary(
+ name = "libbar.so",
+ srcs = ["bar.cc"],
+ linkshared = 1,
+ linkstatic = 1,
+)
+EOF
+
+ bazel build \
+ --remote_executor=grpc://localhost:${worker_port} \
+ --remote_download_toplevel \
+ //a:foo || fail "Failed to build //a:foobar"
+
+ ./bazel-bin/a/foo${EXE_EXT} || fail "bazel-bin/a/foo${EXE_EXT} failed to run"
+}
+
+function test_symlink_outputs_not_allowed_with_minimial() {
+ mkdir -p a
+ cat > a/input.txt <<'EOF'
+Input file
+EOF
+ cat > a/BUILD <<'EOF'
+genrule(
+ name = "foo",
+ srcs = ["input.txt"],
+ outs = ["output.txt", "output_symlink"],
+ cmd = "cp $< $(location :output.txt) && ln -s output.txt $(location output_symlink)",
+)
+EOF
+
+ bazel build \
+ --remote_executor=grpc://localhost:${worker_port} \
+ --remote_download_minimal \
+ //a:foo >& $TEST_log && fail "Expected failure to build //a:foo"
+ expect_log "Symlinks in action outputs are not yet supported"
+}
+
+# Regression test that --remote_download_toplevel does not crash when the
+# top-level output is a tree artifact.
+function test_downloads_toplevel_tree_artifact() {
+ if [[ "$PLATFORM" == "darwin" ]]; then
+ # TODO(b/37355380): This test is disabled due to RemoteWorker not supporting
+ # setting SDKROOT and DEVELOPER_DIR appropriately, as is required of
+ # action executors in order to select the appropriate Xcode toolchain.
+ return 0
+ fi
+
+ mkdir -p a
+
+ # We need the top-level output to be a tree artifact generated by a template
+ # action. This is one way to do that: generate a tree artifact of C++ source
+ # files, and then compile them with a cc_library / cc_binary rule.
+ #
+ # The default top-level output of a cc_binary is the final binary, which is
+ # not what we want. Instead, we use --output_groups=compilation_outputs to
+ # fetch the .o files as the top-level outputs.
+
+ cat > a/gentree.bzl <<'EOF'
+def _gentree(ctx):
+ out = ctx.actions.declare_directory("dir.cc")
+ ctx.actions.run_shell(
+ outputs = [out],
+ command = "mkdir -p %s && echo 'int main(int c, char** v){return 1;}' > %s/foo.cc" %
+ (out.path, out.path),
+ )
+ return DefaultInfo(files = depset([out]))
+
+gentree = rule(implementation = _gentree)
+EOF
+
+ cat > a/BUILD <<'EOF'
+load(":gentree.bzl", "gentree")
+gentree(name = "tree")
+cc_binary(name = "main", srcs = [":tree"])
+EOF
+
+ bazel build \
+ --remote_executor=grpc://localhost:${worker_port} \
+ --remote_download_toplevel \
+ --output_groups=compilation_outputs \
+ //a:main || fail "Failed to build //a:main"
+}
+
 function test_downloads_toplevel_src_runfiles() {
  # Test that using --remote_download_toplevel with a non-generated (source)
  # runfile dependency works.