From 4bd2fc342db94a5a31858943b48979a8860f583c Mon Sep 17 00:00:00 2001 From: Zach Yu Date: Mon, 23 Mar 2026 23:51:35 -0700 Subject: [PATCH] Fix case-insensitive header discovery and validation. This change refines how discovered includes are resolved and validated on case-insensitive file systems. Particularly, paths returned by the compiler cannot be trusted to have the correct case. We need to scan action inputs to retrieve the correct artifact. This is important when bazel is running from case-sensitive FS while the the actions run on a case-insensitive FS. Major changes: * `ArtifactFactory`: The `resolveSourceArtifactsAsciiCaseInsensitively` method no longer creates new source artifacts if not found in the cache, as the exact casing is unknown at that point. * `CppCompileAction`: The `validateInclusions` method is refactored to use a map for unvalidated inputs for better efficiency. * `HeaderDiscovery`: The resolution process is rewritten to a multi-stage approach: first checking derived artifacts and the source cache, then action inputs, and finally handling special cases like `cppmap` and tree artifacts. To reduce the memory impact of input scanning, We avoid building map / set from all the inputs - but building from discovered deps instead. --- .../build/lib/actions/ArtifactFactory.java | 9 +- .../build/lib/rules/cpp/CppCompileAction.java | 43 ++--- .../build/lib/rules/cpp/HeaderDiscovery.java | 172 ++++++++++++------ 3 files changed, 138 insertions(+), 86 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/actions/ArtifactFactory.java b/src/main/java/com/google/devtools/build/lib/actions/ArtifactFactory.java index c44d6ca938b2be..4adc1ff337302a 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ArtifactFactory.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ArtifactFactory.java @@ -608,13 +608,8 @@ public ImmutableList resolveSourceArtifactsAsciiCaseInsensitivel return result; } } - Root sourceRoot = - findSourceRoot(execPath, /* baseExecPath= */ null, /* baseRoot= */ null, repositoryName); - SourceArtifact newArtifact = createArtifactIfNotValid(sourceRoot, execPath); - if (newArtifact == null) { - return ImmutableList.of(); - } - return ImmutableList.of(newArtifact); + // Don't create artifacts that aren't in the cache - we don't know the exact case yet + return ImmutableList.of(); } @Nullable diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java index 52e4ca9fc0b5a0..87d139071b8037 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java @@ -16,6 +16,7 @@ import static com.google.common.base.MoreObjects.firstNonNull; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.devtools.build.lib.actions.ActionAnalysisMetadata.mergeMaps; +import static java.util.stream.Collectors.toCollection; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; @@ -972,7 +973,7 @@ public ImmutableMap getIncompleteEnvironmentForTesting() "failed to generate compile environment variables for rule '%s: %s", getOwner().getLabel(), e.getMessage()); DetailedExitCode code = createDetailedExitCode(message, Code.COMMAND_GENERATION_FAILURE); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } } @@ -1061,7 +1062,7 @@ public ExtraActionInfo.Builder getExtraActionInfo(ActionKeyContext actionKeyCont } // TODO(ulfjack): Extra actions currently ignore the client environment. for (Map.Entry envVariable : - getEffectiveEnvironment(/*clientEnv=*/ ImmutableMap.of()).entrySet()) { + getEffectiveEnvironment(/* clientEnv= */ ImmutableMap.of()).entrySet()) { info.addVariable( EnvironmentVariable.newBuilder() .setName(envVariable.getKey()) @@ -1088,8 +1089,7 @@ public ImmutableMap getExecutionInfo() { return mergeMaps(super.getExecutionInfo(), executionInfo); } - private static boolean validateInclude( - Set allowedIncludes, Iterable ignoreDirs, Artifact include) { + private static boolean shouldIgnoreInput(Iterable ignoreDirs, Artifact include) { // Only declared modules are added to an action and so they are always valid. return include.isFileType(CppFileTypes.CPP_MODULE) || @@ -1097,9 +1097,6 @@ private static boolean validateInclude( // local_objc_modules feature. include.isFileType(CppFileTypes.OBJC_MODULE_MAP) || - // It's a declared include/ - allowedIncludes.contains(include) - || // Ignore headers from built-in include directories. FileSystemUtils.startsWithAny(include.getExecPath(), ignoreDirs); } @@ -1122,30 +1119,30 @@ private static boolean validateInclude( * * @throws ActionExecutionException iff there was an undeclared dependency */ - @VisibleForTesting - public void validateInclusions( - ActionExecutionContext actionExecutionContext, NestedSet inputsForValidation) + private void validateInclusions(NestedSet inputsForValidation) throws ActionExecutionException { if (!needsIncludeValidation) { return; } IncludeProblems errors = new IncludeProblems(); - Set allowedIncludes = new HashSet<>(); - allowedIncludes.addAll(mandatoryInputs.toList()); - allowedIncludes.addAll(ccCompilationContext.getDeclaredIncludeSrcs().toList()); - allowedIncludes.addAll(additionalPrunableHeaders.toList()); + var allowedIncludes = + Iterables.concat( + mandatoryInputs.toList(), + ccCompilationContext.getDeclaredIncludeSrcs().toList(), + additionalPrunableHeaders.toList()); Iterable ignoreDirs = cppConfiguration().isStrictSystemIncludes() ? getBuiltInIncludeDirectories() : getValidationIgnoredDirs(); - // Copy the nested sets to hash sets for fast contains checking, but do so lazily. - // Avoid immutable sets here to limit memory churn. - for (Artifact input : inputsForValidation.toList()) { - if (!validateInclude(allowedIncludes, ignoreDirs, input)) { - errors.add(input.getExecPath().toString()); - } + var unvalidated = + inputsForValidation.toList().stream() + .filter(input -> !shouldIgnoreInput(ignoreDirs, input)) + .collect(toCollection(HashSet::new)); + allowedIncludes.forEach(unvalidated::remove); + for (var artifact : unvalidated) { + errors.add(artifact.getExecPathString()); } errors.assertProblemFree( "undeclared inclusion(s) in rule '" @@ -1203,7 +1200,7 @@ void verifyActionIncludePaths( includePath); DetailedExitCode code = createDetailedExitCode(message, Code.INCLUDE_PATH_OUTSIDE_EXEC_ROOT); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } } } @@ -1553,7 +1550,7 @@ public ActionResult execute(ActionExecutionContext actionExecutionContext) siblingRepositoryLayout, pathMapper); updateActionInputs(discoveredInputs); - validateInclusions(actionExecutionContext, discoveredInputs); + validateInclusions(discoveredInputs); return ActionResult.create(spawnResults); } @@ -1585,7 +1582,7 @@ public ActionResult execute(ActionExecutionContext actionExecutionContext) // hdrs_check: This cannot be switched off for C++ build actions, // because doing so would allow for incorrect builds. // HeadersCheckingMode.NONE should only be used for ObjC build actions. - validateInclusions(actionExecutionContext, discoveredInputs); + validateInclusions(discoveredInputs); return ActionResult.create(spawnResults); } diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderDiscovery.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderDiscovery.java index 5a8bcdb2b163de..a8c97726b5acc5 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderDiscovery.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/HeaderDiscovery.java @@ -14,11 +14,11 @@ package com.google.devtools.build.lib.rules.cpp; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.devtools.build.lib.analysis.constraints.ConstraintConstants.getOsFromConstraintsOrHost; +import static java.util.Comparator.comparing; -import com.google.common.collect.Collections2; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; import com.google.devtools.build.lib.actions.Action; import com.google.devtools.build.lib.actions.ActionExecutionException; import com.google.devtools.build.lib.actions.Artifact; @@ -27,19 +27,22 @@ import com.google.devtools.build.lib.actions.PathMapper; import com.google.devtools.build.lib.cmdline.LabelConstants; import com.google.devtools.build.lib.cmdline.PackageIdentifier; -import com.google.devtools.build.lib.collect.compacthashset.CompactHashSet; import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; +import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; import javax.annotation.Nullable; /** @@ -126,8 +129,6 @@ private static NestedSet runDiscovery( boolean siblingRepositoryLayout, PathMapper pathMapper) throws ActionExecutionException { - NestedSetBuilder inputs = NestedSetBuilder.stableOrder(); - // This is a very special case: in certain corner cases (notably, protobuf), the WORKSPACE file // contains a local_repository that has the same name and path as the main repository. In this // case, if sibling repository layout is active, files in that local repository have the same @@ -157,7 +158,16 @@ private static NestedSet runDiscovery( IncludeProblems unresolvablePathProblems = new IncludeProblems(); boolean possiblyCaseInsensitiveFileSystem = getOsFromConstraintsOrHost(action.getExecutionPlatform()) == OS.WINDOWS; - CompactHashSet sourceArtifactInputs = null; + // Keeps unresolved dependencies with their possible artifact candidates. + Map> candidates = + possiblyCaseInsensitiveFileSystem + ? new TreeMap<>( + comparing( + pathFragment -> StringEncoding.internalToUnicode(pathFragment.getPathString()), + String.CASE_INSENSITIVE_ORDER)) + : new HashMap<>(); + + // Process dependencies and initialize resolve state. for (Path execPath : dependencies) { PathFragment execPathFragment = execPath.asFragment(); if (execPathFragment.isAbsolute()) { @@ -191,74 +201,118 @@ private static NestedSet runDiscovery( continue; } } - Collection resolvedArtifacts = ImmutableList.of(); - Artifact derivedArtifact = regularDerivedArtifacts.get(execPathFragment); - if (derivedArtifact == null) { + candidates.putIfAbsent(execPathFragment, ImmutableList.of()); + } + + Set resolvedArtifacts = new HashSet<>(candidates.size()); + + if (!candidates.isEmpty()) { + var iterCandidates = candidates.entrySet().iterator(); + while (iterCandidates.hasNext()) { + var entry = iterCandidates.next(); + PathFragment execPathFragment = entry.getKey(); + + // Try to resolve from regular derived artifacts first. + Artifact derivedArtifact = regularDerivedArtifacts.get(execPathFragment); + if (derivedArtifact != null) { + resolvedArtifacts.add(derivedArtifact); + iterCandidates.remove(); + continue; + } + // Try to resolve from source artifact cache. Optional pkgId = PackageIdentifier.discoverFromExecPath( execPathFragment, false, siblingRepositoryLayout); if (pkgId.isPresent()) { if (possiblyCaseInsensitiveFileSystem) { - resolvedArtifacts = - artifactResolver.resolveSourceArtifactsAsciiCaseInsensitively( - execPathFragment, pkgId.get().getRepository()); + entry.setValue( + artifactResolver + .resolveSourceArtifactsAsciiCaseInsensitively( + execPathFragment, pkgId.get().getRepository()) + .stream() + .map(s -> (Artifact) s) + .collect(toImmutableList())); + // Since we don't know the exact case of the file, we need to keep the candidate in the + // map in case we encounter a different casing later. } else { var sourceArtifact = artifactResolver.resolveSourceArtifact( execPathFragment, pkgId.get().getRepository()); if (sourceArtifact != null) { - resolvedArtifacts = ImmutableList.of(sourceArtifact); + resolvedArtifacts.add(sourceArtifact); + iterCandidates.remove(); } } } - } else { - resolvedArtifacts = ImmutableList.of(derivedArtifact); } - if (!resolvedArtifacts.isEmpty()) { - // We don't need to add the sourceFile itself as it is a mandatory input. - resolvedArtifacts = Collections2.filter(resolvedArtifacts, a -> !a.equals(sourceFile)); - switch (resolvedArtifacts.size()) { - case 0 -> {} - case 1 -> inputs.add(Iterables.getOnlyElement(resolvedArtifacts)); - default -> { - if (sourceArtifactInputs == null) { - sourceArtifactInputs = CompactHashSet.create(); - for (Artifact input : action.getInputs().toList()) { - if (input.isSourceArtifact()) { - sourceArtifactInputs.add(input); - } - } - } - if (Collections.disjoint(resolvedArtifacts, sourceArtifactInputs)) { - inputs.addAll(resolvedArtifacts); - } else { - for (Artifact resolvedArtifact : resolvedArtifacts) { - if (sourceArtifactInputs.contains(resolvedArtifact)) { - inputs.add(resolvedArtifact); - } - } - } + } + + if (!candidates.isEmpty()) { + // Try to resolve from action inputs. + for (var artifact : action.getInputs().toList()) { + if (!artifact.isSourceArtifact()) { + continue; + } + var existingCandidates = candidates.get(artifact.getExecPath()); + if (existingCandidates == null) { + continue; + } + var sourceArtifact = + artifactResolver.getSourceArtifact( + artifact.getExecPath(), artifact.getRoot().getRoot(), artifact.getArtifactOwner()); + resolvedArtifacts.add(sourceArtifact); + if (existingCandidates instanceof ImmutableList) { + candidates.put(sourceArtifact.getExecPath(), new ArrayList<>(existingCandidates)); + } + candidates.get(sourceArtifact.getExecPath()).add(sourceArtifact); + } + // If a path has multiple candidates, only add them all to the inputs if none of them have + // been resolved yet. + var iterCandidates = candidates.entrySet().iterator(); + while (iterCandidates.hasNext()) { + var artifacts = iterCandidates.next().getValue(); + if (artifacts.isEmpty()) { + continue; + } + boolean resolved = false; + for (var artifact : artifacts) { + if (resolvedArtifacts.contains(artifact)) { + resolved = true; + break; } } - continue; - } else if (execPathFragment.getFileExtension().equals("cppmap")) { - // Transitive cppmap files are added to the dotd files of compiles even - // though they are not required for compilation. Since they're not - // explicit inputs to the action this only happens when sandboxing is - // disabled. - continue; + if (!resolved) { + resolvedArtifacts.addAll(artifacts); + } + iterCandidates.remove(); } + } - SpecialArtifact treeArtifact = - findOwningTreeArtifact(execPathFragment, treeArtifacts, pathMapper); - if (treeArtifact != null) { - inputs.add(treeArtifact); - } else { - // Record a problem if we see files that we can't resolve, likely caused by undeclared - // includes or illegal include constructs. - unresolvablePathProblems.add(execPathFragment.getPathString()); + if (!candidates.isEmpty()) { + var iterCandidates = candidates.entrySet().iterator(); + while (iterCandidates.hasNext()) { + var execPathFragment = iterCandidates.next().getKey(); + if (execPathFragment.getFileExtension().equals("cppmap")) { + // Transitive cppmap files are added to the dotd files of compiles even + // though they are not required for compilation. Since they're not + // explicit inputs to the action this only happens when sandboxing is + // disabled. + iterCandidates.remove(); + continue; + } + SpecialArtifact treeArtifact = + findOwningTreeArtifact(execPathFragment, treeArtifacts, pathMapper); + if (treeArtifact != null) { + resolvedArtifacts.add(treeArtifact); + iterCandidates.remove(); + } } } + + // Record a problem if we see files that we can't resolve, likely caused by undeclared + // includes or illegal include constructs. + candidates.keySet().forEach(fragment -> unresolvablePathProblems.add(fragment.getPathString())); + if (shouldValidateInclusions) { absolutePathProblems.assertProblemFree( "absolute path inclusion(s) found in rule '" @@ -279,7 +333,13 @@ private static NestedSet runDiscovery( + "':", action); } - return inputs.build(); + + return NestedSetBuilder.stableOrder() + .addAll( + resolvedArtifacts.stream() + .filter(a -> !a.equals(sourceFile)) + .collect(toImmutableList())) + .build(); } @Nullable