-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Add layout logic to replace duplicate files with links (Linux/MacOS only) #52044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
7e233d1
Add layout logic to replace duplicate files with links (linux only)
MichaelSimons 4be229e
Use file-scoped namespaces
MichaelSimons ae07321
Merge branch 'main' into file-deduplication
MichaelSimons 248d61f
Update test/Microsoft.NET.TestFramework/TestContext.cs
MichaelSimons 120d4c0
Utilize _testAssetsManager.CreateTestDirectory
MichaelSimons 472b818
Remove unused test code for windows - will be coming later
MichaelSimons 46ba7ca
Apply suggestion from @baronfel
MichaelSimons d13d44d
Change from using OSName to IsOSPlatform
MichaelSimons 3fe029b
Rename 'master' to 'primary' in DeduplicateAssembliesWithLinks
MichaelSimons 2b917a8
Use ordinal comparison for path sorting in deduplication
MichaelSimons 384a151
Merge branch 'main' into file-deduplication
MichaelSimons 15bd60f
Update tests to use RunExeCommand
MichaelSimons 5fb6235
Merge branch 'main' into file-deduplication
MichaelSimons 73d06a4
Update TestAssetsManager references
MichaelSimons File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,182 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| #if !NETFRAMEWORK | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.IO; | ||
| using System.IO.Hashing; | ||
| using System.Linq; | ||
|
|
||
| namespace Microsoft.DotNet.Build.Tasks; | ||
|
|
||
| /// <summary> | ||
| /// Deduplicates assemblies (.dll and .exe files) in a directory by replacing duplicates with links (hard or symbolic). | ||
| /// Assemblies are grouped by content hash, and a deterministic "primary" file is selected (closest to root, alphabetically | ||
| /// first) which duplicates are linked to. Text-based files (config, json, xml, etc.) are not deduplicated. | ||
| /// </summary> | ||
| public sealed class DeduplicateAssembliesWithLinks : Task | ||
| { | ||
| /// <summary> | ||
| /// The root directory to scan for duplicate assemblies. | ||
| /// </summary> | ||
| [Required] | ||
| public string LayoutDirectory { get; set; } = null!; | ||
|
|
||
| /// <summary> | ||
| /// If true, creates hard links. If false, creates symbolic links. | ||
| /// </summary> | ||
| public bool UseHardLinks { get; set; } = false; | ||
|
|
||
| private string LinkType => UseHardLinks ? "hard link" : "symbolic link"; | ||
|
|
||
| public override bool Execute() | ||
| { | ||
| if (!Directory.Exists(LayoutDirectory)) | ||
| { | ||
| Log.LogError($"LayoutDirectory '{LayoutDirectory}' does not exist."); | ||
| return false; | ||
| } | ||
|
|
||
| Log.LogMessage(MessageImportance.High, $"Scanning for duplicate assemblies in '{LayoutDirectory}' (using {LinkType}s)..."); | ||
|
|
||
| // Only deduplicate assemblies - non-assembly files are small and offer minimal ROI. | ||
| // Some non-assembly files such as config files shouldn't be linked (may be edited). | ||
| var files = Directory.GetFiles(LayoutDirectory, "*", SearchOption.AllDirectories) | ||
| .Where(f => IsAssembly(f)) | ||
| .ToList(); | ||
|
|
||
| Log.LogMessage(MessageImportance.Normal, $"Found {files.Count} assemblies eligible for deduplication."); | ||
|
|
||
| var (filesByHash, hashingSuccess) = HashAndGroupFiles(files); | ||
| if (!hashingSuccess) | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| var duplicateGroups = filesByHash.Values.Where(g => g.Count > 1).ToList(); | ||
| Log.LogMessage(MessageImportance.Normal, $"Found {duplicateGroups.Count} groups of duplicate assemblies."); | ||
| return DeduplicateFileGroups(duplicateGroups); | ||
| } | ||
|
|
||
| private (Dictionary<string, List<FileEntry>> filesByHash, bool success) HashAndGroupFiles(List<string> files) | ||
| { | ||
| var filesByHash = new Dictionary<string, List<FileEntry>>(); | ||
| bool hasErrors = false; | ||
|
|
||
| foreach (var filePath in files) | ||
| { | ||
| try | ||
| { | ||
| var fileInfo = new FileInfo(filePath); | ||
| var hash = ComputeFileHash(filePath); | ||
| var entry = new FileEntry( | ||
| filePath, | ||
| hash, | ||
| fileInfo.Length, | ||
| GetPathDepth(filePath, LayoutDirectory)); | ||
|
|
||
| if (!filesByHash.ContainsKey(hash)) | ||
| { | ||
| filesByHash[hash] = new List<FileEntry>(); | ||
| } | ||
|
|
||
| filesByHash[hash].Add(entry); | ||
| } | ||
| catch (Exception ex) | ||
| { | ||
| Log.LogError($"Failed to hash file '{filePath}': {ex.Message}"); | ||
| hasErrors = true; | ||
| } | ||
| } | ||
|
|
||
| return (filesByHash, !hasErrors); | ||
| } | ||
|
|
||
| private bool DeduplicateFileGroups(List<List<FileEntry>> duplicateGroups) | ||
| { | ||
| int totalFilesDeduped = 0; | ||
| long totalBytesSaved = 0; | ||
| bool hasErrors = false; | ||
|
|
||
| foreach (var group in duplicateGroups) | ||
| { | ||
| // Sort deterministically: by depth (ascending), then alphabetically (ordinal for reproducibility) | ||
| var sorted = group.OrderBy(f => f.Depth).ThenBy(f => f.Path, StringComparer.Ordinal).ToList(); | ||
|
|
||
| // First file is the "primary" - all duplicates will link to it | ||
| var primary = sorted[0]; | ||
| var duplicates = sorted.Skip(1).ToList(); | ||
|
|
||
| foreach (var duplicate in duplicates) | ||
| { | ||
| try | ||
| { | ||
| CreateLink(duplicate.Path, primary.Path); | ||
| totalFilesDeduped++; | ||
| totalBytesSaved += duplicate.Size; | ||
| Log.LogMessage(MessageImportance.Low, $" Linked: {duplicate.Path} -> {primary.Path}"); | ||
| } | ||
| catch (Exception ex) | ||
| { | ||
| Log.LogError($"Failed to create {LinkType} from '{duplicate.Path}' to '{primary.Path}': {ex.Message}"); | ||
| hasErrors = true; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Log.LogMessage(MessageImportance.High, | ||
| $"Deduplication complete: {totalFilesDeduped} files replaced with {LinkType}s, saving {totalBytesSaved / (1024.0 * 1024.0):F2} MB."); | ||
|
|
||
| return !hasErrors; | ||
| } | ||
|
|
||
| private void CreateLink(string duplicateFilePath, string primaryFilePath) | ||
| { | ||
| // Delete the duplicate file before creating the link | ||
| File.Delete(duplicateFilePath); | ||
|
|
||
| if (UseHardLinks) | ||
| { | ||
| File.CreateHardLink(duplicateFilePath, primaryFilePath); | ||
| } | ||
| else | ||
| { | ||
| // Create relative symlink so it works when directory is moved/archived | ||
| var duplicateDirectory = Path.GetDirectoryName(duplicateFilePath)!; | ||
| var relativePath = Path.GetRelativePath(duplicateDirectory, primaryFilePath); | ||
| File.CreateSymbolicLink(duplicateFilePath, relativePath); | ||
| } | ||
| } | ||
|
|
||
| private static string ComputeFileHash(string filePath) | ||
| { | ||
| var xxHash = new XxHash64(); | ||
| using var stream = File.OpenRead(filePath); | ||
|
|
||
| byte[] buffer = new byte[65536]; // 64KB buffer | ||
| int bytesRead; | ||
| while ((bytesRead = stream.Read(buffer)) > 0) | ||
| { | ||
| xxHash.Append(buffer[..bytesRead]); | ||
| } | ||
|
|
||
| return Convert.ToHexString(xxHash.GetCurrentHash()); | ||
| } | ||
|
|
||
| private static int GetPathDepth(string filePath, string rootDirectory) | ||
| { | ||
| var relativePath = Path.GetRelativePath(rootDirectory, filePath); | ||
| return relativePath.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar).Length - 1; | ||
| } | ||
|
|
||
| private static bool IsAssembly(string filePath) | ||
| { | ||
| var extension = Path.GetExtension(filePath); | ||
| return extension.Equals(".dll", StringComparison.OrdinalIgnoreCase) || | ||
| extension.Equals(".exe", StringComparison.OrdinalIgnoreCase); | ||
| } | ||
|
|
||
| private record FileEntry(string Path, string Hash, long Size, int Depth); | ||
| } | ||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.Runtime.InteropServices; | ||
| using EndToEnd.Tests.Utilities; | ||
|
|
||
| namespace EndToEnd.Tests; | ||
|
|
||
| public class GivenSdkArchives(ITestOutputHelper log) : SdkTest(log) | ||
| { | ||
| [Fact] | ||
| public void ItHasDeduplicatedAssemblies() | ||
| { | ||
| // TODO: Windows is not supported yet - blocked on signing support (https://github.com/dotnet/sdk/issues/52182). | ||
| if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| // Find and extract archive | ||
| string archivePath = TestContext.FindSdkAcquisitionArtifact("dotnet-sdk-*.tar.gz"); | ||
| Log.WriteLine($"Found SDK archive: {Path.GetFileName(archivePath)}"); | ||
| string extractedPath = ExtractArchive(archivePath); | ||
|
|
||
| // Verify deduplication worked by checking for symbolic links | ||
| SymbolicLinkHelpers.VerifyDirectoryHasRelativeSymlinks(extractedPath, Log, "archive"); | ||
| } | ||
|
|
||
| private string ExtractArchive(string archivePath) | ||
| { | ||
| var testDir = TestAssetsManager.CreateTestDirectory(); | ||
| string extractPath = Path.Combine(testDir.Path, "sdk-extracted"); | ||
| Directory.CreateDirectory(extractPath); | ||
|
|
||
| Log.WriteLine($"Extracting archive to: {extractPath}"); | ||
|
|
||
| SymbolicLinkHelpers.ExtractTarGz(archivePath, extractPath, Log); | ||
|
|
||
| return extractPath; | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| namespace EndToEnd.Tests.Utilities; | ||
|
|
||
| /// <summary> | ||
| /// Shared helpers for verifying symbolic links and extracting archives in tests. | ||
| /// </summary> | ||
| internal static class SymbolicLinkHelpers | ||
| { | ||
| /// <summary> | ||
| /// Minimum number of deduplicated links expected in an SDK layout. | ||
| /// Used by both symbolic link and hard link validation. | ||
| /// </summary> | ||
| public const int MinExpectedDeduplicatedLinks = 100; | ||
|
|
||
| /// <summary> | ||
| /// Extracts a tar.gz archive to a directory using system tar command. | ||
| /// Uses system tar for simplicity. | ||
| /// </summary> | ||
| /// <param name="tarGzPath">Path to the .tar.gz file to extract.</param> | ||
| /// <param name="destinationDirectory">Directory to extract files into.</param> | ||
| /// <param name="log">Test output logger.</param> | ||
| public static void ExtractTarGz(string tarGzPath, string destinationDirectory, ITestOutputHelper log) | ||
| { | ||
| new RunExeCommand(log, "tar") | ||
| .Execute("-xzf", tarGzPath, "-C", destinationDirectory) | ||
| .Should().Pass(); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Extracts an installer package to a temporary directory, verifies symbolic links, and cleans up. | ||
| /// </summary> | ||
| /// <param name="installerFile">Path to the installer file.</param> | ||
| /// <param name="packageType">Type of package (e.g., "deb", "rpm", "pkg") for logging.</param> | ||
| /// <param name="extractPackage">Action that extracts the package contents to the provided temp directory.</param> | ||
| /// <param name="log">Test output logger.</param> | ||
| public static void VerifyPackageSymlinks(string installerFile, string packageType, Action<string> extractPackage, ITestOutputHelper log) | ||
| { | ||
| var tempDir = Path.Combine(Path.GetTempPath(), $"{packageType}-test-{Guid.NewGuid()}"); | ||
| Directory.CreateDirectory(tempDir); | ||
|
|
||
| try | ||
| { | ||
| extractPackage(tempDir); | ||
| VerifyDirectoryHasRelativeSymlinks(tempDir, log, $"{packageType} package"); | ||
| } | ||
| finally | ||
| { | ||
| if (Directory.Exists(tempDir)) | ||
| { | ||
| Directory.Delete(tempDir, recursive: true); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Verifies that a directory contains >100 symbolic links and all use relative paths. | ||
| /// </summary> | ||
| /// <param name="directory">The directory to check for symbolic links.</param> | ||
| /// <param name="log">Test output logger.</param> | ||
| /// <param name="contextName">Name of the context being tested (for error messages, e.g., "deb package", "archive").</param> | ||
| public static void VerifyDirectoryHasRelativeSymlinks(string directory, ITestOutputHelper log, string contextName) | ||
| { | ||
| // Find all symbolic links in the directory | ||
| var findResult = new RunExeCommand(log, "find") | ||
| .WithWorkingDirectory(directory) | ||
| .Execute(".", "-type", "l"); | ||
|
|
||
| findResult.Should().Pass(); | ||
|
|
||
| var symlinkPaths = (findResult.StdOut ?? string.Empty) | ||
| .Split('\n', StringSplitOptions.RemoveEmptyEntries) | ||
| .ToList(); | ||
|
|
||
| log.WriteLine($"Found {symlinkPaths.Count} symbolic links in {contextName}"); | ||
|
|
||
| Assert.True(symlinkPaths.Count > MinExpectedDeduplicatedLinks, | ||
| $"Expected more than {MinExpectedDeduplicatedLinks} symbolic links in {contextName}, but found only {symlinkPaths.Count}. " + | ||
| "This suggests deduplication did not run correctly."); | ||
|
|
||
| // Verify all symlinks use relative paths (not absolute) | ||
| var absoluteSymlinks = new List<string>(); | ||
| foreach (var symlinkPath in symlinkPaths) | ||
| { | ||
| var fullPath = Path.Combine(directory, symlinkPath.TrimStart('.', '/')); | ||
| var readlinkResult = new RunExeCommand(log, "readlink") | ||
| .Execute(fullPath); | ||
|
|
||
| readlinkResult.Should().Pass(); | ||
|
|
||
| var target = (readlinkResult.StdOut ?? string.Empty).Trim(); | ||
| if (target.StartsWith("/")) | ||
| { | ||
| absoluteSymlinks.Add($"{symlinkPath} -> {target}"); | ||
| } | ||
| } | ||
|
|
||
| Assert.Empty(absoluteSymlinks); | ||
| log.WriteLine($"Verified all {symlinkPaths.Count} symbolic links use relative paths"); | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.