Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/Layout/redist/targets/GenerateInstallerLayout.targets
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@
LayoutDnxShim;
CrossgenLayout;
ReplaceBundledRuntimePackFilesWithSymbolicLinks"
AfterTargets="AfterBuild" />
AfterTargets="AfterBuild">
<!-- TODO: This will eventually be done for all platforms as part of the deduplication work (https://github.com/dotnet/sdk/issues/52183). -->
<DeduplicateAssembliesWithLinks Condition="!$([MSBuild]::IsOSPlatform('Windows'))" LayoutDirectory="$(RedistInstallerLayoutPath)sdk\" UseHardLinks="false" />
</Target>

<!-- Copy the sdk layout into a temporary folder so that it's nested under "sdk\$(Version)\" which is
necessary for the msi/pkg to install correctly and put the content under that sub path. -->
Expand Down
182 changes: 182 additions & 0 deletions src/Tasks/sdk-tasks/DeduplicateAssembliesWithLinks.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#if !NETFRAMEWORK
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Hashing;
using System.Linq;

namespace Microsoft.DotNet.Build.Tasks;

/// <summary>
/// Deduplicates assemblies (.dll and .exe files) in a directory by replacing duplicates with links (hard or symbolic).
/// Assemblies are grouped by content hash, and a deterministic "primary" file is selected (closest to root, alphabetically
/// first) which duplicates are linked to. Text-based files (config, json, xml, etc.) are not deduplicated.
/// </summary>
public sealed class DeduplicateAssembliesWithLinks : Task
{
/// <summary>
/// The root directory to scan for duplicate assemblies.
/// </summary>
[Required]
public string LayoutDirectory { get; set; } = null!;

/// <summary>
/// If true, creates hard links. If false, creates symbolic links.
/// </summary>
public bool UseHardLinks { get; set; } = false;

private string LinkType => UseHardLinks ? "hard link" : "symbolic link";

public override bool Execute()
{
if (!Directory.Exists(LayoutDirectory))
{
Log.LogError($"LayoutDirectory '{LayoutDirectory}' does not exist.");
return false;
}

Log.LogMessage(MessageImportance.High, $"Scanning for duplicate assemblies in '{LayoutDirectory}' (using {LinkType}s)...");

// Only deduplicate assemblies - non-assembly files are small and offer minimal ROI.
// Some non-assembly files such as config files shouldn't be linked (may be edited).
var files = Directory.GetFiles(LayoutDirectory, "*", SearchOption.AllDirectories)
.Where(f => IsAssembly(f))
.ToList();

Log.LogMessage(MessageImportance.Normal, $"Found {files.Count} assemblies eligible for deduplication.");

var (filesByHash, hashingSuccess) = HashAndGroupFiles(files);
if (!hashingSuccess)
{
return false;
}

var duplicateGroups = filesByHash.Values.Where(g => g.Count > 1).ToList();
Log.LogMessage(MessageImportance.Normal, $"Found {duplicateGroups.Count} groups of duplicate assemblies.");
return DeduplicateFileGroups(duplicateGroups);
}

private (Dictionary<string, List<FileEntry>> filesByHash, bool success) HashAndGroupFiles(List<string> files)
{
var filesByHash = new Dictionary<string, List<FileEntry>>();
bool hasErrors = false;

foreach (var filePath in files)
{
try
{
var fileInfo = new FileInfo(filePath);
var hash = ComputeFileHash(filePath);
var entry = new FileEntry(
filePath,
hash,
fileInfo.Length,
GetPathDepth(filePath, LayoutDirectory));

if (!filesByHash.ContainsKey(hash))
{
filesByHash[hash] = new List<FileEntry>();
}

filesByHash[hash].Add(entry);
}
catch (Exception ex)
{
Log.LogError($"Failed to hash file '{filePath}': {ex.Message}");
hasErrors = true;
}
}

return (filesByHash, !hasErrors);
}

private bool DeduplicateFileGroups(List<List<FileEntry>> duplicateGroups)
{
int totalFilesDeduped = 0;
long totalBytesSaved = 0;
bool hasErrors = false;

foreach (var group in duplicateGroups)
{
// Sort deterministically: by depth (ascending), then alphabetically (ordinal for reproducibility)
var sorted = group.OrderBy(f => f.Depth).ThenBy(f => f.Path, StringComparer.Ordinal).ToList();

// First file is the "primary" - all duplicates will link to it
var primary = sorted[0];
var duplicates = sorted.Skip(1).ToList();

foreach (var duplicate in duplicates)
{
try
{
CreateLink(duplicate.Path, primary.Path);
totalFilesDeduped++;
totalBytesSaved += duplicate.Size;
Log.LogMessage(MessageImportance.Low, $" Linked: {duplicate.Path} -> {primary.Path}");
}
catch (Exception ex)
{
Log.LogError($"Failed to create {LinkType} from '{duplicate.Path}' to '{primary.Path}': {ex.Message}");
hasErrors = true;
}
}
}

Log.LogMessage(MessageImportance.High,
$"Deduplication complete: {totalFilesDeduped} files replaced with {LinkType}s, saving {totalBytesSaved / (1024.0 * 1024.0):F2} MB.");

return !hasErrors;
}

private void CreateLink(string duplicateFilePath, string primaryFilePath)
{
// Delete the duplicate file before creating the link
File.Delete(duplicateFilePath);

if (UseHardLinks)
{
File.CreateHardLink(duplicateFilePath, primaryFilePath);
}
else
{
// Create relative symlink so it works when directory is moved/archived
var duplicateDirectory = Path.GetDirectoryName(duplicateFilePath)!;
var relativePath = Path.GetRelativePath(duplicateDirectory, primaryFilePath);
File.CreateSymbolicLink(duplicateFilePath, relativePath);
}
}

private static string ComputeFileHash(string filePath)
{
var xxHash = new XxHash64();
using var stream = File.OpenRead(filePath);

byte[] buffer = new byte[65536]; // 64KB buffer
int bytesRead;
while ((bytesRead = stream.Read(buffer)) > 0)
{
xxHash.Append(buffer[..bytesRead]);
}

return Convert.ToHexString(xxHash.GetCurrentHash());
}

private static int GetPathDepth(string filePath, string rootDirectory)
{
var relativePath = Path.GetRelativePath(rootDirectory, filePath);
return relativePath.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar).Length - 1;
}

private static bool IsAssembly(string filePath)
{
var extension = Path.GetExtension(filePath);
return extension.Equals(".dll", StringComparison.OrdinalIgnoreCase) ||
extension.Equals(".exe", StringComparison.OrdinalIgnoreCase);
}

private record FileEntry(string Path, string Hash, long Size, int Depth);
}
#endif
2 changes: 1 addition & 1 deletion src/Tasks/sdk-tasks/ReplaceFilesWithSymbolicLinks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
namespace Microsoft.DotNet.Build.Tasks
{
/// <summary>
/// Replaces files that have the same content with hard links.
/// Replaces files that have the same content with symbolic links.
/// </summary>
public sealed class ReplaceFilesWithSymbolicLinks : Task
Comment thread
MichaelSimons marked this conversation as resolved.
{
Expand Down
5 changes: 5 additions & 0 deletions src/Tasks/sdk-tasks/sdk-tasks.InTree.targets
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
<UsingTask TaskName="UpdateRuntimeConfig" AssemblyFile="$(SdkTasksAssembly)" TaskFactory="TaskHostFactory" Runtime="NET" />
<UsingTask TaskName="ZipFileCreateFromDirectory" AssemblyFile="$(SdkTasksAssembly)" TaskFactory="TaskHostFactory" Runtime="NET" />

<UsingTask TaskName="DeduplicateAssembliesWithLinks"
Comment thread
MichaelSimons marked this conversation as resolved.
Condition="'$(MSBuildRuntimeType)' == 'Core'"
AssemblyFile="$(SdkTasksAssembly)"
TaskFactory="TaskHostFactory" />

<!-- Tasks from the Arcade SDK -->
<UsingTask TaskName="DownloadFile" AssemblyFile="$(ArcadeSdkBuildTasksAssembly)" />

Expand Down
1 change: 1 addition & 0 deletions src/Tasks/sdk-tasks/sdk-tasks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<PackageReference Include="Newtonsoft.Json" />
<PackageReference Include="NuGet.Packaging" />
<PackageReference Include="NuGet.Versioning" />
<PackageReference Include="System.IO.Hashing" />
</ItemGroup>

<!-- Global usings -->
Expand Down
41 changes: 41 additions & 0 deletions test/EndToEnd.Tests/GivenSdkArchives.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;
using EndToEnd.Tests.Utilities;

namespace EndToEnd.Tests;

public class GivenSdkArchives(ITestOutputHelper log) : SdkTest(log)
{
[Fact]
public void ItHasDeduplicatedAssemblies()
{
// TODO: Windows is not supported yet - blocked on signing support (https://github.com/dotnet/sdk/issues/52182).
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return;
}

// Find and extract archive
string archivePath = TestContext.FindSdkAcquisitionArtifact("dotnet-sdk-*.tar.gz");
Log.WriteLine($"Found SDK archive: {Path.GetFileName(archivePath)}");
string extractedPath = ExtractArchive(archivePath);

// Verify deduplication worked by checking for symbolic links
SymbolicLinkHelpers.VerifyDirectoryHasRelativeSymlinks(extractedPath, Log, "archive");
}

private string ExtractArchive(string archivePath)
{
var testDir = TestAssetsManager.CreateTestDirectory();
string extractPath = Path.Combine(testDir.Path, "sdk-extracted");
Directory.CreateDirectory(extractPath);

Log.WriteLine($"Extracting archive to: {extractPath}");

SymbolicLinkHelpers.ExtractTarGz(archivePath, extractPath, Log);

return extractPath;
}
}
102 changes: 102 additions & 0 deletions test/EndToEnd.Tests/Utilities/SymbolicLinkHelpers.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

namespace EndToEnd.Tests.Utilities;

/// <summary>
/// Shared helpers for verifying symbolic links and extracting archives in tests.
/// </summary>
internal static class SymbolicLinkHelpers
{
/// <summary>
/// Minimum number of deduplicated links expected in an SDK layout.
/// Used by both symbolic link and hard link validation.
/// </summary>
public const int MinExpectedDeduplicatedLinks = 100;

/// <summary>
/// Extracts a tar.gz archive to a directory using system tar command.
/// Uses system tar for simplicity.
/// </summary>
/// <param name="tarGzPath">Path to the .tar.gz file to extract.</param>
/// <param name="destinationDirectory">Directory to extract files into.</param>
/// <param name="log">Test output logger.</param>
public static void ExtractTarGz(string tarGzPath, string destinationDirectory, ITestOutputHelper log)
{
new RunExeCommand(log, "tar")
.Execute("-xzf", tarGzPath, "-C", destinationDirectory)
.Should().Pass();
}

/// <summary>
/// Extracts an installer package to a temporary directory, verifies symbolic links, and cleans up.
/// </summary>
/// <param name="installerFile">Path to the installer file.</param>
/// <param name="packageType">Type of package (e.g., "deb", "rpm", "pkg") for logging.</param>
/// <param name="extractPackage">Action that extracts the package contents to the provided temp directory.</param>
/// <param name="log">Test output logger.</param>
public static void VerifyPackageSymlinks(string installerFile, string packageType, Action<string> extractPackage, ITestOutputHelper log)
{
var tempDir = Path.Combine(Path.GetTempPath(), $"{packageType}-test-{Guid.NewGuid()}");
Directory.CreateDirectory(tempDir);

try
{
extractPackage(tempDir);
VerifyDirectoryHasRelativeSymlinks(tempDir, log, $"{packageType} package");
}
finally
{
if (Directory.Exists(tempDir))
{
Directory.Delete(tempDir, recursive: true);
}
}
}

/// <summary>
/// Verifies that a directory contains >100 symbolic links and all use relative paths.
/// </summary>
/// <param name="directory">The directory to check for symbolic links.</param>
/// <param name="log">Test output logger.</param>
/// <param name="contextName">Name of the context being tested (for error messages, e.g., "deb package", "archive").</param>
public static void VerifyDirectoryHasRelativeSymlinks(string directory, ITestOutputHelper log, string contextName)
{
// Find all symbolic links in the directory
var findResult = new RunExeCommand(log, "find")
.WithWorkingDirectory(directory)
.Execute(".", "-type", "l");

findResult.Should().Pass();

var symlinkPaths = (findResult.StdOut ?? string.Empty)
.Split('\n', StringSplitOptions.RemoveEmptyEntries)
.ToList();

log.WriteLine($"Found {symlinkPaths.Count} symbolic links in {contextName}");

Assert.True(symlinkPaths.Count > MinExpectedDeduplicatedLinks,
$"Expected more than {MinExpectedDeduplicatedLinks} symbolic links in {contextName}, but found only {symlinkPaths.Count}. " +
"This suggests deduplication did not run correctly.");

// Verify all symlinks use relative paths (not absolute)
var absoluteSymlinks = new List<string>();
foreach (var symlinkPath in symlinkPaths)
{
var fullPath = Path.Combine(directory, symlinkPath.TrimStart('.', '/'));
var readlinkResult = new RunExeCommand(log, "readlink")
.Execute(fullPath);

readlinkResult.Should().Pass();

var target = (readlinkResult.StdOut ?? string.Empty).Trim();
if (target.StartsWith("/"))
{
absoluteSymlinks.Add($"{symlinkPath} -> {target}");
}
}

Assert.Empty(absoluteSymlinks);
log.WriteLine($"Verified all {symlinkPaths.Count} symbolic links use relative paths");
}
}
Loading