From bfef81393883fd1cfec5d2f13865c2d5ca3d6431 Mon Sep 17 00:00:00 2001
From: Jaap de Haan <261428+jdehaan@users.noreply.github.com>
Date: Sat, 25 Apr 2026 09:56:32 +0000
Subject: [PATCH] feat: Dump
---
PdfInfoTool/Command/Dump.cs | 134 +++++++++++++++++++++++++++++++++++-
PdfInfoTool/Options.cs | 8 +++
2 files changed, 141 insertions(+), 1 deletion(-)
diff --git a/PdfInfoTool/Command/Dump.cs b/PdfInfoTool/Command/Dump.cs
index abd32f2..2c14511 100644
--- a/PdfInfoTool/Command/Dump.cs
+++ b/PdfInfoTool/Command/Dump.cs
@@ -1,10 +1,142 @@
using System;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using SafeRapidPdf;
+using SafeRapidPdf.Objects;
+using SafeRapidPdf.Parsing;
+
namespace PdfInfoTool;
internal static partial class Command
{
internal static int RunDumpAndReturnExitCode(DumpOptions opts)
{
- throw new NotImplementedException();
+ if (opts.Hex && opts.Binary)
+ {
+ Console.Error.WriteLine("--hex and --binary are mutually exclusive.");
+ return 1;
+ }
+
+ try
+ {
+ var file = PdfFile.Parse(opts.FileName);
+
+ if (opts.Binary)
+ {
+ DumpBinary(file);
+ return 0;
+ }
+
+ if (!opts.Quiet)
+ {
+ Console.WriteLine("PDF Version: {0}", file.Version);
+ }
+
+ if (opts.Hex)
+ {
+ DumpHex(file);
+ }
+ else
+ {
+ WriteObject(file, depth: 0, includeType: opts.Verbose);
+
+ if (!opts.Quiet)
+ {
+ Console.WriteLine("Top-level objects: {0}", file.Items.Count);
+ }
+ }
+
+ return 0;
+ }
+ catch (Exception ex) when (
+ ex is ParsingException or IOException or UnauthorizedAccessException)
+ {
+ Console.Error.WriteLine(ex.Message);
+ return 1;
+ }
+ }
+
+ ///
+ /// Recursively print all stream data as hex strings, one stream per line,
+ /// prefixed with the owning indirect object number.
+ ///
+ private static void DumpHex(IPdfObject obj, string prefix = "")
+ {
+ if (obj is PdfIndirectObject indirect)
+ {
+ prefix = indirect.ObjectNumber.ToString(CultureInfo.InvariantCulture);
+ }
+
+ if (obj is PdfStream stream)
+ {
+ byte[] decoded = stream.Decode();
+ Console.WriteLine("{0}: {1}", prefix, ToHexString(decoded));
+ return;
+ }
+
+ if (obj.IsContainer && obj.Items is not null)
+ {
+ foreach (var item in obj.Items)
+ {
+ DumpHex(item, prefix);
+ }
+ }
+ }
+
+ ///
+ /// Write the raw decoded bytes of every stream in the file to stdout.
+ /// Intended for piping to external tools.
+ ///
+ private static void DumpBinary(IPdfObject obj)
+ {
+ if (obj is PdfStream stream)
+ {
+ byte[] decoded = stream.Decode();
+ using var stdout = new BinaryWriter(Console.OpenStandardOutput(), Encoding.UTF8, leaveOpen: true);
+ stdout.Write(decoded);
+ return;
+ }
+
+ if (obj.IsContainer && obj.Items is not null)
+ {
+ foreach (var item in obj.Items)
+ {
+ DumpBinary(item);
+ }
+ }
+ }
+
+ private static string ToHexString(byte[] data)
+ {
+ var sb = new StringBuilder(data.Length * 2);
+ foreach (byte b in data)
+ {
+ _ = sb.Append(b.ToString("x2", CultureInfo.InvariantCulture));
+ }
+ return sb.ToString();
+ }
+
+ private static void WriteObject(IPdfObject obj, int depth, bool includeType)
+ {
+ var indent = new string(' ', depth * 2);
+ if (includeType)
+ {
+ Console.WriteLine($"{indent}{obj.ObjectType}: {obj.Text}");
+ }
+ else
+ {
+ Console.WriteLine($"{indent}{obj.Text}");
+ }
+
+ if (!obj.IsContainer || obj.Items is null)
+ {
+ return;
+ }
+
+ foreach (var item in obj.Items)
+ {
+ WriteObject(item, depth + 1, includeType);
+ }
}
}
diff --git a/PdfInfoTool/Options.cs b/PdfInfoTool/Options.cs
index 61d9de5..e1cb6fb 100644
--- a/PdfInfoTool/Options.cs
+++ b/PdfInfoTool/Options.cs
@@ -27,6 +27,14 @@ internal sealed class DumpOptions : IOptions
public bool Verbose { get; set; }
public bool Quiet { get; set; }
public string FileName { get; set; }
+
+ [Option('x', "hex",
+ HelpText = "Dump decoded stream data as a hex string (for use in unit tests).")]
+ public bool Hex { get; set; }
+
+ [Option('b', "binary",
+ HelpText = "Write decoded stream data as raw bytes to stdout (for piping to other tools).")]
+ public bool Binary { get; set; }
}
[Verb("show", HelpText = "Show object contents in a human readable way.")]