From 1d48924636438322815b298cdbdff65742028d16 Mon Sep 17 00:00:00 2001 From: Elizaveta Selyukovich Date: Mon, 27 Apr 2026 09:03:25 +0300 Subject: [PATCH 1/2] Return list of fonts --- src/core/catalog.js | 194 +++++++++++++++++++++++++++++++++++++++++++ src/core/document.js | 4 + src/core/worker.js | 5 +- 3 files changed, 201 insertions(+), 2 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 78c036e649635..8558a0c45fac2 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1990,6 +1990,200 @@ class ExtendedCatalog extends Catalog { } return shadow(this, "structureTree", structureTree); } + + resolveFontInfo(fontObj, fontName) { + let actualFont = fontObj; + let isComposite = false; + let cidFontType = null; + let normalizedSubtype = null; + + try { + const subtype = fontObj.get("Subtype"); + const subtypeStr = + // eslint-disable-next-line no-nested-ternary + subtype instanceof Name + ? subtype.name + : typeof subtype === "string" + ? subtype + : null; + + if (subtypeStr === "Type0") { + isComposite = true; + + const descendantFonts = fontObj.get("DescendantFonts"); + + if (Array.isArray(descendantFonts) && descendantFonts.length > 0) { + const cidFont = this.xref.fetchIfRef(descendantFonts[0]); + + if (cidFont instanceof Dict) { + actualFont = cidFont; + + const cidSubtype = cidFont.get("Subtype"); + const cidSubtypeStr = + // eslint-disable-next-line no-nested-ternary + cidSubtype instanceof Name + ? cidSubtype.name + : typeof cidSubtype === "string" + ? cidSubtype + : null; + + if (cidSubtypeStr) { + cidFontType = cidSubtypeStr; + + if (cidFontType === "CIDFontType0") { + normalizedSubtype = "Type1 (CID)"; + } else if (cidFontType === "CIDFontType2") { + normalizedSubtype = "TrueType (CID)"; + } else { + normalizedSubtype = cidFontType; + } + } + } + } + + if (!normalizedSubtype) { + normalizedSubtype = subtypeStr; + } + } else if (subtypeStr) { + normalizedSubtype = subtypeStr; + } + + let descriptor = actualFont.get("FontDescriptor"); + + if (!(descriptor instanceof Dict) && !isComposite) { + descriptor = fontObj.get("FontDescriptor"); + } + + return { + descriptor, + isComposite, + cidFontType, + normalizedSubtype, + }; + } catch (e) { + console.error(`Error resolving font info for ${fontName}: ${e.message}`); + + return { + descriptor: fontObj.get("FontDescriptor"), + isComposite: false, + cidFontType: null, + normalizedSubtype: null, + }; + } + } + + collectFonts() { + const fontsList = []; + const seenRefs = new Set(); + + try { + for (let pageIndex = 0; pageIndex < this.pages.length; pageIndex++) { + const pageRef = this.pages[pageIndex]; + const pageObj = this.xref.fetch(pageRef); + + if (!(pageObj instanceof Dict)) { + continue; + } + + let resources = pageObj.get("Resources"); + if (!resources) { + let parent = pageObj.get("Parent"); + while (parent instanceof Dict && !resources) { + resources = parent.get("Resources"); + parent = parent.get("Parent"); + } + } + + if (!(resources instanceof Dict)) { + continue; + } + + const fontDict = resources.get("Font"); + if (!(fontDict instanceof Dict)) { + continue; + } + + for (const [fontName, fontVal] of fontDict) { + try { + const fontRef = fontDict.getRaw(fontName); + const refKey = + fontRef instanceof Ref ? fontRef.toString() : fontName; + + if (seenRefs.has(refKey)) { + continue; + } + seenRefs.add(refKey); + + const fontObj = this.xref.fetchIfRef(fontVal); + if (!(fontObj instanceof Dict)) { + continue; + } + + // Resolve composite fonts and get the actual font info + const { descriptor, isComposite, cidFontType, normalizedSubtype } = + this.resolveFontInfo(fontObj, fontName); + + const fontInfo = { + name: fontName, + pageIndex, + ref: fontRef instanceof Ref ? fontRef : null, + type: isComposite ? "Type0" : normalizedSubtype, + subtype: normalizedSubtype, + cidFontType, + baseFont: null, + encoding: null, + isSubset: false, + isEmbedded: false, + isComposite, + }; + + if (fontObj.has("BaseFont")) { + const baseFont = fontObj.get("BaseFont"); + if (baseFont instanceof Name) { + fontInfo.baseFont = baseFont.name; + } else if (typeof baseFont === "string") { + fontInfo.baseFont = baseFont; + } + } + + if (fontObj.has("Encoding")) { + const encoding = fontObj.get("Encoding"); + if (encoding instanceof Name) { + fontInfo.encoding = encoding.name; + } else if (typeof encoding === "string") { + fontInfo.encoding = encoding; + } + } + + if (descriptor instanceof Dict) { + const fontFile = + descriptor.get("FontFile") || + descriptor.get("FontFile2") || + descriptor.get("FontFile3"); + fontInfo.isEmbedded = !!fontFile; + } + + fontInfo.isSubset = fontInfo.baseFont + ? /^[A-Z0-9]{1,6}\+/.test(fontInfo.baseFont) + : false; + + fontsList.push(fontInfo); + } catch { + continue; + } + } + } + } catch (e) { + console.error(`Failed to collect fonts: ${e.message}`); + } + + return fontsList; + } + + get fonts() { + const fonts = this.collectFonts(); + return shadow(this, "fonts", fonts && fonts.length > 0 ? fonts : null); + } } export { ExtendedCatalog as Catalog }; diff --git a/src/core/document.js b/src/core/document.js index 46a2626a3a814..c62507b4a102a 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -2131,6 +2131,10 @@ class ExtendedPDFDocument extends PDFDocument { get structureTree() { return shadow(this, "structureTree", this.catalog.structureTree); } + + get fonts() { + return shadow(this, "fonts", this.catalog.fonts); + } } export { Page, ExtendedPDFDocument as PDFDocument }; diff --git a/src/core/worker.js b/src/core/worker.js index 87fe7a8fb4d3b..ed65e7b858214 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -177,10 +177,11 @@ class WorkerMessageHandler { finishWorkerTask(task); } - const [numPages, fingerprints, structureTree] = await Promise.all([ + const [numPages, fingerprints, structureTree, fonts] = await Promise.all([ pdfManager.ensureDoc("numPages"), pdfManager.ensureDoc("fingerprints"), pdfManager.ensureDoc("structureTree"), + pdfManager.ensureDoc("fonts"), ]); // Get htmlForXfa after numPages to avoid to create HTML twice. @@ -188,7 +189,7 @@ class WorkerMessageHandler { ? await pdfManager.ensureDoc("htmlForXfa") : null; - return { numPages, fingerprints, htmlForXfa, structureTree }; + return { numPages, fingerprints, htmlForXfa, structureTree, fonts }; } async function getPdfManager({ From 9dca05ebd15f02a2acfd012eaa226b0d6c4b9834 Mon Sep 17 00:00:00 2001 From: Elizaveta Selyukovich Date: Tue, 28 Apr 2026 09:19:02 +0300 Subject: [PATCH 2/2] Code improvements --- src/core/catalog.js | 142 ++++++++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 65 deletions(-) diff --git a/src/core/catalog.js b/src/core/catalog.js index 8558a0c45fac2..779407035a37b 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1991,23 +1991,31 @@ class ExtendedCatalog extends Catalog { return shadow(this, "structureTree", structureTree); } + getSubtypeStr(fontObj) { + const subtype = fontObj.get("Subtype"); + if (subtype instanceof Name) { + return subtype.name; + } + if (typeof subtype === "string") { + return subtype; + } + return null; + } + resolveFontInfo(fontObj, fontName) { let actualFont = fontObj; - let isComposite = false; let cidFontType = null; let normalizedSubtype = null; + let baseFont = null; + let encoding = null; + let isEmbedded = false; + let isComposite = false; + let isSubset = false; try { - const subtype = fontObj.get("Subtype"); - const subtypeStr = - // eslint-disable-next-line no-nested-ternary - subtype instanceof Name - ? subtype.name - : typeof subtype === "string" - ? subtype - : null; - - if (subtypeStr === "Type0") { + const subtype = this.getSubtypeStr(fontObj); + + if (subtype === "Type0") { isComposite = true; const descendantFonts = fontObj.get("DescendantFonts"); @@ -2018,17 +2026,10 @@ class ExtendedCatalog extends Catalog { if (cidFont instanceof Dict) { actualFont = cidFont; - const cidSubtype = cidFont.get("Subtype"); - const cidSubtypeStr = - // eslint-disable-next-line no-nested-ternary - cidSubtype instanceof Name - ? cidSubtype.name - : typeof cidSubtype === "string" - ? cidSubtype - : null; + const cidSubtype = this.getSubtypeStr(cidFont); - if (cidSubtypeStr) { - cidFontType = cidSubtypeStr; + if (cidSubtype) { + cidFontType = cidSubtype; if (cidFontType === "CIDFontType0") { normalizedSubtype = "Type1 (CID)"; @@ -2042,10 +2043,10 @@ class ExtendedCatalog extends Catalog { } if (!normalizedSubtype) { - normalizedSubtype = subtypeStr; + normalizedSubtype = subtype; } - } else if (subtypeStr) { - normalizedSubtype = subtypeStr; + } else if (subtype) { + normalizedSubtype = subtype; } let descriptor = actualFont.get("FontDescriptor"); @@ -2054,20 +2055,54 @@ class ExtendedCatalog extends Catalog { descriptor = fontObj.get("FontDescriptor"); } + if (fontObj.has("BaseFont")) { + const BaseFont = fontObj.get("BaseFont"); + if (BaseFont instanceof Name) { + baseFont = BaseFont.name; + } else if (typeof BaseFont === "string") { + baseFont = BaseFont; + } + } + + if (fontObj.has("Encoding")) { + const Encoding = fontObj.get("Encoding"); + if (Encoding instanceof Name) { + encoding = Encoding.name; + } else if (typeof Encoding === "string") { + encoding = Encoding; + } + } + + if (descriptor instanceof Dict) { + const fontFile = + descriptor.get("FontFile") || + descriptor.get("FontFile2") || + descriptor.get("FontFile3"); + isEmbedded = !!fontFile; + } + + isSubset = baseFont ? /^[A-Z0-9]{1,6}\+/.test(baseFont) : false; + return { - descriptor, - isComposite, cidFontType, normalizedSubtype, + baseFont, + encoding, + isComposite, + isSubset, + isEmbedded, }; } catch (e) { console.error(`Error resolving font info for ${fontName}: ${e.message}`); return { - descriptor: fontObj.get("FontDescriptor"), - isComposite: false, cidFontType: null, normalizedSubtype: null, + baseFont: null, + encoding: null, + isComposite: false, + isSubset: false, + isEmbedded: false, }; } } @@ -2120,53 +2155,30 @@ class ExtendedCatalog extends Catalog { } // Resolve composite fonts and get the actual font info - const { descriptor, isComposite, cidFontType, normalizedSubtype } = - this.resolveFontInfo(fontObj, fontName); + const { + cidFontType, + normalizedSubtype, + baseFont, + encoding, + isComposite, + isSubset, + isEmbedded, + } = this.resolveFontInfo(fontObj, fontName); const fontInfo = { name: fontName, - pageIndex, ref: fontRef instanceof Ref ? fontRef : null, type: isComposite ? "Type0" : normalizedSubtype, subtype: normalizedSubtype, + pageIndex, cidFontType, - baseFont: null, - encoding: null, - isSubset: false, - isEmbedded: false, + baseFont, + encoding, + isSubset, + isEmbedded, isComposite, }; - if (fontObj.has("BaseFont")) { - const baseFont = fontObj.get("BaseFont"); - if (baseFont instanceof Name) { - fontInfo.baseFont = baseFont.name; - } else if (typeof baseFont === "string") { - fontInfo.baseFont = baseFont; - } - } - - if (fontObj.has("Encoding")) { - const encoding = fontObj.get("Encoding"); - if (encoding instanceof Name) { - fontInfo.encoding = encoding.name; - } else if (typeof encoding === "string") { - fontInfo.encoding = encoding; - } - } - - if (descriptor instanceof Dict) { - const fontFile = - descriptor.get("FontFile") || - descriptor.get("FontFile2") || - descriptor.get("FontFile3"); - fontInfo.isEmbedded = !!fontFile; - } - - fontInfo.isSubset = fontInfo.baseFont - ? /^[A-Z0-9]{1,6}\+/.test(fontInfo.baseFont) - : false; - fontsList.push(fontInfo); } catch { continue;