diff --git a/src/core/catalog.js b/src/core/catalog.js index 78c036e649635..779407035a37b 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -1990,6 +1990,212 @@ class ExtendedCatalog extends Catalog { } return shadow(this, "structureTree", structureTree); } + + getSubtypeStr(fontObj) { + const subtype = fontObj.get("Subtype"); + if (subtype instanceof Name) { + return subtype.name; + } + if (typeof subtype === "string") { + return subtype; + } + return null; + } + + resolveFontInfo(fontObj, fontName) { + let actualFont = fontObj; + let cidFontType = null; + let normalizedSubtype = null; + let baseFont = null; + let encoding = null; + let isEmbedded = false; + let isComposite = false; + let isSubset = false; + + try { + const subtype = this.getSubtypeStr(fontObj); + + if (subtype === "Type0") { + isComposite = true; + + const descendantFonts = fontObj.get("DescendantFonts"); + + if (Array.isArray(descendantFonts) && descendantFonts.length > 0) { + const cidFont = this.xref.fetchIfRef(descendantFonts[0]); + + if (cidFont instanceof Dict) { + actualFont = cidFont; + + const cidSubtype = this.getSubtypeStr(cidFont); + + if (cidSubtype) { + cidFontType = cidSubtype; + + if (cidFontType === "CIDFontType0") { + normalizedSubtype = "Type1 (CID)"; + } else if (cidFontType === "CIDFontType2") { + normalizedSubtype = "TrueType (CID)"; + } else { + normalizedSubtype = cidFontType; + } + } + } + } + + if (!normalizedSubtype) { + normalizedSubtype = subtype; + } + } else if (subtype) { + normalizedSubtype = subtype; + } + + let descriptor = actualFont.get("FontDescriptor"); + + if (!(descriptor instanceof Dict) && !isComposite) { + descriptor = fontObj.get("FontDescriptor"); + } + + if (fontObj.has("BaseFont")) { + const BaseFont = fontObj.get("BaseFont"); + if (BaseFont instanceof Name) { + baseFont = BaseFont.name; + } else if (typeof BaseFont === "string") { + baseFont = BaseFont; + } + } + + if (fontObj.has("Encoding")) { + const Encoding = fontObj.get("Encoding"); + if (Encoding instanceof Name) { + encoding = Encoding.name; + } else if (typeof Encoding === "string") { + encoding = Encoding; + } + } + + if (descriptor instanceof Dict) { + const fontFile = + descriptor.get("FontFile") || + descriptor.get("FontFile2") || + descriptor.get("FontFile3"); + isEmbedded = !!fontFile; + } + + isSubset = baseFont ? /^[A-Z0-9]{1,6}\+/.test(baseFont) : false; + + return { + cidFontType, + normalizedSubtype, + baseFont, + encoding, + isComposite, + isSubset, + isEmbedded, + }; + } catch (e) { + console.error(`Error resolving font info for ${fontName}: ${e.message}`); + + return { + cidFontType: null, + normalizedSubtype: null, + baseFont: null, + encoding: null, + isComposite: false, + isSubset: false, + isEmbedded: false, + }; + } + } + + collectFonts() { + const fontsList = []; + const seenRefs = new Set(); + + try { + for (let pageIndex = 0; pageIndex < this.pages.length; pageIndex++) { + const pageRef = this.pages[pageIndex]; + const pageObj = this.xref.fetch(pageRef); + + if (!(pageObj instanceof Dict)) { + continue; + } + + let resources = pageObj.get("Resources"); + if (!resources) { + let parent = pageObj.get("Parent"); + while (parent instanceof Dict && !resources) { + resources = parent.get("Resources"); + parent = parent.get("Parent"); + } + } + + if (!(resources instanceof Dict)) { + continue; + } + + const fontDict = resources.get("Font"); + if (!(fontDict instanceof Dict)) { + continue; + } + + for (const [fontName, fontVal] of fontDict) { + try { + const fontRef = fontDict.getRaw(fontName); + const refKey = + fontRef instanceof Ref ? fontRef.toString() : fontName; + + if (seenRefs.has(refKey)) { + continue; + } + seenRefs.add(refKey); + + const fontObj = this.xref.fetchIfRef(fontVal); + if (!(fontObj instanceof Dict)) { + continue; + } + + // Resolve composite fonts and get the actual font info + const { + cidFontType, + normalizedSubtype, + baseFont, + encoding, + isComposite, + isSubset, + isEmbedded, + } = this.resolveFontInfo(fontObj, fontName); + + const fontInfo = { + name: fontName, + ref: fontRef instanceof Ref ? fontRef : null, + type: isComposite ? "Type0" : normalizedSubtype, + subtype: normalizedSubtype, + pageIndex, + cidFontType, + baseFont, + encoding, + isSubset, + isEmbedded, + isComposite, + }; + + fontsList.push(fontInfo); + } catch { + continue; + } + } + } + } catch (e) { + console.error(`Failed to collect fonts: ${e.message}`); + } + + return fontsList; + } + + get fonts() { + const fonts = this.collectFonts(); + return shadow(this, "fonts", fonts && fonts.length > 0 ? fonts : null); + } } export { ExtendedCatalog as Catalog }; diff --git a/src/core/document.js b/src/core/document.js index 46a2626a3a814..c62507b4a102a 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -2131,6 +2131,10 @@ class ExtendedPDFDocument extends PDFDocument { get structureTree() { return shadow(this, "structureTree", this.catalog.structureTree); } + + get fonts() { + return shadow(this, "fonts", this.catalog.fonts); + } } export { Page, ExtendedPDFDocument as PDFDocument }; diff --git a/src/core/worker.js b/src/core/worker.js index 87fe7a8fb4d3b..ed65e7b858214 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -177,10 +177,11 @@ class WorkerMessageHandler { finishWorkerTask(task); } - const [numPages, fingerprints, structureTree] = await Promise.all([ + const [numPages, fingerprints, structureTree, fonts] = await Promise.all([ pdfManager.ensureDoc("numPages"), pdfManager.ensureDoc("fingerprints"), pdfManager.ensureDoc("structureTree"), + pdfManager.ensureDoc("fonts"), ]); // Get htmlForXfa after numPages to avoid to create HTML twice. @@ -188,7 +189,7 @@ class WorkerMessageHandler { ? await pdfManager.ensureDoc("htmlForXfa") : null; - return { numPages, fingerprints, htmlForXfa, structureTree }; + return { numPages, fingerprints, htmlForXfa, structureTree, fonts }; } async function getPdfManager({