diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 256a506268a06..d38fab6b0fe6b 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2927,7 +2927,7 @@ class PartialEvaluator { for (let i = 0, ii = glyphs.length; i < ii; i++) { const glyph = glyphs[i]; - const { category } = glyph; + const { category, originalCharCode } = glyph; if (category.isInvisibleFormatMark) { continue; @@ -2941,6 +2941,10 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; + if (originalCharCode === 0x20) { + charSpacing += textState.wordSpacing; + } + if (!keepWhiteSpace && category.isWhitespace) { // Don't push a " " in the textContentItem // (except when it's between two non-spaces chars), @@ -2948,13 +2952,13 @@ class PartialEvaluator { // compareWithLastPosition. // This way we can merge real spaces and spaces due to cursor moves. if (!font.vertical) { - charSpacing += scaledDim + textState.wordSpacing; + charSpacing += scaledDim; textState.translateTextMatrix( charSpacing * textState.textHScale, 0 ); } else { - charSpacing += -scaledDim + textState.wordSpacing; + charSpacing += -scaledDim; textState.translateTextMatrix(0, -charSpacing); } saveLastChar(" "); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6b1ede9aefd21..45530519a2b23 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -746,3 +746,5 @@ !issue20232.pdf !bug1989304.pdf !comments.pdf +!issue20319_1.pdf +!issue20319_2.pdf diff --git a/test/pdfs/issue20319_1.pdf b/test/pdfs/issue20319_1.pdf new file mode 100644 index 0000000000000..0fe77c69957d9 Binary files /dev/null and b/test/pdfs/issue20319_1.pdf differ diff --git a/test/pdfs/issue20319_2.pdf b/test/pdfs/issue20319_2.pdf new file mode 100644 index 0000000000000..0878fb50b11df Binary files /dev/null and b/test/pdfs/issue20319_2.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 648fe309e7952..a006bb226a6eb 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -4027,6 +4027,37 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`) expect(items[1].fontName).not.toEqual(items[0].fontName); }); + it("gets text content with word spacing (issue 20319)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue20319_1.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + const text = mergeText(items); + + expect(text).toEqual("A A"); + + await loadingTask.destroy(); + }); + + it("gets text content with word spacing and a fake space (issue 20319)", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("issue20319_2.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfPage = await pdfDoc.getPage(1); + const { items } = await pdfPage.getTextContent({ + disableNormalization: true, + }); + const text = mergeText(items); + expect(text).toEqual("AA A"); + + await loadingTask.destroy(); + }); + it("gets empty structure tree", async function () { const tree = await page.getStructTree();