Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
}
}

/**
* Get the number of bytes to use to represent the given positive integer.
* If n is zero, the function returns 0 which means that we don't need to waste
* a byte to represent it.
* @param {number} x - a positive integer.
* @returns {number}
*/
function getSizeInBytes(x) {
// n bits are required for numbers up to 2^n - 1.
// So for a number x, we need ceil(log2(1 + x)) bits.
return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
}

export {
arrayBuffersToBytes,
codePointIter,
Expand All @@ -622,6 +635,7 @@ export {
getLookupTableFactory,
getNewAnnotationsMap,
getRotationMatrix,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down
4 changes: 3 additions & 1 deletion src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {
getNewAnnotationsMap,
XRefParseException,
} from "./core_utils.js";
import { Dict, Ref } from "./primitives.js";
import { Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { AnnotationFactory } from "./annotation.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
Expand Down Expand Up @@ -726,6 +726,8 @@ class WorkerMessageHandler {
acroFormRef,
acroForm,
xfaData,
// Use the same kind of XRef as the previous one.
useXrefStream: isDict(xref.topDict, "XRef"),
}).finally(() => {
xref.resetNewTemporaryRef();
});
Expand Down
177 changes: 118 additions & 59 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
import {
escapePDFName,
escapeString,
getSizeInBytes,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Stream } from "./stream.js";

async function writeObject(ref, obj, buffer, { encrypt = null }) {
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
Expand Down Expand Up @@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
newRefs.push({ ref: xfaDatasetsRef, data });
}

async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
buffer.push("xref\n");
const indexes = getIndexes(newRefs);
let indexesPosition = 0;
for (const { ref, data } of newRefs) {
if (ref.num === indexes[indexesPosition]) {
buffer.push(
`${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
);
indexesPosition += 2;
}
// The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
// (see 7.5.4 - Cross-Reference Table).
buffer.push(
`${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
);
baseOffset += data.length;
}
computeIDs(baseOffset, xrefInfo, newXref);
buffer.push("trailer\n");
await writeDict(newXref, buffer);
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function getIndexes(newRefs) {
const indexes = [];
for (const { ref } of newRefs) {
if (ref.num === indexes.at(-2) + indexes.at(-1)) {
indexes[indexes.length - 1] += 1;
} else {
indexes.push(ref.num, 1);
}
}
return indexes;
}

async function getXRefStreamTable(
xrefInfo,
baseOffset,
newRefs,
newXref,
buffer
) {
const xrefTableData = [];
let maxOffset = 0;
let maxGen = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
const gen = Math.min(ref.gen, 0xffff);
maxGen = Math.max(maxGen, gen);
xrefTableData.push([1, baseOffset, gen]);
baseOffset += data.length;
}
newXref.set("Index", getIndexes(newRefs));
const offsetSize = getSizeInBytes(maxOffset);
const maxGenSize = getSizeInBytes(maxGen);
const sizes = [1, offsetSize, maxGenSize];
newXref.set("W", sizes);
computeIDs(baseOffset, xrefInfo, newXref);

const structSize = sizes.reduce((a, x) => a + x, 0);
const data = new Uint8Array(structSize * xrefTableData.length);
const stream = new Stream(data);
stream.dict = newXref;

let offset = 0;
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, data);
offset = writeInt(objOffset, sizes[1], offset, data);
offset = writeInt(gen, sizes[2], offset, data);
}

await writeObject(xrefInfo.newRef, stream, buffer, {});
buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function computeIDs(baseOffset, xrefInfo, newXref) {
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}
}

function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
const newXref = new Dict(null);
newXref.set("Prev", xrefInfo.startXRef);
const refForXrefTable = xrefInfo.newRef;
if (useXrefStream) {
newRefs.push({ ref: refForXrefTable, data: "" });
newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Type", Name.get("XRef"));
} else {
newXref.set("Size", refForXrefTable.num);
}
if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}
return newXref;
}

async function incrementalUpdate({
originalData,
xrefInfo,
Expand All @@ -293,6 +401,7 @@ async function incrementalUpdate({
acroFormRef = null,
acroForm = null,
xfaData = null,
useXrefStream = false,
}) {
await updateAcroform({
xref,
Expand All @@ -314,9 +423,6 @@ async function incrementalUpdate({
});
}

const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;

let buffer, baseOffset;
const lastByte = originalData.at(-1);
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
Expand All @@ -328,60 +434,23 @@ async function incrementalUpdate({
baseOffset = originalData.length + 1;
}

newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Prev", xrefInfo.startXRef);
newXref.set("Type", Name.get("XRef"));

if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}

// Add a ref for the new xref and sort them
newRefs.push({ ref: refForXrefTable, data: "" });
const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
newRefs = newRefs.sort(
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
);

const xrefTableData = [[0, 1, 0xffff]];
const indexes = [0, 1];
let maxOffset = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
baseOffset += data.length;
indexes.push(ref.num, 1);
for (const { data } of newRefs) {
buffer.push(data);
}

newXref.set("Index", indexes);

if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}

const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
const sizes = [1, offsetSize, 2];
const structSize = sizes[0] + sizes[1] + sizes[2];
const tableLength = structSize * xrefTableData.length;
newXref.set("W", sizes);
newXref.set("Length", tableLength);

buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
await writeDict(newXref, buffer, null);
buffer.push(" stream\n");
await (useXrefStream
? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
: getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));

const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
const array = new Uint8Array(
originalData.length + bufferLen + tableLength + footer.length
const totalLength = buffer.reduce(
(a, str) => a + str.length,
originalData.length
);
const array = new Uint8Array(totalLength);

// Original data
array.set(originalData);
Expand All @@ -393,16 +462,6 @@ async function incrementalUpdate({
offset += str.length;
}

// New xref table
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, array);
offset = writeInt(objOffset, sizes[1], offset, array);
offset = writeInt(gen, sizes[2], offset, array);
}

// Add the footer
writeString(footer, offset, array);

return array;
}

Expand Down
18 changes: 18 additions & 0 deletions test/unit/core_utils_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
escapePDFName,
escapeString,
getInheritableProperty,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down Expand Up @@ -468,4 +469,21 @@ describe("core_utils", function () {
);
});
});

describe("getSizeInBytes", function () {
it("should get the size in bytes to use to represent a positive integer", function () {
expect(getSizeInBytes(0)).toEqual(0);
for (let i = 1; i <= 0xff; i++) {
expect(getSizeInBytes(i)).toEqual(1);
}

for (let i = 0x100; i <= 0xffff; i += 0x100) {
expect(getSizeInBytes(i)).toEqual(2);
}

for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
expect(getSizeInBytes(i)).toEqual(3);
}
});
});
});
Loading