From 89de2e90b307c084fc069ddde8b3e663ee90d799 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 29 Dec 2023 10:24:19 +0000 Subject: [PATCH 1/4] Speed up algorithm by not considering diagonals that take us off the edge of the graph --- src/diff/base.js | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/diff/base.js b/src/diff/base.js index 0dcd3a2a..99f93d7a 100644 --- a/src/diff/base.js +++ b/src/diff/base.js @@ -43,9 +43,32 @@ Diff.prototype = { return done([{value: this.join(newString), count: newString.length}]); } + // Once we hit the right edge of the edit graph on some diagonal k, we can + // definitely reach the end of the edit graph in no more than k edits, so + // there's no point in considering any moves to diagonal k+1 any more (from + // which we're guaranteed to need at least k+1 more edits). + // Similarly, once we've reached the bottom of the edit graph, there's no + // point considering moves to lower diagonals. + // We record this fact by setting minDiagonalToConsider and + // maxDiagonalToConsider to some finite value once we've hit the edge of + // the edit graph. + // This optimization is not faithful to the original algorithm presented in + // Myers's paper, which instead pointlessly extends D-paths off the end of + // the edit graph - see page 7 of Myers's paper which notes this point + // explicitly and illustrates it with a diagram. This has major performance + // implications for some common scenarios. For instance, to compute a diff + // where the new text simply appends d characters on the end of the + // original text of length n, the true Myers algorithm will take O(n+d^2) + // time while this optimization needs only O(n+d) time. + let minDiagonalToConsider = -Infinity, maxDiagonalToConsider = Infinity; + // Main worker method. checks all permutations of a given edit length for acceptance. function execEditLength() { - for (let diagonalPath = -1 * editLength; diagonalPath <= editLength; diagonalPath += 2) { + for ( + let diagonalPath = Math.max(minDiagonalToConsider, -editLength); + diagonalPath <= Math.min(maxDiagonalToConsider, editLength); + diagonalPath += 2 + ) { let basePath; let removePath = bestPath[diagonalPath - 1], addPath = bestPath[diagonalPath + 1]; @@ -81,12 +104,17 @@ Diff.prototype = { newPos = self.extractCommon(basePath, newString, oldString, diagonalPath); - // If we have hit the end of both strings, then we are done if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) { + // If we have hit the end of both strings, then we are done return done(buildValues(self, basePath.lastComponent, newString, oldString, self.useLongestToken)); } else { - // Otherwise track this path as a potential candidate and continue. bestPath[diagonalPath] = basePath; + if (basePath.oldPos + 1 >= oldLen) { + maxDiagonalToConsider = Math.min(maxDiagonalToConsider, diagonalPath - 1); + } + if (newPos + 1 >= newLen) { + minDiagonalToConsider = Math.max(minDiagonalToConsider, diagonalPath + 1); + } } } From 25edf72c2a17bb3b9c0c87132f9dcf80d7dda1e4 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 29 Dec 2023 10:57:10 +0000 Subject: [PATCH 2/4] Note deviations from Myers diff in the README --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 60eacb18..99b2198e 100644 --- a/README.md +++ b/README.md @@ -215,3 +215,10 @@ jsdiff supports all ES3 environments with some known issues on IE8 and below. Un ## License See [LICENSE](https://github.com/kpdecker/jsdiff/blob/master/LICENSE). + +## Deviations from the published Myers diff algorithm + +JsDiff deviates from the published algorithm in a couple of ways that don't affect results but do affect performance: + +* JsDiff keeps track of the diff for each diagonal using a linked list of change objects for each diagonal, rather than the historical array of furthest-reaching D-paths on each diagonal contemplated on page 8 of Myers's paper. +* JsDiff skips considering diagonals where the furthest-reaching D-path would go off the edge of the edit graph. This dramatically reduces the time cost (from quadratic to linear) in cases where the new text just appends or truncates content at the end of the old text. From cbe61ac879e4988ed5cd2acb664709721befd84f Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 29 Dec 2023 10:59:18 +0000 Subject: [PATCH 3/4] Add release notes --- release-notes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/release-notes.md b/release-notes.md index 23a8c6fa..026b0e16 100644 --- a/release-notes.md +++ b/release-notes.md @@ -5,6 +5,7 @@ [Commits](https://github.com/kpdecker/jsdiff/compare/v5.1.0...master) - [#411](https://github.com/kpdecker/jsdiff/pull/411) Big performance improvement. Previously an O(n) array-copying operation inside the innermost loop of jsdiff's base diffing code increased the overall worst-case time complexity of computing a diff from O(n²) to O(n³). This is now fixed, bringing the worst-case time complexity down to what it theoretically should be for a Myers diff implementation. +- [#448](https://github.com/kpdecker/jsdiff/pull/411) Performance improvement. Diagonals whose furthest-reaching D-path would go off the edge of the edit graph are now skipped, rather than being pointlessly considered as called for by the original Myers diff algorithm. This dramatically speeds up computing diffs where the new text just appends or truncates content at the end of the old text. - [#351](https://github.com/kpdecker/jsdiff/issues/351) Importing from the lib folder - e.g. `require("diff/lib/diff/word.js")` - will work again now. This had been broken for users on the latest version of Node since Node 17.5.0, which changed how Node interprets the `exports` property in jsdiff's `package.json` file. - [#344](https://github.com/kpdecker/jsdiff/issues/344) `diffLines`, `createTwoFilesPatch`, and other patch-creation methods now take an optional `stripTrailingCr: true` option which causes Windows-style `\r\n` line endings to be replaced with Unix-style `\n` line endings before calculating the diff, just like GNU `diff`'s `--strip-trailing-cr` flag. From 4f0ed930b50c96d68fb8233d735e49f9cc747c42 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 29 Dec 2023 11:05:08 +0000 Subject: [PATCH 4/4] Use capitalisation 'jsdiff', which seems most common --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 99b2198e..25b0b3fb 100644 --- a/README.md +++ b/README.md @@ -218,7 +218,7 @@ See [LICENSE](https://github.com/kpdecker/jsdiff/blob/master/LICENSE). ## Deviations from the published Myers diff algorithm -JsDiff deviates from the published algorithm in a couple of ways that don't affect results but do affect performance: +jsdiff deviates from the published algorithm in a couple of ways that don't affect results but do affect performance: -* JsDiff keeps track of the diff for each diagonal using a linked list of change objects for each diagonal, rather than the historical array of furthest-reaching D-paths on each diagonal contemplated on page 8 of Myers's paper. -* JsDiff skips considering diagonals where the furthest-reaching D-path would go off the edge of the edit graph. This dramatically reduces the time cost (from quadratic to linear) in cases where the new text just appends or truncates content at the end of the old text. +* jsdiff keeps track of the diff for each diagonal using a linked list of change objects for each diagonal, rather than the historical array of furthest-reaching D-paths on each diagonal contemplated on page 8 of Myers's paper. +* jsdiff skips considering diagonals where the furthest-reaching D-path would go off the edge of the edit graph. This dramatically reduces the time cost (from quadratic to linear) in cases where the new text just appends or truncates content at the end of the old text.