Skip to content

Commit

Permalink
Flip core algorithm so everything is no longer the mirror image of My…
Browse files Browse the repository at this point in the history
…ers's paper (#440)

* Fix core algorithm implementation being mirrored relative to the Myers algorithm

It makes it harder than it needs to be to reason about the algorithm when everything is flipped relative to Myers' paper. In Myers' paper, we keep track in an array of the index each diagonal has reached in the old string, and positive diagonal numbers represent diagonals where we have done more deletions than insertions. In JsDiff as it exists on master, we keep track in an array of the index each diagonal has reached in the NEW string, and positive diagonal numbers represent diagonals where we have done more insertions than deletions. Everything is mirrored, and this also causes an actual behaviour mismatch between JsDiff and an accurate Myers diff implementation - namely that when we diff e.g. 'abcd' against 'acbd' we output a diff that does an insertion and then later a deletion, rather than a deletion first and an insertion later like it should be.

This patch makes the code in base.js be a closer match to what's in Myers's paper, which should make comparing this to other implementations or adding any of the optimizations proposed in Myers's paper easier. For now, this DOESN'T change any behaviour (I've added a hack, noted with a TODO, to ensure this) although it'll now be straightforward to fix the bug mentioned above.

* Fix comment
  • Loading branch information
ExplodingCabbage authored Dec 27, 2023
1 parent a2dc5ec commit bf5ec4a
Showing 1 changed file with 29 additions and 22 deletions.
51 changes: 29 additions & 22 deletions src/diff/base.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ Diff.prototype = {
maxEditLength = Math.min(maxEditLength, options.maxEditLength);
}

let bestPath = [{ newPos: -1, lastComponent: undefined }];
let bestPath = [{ oldPos: -1, lastComponent: undefined }];

// Seed editLength = 0, i.e. the content starts with the same values
let oldPos = this.extractCommon(bestPath[0], newString, oldString, 0);
if (bestPath[0].newPos + 1 >= newLen && oldPos + 1 >= oldLen) {
let newPos = this.extractCommon(bestPath[0], newString, oldString, 0);
if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) {
// Identity per the equality and tokenizer
return done([{value: this.join(newString), count: newString.length}]);
}
Expand All @@ -47,35 +47,42 @@ Diff.prototype = {
function execEditLength() {
for (let diagonalPath = -1 * editLength; diagonalPath <= editLength; diagonalPath += 2) {
let basePath;
let addPath = bestPath[diagonalPath - 1],
removePath = bestPath[diagonalPath + 1],
oldPos = (removePath ? removePath.newPos : 0) - diagonalPath;
if (addPath) {
let removePath = bestPath[diagonalPath - 1],
addPath = bestPath[diagonalPath + 1];
if (removePath) {
// No one else is going to attempt to use this value, clear it
bestPath[diagonalPath - 1] = undefined;
}

let canAdd = addPath && addPath.newPos + 1 < newLen,
canRemove = removePath && 0 <= oldPos && oldPos < oldLen;
let canAdd = false;
if (addPath) {
// what newPos will be after we do an insertion:
const addPathNewPos = addPath.oldPos - diagonalPath;
canAdd = addPath && 0 <= addPathNewPos && addPathNewPos < newLen;
}

let canRemove = removePath && removePath.oldPos + 1 < oldLen;
if (!canAdd && !canRemove) {
// If this path is a terminal then prune
bestPath[diagonalPath] = undefined;
continue;
}

// Select the diagonal that we want to branch from. We select the prior
// path whose position in the new string is the farthest from the origin
// path whose position in the old string is the farthest from the origin
// and does not pass the bounds of the diff graph
if (!canAdd || (canRemove && addPath.newPos < removePath.newPos)) {
basePath = self.addToPath(removePath, undefined, true, 0);
// TODO: Remove the `+ 1` here to make behavior match Myers algorithm
// and prefer to order removals before insertions.
if (!canRemove || (canAdd && removePath.oldPos + 1 < addPath.oldPos)) {
basePath = self.addToPath(addPath, true, undefined, 0);
} else {
basePath = self.addToPath(addPath, true, undefined, 1);
basePath = self.addToPath(removePath, undefined, true, 1);
}

oldPos = self.extractCommon(basePath, newString, oldString, diagonalPath);
newPos = self.extractCommon(basePath, newString, oldString, diagonalPath);

// If we have hit the end of both strings, then we are done
if (basePath.newPos + 1 >= newLen && oldPos + 1 >= oldLen) {
if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) {
return done(buildValues(self, basePath.lastComponent, newString, oldString, self.useLongestToken));
} else {
// Otherwise track this path as a potential candidate and continue.
Expand Down Expand Up @@ -112,25 +119,25 @@ Diff.prototype = {
}
},

addToPath(path, added, removed, newPosInc) {
addToPath(path, added, removed, oldPosInc) {
let last = path.lastComponent;
if (last && last.added === added && last.removed === removed) {
return {
newPos: path.newPos + newPosInc,
oldPos: path.oldPos + oldPosInc,
lastComponent: {count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent }
};
} else {
return {
newPos: path.newPos + newPosInc,
oldPos: path.oldPos + oldPosInc,
lastComponent: {count: 1, added: added, removed: removed, previousComponent: last }
};
}
},
extractCommon(basePath, newString, oldString, diagonalPath) {
let newLen = newString.length,
oldLen = oldString.length,
newPos = basePath.newPos,
oldPos = newPos - diagonalPath,
oldPos = basePath.oldPos,
newPos = oldPos - diagonalPath,

commonCount = 0;
while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(newString[newPos + 1], oldString[oldPos + 1])) {
Expand All @@ -143,8 +150,8 @@ Diff.prototype = {
basePath.lastComponent = {count: commonCount, previousComponent: basePath.lastComponent};
}

basePath.newPos = newPos;
return oldPos;
basePath.oldPos = oldPos;
return newPos;
},

equals(left, right) {
Expand Down

0 comments on commit bf5ec4a

Please sign in to comment.