Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Optimized diff() function which now uses fastDiff() function internally for large data sets #274

Merged
merged 7 commits into from
Feb 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* @module utils/diff
*/

import fastDiff from '../src/fastdiff';

// The following code is based on the "O(NP) Sequence Comparison Algorithm"
// by Sun Wu, Udi Manber, Gene Myers, Webb Miller.

Expand All @@ -16,6 +18,11 @@
*
* diff( 'aba', 'acca' ); // [ 'equal', 'insert', 'insert', 'delete', 'equal' ]
*
* This function is based on the "O(NP) Sequence Comparison Algorithm" by Sun Wu, Udi Manber, Gene Myers, Webb Miller.
* Unfortunately, while it gives the most precise results, its to complex for longer strings/arrow (above 200 items).
* Therefore, `diff()` automatically switches to {@link module:utils/fastdiff~fastDiff `fastDiff()`} when detecting
* such a scenario. The return formats of both functions are identical.
*
* @param {Array|String} a Input array or string.
* @param {Array|String} b Output array or string.
* @param {Function} [cmp] Optional function used to compare array values, by default === is used.
Expand All @@ -27,11 +34,19 @@ export default function diff( a, b, cmp ) {
return a === b;
};

const aLength = a.length;
const bLength = b.length;

// Perform `fastDiff` for longer strings/arrays (see #269).
if ( aLength > 200 || bLength > 200 || aLength + bLength > 300 ) {
return diff.fastDiff( a, b, cmp, true );
}

// Temporary action type statics.
let _insert, _delete;

// Swapped the arrays to use the shorter one as the first one.
if ( b.length < a.length ) {
if ( bLength < aLength ) {
const tmp = a;

a = b;
Expand Down Expand Up @@ -117,3 +132,7 @@ export default function diff( a, b, cmp ) {
// We remove the first item that represents the action for the injected nulls.
return es[ delta ].slice( 1 );
}

// Store the API in static property to easily overwrite it in tests.
// Too bad dependency injection does not work in Webpack + ES 6 (const) + Babel.
diff.fastDiff = fastDiff;
225 changes: 160 additions & 65 deletions src/fastdiff.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
*/

/**
* Finds position of the first and last change in the given strings and generates set of changes. Set of changes
* can be applied to the input text in order to transform it into the output text, for example:
* Finds positions of the first and last change in the given string/array and generates a set of changes:
*
* fastDiff( '12a', '12xyza' );
* // [ { index: 2, type: 'insert', values: [ 'x', 'y', 'z' ] } ]
Expand All @@ -20,17 +19,25 @@
* fastDiff( '12xyza', '12a' );
* // [ { index: 2, type: 'delete', howMany: 3 } ]
*
* fastDiff( '12aa', '12a' );
* fastDiff( [ '1', '2', 'a', 'a' ], [ '1', '2', 'a' ] );
* // [ { index: 3, type: 'delete', howMany: 1 } ]
*
* fastDiff( '12abc3', '2ab' );
* fastDiff( [ '1', '2', 'a', 'b', 'c', '3' ], [ '2', 'a', 'b' ] );
* // [ { index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, { index: 3, type: 'delete', howMany: 6 } ]
*
* Using returned results you can modify `oldText` to transform it into `newText`:
* Passed arrays can contain any type of data, however to compare them correctly custom comparator function
* should be passed as a third parameter:
*
* let input = '12abc3';
* const output = '2ab';
* const changes = fastDiff( input, output );
* fastDiff( [ { value: 1 }, { value: 2 } ], [ { value: 1 }, { value: 3 } ], ( a, b ) => {
* return a.value === b.value;
* } );
* // [ { index: 1, type: 'insert', values: [ { value: 3 } ] }, { index: 2, type: 'delete', howMany: 1 } ]
*
* The resulted set of changes can be applied to the input in order to transform it into the output, for example:
*
* let input = '12abc3';
* const output = '2ab';
* const changes = fastDiff( input, output );
*
* changes.forEach( change => {
* if ( change.type == 'insert' ) {
Expand All @@ -40,101 +47,156 @@
* }
* } );
*
* input === output; // -> true
* // input equals output now
*
* or in case of arrays:
*
* let input = [ '1', '2', 'a', 'b', 'c', '3' ];
* const output = [ '2', 'a', 'b' ];
* const changes = fastDiff( input, output );
*
* changes.forEach( change => {
* if ( change.type == 'insert' ) {
* input = input.slice( 0, change.index ).concat( change.values, input.slice( change.index ) );
* } else if ( change.type == 'delete' ) {
* input = input.slice( 0, change.index ).concat( input.slice( change.index + change.howMany ) );
* }
* } );
*
* // input equals output now
*
* The output format of this function is compatible with {@link module:utils/difftochanges~diffToChanges} output format.
* By passing `true` as the fourth parameter (`atomicChanges`) the output of this function will become compatible with
* the {@link module:utils/diff~diff `diff()`} function:
*
* @param {String} oldText Input string.
* @param {String} newText Input string.
* fastDiff( '12a', '12xyza' );
* // [ 'equal', 'equal', 'insert', 'insert', 'insert', 'equal' ]
*
* The default output format of this function is compatible with the output format of
* {@link module:utils/difftochanges~diffToChanges `diffToChanges()`}. The `diffToChanges()` input format is, in turn,
* compatible with the output of {@link module:utils/diff~diff `diff()`}:
*
* const a = '1234';
* const b = '12xyz34';
*
* // Both calls will return the same results (grouped changes format).
* fastDiff( a, b );
* diffToChanges( diff( a, b ) );
*
* // Again, both calls will return the same results (atomic changes format).
* fastDiff( a, b, null, true );
* diff( a, b );
*
*
* @param {Array|String} a Input array or string.
* @param {Array|String} b Input array or string.
* @param {Function} [cmp] Optional function used to compare array values, by default `===` (strict equal operator) is used.
* @param {Boolean} [atomicChanges=false] Whether an array of `inset|delete|equal` operations should
* be returned instead of changes set. This makes this function compatible with {@link module:utils/diff~diff `diff()`}.
* @returns {Array} Array of changes.
*/
export default function fastDiff( oldText, newText ) {
// Check if both texts are equal.
if ( oldText === newText ) {
return [];
export default function fastDiff( a, b, cmp, atomicChanges = false ) {
// Set the comparator function.
cmp = cmp || function( a, b ) {
return a === b;
};

// Transform text or any iterable into arrays for easier, consistent processing.
if ( !Array.isArray( a ) ) {
a = Array.from( a );
}

if ( !Array.isArray( b ) ) {
b = Array.from( b );
}

const changeIndexes = findChangeBoundaryIndexes( oldText, newText );
// Find first and last change.
const changeIndexes = findChangeBoundaryIndexes( a, b, cmp );

return changeIndexesToChanges( newText, changeIndexes );
// Transform into changes array.
return atomicChanges ? changeIndexesToAtomicChanges( changeIndexes, b.length ) : changeIndexesToChanges( b, changeIndexes );
}

// Finds position of the first and last change in the given strings. For example:
// Finds position of the first and last change in the given arrays. For example:
//
// const indexes = findChangeBoundaryIndexes( '1234', '13424' );
// const indexes = findChangeBoundaryIndexes( [ '1', '2', '3', '4' ], [ '1', '3', '4', '2', '4' ] );
// console.log( indexes ); // { firstIndex: 1, lastIndexOld: 3, lastIndexNew: 4 }
//
// The above indexes means that in `oldText` modified part is `1[23]4` and in the `newText` it is `1[342]4`.
// Based on such indexes, array with `insert`/`delete` operations which allows transforming
// old text to the new one can be generated.
//
// It is expected that `oldText` and `newText` are different.
// The above indexes means that in the first array the modified part is `1[23]4` and in the second array it is `1[342]4`.
// Based on such indexes, array with `insert`/`delete` operations which allows transforming first value into the second one
// can be generated.
//
// @param {String} oldText
// @param {String} newText
// @param {Array} arr1
// @param {Array} arr2
// @param {Function} cmp Comparator function.
// @returns {Object}
// @returns {Number} return.firstIndex Index of the first change in both strings (always the same for both).
// @returns {Number} result.lastIndexOld Index of the last common character in `oldText` string.
// @returns {Number} result.lastIndexNew Index of the last common character in `newText` string.
function findChangeBoundaryIndexes( oldText, newText ) {
// Find the first difference between texts.
const firstIndex = findFirstDifferenceIndex( oldText, newText );

// Remove the common part of texts and reverse them to make it simpler to find the last difference between texts.
const oldTextReversed = cutAndReverse( oldText, firstIndex );
const newTextReversed = cutAndReverse( newText, firstIndex );

// Find the first difference between reversed texts.
// It should be treated as "how many characters from the end the last difference occurred".
// @returns {Number} return.firstIndex Index of the first change in both values (always the same for both).
// @returns {Number} result.lastIndexOld Index of the last common value in `arr1`.
// @returns {Number} result.lastIndexNew Index of the last common value in `arr2`.
function findChangeBoundaryIndexes( arr1, arr2, cmp ) {
// Find the first difference between passed values.
const firstIndex = findFirstDifferenceIndex( arr1, arr2, cmp );

// If arrays are equal return -1 indexes object.
if ( firstIndex === -1 ) {
return { firstIndex: -1, lastIndexOld: -1, lastIndexNew: -1 };
}

// Remove the common part of each value and reverse them to make it simpler to find the last difference between them.
const oldArrayReversed = cutAndReverse( arr1, firstIndex );
const newArrayReversed = cutAndReverse( arr2, firstIndex );

// Find the first difference between reversed values.
// It should be treated as "how many elements from the end the last difference occurred".
//
// For example:
//
// initial -> after cut -> reversed:
// oldText: '321ba' -> '21ba' -> 'ab12'
// newText: '31xba' -> '1xba' -> 'abx1'
// lastIndex: -> 2
// initial -> after cut -> reversed:
// oldValue: '321ba' -> '21ba' -> 'ab12'
// newValue: '31xba' -> '1xba' -> 'abx1'
// lastIndex: -> 2
//
// So the last change occurred two characters from the end of the texts.
const lastIndex = findFirstDifferenceIndex( oldTextReversed, newTextReversed );
// So the last change occurred two characters from the end of the arrays.
const lastIndex = findFirstDifferenceIndex( oldArrayReversed, newArrayReversed, cmp );

// Use `lastIndex` to calculate proper offset, starting from the beginning (`lastIndex` kind of starts from the end).
const lastIndexOld = oldText.length - lastIndex;
const lastIndexNew = newText.length - lastIndex;
const lastIndexOld = arr1.length - lastIndex;
const lastIndexNew = arr2.length - lastIndex;

return { firstIndex, lastIndexOld, lastIndexNew };
}

// Returns a first index on which `oldText` and `newText` differ.
// Returns a first index on which given arrays differ. If both arrays are the same, -1 is returned.
//
// @param {String} oldText
// @param {String} newText
// @param {Array} arr1
// @param {Array} arr2
// @param {Function} cmp Comparator function.
// @returns {Number}
function findFirstDifferenceIndex( oldText, newText ) {
for ( let i = 0; i < Math.max( oldText.length, newText.length ); i++ ) {
if ( oldText[ i ] !== newText[ i ] ) {
function findFirstDifferenceIndex( arr1, arr2, cmp ) {
for ( let i = 0; i < Math.max( arr1.length, arr2.length ); i++ ) {
if ( arr1[ i ] === undefined || arr2[ i ] === undefined || !cmp( arr1[ i ], arr2[ i ] ) ) {
return i;
}
}
// No "backup" return cause we assume that `oldText` and `newText` differ. This means that they either have a
// difference or they have a different lengths. This means that the `if` condition will always be met eventually.

return -1; // Return -1 if arrays are equal.
}

// Removes `howMany` characters from the given `text` string starting from the beginning, then reverses and returns it.
// Returns a copy of the given array with `howMany` elements removed starting from the beginning and in reversed order.
//
// @param {String} text Text to be processed.
// @param {Number} howMany How many characters from text beginning to cut.
// @returns {String} Shortened and reversed text.
function cutAndReverse( text, howMany ) {
return text.substring( howMany ).split( '' ).reverse().join( '' );
// @param {Array} arr Array to be processed.
// @param {Number} howMany How many elements from array beginning to remove.
// @returns {Array} Shortened and reversed array.
function cutAndReverse( arr, howMany ) {
return arr.slice( howMany ).reverse();
}

// Generates changes array based on change indexes from `findChangeBoundaryIndexes` function. This function will
// generate array with 0 (no changes), 1 (deletion or insertion) or 2 records (insertion and deletion).
//
// @param {String} newText New text for which change indexes were calculated.
// @param {Array} newArray New array for which change indexes were calculated.
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function.
// @returns {Array.<Object>} Array of changes compatible with {@link module:utils/difftochanges~diffToChanges} format.
function changeIndexesToChanges( newText, changeIndexes ) {
function changeIndexesToChanges( newArray, changeIndexes ) {
const result = [];
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes;

Expand All @@ -145,7 +207,7 @@ function changeIndexesToChanges( newText, changeIndexes ) {
result.push( {
index: firstIndex,
type: 'insert',
values: newText.substring( firstIndex, lastIndexNew ).split( '' )
values: newArray.slice( firstIndex, lastIndexNew )
} );
}

Expand All @@ -159,3 +221,36 @@ function changeIndexesToChanges( newText, changeIndexes ) {

return result;
}

// Generates array with set `equal|insert|delete` operations based on change indexes from `findChangeBoundaryIndexes` function.
//
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function.
// @param {Number} newLength Length of the new array on which `findChangeBoundaryIndexes` calculated change indexes.
// @returns {Array.<String>} Array of changes compatible with {@link module:utils/diff~diff} format.
function changeIndexesToAtomicChanges( changeIndexes, newLength ) {
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes;

// No changes.
if ( firstIndex === -1 ) {
return Array( newLength ).fill( 'equal' );
}

let result = [];
if ( firstIndex > 0 ) {
result = result.concat( Array( firstIndex ).fill( 'equal' ) );
}

if ( lastIndexNew - firstIndex > 0 ) {
result = result.concat( Array( lastIndexNew - firstIndex ).fill( 'insert' ) );
}

if ( lastIndexOld - firstIndex > 0 ) {
result = result.concat( Array( lastIndexOld - firstIndex ).fill( 'delete' ) );
}

if ( lastIndexNew < newLength ) {
result = result.concat( Array( newLength - lastIndexNew ).fill( 'equal' ) );
}

return result;
}
46 changes: 46 additions & 0 deletions tests/_utils-tests/longtext.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* @license Copyright (c) 2003-2019, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

import getLongText from '../../tests/_utils/longtext';

describe( 'utils', () => {
describe( 'getLongText', () => {
it( 'should return text with 0 length', () => {
expect( getLongText( 0 ).length ).to.equal( 0 );
} );

it( 'should return text with 553 length', () => {
expect( getLongText( 553 ).length ).to.equal( 553 );
} );

it( 'should return text with 1500 length', () => {
expect( getLongText( 1500 ).length ).to.equal( 1500 );
} );

it( 'should return text with 4000 length', () => {
expect( getLongText( 4000 ).length ).to.equal( 4000 );
} );

it( 'should return different text with fromStart=false', () => {
expect( getLongText( 100 ) ).to.not.equal( getLongText( 100, false ) );
} );

it( 'should return reversed text', () => {
const text1 = getLongText( 100 );
const text2 = getLongText( 100, true, true );

expect( text1 ).to.not.equal( text2 );
expect( text1 ).to.equal( text2.split( '' ).reverse().join( '' ) );
} );

it( 'should return reversed text (with fromStart=false)', () => {
const text1 = getLongText( 150, false );
const text2 = getLongText( 150, false, true );

expect( text1 ).to.not.equal( text2 );
expect( text1 ).to.equal( text2.split( '' ).reverse().join( '' ) );
} );
} );
} );
Loading