-
Notifications
You must be signed in to change notification settings - Fork 236
Reduce TextDiffer allocations, including LOH #11983
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,38 +18,53 @@ internal abstract partial class TextDiffer | |
| protected abstract int OldSourceLength { get; } | ||
| protected abstract int NewSourceLength { get; } | ||
|
|
||
| private int _oldSourceOffset; | ||
| private int _newSourceOffset; | ||
|
|
||
| protected abstract bool SourceEqual(int oldSourceIndex, int newSourceIndex); | ||
|
|
||
| protected List<DiffEdit> ComputeDiff() | ||
| { | ||
| var edits = new List<DiffEdit>(capacity: 4); | ||
| var builder = new DiffEditBuilder(edits); | ||
|
|
||
| var lowA = 0; | ||
| var highA = OldSourceLength; | ||
| var lowB = 0; | ||
| var highB = NewSourceLength; | ||
|
|
||
| // Determine the extent of the changes in both texts, as this will allow us | ||
| // to limit the amount of memory needed in the vf/vr arrays. By doing this though, | ||
| // we will need to adjust SourceEqual requests to use an appropriate offset. | ||
| FindChangeExtent(ref lowA, ref highA, ref lowB, ref highB); | ||
|
|
||
| _oldSourceOffset = lowA; | ||
| _newSourceOffset = lowB; | ||
|
|
||
| var oldSourceLength = highA - lowA; | ||
| var newSourceLength = highB - lowB; | ||
|
|
||
| // Initialize the vectors to use for forward and reverse searches. | ||
| var max = NewSourceLength + OldSourceLength; | ||
| var max = newSourceLength + oldSourceLength; | ||
| using var vf = new IntArray((2 * max) + 1); | ||
| using var vr = new IntArray((2 * max) + 1); | ||
|
|
||
| ComputeDiffRecursive(builder, 0, OldSourceLength, 0, NewSourceLength, vf, vr); | ||
| ComputeDiffRecursive(builder, 0, oldSourceLength, 0, newSourceLength, vf, vr); | ||
|
|
||
| // Update the resultant edits with the appropriate offsets | ||
| for (var i = 0; i < edits.Count; i++) | ||
| { | ||
| var edit = edits[i]; | ||
|
|
||
| edits[i] = new DiffEdit(edit.Kind, _oldSourceOffset + edit.Position, _newSourceOffset + edit.NewTextPosition, edit.Length); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since there was effort to make the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
|
|
||
| return edits; | ||
| } | ||
|
|
||
| private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, int lowB, int highB, IntArray vf, IntArray vr) | ||
| { | ||
| while (lowA < highA && lowB < highB && SourceEqual(lowA, lowB)) | ||
| { | ||
| // Skip equal text at the start. | ||
| lowA++; | ||
| lowB++; | ||
| } | ||
|
|
||
| while (lowA < highA && lowB < highB && SourceEqual(highA - 1, highB - 1)) | ||
| { | ||
| // Skip equal text at the end. | ||
| highA--; | ||
| highB--; | ||
| } | ||
| FindChangeExtent(ref lowA, ref highA, ref lowB, ref highB); | ||
|
|
||
| if (lowA == highA) | ||
| { | ||
|
|
@@ -82,6 +97,23 @@ private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, in | |
| } | ||
| } | ||
|
|
||
| private void FindChangeExtent(ref int lowA, ref int highA, ref int lowB, ref int highB) | ||
| { | ||
| while (lowA < highA && lowB < highB && SourceEqualUsingOffset(lowA, lowB)) | ||
| { | ||
| // Skip equal text at the start. | ||
| lowA++; | ||
| lowB++; | ||
| } | ||
|
|
||
| while (lowA < highA && lowB < highB && SourceEqualUsingOffset(highA - 1, highB - 1)) | ||
| { | ||
| // Skip equal text at the end. | ||
| highA--; | ||
| highB--; | ||
| } | ||
| } | ||
|
|
||
| private (int, int) FindMiddleSnake(int lowA, int highA, int lowB, int highB, IntArray vf, IntArray vr) | ||
| { | ||
| var n = highA - lowA; | ||
|
|
@@ -126,7 +158,7 @@ private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, in | |
| var y = x - k; | ||
|
|
||
| // Traverse diagonal if possible. | ||
| while (x < highA && y < highB && SourceEqual(x, y)) | ||
| while (x < highA && y < highB && SourceEqualUsingOffset(x, y)) | ||
| { | ||
| x++; | ||
| y++; | ||
|
|
@@ -169,7 +201,7 @@ private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, in | |
| var y = x - k; | ||
|
|
||
| // Traverse diagonal if possible. | ||
| while (x > lowA && y > lowB && SourceEqual(x - 1, y - 1)) | ||
| while (x > lowA && y > lowB && SourceEqualUsingOffset(x - 1, y - 1)) | ||
| { | ||
| x--; | ||
| y--; | ||
|
|
@@ -195,4 +227,7 @@ private void ComputeDiffRecursive(DiffEditBuilder edits, int lowA, int highA, in | |
|
|
||
| throw Assumes.NotReachable(); | ||
| } | ||
|
|
||
| private bool SourceEqualUsingOffset(int oldSourceIndex, int newSourceIndex) | ||
| => SourceEqual(oldSourceIndex + _oldSourceOffset, newSourceIndex + _newSourceOffset); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect the answer is buried in the complicated algorithm I'm refusing to look at, but why pass
0here only to then apply an offset later, instead of just computing the diff starting from the offset?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried using an offset to index into the vf/vr vectors in, but that got logically quite hairy, and I still had bugs after trying that for a couple hours. It seemed easier to just say:
algorithm, do your thing over this smaller region. But, when you need to call SourceEqual, let me intersept that call and do the translation. And when you are done, I'll translate your answer to use the appropriate coordinate space.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would have done the same. I hate these highly mathematical algorithms with tiny variable names. I didn't get into this business to do math.