Skip to content

Commit

Permalink
GitObjects: use new SideChannelStream to hash during download
Browse files Browse the repository at this point in the history
The loose object download needs to verify the objects being sent across
the wire before moving them to their "real" locations in the object
directory. The previous implementation wrote them to disk then read from
that file to hash the contents.

Instead, create a new "SideChannelStream" that injects between the
responseStream and the inflater, but sends all of the data that is read
out to the filestream. Here is a diagram of the stream sequence:

  responseStream -> sideChannelStream -> inflateStream -> hashingStream
                           \                                   |
			    \                                 \|/
			     tempFileStream                devNull

By copying all data from the hashingStream into devNull, we ensure that
the responseStream is drained and written to the tempFileStream while
simultaneously inflating and hashing the object contents. We then
amortize this computation during the window that we are downloading data
over the network, minimizing the potential performance impact.

Helped-by: Matthew Cheetham <[email protected]>
Signed-off-by: Derrick Stolee <[email protected]>
  • Loading branch information
derrickstolee committed Oct 20, 2020
1 parent b8e2283 commit ded306b
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 11 deletions.
19 changes: 11 additions & 8 deletions GVFS/GVFS.Common/Git/GitObjects.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,13 @@ public virtual string WriteLooseObject(Stream responseStream, string sha, bool o
{
LooseObjectToWrite toWrite = this.GetLooseObjectDestination(sha);

using (Stream fileStream = this.OpenTempLooseObjectStream(toWrite.TempFile))
{
StreamUtil.CopyToWithBuffer(responseStream, fileStream, bufToCopyWith);
fileStream.Flush();
}

if (this.checkData)
{
try
{
using (Stream readFile = this.fileSystem.OpenFileStream(toWrite.TempFile, FileMode.Open, FileAccess.Read, FileShare.Read, true))
using (InflaterInputStream inflate = new InflaterInputStream(readFile))
using (Stream fileStream = this.OpenTempLooseObjectStream(toWrite.TempFile))
using (SideChannelStream sideChannel = new SideChannelStream(from: responseStream, to: fileStream))
using (InflaterInputStream inflate = new InflaterInputStream(sideChannel))
using (HashingStream hashing = new HashingStream(inflate))
using (NoOpStream devNull = new NoOpStream())
{
Expand All @@ -221,6 +216,14 @@ public virtual string WriteLooseObject(Stream responseStream, string sha, bool o
throw new RetryableException(message);
}
}
else
{
using (Stream fileStream = this.OpenTempLooseObjectStream(toWrite.TempFile))
{
StreamUtil.CopyToWithBuffer(responseStream, fileStream, bufToCopyWith);
fileStream.Flush();
}
}

this.FinalizeTempFile(sha, toWrite, overwriteExistingObject);

Expand Down
60 changes: 60 additions & 0 deletions GVFS/GVFS.Common/Git/SideChannelStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using System;
using System.IO;

namespace GVFS.Common.Git
{
/// <summary>
/// As you read from a SideChannelStream, we read from the inner
/// 'from' stream and write that data to the inner 'to' stream
/// before passing the bytes out to the reader.
/// </summary>
public class SideChannelStream : Stream
{
protected readonly Stream from;
protected readonly Stream to;

public SideChannelStream(Stream from, Stream to)
{
this.from = from;
this.to = to;
}

public override bool CanRead => true;

public override bool CanSeek => false;

public override bool CanWrite => false;

public override long Length => 0;

public override long Position { get => 0; set => throw new NotImplementedException(); }

public override void Flush()
{
this.from.Flush();
this.to.Flush();
}

public override int Read(byte[] buffer, int offset, int count)
{
int n = this.from.Read(buffer, offset, count);
this.to.Write(buffer, offset, n);
return n;
}

public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}

public override void SetLength(long value)
{
throw new NotImplementedException();
}

public override void Write(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
}
}
6 changes: 3 additions & 3 deletions GVFS/GVFS.UnitTests/Git/GitObjectsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void WriteLooseObject_DetectsDataNotCompressed()
}

foundException.ShouldBeTrue("Failed to throw RetryableException");
this.openedPaths.Count.ShouldEqual(2, "Incorrect number of opened paths (one to write temp file, one to read temp file)");
this.openedPaths.Count.ShouldEqual(1, "Incorrect number of opened paths (one to write temp file)");
this.openedPaths[0].IndexOf(EmptySha.Substring(2)).ShouldBeAtMost(-1, "Should not have written to the loose object location");
}

Expand Down Expand Up @@ -92,7 +92,7 @@ public void WriteLooseObject_DetectsIncorrectData()
}

foundException.ShouldBeTrue("Failed to throw SecurityException");
this.openedPaths.Count.ShouldEqual(2, "Incorrect number of opened paths (one to write temp file, one to read temp file)");
this.openedPaths.Count.ShouldEqual(1, "Incorrect number of opened paths (one to write temp file)");
this.openedPaths[0].IndexOf(EmptySha.Substring(2)).ShouldBeAtMost(-1, "Should not have written to the loose object location");
}

Expand Down Expand Up @@ -120,7 +120,7 @@ public void WriteLooseObject_Success()
gitObjects.WriteLooseObject(stream, RealSha, true, new byte[128]);
}

this.openedPaths.Count.ShouldEqual(3, "Incorrect number of opened paths");
this.openedPaths.Count.ShouldEqual(2, "Incorrect number of opened paths");
moved.ShouldBeTrue("File was not moved");
}

Expand Down

0 comments on commit ded306b

Please sign in to comment.