Browse Source

Implement zero-copy for single window objects

Objects that fall completely within a single window can be worked
with in a zero-copy fashion, provided that the window is backed by
a normal byte[] and not by a ByteBuffer.

This works for a surprising number of objects.  The default window
size is 8 KiB, but most deltas are quite a bit smaller than that.
Objects smaller than 1/2 of the window size have a very good chance
of falling completely within a window's array, which means we can
work with them without copying their data around.

Larger objects, or objects which are unlucky enough to span over a
window boundary, get copied through the temporary buffer.  We pay
a tiny penalty to realize we can't use the zero-copy code path,
but its easier than trying to keep track of two adjacent windows.

With this change (as well as everything preceeding it), packing
is actually a bit faster.  Some crude benchmarks based on cloning
linux-2.6.git (~324 MiB, 1,624,785 objects) over localhost using
C git client and JGit daemon shows we get better throughput, and
slightly better times:

  Total Time    | Throughput
  (old)  (now)  | (old)          (now)
  --------------+---------------------------
  2m45s  2m37s  | 12.49 MiB/s    21.17 MiB/s
  2m42s  2m36s  | 16.29 MiB/s    22.63 MiB/s
  2m37s  2m31s  | 16.07 MiB/s    21.92 MiB/s

Change-Id: I48b2c8d37f08d7bf5e76c5a8020cde4a16ae3396
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.9
Shawn O. Pearce 15 years ago
parent
commit
3a7aec03e0
  1. 18
      org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java
  2. 60
      org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java
  3. 9
      org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java

18
org.eclipse.jgit/src/org/eclipse/jgit/lib/ByteArrayWindow.java

@ -45,6 +45,9 @@
package org.eclipse.jgit.lib; package org.eclipse.jgit.lib;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException; import java.util.zip.DataFormatException;
import java.util.zip.Inflater; import java.util.zip.Inflater;
@ -80,4 +83,19 @@ final class ByteArrayWindow extends ByteWindow {
o += inf.inflate(b, o, b.length - o); o += inf.inflate(b, o, b.length - o);
return o; return o;
} }
void crc32(CRC32 out, long pos, int cnt) {
out.update(array, (int) (pos - start), cnt);
}
void write(OutputStream out, long pos, int cnt) throws IOException {
out.write(array, (int) (pos - start), cnt);
}
void check(Inflater inf, byte[] tmp, long pos, int cnt)
throws DataFormatException {
inf.setInput(array, (int) (pos - start), cnt);
while (inf.inflate(tmp, 0, tmp.length) > 0)
continue;
}
} }

60
org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java

@ -338,26 +338,31 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
final long dataOffset = src.copyOffset + headerCnt; final long dataOffset = src.copyOffset + headerCnt;
final long dataLength; final long dataLength;
final long expectedCRC; final long expectedCRC;
final ByteArrayWindow quickCopy;
// Verify the object isn't corrupt before sending. If it is, // Verify the object isn't corrupt before sending. If it is,
// we report it missing instead. // we report it missing instead.
// //
try { try {
dataLength = findEndOffset(src.copyOffset) - dataOffset; dataLength = findEndOffset(src.copyOffset) - dataOffset;
quickCopy = curs.quickCopy(this, dataOffset, dataLength);
if (idx().hasCRC32Support()) { if (idx().hasCRC32Support()) {
// Index has the CRC32 code cached, validate the object. // Index has the CRC32 code cached, validate the object.
// //
expectedCRC = idx().findCRC32(src); expectedCRC = idx().findCRC32(src);
if (quickCopy != null) {
long pos = dataOffset; quickCopy.crc32(crc1, dataOffset, (int) dataLength);
long cnt = dataLength; } else {
while (cnt > 0) { long pos = dataOffset;
final int n = (int) Math.min(cnt, buf.length); long cnt = dataLength;
readFully(pos, buf, 0, n, curs); while (cnt > 0) {
crc1.update(buf, 0, n); final int n = (int) Math.min(cnt, buf.length);
pos += n; readFully(pos, buf, 0, n, curs);
cnt -= n; crc1.update(buf, 0, n);
pos += n;
cnt -= n;
}
} }
if (crc1.getValue() != expectedCRC) { if (crc1.getValue() != expectedCRC) {
setCorrupt(src.copyOffset); setCorrupt(src.copyOffset);
@ -370,21 +375,25 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
// now while inflating the raw data to get zlib to tell us // now while inflating the raw data to get zlib to tell us
// whether or not the data is safe. // whether or not the data is safe.
// //
long pos = dataOffset;
long cnt = dataLength;
Inflater inf = curs.inflater(); Inflater inf = curs.inflater();
byte[] tmp = new byte[1024]; byte[] tmp = new byte[1024];
while (cnt > 0) { if (quickCopy != null) {
final int n = (int) Math.min(cnt, buf.length); quickCopy.check(inf, tmp, dataOffset, (int) dataLength);
readFully(pos, buf, 0, n, curs); } else {
crc1.update(buf, 0, n); long pos = dataOffset;
inf.setInput(buf, 0, n); long cnt = dataLength;
while (inf.inflate(tmp, 0, tmp.length) > 0) while (cnt > 0) {
continue; final int n = (int) Math.min(cnt, buf.length);
pos += n; readFully(pos, buf, 0, n, curs);
cnt -= n; crc1.update(buf, 0, n);
inf.setInput(buf, 0, n);
while (inf.inflate(tmp, 0, tmp.length) > 0)
continue;
pos += n;
cnt -= n;
}
} }
if (!inf.finished()) { if (!inf.finished() || inf.getBytesRead() != dataLength) {
setCorrupt(src.copyOffset); setCorrupt(src.copyOffset);
throw new EOFException(MessageFormat.format( throw new EOFException(MessageFormat.format(
JGitText.get().shortCompressedStreamAt, JGitText.get().shortCompressedStreamAt,
@ -413,7 +422,14 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
throw gone; throw gone;
} }
if (dataLength <= buf.length) { if (quickCopy != null) {
// The entire object fits into a single byte array window slice,
// and we have it pinned. Write this out without copying.
//
out.writeHeader(src, inflatedLength);
quickCopy.write(out, dataOffset, (int) dataLength);
} else if (dataLength <= buf.length) {
// Tiny optimization: Lots of objects are very small deltas or // Tiny optimization: Lots of objects are very small deltas or
// deflated commits that are likely to fit in the copy buffer. // deflated commits that are likely to fit in the copy buffer.
// //

9
org.eclipse.jgit/src/org/eclipse/jgit/lib/WindowCursor.java

@ -166,6 +166,15 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
} }
} }
ByteArrayWindow quickCopy(PackFile p, long pos, long cnt)
throws IOException {
pin(p, pos);
if (window instanceof ByteArrayWindow
&& window.contains(p, pos + (cnt - 1)))
return (ByteArrayWindow) window;
return null;
}
Inflater inflater() { Inflater inflater() {
prepareInflater(); prepareInflater();
return inf; return inf;

Loading…
Cancel
Save