Browse Source

PackWriter: Avoid CRC-32 validation when feeding IndexPack

There is no need to validate the object contents during
copyObjectAsIs if the result is going to be parsed by unpack-objects
or index-pack.  Both programs will compute the SHA-1 of the object,
and also validate most of the pack structure.  For git daemon
like servers, this work is already done on the client end of the
connection, so the server doesn't need to repeat that work itself.

Disable object validation for the 3 transport cases where we know
the remote side will handle object validation for us (push, bundle
creation, and upload pack).  This improves performance on the server
side by reducing the work that must be done.

Change-Id: Iabb78eec45898e4a17f7aab3fb94c004d8d69af6
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.12
Shawn O. Pearce 14 years ago
parent
commit
1b2062fe37
  1. 2
      org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java
  2. 27
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
  3. 7
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
  4. 21
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java
  5. 28
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java
  6. 1
      org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java
  7. 1
      org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java
  8. 1
      org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

2
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java

@ -107,7 +107,7 @@ class ShowPackDelta extends TextBuiltin {
asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE, asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE,
Collections.singleton(target)); Collections.singleton(target));
asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE, asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE,
buf, pw), target); buf, pw), target, true);
// At this point the object header has no delta information, // At this point the object header has no delta information,
// because it was output as though it were a whole object. // because it was output as though it were a whole object.

27
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java

@ -313,21 +313,21 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
} }
final void copyAsIs(PackOutputStream out, LocalObjectToPack src, final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException, boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException { StoredObjectRepresentationNotAvailableException {
beginCopyAsIs(src); beginCopyAsIs(src);
try { try {
copyAsIs2(out, src, curs); copyAsIs2(out, src, validate, curs);
} finally { } finally {
endCopyAsIs(); endCopyAsIs();
} }
} }
private void copyAsIs2(PackOutputStream out, LocalObjectToPack src, private void copyAsIs2(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException, boolean validate, WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException { StoredObjectRepresentationNotAvailableException {
final CRC32 crc1 = new CRC32(); final CRC32 crc1 = validate ? new CRC32() : null;
final CRC32 crc2 = new CRC32(); final CRC32 crc2 = validate ? new CRC32() : null;
final byte[] buf = out.getCopyBuffer(); final byte[] buf = out.getCopyBuffer();
// Rip apart the header so we can discover the size. // Rip apart the header so we can discover the size.
@ -348,17 +348,23 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
do { do {
c = buf[headerCnt++] & 0xff; c = buf[headerCnt++] & 0xff;
} while ((c & 128) != 0); } while ((c & 128) != 0);
if (validate) {
crc1.update(buf, 0, headerCnt); crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt); crc2.update(buf, 0, headerCnt);
}
} else if (typeCode == Constants.OBJ_REF_DELTA) { } else if (typeCode == Constants.OBJ_REF_DELTA) {
if (validate) {
crc1.update(buf, 0, headerCnt); crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt); crc2.update(buf, 0, headerCnt);
}
readFully(src.offset + headerCnt, buf, 0, 20, curs); readFully(src.offset + headerCnt, buf, 0, 20, curs);
if (validate) {
crc1.update(buf, 0, 20); crc1.update(buf, 0, 20);
crc2.update(buf, 0, 20); crc2.update(buf, 0, 20);
}
headerCnt += 20; headerCnt += 20;
} else { } else if (validate) {
crc1.update(buf, 0, headerCnt); crc1.update(buf, 0, headerCnt);
crc2.update(buf, 0, headerCnt); crc2.update(buf, 0, headerCnt);
} }
@ -374,7 +380,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
try { try {
quickCopy = curs.quickCopy(this, dataOffset, dataLength); quickCopy = curs.quickCopy(this, dataOffset, dataLength);
if (idx().hasCRC32Support()) { if (validate && idx().hasCRC32Support()) {
// Index has the CRC32 code cached, validate the object. // Index has the CRC32 code cached, validate the object.
// //
expectedCRC = idx().findCRC32(src); expectedCRC = idx().findCRC32(src);
@ -397,7 +403,7 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
JGitText.get().objectAtHasBadZlibStream, JGitText.get().objectAtHasBadZlibStream,
src.offset, getPackFile())); src.offset, getPackFile()));
} }
} else { } else if (validate) {
// We don't have a CRC32 code in the index, so compute it // We don't have a CRC32 code in the index, so compute it
// now while inflating the raw data to get zlib to tell us // now while inflating the raw data to get zlib to tell us
// whether or not the data is safe. // whether or not the data is safe.
@ -427,6 +433,8 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
src.offset)); src.offset));
} }
expectedCRC = crc1.getValue(); expectedCRC = crc1.getValue();
} else {
expectedCRC = -1;
} }
} catch (DataFormatException dataFormat) { } catch (DataFormatException dataFormat) {
setCorrupt(src.offset); setCorrupt(src.offset);
@ -473,12 +481,13 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
while (cnt > 0) { while (cnt > 0) {
final int n = (int) Math.min(cnt, buf.length); final int n = (int) Math.min(cnt, buf.length);
readFully(pos, buf, 0, n, curs); readFully(pos, buf, 0, n, curs);
if (validate)
crc2.update(buf, 0, n); crc2.update(buf, 0, n);
out.write(buf, 0, n); out.write(buf, 0, n);
pos += n; pos += n;
cnt -= n; cnt -= n;
} }
if (crc2.getValue() != expectedCRC) { if (validate && crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(JGitText throw new CorruptObjectException(MessageFormat.format(JGitText
.get().objectAtHasBadZlibStream, src.offset, .get().objectAtHasBadZlibStream, src.offset,
getPackFile())); getPackFile()));

7
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java

@ -143,10 +143,11 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
} }
} }
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
throws IOException, StoredObjectRepresentationNotAvailableException { boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException {
LocalObjectToPack src = (LocalObjectToPack) otp; LocalObjectToPack src = (LocalObjectToPack) otp;
src.pack.copyAsIs(out, src, this); src.pack.copyAsIs(out, src, validate, this);
} }
public void writeObjects(PackOutputStream out, List<ObjectToPack> list) public void writeObjects(PackOutputStream out, List<ObjectToPack> list)

21
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java

@ -130,8 +130,9 @@ public interface ObjectReuseAsIs {
* reduce data locality for the reader, slowing down data access. * reduce data locality for the reader, slowing down data access.
* *
* Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause * Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause
* {@link #copyObjectAsIs(PackOutputStream, ObjectToPack)} to be invoked * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be
* recursively on {@code this} if the current object is scheduled for reuse. * invoked recursively on {@code this} if the current object is scheduled
* for reuse.
* *
* @param out * @param out
* the stream to write each object to. * the stream to write each object to.
@ -160,7 +161,11 @@ public interface ObjectReuseAsIs {
* *
* <pre> * <pre>
* MyToPack mtp = (MyToPack) otp; * MyToPack mtp = (MyToPack) otp;
* byte[] raw = validate(mtp); // throw SORNAE here, if at all * byte[] raw;
* if (validate)
* raw = validate(mtp); // throw SORNAE here, if at all
* else
* raw = readFast(mtp);
* out.writeHeader(mtp, mtp.inflatedSize); * out.writeHeader(mtp, mtp.inflatedSize);
* out.write(raw); * out.write(raw);
* </pre> * </pre>
@ -169,6 +174,11 @@ public interface ObjectReuseAsIs {
* stream the object should be written to. * stream the object should be written to.
* @param otp * @param otp
* the object's saved representation information. * the object's saved representation information.
* @param validate
* if true the representation must be validated and not be
* corrupt before being reused. If false, validation may be
* skipped as it will be performed elsewhere in the processing
* pipeline.
* @throws StoredObjectRepresentationNotAvailableException * @throws StoredObjectRepresentationNotAvailableException
* the previously selected representation is no longer * the previously selected representation is no longer
* available. If thrown before {@code out.writeHeader} the pack * available. If thrown before {@code out.writeHeader} the pack
@ -179,8 +189,9 @@ public interface ObjectReuseAsIs {
* the stream's write method threw an exception. Packing will * the stream's write method threw an exception. Packing will
* abort. * abort.
*/ */
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp,
throws IOException, StoredObjectRepresentationNotAvailableException; boolean validate) throws IOException,
StoredObjectRepresentationNotAvailableException;
/** /**
* Obtain the available cached packs. * Obtain the available cached packs.

28
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -177,6 +177,8 @@ public class PackWriter {
private boolean reuseDeltaCommits; private boolean reuseDeltaCommits;
private boolean reuseValidate;
private boolean thin; private boolean thin;
private boolean useCachedPacks; private boolean useCachedPacks;
@ -245,6 +247,7 @@ public class PackWriter {
deltaBaseAsOffset = config.isDeltaBaseAsOffset(); deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas(); reuseDeltas = config.isReuseDeltas();
reuseValidate = true; // be paranoid by default
stats = new Statistics(); stats = new Statistics();
} }
@ -298,6 +301,29 @@ public class PackWriter {
reuseDeltaCommits = reuse; reuseDeltaCommits = reuse;
} }
/**
* Check if the writer validates objects before copying them.
*
* @return true if validation is enabled; false if the reader will handle
* object validation as a side-effect of it consuming the output.
*/
public boolean isReuseValidatingObjects() {
return reuseValidate;
}
/**
* Enable (or disable) object validation during packing.
*
* @param validate
* if true the pack writer will validate an object before it is
* put into the output. This additional validation work may be
* necessary to avoid propagating corruption from one local pack
* file to another local pack file.
*/
public void setReuseValidatingObjects(boolean validate) {
reuseValidate = validate;
}
/** @return true if this writer is producing a thin pack. */ /** @return true if this writer is producing a thin pack. */
public boolean isThin() { public boolean isThin() {
return thin; return thin;
@ -1006,7 +1032,7 @@ public class PackWriter {
while (otp.isReuseAsIs()) { while (otp.isReuseAsIs()) {
try { try {
reuseSupport.copyObjectAsIs(out, otp); reuseSupport.copyObjectAsIs(out, otp, reuseValidate);
out.endObject(); out.endObject();
otp.setCRC(out.getCRC32()); otp.setCRC(out.getCRC32());
stats.reusedObjects++; stats.reusedObjects++;

1
org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java

@ -260,6 +260,7 @@ public abstract class BasePackPushConnection extends BasePackConnection implemen
writer.setUseCachedPacks(true); writer.setUseCachedPacks(true);
writer.setThin(thinPack); writer.setThin(thinPack);
writer.setReuseValidatingObjects(false);
writer.setDeltaBaseAsOffset(capableOfsDelta); writer.setDeltaBaseAsOffset(capableOfsDelta);
writer.preparePack(monitor, newObjects, remoteObjects); writer.preparePack(monitor, newObjects, remoteObjects);
writer.writePack(monitor, monitor, out); writer.writePack(monitor, monitor, out);

1
org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java

@ -202,6 +202,7 @@ public class BundleWriter {
exc.add(r.getId()); exc.add(r.getId());
packWriter.setDeltaBaseAsOffset(true); packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(exc.size() > 0); packWriter.setThin(exc.size() > 0);
packWriter.setReuseValidatingObjects(false);
if (exc.size() == 0) if (exc.size() == 0)
packWriter.setTagTargets(tagTargets); packWriter.setTagTargets(tagTargets);
packWriter.preparePack(monitor, inc, exc); packWriter.preparePack(monitor, inc, exc);

1
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

@ -695,6 +695,7 @@ public class UploadPack {
pw.setReuseDeltaCommits(true); pw.setReuseDeltaCommits(true);
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA)); pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK)); pw.setThin(options.contains(OPTION_THIN_PACK));
pw.setReuseValidatingObjects(false);
if (commonBase.isEmpty()) { if (commonBase.isEmpty()) {
Set<ObjectId> tagTargets = new HashSet<ObjectId>(); Set<ObjectId> tagTargets = new HashSet<ObjectId>();

Loading…
Cancel
Save