From 1b2062fe37b43e59e40e360139e03e1221fa5b6b Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 2 Mar 2011 12:23:55 -0800 Subject: [PATCH] PackWriter: Avoid CRC-32 validation when feeding IndexPack There is no need to validate the object contents during copyObjectAsIs if the result is going to be parsed by unpack-objects or index-pack. Both programs will compute the SHA-1 of the object, and also validate most of the pack structure. For git daemon like servers, this work is already done on the client end of the connection, so the server doesn't need to repeat that work itself. Disable object validation for the 3 transport cases where we know the remote side will handle object validation for us (push, bundle creation, and upload pack). This improves performance on the server side by reducing the work that must be done. Change-Id: Iabb78eec45898e4a17f7aab3fb94c004d8d69af6 Signed-off-by: Shawn O. Pearce --- .../eclipse/jgit/pgm/debug/ShowPackDelta.java | 2 +- .../eclipse/jgit/storage/file/PackFile.java | 41 +++++++++++-------- .../jgit/storage/file/WindowCursor.java | 7 ++-- .../jgit/storage/pack/ObjectReuseAsIs.java | 21 +++++++--- .../eclipse/jgit/storage/pack/PackWriter.java | 28 ++++++++++++- .../transport/BasePackPushConnection.java | 1 + .../eclipse/jgit/transport/BundleWriter.java | 1 + .../eclipse/jgit/transport/UploadPack.java | 1 + 8 files changed, 76 insertions(+), 26 deletions(-) diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java index 0e0c73831..ddeec330f 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/debug/ShowPackDelta.java @@ -107,7 +107,7 @@ class ShowPackDelta extends TextBuiltin { asis.selectObjectRepresentation(pw, NullProgressMonitor.INSTANCE, Collections.singleton(target)); asis.copyObjectAsIs(new PackOutputStream(NullProgressMonitor.INSTANCE, - buf, pw), target); + buf, pw), target, true); // At this point the object header has no delta information, // because it was output as though it were a whole object. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java index 3a76d4c66..80820b2ad 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java @@ -313,21 +313,21 @@ public class PackFile implements Iterable { } final void copyAsIs(PackOutputStream out, LocalObjectToPack src, - WindowCursor curs) throws IOException, + boolean validate, WindowCursor curs) throws IOException, StoredObjectRepresentationNotAvailableException { beginCopyAsIs(src); try { - copyAsIs2(out, src, curs); + copyAsIs2(out, src, validate, curs); } finally { endCopyAsIs(); } } private void copyAsIs2(PackOutputStream out, LocalObjectToPack src, - WindowCursor curs) throws IOException, + boolean validate, WindowCursor curs) throws IOException, StoredObjectRepresentationNotAvailableException { - final CRC32 crc1 = new CRC32(); - final CRC32 crc2 = new CRC32(); + final CRC32 crc1 = validate ? new CRC32() : null; + final CRC32 crc2 = validate ? new CRC32() : null; final byte[] buf = out.getCopyBuffer(); // Rip apart the header so we can discover the size. @@ -348,17 +348,23 @@ public class PackFile implements Iterable { do { c = buf[headerCnt++] & 0xff; } while ((c & 128) != 0); - crc1.update(buf, 0, headerCnt); - crc2.update(buf, 0, headerCnt); + if (validate) { + crc1.update(buf, 0, headerCnt); + crc2.update(buf, 0, headerCnt); + } } else if (typeCode == Constants.OBJ_REF_DELTA) { - crc1.update(buf, 0, headerCnt); - crc2.update(buf, 0, headerCnt); + if (validate) { + crc1.update(buf, 0, headerCnt); + crc2.update(buf, 0, headerCnt); + } readFully(src.offset + headerCnt, buf, 0, 20, curs); - crc1.update(buf, 0, 20); - crc2.update(buf, 0, 20); + if (validate) { + crc1.update(buf, 0, 20); + crc2.update(buf, 0, 20); + } headerCnt += 20; - } else { + } else if (validate) { crc1.update(buf, 0, headerCnt); crc2.update(buf, 0, headerCnt); } @@ -374,7 +380,7 @@ public class PackFile implements Iterable { try { quickCopy = curs.quickCopy(this, dataOffset, dataLength); - if (idx().hasCRC32Support()) { + if (validate && idx().hasCRC32Support()) { // Index has the CRC32 code cached, validate the object. // expectedCRC = idx().findCRC32(src); @@ -397,7 +403,7 @@ public class PackFile implements Iterable { JGitText.get().objectAtHasBadZlibStream, src.offset, getPackFile())); } - } else { + } else if (validate) { // We don't have a CRC32 code in the index, so compute it // now while inflating the raw data to get zlib to tell us // whether or not the data is safe. @@ -427,6 +433,8 @@ public class PackFile implements Iterable { src.offset)); } expectedCRC = crc1.getValue(); + } else { + expectedCRC = -1; } } catch (DataFormatException dataFormat) { setCorrupt(src.offset); @@ -473,12 +481,13 @@ public class PackFile implements Iterable { while (cnt > 0) { final int n = (int) Math.min(cnt, buf.length); readFully(pos, buf, 0, n, curs); - crc2.update(buf, 0, n); + if (validate) + crc2.update(buf, 0, n); out.write(buf, 0, n); pos += n; cnt -= n; } - if (crc2.getValue() != expectedCRC) { + if (validate && crc2.getValue() != expectedCRC) { throw new CorruptObjectException(MessageFormat.format(JGitText .get().objectAtHasBadZlibStream, src.offset, getPackFile())); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java index 5a3e01c8c..661df62cb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java @@ -143,10 +143,11 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs { } } - public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) - throws IOException, StoredObjectRepresentationNotAvailableException { + public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp, + boolean validate) throws IOException, + StoredObjectRepresentationNotAvailableException { LocalObjectToPack src = (LocalObjectToPack) otp; - src.pack.copyAsIs(out, src, this); + src.pack.copyAsIs(out, src, validate, this); } public void writeObjects(PackOutputStream out, List list) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java index 480e0519d..8ad0b2419 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java @@ -130,8 +130,9 @@ public interface ObjectReuseAsIs { * reduce data locality for the reader, slowing down data access. * * Invoking {@link PackOutputStream#writeObject(ObjectToPack)} will cause - * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack)} to be invoked - * recursively on {@code this} if the current object is scheduled for reuse. + * {@link #copyObjectAsIs(PackOutputStream, ObjectToPack, boolean)} to be + * invoked recursively on {@code this} if the current object is scheduled + * for reuse. * * @param out * the stream to write each object to. @@ -160,7 +161,11 @@ public interface ObjectReuseAsIs { * *
 	 * MyToPack mtp = (MyToPack) otp;
-	 * byte[] raw = validate(mtp); // throw SORNAE here, if at all
+	 * byte[] raw;
+	 * if (validate)
+	 * 	 raw = validate(mtp); // throw SORNAE here, if at all
+	 * else
+	 * 	 raw = readFast(mtp);
 	 * out.writeHeader(mtp, mtp.inflatedSize);
 	 * out.write(raw);
 	 * 
@@ -169,6 +174,11 @@ public interface ObjectReuseAsIs { * stream the object should be written to. * @param otp * the object's saved representation information. + * @param validate + * if true the representation must be validated and not be + * corrupt before being reused. If false, validation may be + * skipped as it will be performed elsewhere in the processing + * pipeline. * @throws StoredObjectRepresentationNotAvailableException * the previously selected representation is no longer * available. If thrown before {@code out.writeHeader} the pack @@ -179,8 +189,9 @@ public interface ObjectReuseAsIs { * the stream's write method threw an exception. Packing will * abort. */ - public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp) - throws IOException, StoredObjectRepresentationNotAvailableException; + public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp, + boolean validate) throws IOException, + StoredObjectRepresentationNotAvailableException; /** * Obtain the available cached packs. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index 57dca95a3..3d0604868 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -177,6 +177,8 @@ public class PackWriter { private boolean reuseDeltaCommits; + private boolean reuseValidate; + private boolean thin; private boolean useCachedPacks; @@ -245,6 +247,7 @@ public class PackWriter { deltaBaseAsOffset = config.isDeltaBaseAsOffset(); reuseDeltas = config.isReuseDeltas(); + reuseValidate = true; // be paranoid by default stats = new Statistics(); } @@ -298,6 +301,29 @@ public class PackWriter { reuseDeltaCommits = reuse; } + /** + * Check if the writer validates objects before copying them. + * + * @return true if validation is enabled; false if the reader will handle + * object validation as a side-effect of it consuming the output. + */ + public boolean isReuseValidatingObjects() { + return reuseValidate; + } + + /** + * Enable (or disable) object validation during packing. + * + * @param validate + * if true the pack writer will validate an object before it is + * put into the output. This additional validation work may be + * necessary to avoid propagating corruption from one local pack + * file to another local pack file. + */ + public void setReuseValidatingObjects(boolean validate) { + reuseValidate = validate; + } + /** @return true if this writer is producing a thin pack. */ public boolean isThin() { return thin; @@ -1006,7 +1032,7 @@ public class PackWriter { while (otp.isReuseAsIs()) { try { - reuseSupport.copyObjectAsIs(out, otp); + reuseSupport.copyObjectAsIs(out, otp, reuseValidate); out.endObject(); otp.setCRC(out.getCRC32()); stats.reusedObjects++; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java index 2a5cfd0ad..4e0536d0c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java @@ -260,6 +260,7 @@ public abstract class BasePackPushConnection extends BasePackConnection implemen writer.setUseCachedPacks(true); writer.setThin(thinPack); + writer.setReuseValidatingObjects(false); writer.setDeltaBaseAsOffset(capableOfsDelta); writer.preparePack(monitor, newObjects, remoteObjects); writer.writePack(monitor, monitor, out); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java index 3865c8a75..9e79d43c3 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java @@ -202,6 +202,7 @@ public class BundleWriter { exc.add(r.getId()); packWriter.setDeltaBaseAsOffset(true); packWriter.setThin(exc.size() > 0); + packWriter.setReuseValidatingObjects(false); if (exc.size() == 0) packWriter.setTagTargets(tagTargets); packWriter.preparePack(monitor, inc, exc); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index 3e324d093..798c4638f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -695,6 +695,7 @@ public class UploadPack { pw.setReuseDeltaCommits(true); pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA)); pw.setThin(options.contains(OPTION_THIN_PACK)); + pw.setReuseValidatingObjects(false); if (commonBase.isEmpty()) { Set tagTargets = new HashSet();