Browse Source

Defer object collision check until pack stream is done

Object collision check requires read from local storage which may be
slow. We already delay this check for blobs, this change will also delay
other objects until the pack stream is closed. In this way, there is no
readCurs call until the pack stream is closed.

Change-Id: I3c8c4720dd19a5f64f8c7ddf07d815ed6877b6aa
Signed-off-by: Zhen Chen <czhen@google.com>
stable-4.9
Zhen Chen 7 years ago
parent
commit
9a3e037726
  1. 115
      org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java

115
org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java

@ -173,8 +173,8 @@ public abstract class PackParser {
private LongMap<UnresolvedDelta> baseByPos; private LongMap<UnresolvedDelta> baseByPos;
/** Blobs whose contents need to be double-checked after indexing. */ /** Objects need to be double-checked for collision after indexing. */
private BlockList<PackedObjectInfo> deferredCheckBlobs; private BlockList<PackedObjectInfo> collisionCheckObjs;
private MessageDigest packDigest; private MessageDigest packDigest;
@ -528,7 +528,7 @@ public abstract class PackParser {
entries = new PackedObjectInfo[(int) objectCount]; entries = new PackedObjectInfo[(int) objectCount];
baseById = new ObjectIdOwnerMap<>(); baseById = new ObjectIdOwnerMap<>();
baseByPos = new LongMap<>(); baseByPos = new LongMap<>();
deferredCheckBlobs = new BlockList<>(); collisionCheckObjs = new BlockList<>();
receiving.beginTask(JGitText.get().receivingObjects, receiving.beginTask(JGitText.get().receivingObjects,
(int) objectCount); (int) objectCount);
@ -545,8 +545,10 @@ public abstract class PackParser {
receiving.endTask(); receiving.endTask();
} }
if (!deferredCheckBlobs.isEmpty()) if (!collisionCheckObjs.isEmpty()) {
doDeferredCheckBlobs(); checkObjectCollision();
}
if (deltaCount > 0) { if (deltaCount > 0) {
if (resolving instanceof BatchingProgressMonitor) { if (resolving instanceof BatchingProgressMonitor) {
((BatchingProgressMonitor) resolving).setDelayStart( ((BatchingProgressMonitor) resolving).setDelayStart(
@ -675,6 +677,9 @@ public abstract class PackParser {
objectDigest.digest(tempObjectId); objectDigest.digest(tempObjectId);
verifySafeObject(tempObjectId, type, visit.data); verifySafeObject(tempObjectId, type, visit.data);
if (isCheckObjectCollisions() && readCurs.has(tempObjectId)) {
checkObjectCollision(tempObjectId, type, visit.data);
}
PackedObjectInfo oe; PackedObjectInfo oe;
oe = newInfo(tempObjectId, visit.delta, visit.parent.id); oe = newInfo(tempObjectId, visit.delta, visit.parent.id);
@ -1031,7 +1036,6 @@ public abstract class PackParser {
objectDigest.update((byte) 0); objectDigest.update((byte) 0);
final byte[] data; final byte[] data;
boolean checkContentLater = false;
if (type == Constants.OBJ_BLOB) { if (type == Constants.OBJ_BLOB) {
byte[] readBuffer = buffer(); byte[] readBuffer = buffer();
InputStream inf = inflate(Source.INPUT, sz); InputStream inf = inflate(Source.INPUT, sz);
@ -1045,10 +1049,7 @@ public abstract class PackParser {
} }
inf.close(); inf.close();
objectDigest.digest(tempObjectId); objectDigest.digest(tempObjectId);
checkContentLater = isCheckObjectCollisions()
&& readCurs.has(tempObjectId);
data = null; data = null;
} else { } else {
data = inflateAndReturn(Source.INPUT, sz); data = inflateAndReturn(Source.INPUT, sz);
objectDigest.update(data); objectDigest.update(data);
@ -1062,8 +1063,10 @@ public abstract class PackParser {
if (data != null) if (data != null)
onInflatedObjectData(obj, type, data); onInflatedObjectData(obj, type, data);
addObjectAndTrack(obj); addObjectAndTrack(obj);
if (checkContentLater)
deferredCheckBlobs.add(obj); if (isCheckObjectCollisions()) {
collisionCheckObjs.add(obj);
}
} }
private void verifySafeObject(final AnyObjectId id, final int type, private void verifySafeObject(final AnyObjectId id, final int type,
@ -1078,65 +1081,73 @@ public abstract class PackParser {
throw new CorruptObjectException(MessageFormat.format( throw new CorruptObjectException(MessageFormat.format(
JGitText.get().invalidObject, JGitText.get().invalidObject,
Constants.typeString(type), Constants.typeString(type),
readCurs.abbreviate(id, 10).name(), id.name(),
e.getMessage()), e); e.getMessage()), e);
} }
} }
}
if (isCheckObjectCollisions()) { private void checkObjectCollision() throws IOException {
try { for (PackedObjectInfo obj : collisionCheckObjs) {
final ObjectLoader ldr = readCurs.open(id, type); if (!readCurs.has(obj)) {
final byte[] existingData = ldr.getCachedBytes(data.length); continue;
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, id.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
} }
checkObjectCollision(obj);
} }
} }
private void doDeferredCheckBlobs() throws IOException { private void checkObjectCollision(PackedObjectInfo obj)
throws IOException {
ObjectTypeAndSize info = openDatabase(obj, new ObjectTypeAndSize());
final byte[] readBuffer = buffer(); final byte[] readBuffer = buffer();
final byte[] curBuffer = new byte[readBuffer.length]; final byte[] curBuffer = new byte[readBuffer.length];
ObjectTypeAndSize info = new ObjectTypeAndSize(); long sz = info.size;
InputStream pck = null;
for (PackedObjectInfo obj : deferredCheckBlobs) { try (ObjectStream cur = readCurs.open(obj, info.type).openStream()) {
info = openDatabase(obj, info); if (cur.getSize() != sz) {
if (info.type != Constants.OBJ_BLOB)
throw new IOException(MessageFormat.format( throw new IOException(MessageFormat.format(
JGitText.get().unknownObjectType, JGitText.get().collisionOn, obj.name()));
Integer.valueOf(info.type))); }
pck = inflate(Source.DATABASE, sz);
ObjectStream cur = readCurs.open(obj, info.type).openStream(); while (0 < sz) {
try { int n = (int) Math.min(readBuffer.length, sz);
long sz = info.size; IO.readFully(cur, curBuffer, 0, n);
if (cur.getSize() != sz) IO.readFully(pck, readBuffer, 0, n);
throw new IOException(MessageFormat.format( for (int i = 0; i < n; i++) {
JGitText.get().collisionOn, obj.name())); if (curBuffer[i] != readBuffer[i]) {
InputStream pck = inflate(Source.DATABASE, sz); throw new IOException(MessageFormat.format(JGitText
while (0 < sz) { .get().collisionOn, obj.name()));
int n = (int) Math.min(readBuffer.length, sz);
IO.readFully(cur, curBuffer, 0, n);
IO.readFully(pck, readBuffer, 0, n);
for (int i = 0; i < n; i++) {
if (curBuffer[i] != readBuffer[i])
throw new IOException(MessageFormat.format(JGitText
.get().collisionOn, obj.name()));
} }
sz -= n;
} }
sz -= n;
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
} finally {
if (pck != null) {
pck.close(); pck.close();
} finally {
cur.close();
} }
} }
} }
private void checkObjectCollision(AnyObjectId obj, int type, byte[] data)
throws IOException {
try {
final ObjectLoader ldr = readCurs.open(obj, type);
final byte[] existingData = ldr.getCachedBytes(data.length);
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, obj.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
}
}
/** @return current position of the input stream being parsed. */ /** @return current position of the input stream being parsed. */
private long streamPosition() { private long streamPosition() {
return bBase + bOffset; return bBase + bOffset;

Loading…
Cancel
Save