Browse Source

Defer object collision check until pack stream is done

Object collision check requires read from local storage which may be
slow. We already delay this check for blobs, this change will also delay
other objects until the pack stream is closed. In this way, there is no
readCurs call until the pack stream is closed.

Change-Id: I3c8c4720dd19a5f64f8c7ddf07d815ed6877b6aa
Signed-off-by: Zhen Chen <czhen@google.com>
stable-4.9
Zhen Chen 7 years ago
parent
commit
9a3e037726
  1. 95
      org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java

95
org.eclipse.jgit/src/org/eclipse/jgit/transport/PackParser.java

@ -173,8 +173,8 @@ public abstract class PackParser {
private LongMap<UnresolvedDelta> baseByPos;
/** Blobs whose contents need to be double-checked after indexing. */
private BlockList<PackedObjectInfo> deferredCheckBlobs;
/** Objects need to be double-checked for collision after indexing. */
private BlockList<PackedObjectInfo> collisionCheckObjs;
private MessageDigest packDigest;
@ -528,7 +528,7 @@ public abstract class PackParser {
entries = new PackedObjectInfo[(int) objectCount];
baseById = new ObjectIdOwnerMap<>();
baseByPos = new LongMap<>();
deferredCheckBlobs = new BlockList<>();
collisionCheckObjs = new BlockList<>();
receiving.beginTask(JGitText.get().receivingObjects,
(int) objectCount);
@ -545,8 +545,10 @@ public abstract class PackParser {
receiving.endTask();
}
if (!deferredCheckBlobs.isEmpty())
doDeferredCheckBlobs();
if (!collisionCheckObjs.isEmpty()) {
checkObjectCollision();
}
if (deltaCount > 0) {
if (resolving instanceof BatchingProgressMonitor) {
((BatchingProgressMonitor) resolving).setDelayStart(
@ -675,6 +677,9 @@ public abstract class PackParser {
objectDigest.digest(tempObjectId);
verifySafeObject(tempObjectId, type, visit.data);
if (isCheckObjectCollisions() && readCurs.has(tempObjectId)) {
checkObjectCollision(tempObjectId, type, visit.data);
}
PackedObjectInfo oe;
oe = newInfo(tempObjectId, visit.delta, visit.parent.id);
@ -1031,7 +1036,6 @@ public abstract class PackParser {
objectDigest.update((byte) 0);
final byte[] data;
boolean checkContentLater = false;
if (type == Constants.OBJ_BLOB) {
byte[] readBuffer = buffer();
InputStream inf = inflate(Source.INPUT, sz);
@ -1045,10 +1049,7 @@ public abstract class PackParser {
}
inf.close();
objectDigest.digest(tempObjectId);
checkContentLater = isCheckObjectCollisions()
&& readCurs.has(tempObjectId);
data = null;
} else {
data = inflateAndReturn(Source.INPUT, sz);
objectDigest.update(data);
@ -1062,8 +1063,10 @@ public abstract class PackParser {
if (data != null)
onInflatedObjectData(obj, type, data);
addObjectAndTrack(obj);
if (checkContentLater)
deferredCheckBlobs.add(obj);
if (isCheckObjectCollisions()) {
collisionCheckObjs.add(obj);
}
}
private void verifySafeObject(final AnyObjectId id, final int type,
@ -1078,62 +1081,70 @@ public abstract class PackParser {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().invalidObject,
Constants.typeString(type),
readCurs.abbreviate(id, 10).name(),
id.name(),
e.getMessage()), e);
}
}
if (isCheckObjectCollisions()) {
try {
final ObjectLoader ldr = readCurs.open(id, type);
final byte[] existingData = ldr.getCachedBytes(data.length);
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, id.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
private void checkObjectCollision() throws IOException {
for (PackedObjectInfo obj : collisionCheckObjs) {
if (!readCurs.has(obj)) {
continue;
}
checkObjectCollision(obj);
}
}
private void doDeferredCheckBlobs() throws IOException {
private void checkObjectCollision(PackedObjectInfo obj)
throws IOException {
ObjectTypeAndSize info = openDatabase(obj, new ObjectTypeAndSize());
final byte[] readBuffer = buffer();
final byte[] curBuffer = new byte[readBuffer.length];
ObjectTypeAndSize info = new ObjectTypeAndSize();
for (PackedObjectInfo obj : deferredCheckBlobs) {
info = openDatabase(obj, info);
if (info.type != Constants.OBJ_BLOB)
throw new IOException(MessageFormat.format(
JGitText.get().unknownObjectType,
Integer.valueOf(info.type)));
ObjectStream cur = readCurs.open(obj, info.type).openStream();
try {
long sz = info.size;
if (cur.getSize() != sz)
InputStream pck = null;
try (ObjectStream cur = readCurs.open(obj, info.type).openStream()) {
if (cur.getSize() != sz) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, obj.name()));
InputStream pck = inflate(Source.DATABASE, sz);
}
pck = inflate(Source.DATABASE, sz);
while (0 < sz) {
int n = (int) Math.min(readBuffer.length, sz);
IO.readFully(cur, curBuffer, 0, n);
IO.readFully(pck, readBuffer, 0, n);
for (int i = 0; i < n; i++) {
if (curBuffer[i] != readBuffer[i])
if (curBuffer[i] != readBuffer[i]) {
throw new IOException(MessageFormat.format(JGitText
.get().collisionOn, obj.name()));
}
}
sz -= n;
}
pck.close();
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
} finally {
cur.close();
if (pck != null) {
pck.close();
}
}
}
private void checkObjectCollision(AnyObjectId obj, int type, byte[] data)
throws IOException {
try {
final ObjectLoader ldr = readCurs.open(obj, type);
final byte[] existingData = ldr.getCachedBytes(data.length);
if (!Arrays.equals(data, existingData)) {
throw new IOException(MessageFormat.format(
JGitText.get().collisionOn, obj.name()));
}
} catch (MissingObjectException notLocal) {
// This is OK, we don't have a copy of the object locally
// but the API throws when we try to read it as usually its
// an error to read something that doesn't exist.
}
}

Loading…
Cancel
Save