Browse Source

PackInserter: Ensure objects are written at the end of the pack

When interleaving reads and writes from an unflushed pack, we forgot to
reset the file pointer back to the end of the file before writing more
new objects. This had at least two unfortunate effects:
  * The pack data was potentially corrupt, since we could overwrite
    previous portions of the file willy-nilly.
  * The CountingOutputStream would report more bytes read than the size
    of the file, which stored the wrong PackedObjectInfo, which would
    cause EOFs during reading.

We already had a test in PackInserterTest which was supposed to catch
bugs like this, by interleaving reads and writes. Unfortunately, it
didn't catch the bug, since as an implementation detail we always read a
full buffer's worth of data from the file when inflating during
readback. If the size of the file was less than the offset of the object
we were reading back plus one buffer (8192 bytes), we would completely
accidentally end up back in the right place in the file.

So, add another test for this case where we read back a small object
positioned before a large object. Before the fix, this test exhibited
exactly the "Unexpected EOF" error reported at crbug.com/gerrit/7668.

Change-Id: I74f08f3d5d9046781d59e5bd7c84916ff8225c3b
stable-4.9
Dave Borowitz 7 years ago
parent
commit
43ef5dabf1
  1. 52
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java
  2. 101
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java

52
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java

@ -67,6 +67,7 @@ import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -440,6 +441,53 @@ public class PackInserterTest extends RepositoryTestCase {
}
}
@Test
public void readBackSmallObjectBeforeLargeObject() throws Exception {
WindowCacheConfig wcc = new WindowCacheConfig();
wcc.setStreamFileThreshold(1024);
wcc.install();
ObjectId blobId1;
ObjectId blobId2;
ObjectId largeId;
byte[] blob1 = Constants.encode("blob1");
byte[] blob2 = Constants.encode("blob2");
byte[] largeBlob = newLargeBlob();
try (PackInserter ins = newInserter()) {
assertThat(blob1.length, lessThan(ins.getBufferSize()));
assertThat(largeBlob.length, greaterThan(ins.getBufferSize()));
blobId1 = ins.insert(OBJ_BLOB, blob1);
largeId = ins.insert(OBJ_BLOB, largeBlob);
try (ObjectReader reader = ins.newReader()) {
// A previous bug did not reset the file pointer to EOF after reading
// back. We need to seek to something further back than a full buffer,
// since the read-back code eagerly reads a full buffer's worth of data
// from the file to pass to the inflater. If we seeked back just a small
// amount, this step would consume the rest of the file, so the file
// pointer would coincidentally end up back at EOF, hiding the bug.
assertBlob(reader, blobId1, blob1);
}
blobId2 = ins.insert(OBJ_BLOB, blob2);
try (ObjectReader reader = ins.newReader()) {
assertBlob(reader, blobId1, blob1);
assertBlob(reader, blobId2, blob2);
assertBlob(reader, largeId, largeBlob);
}
ins.flush();
}
try (ObjectReader reader = db.newObjectReader()) {
assertBlob(reader, blobId1, blob1);
assertBlob(reader, blobId2, blob2);
assertBlob(reader, largeId, largeBlob);
}
}
private List<PackFile> listPacks() throws Exception {
List<PackFile> fromOpenDb = listPacks(db);
List<PackFile> reopened;
@ -470,9 +518,7 @@ public class PackInserterTest extends RepositoryTestCase {
private static byte[] newLargeBlob() {
byte[] blob = new byte[10240];
for (int i = 0; i < blob.length; i++) {
blob[i] = (byte) ('0' + (i % 10));
}
new Random(0).nextBytes(blob);
return blob;
}

101
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java

@ -52,6 +52,7 @@ import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@ -254,7 +255,6 @@ class PackInserter extends ObjectInserter {
try {
packHash = packOut.finishPack();
} finally {
packOut.close();
packOut = null;
}
@ -359,6 +359,20 @@ class PackInserter extends ObjectInserter {
return cachedInflater;
}
/**
* Stream that writes to a pack file.
* <p>
* Backed by two views of the same open file descriptor: a random-access file,
* and an output stream. Seeking in the file causes subsequent writes to the
* output stream to occur wherever the file pointer is pointing, so we need to
* take care to always seek to the end of the file before writing a new
* object.
* <p>
* Callers should always use {@link #seek(long)} to seek, rather than reaching
* into the file member. As long as this contract is followed, calls to {@link
* #write(byte[], int, int)} are guaranteed to write at the end of the file,
* even if there have been intermediate seeks.
*/
private class PackStream extends OutputStream {
final byte[] hdrBuf;
final CRC32 crc32;
@ -368,6 +382,8 @@ class PackInserter extends ObjectInserter {
private final CountingOutputStream out;
private final Deflater deflater;
private boolean atEnd;
PackStream(File pack) throws IOException {
file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
out = new CountingOutputStream(new FileOutputStream(file.getFD()));
@ -375,12 +391,23 @@ class PackInserter extends ObjectInserter {
compress = new DeflaterOutputStream(this, deflater, 8192);
hdrBuf = new byte[32];
crc32 = new CRC32();
atEnd = true;
}
long getOffset() {
// This value is accurate as long as we only ever write to the end of the
// file, and don't seek back to overwrite any previous segments. Although
// this is subtle, storing the stream counter this way is still preferable
// to returning file.length() here, as it avoids a syscall and possible
// IOException.
return out.getCount();
}
void seek(long offset) throws IOException {
file.seek(offset);
atEnd = false;
}
void beginObject(int objectType, long length) throws IOException {
crc32.reset();
deflater.reset();
@ -409,13 +436,22 @@ class PackInserter extends ObjectInserter {
@Override
public void write(byte[] data, int off, int len) throws IOException {
crc32.update(data, off, len);
if (!atEnd) {
file.seek(file.length());
atEnd = true;
}
out.write(data, off, len);
}
byte[] finishPack() throws IOException {
// Overwrite placeholder header with actual object count, then hash.
// Overwrite placeholder header with actual object count, then hash. This
// method intentionally uses direct seek/write calls rather than the
// wrappers which keep track of atEnd. This leaves atEnd, the file
// pointer, and out's counter in an inconsistent state; that's ok, since
// this method closes the file anyway.
try {
file.seek(0);
write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
byte[] buf = buffer();
SHA1 md = digest().reset();
@ -430,14 +466,20 @@ class PackInserter extends ObjectInserter {
byte[] packHash = md.digest();
out.write(packHash, 0, packHash.length);
return packHash;
} finally {
close();
}
}
@Override
public void close() throws IOException {
deflater.end();
try {
out.close();
} finally {
file.close();
}
}
byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
byte[] dstbuf;
@ -467,7 +509,7 @@ class PackInserter extends ObjectInserter {
private int setInput(long filePos, Inflater inf, byte[] buf)
throws IOException {
if (file.getFilePointer() != filePos) {
file.seek(filePos);
seek(filePos);
}
int n = file.read(buf);
if (n < 0) {
@ -528,9 +570,8 @@ class PackInserter extends ObjectInserter {
}
byte[] buf = buffer();
RandomAccessFile f = packOut.file;
f.seek(obj.getOffset());
int cnt = f.read(buf, 0, 20);
packOut.seek(obj.getOffset());
int cnt = packOut.file.read(buf, 0, 20);
if (cnt <= 0) {
throw new EOFException(JGitText.get().unexpectedEofInPack);
}
@ -564,7 +605,7 @@ class PackInserter extends ObjectInserter {
return new ObjectLoader.SmallObject(type, data);
}
}
return new StreamLoader(f, type, sz, zpos);
return new StreamLoader(type, sz, zpos);
}
private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
@ -593,13 +634,11 @@ class PackInserter extends ObjectInserter {
}
private class StreamLoader extends ObjectLoader {
private final RandomAccessFile file;
private final int type;
private final long size;
private final long pos;
StreamLoader(RandomAccessFile file, int type, long size, long pos) {
this.file = file;
StreamLoader(int type, long size, long pos) {
this.type = type;
this.size = size;
this.pos = pos;
@ -609,14 +648,44 @@ class PackInserter extends ObjectInserter {
public ObjectStream openStream()
throws MissingObjectException, IOException {
int bufsz = buffer().length;
file.seek(pos);
packOut.seek(pos);
InputStream fileStream = new FilterInputStream(
Channels.newInputStream(packOut.file.getChannel())) {
// atEnd was already set to false by the previous seek, but it's
// technically possible for a caller to call insert on the
// inserter in the middle of reading from this stream. Behavior is
// undefined in this case, so it would arguably be ok to ignore,
// but it's not hard to at least make an attempt to not corrupt
// the data.
@Override
public int read() throws IOException {
packOut.atEnd = false;
return super.read();
}
@Override
public int read(byte[] b) throws IOException {
packOut.atEnd = false;
return super.read(b);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
packOut.atEnd = false;
return super.read(b,off,len);
}
@Override
public void close() {
// Never close underlying RandomAccessFile, which lasts the
// lifetime of the enclosing PackStream.
}
};
return new ObjectStream.Filter(
type, size,
new BufferedInputStream(
new InflaterInputStream(
Channels.newInputStream(packOut.file.getChannel()),
inflater(), bufsz),
bufsz));
new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
}
@Override

Loading…
Cancel
Save