From 43ef5dabf12fee8877c6d8fa07b26be766dca1ee Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Wed, 20 Dec 2017 12:36:03 -0500 Subject: [PATCH] PackInserter: Ensure objects are written at the end of the pack When interleaving reads and writes from an unflushed pack, we forgot to reset the file pointer back to the end of the file before writing more new objects. This had at least two unfortunate effects: * The pack data was potentially corrupt, since we could overwrite previous portions of the file willy-nilly. * The CountingOutputStream would report more bytes read than the size of the file, which stored the wrong PackedObjectInfo, which would cause EOFs during reading. We already had a test in PackInserterTest which was supposed to catch bugs like this, by interleaving reads and writes. Unfortunately, it didn't catch the bug, since as an implementation detail we always read a full buffer's worth of data from the file when inflating during readback. If the size of the file was less than the offset of the object we were reading back plus one buffer (8192 bytes), we would completely accidentally end up back in the right place in the file. So, add another test for this case where we read back a small object positioned before a large object. Before the fix, this test exhibited exactly the "Unexpected EOF" error reported at crbug.com/gerrit/7668. Change-Id: I74f08f3d5d9046781d59e5bd7c84916ff8225c3b --- .../storage/file/PackInserterTest.java | 52 ++++++- .../internal/storage/file/PackInserter.java | 131 +++++++++++++----- 2 files changed, 149 insertions(+), 34 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java index 17f04c854..8596f74f8 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java @@ -67,6 +67,7 @@ import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Random; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -440,6 +441,53 @@ public class PackInserterTest extends RepositoryTestCase { } } + @Test + public void readBackSmallObjectBeforeLargeObject() throws Exception { + WindowCacheConfig wcc = new WindowCacheConfig(); + wcc.setStreamFileThreshold(1024); + wcc.install(); + + ObjectId blobId1; + ObjectId blobId2; + ObjectId largeId; + byte[] blob1 = Constants.encode("blob1"); + byte[] blob2 = Constants.encode("blob2"); + byte[] largeBlob = newLargeBlob(); + try (PackInserter ins = newInserter()) { + assertThat(blob1.length, lessThan(ins.getBufferSize())); + assertThat(largeBlob.length, greaterThan(ins.getBufferSize())); + + blobId1 = ins.insert(OBJ_BLOB, blob1); + largeId = ins.insert(OBJ_BLOB, largeBlob); + + try (ObjectReader reader = ins.newReader()) { + // A previous bug did not reset the file pointer to EOF after reading + // back. We need to seek to something further back than a full buffer, + // since the read-back code eagerly reads a full buffer's worth of data + // from the file to pass to the inflater. If we seeked back just a small + // amount, this step would consume the rest of the file, so the file + // pointer would coincidentally end up back at EOF, hiding the bug. + assertBlob(reader, blobId1, blob1); + } + + blobId2 = ins.insert(OBJ_BLOB, blob2); + + try (ObjectReader reader = ins.newReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + + ins.flush(); + } + + try (ObjectReader reader = db.newObjectReader()) { + assertBlob(reader, blobId1, blob1); + assertBlob(reader, blobId2, blob2); + assertBlob(reader, largeId, largeBlob); + } + } + private List listPacks() throws Exception { List fromOpenDb = listPacks(db); List reopened; @@ -470,9 +518,7 @@ public class PackInserterTest extends RepositoryTestCase { private static byte[] newLargeBlob() { byte[] blob = new byte[10240]; - for (int i = 0; i < blob.length; i++) { - blob[i] = (byte) ('0' + (i % 10)); - } + new Random(0).nextBytes(blob); return blob; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java index dd83e251f..e6139fdc0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java @@ -52,6 +52,7 @@ import java.io.BufferedInputStream; import java.io.EOFException; import java.io.File; import java.io.FileOutputStream; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -254,7 +255,6 @@ class PackInserter extends ObjectInserter { try { packHash = packOut.finishPack(); } finally { - packOut.close(); packOut = null; } @@ -359,6 +359,20 @@ class PackInserter extends ObjectInserter { return cachedInflater; } + /** + * Stream that writes to a pack file. + *

+ * Backed by two views of the same open file descriptor: a random-access file, + * and an output stream. Seeking in the file causes subsequent writes to the + * output stream to occur wherever the file pointer is pointing, so we need to + * take care to always seek to the end of the file before writing a new + * object. + *

+ * Callers should always use {@link #seek(long)} to seek, rather than reaching + * into the file member. As long as this contract is followed, calls to {@link + * #write(byte[], int, int)} are guaranteed to write at the end of the file, + * even if there have been intermediate seeks. + */ private class PackStream extends OutputStream { final byte[] hdrBuf; final CRC32 crc32; @@ -368,6 +382,8 @@ class PackInserter extends ObjectInserter { private final CountingOutputStream out; private final Deflater deflater; + private boolean atEnd; + PackStream(File pack) throws IOException { file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$ out = new CountingOutputStream(new FileOutputStream(file.getFD())); @@ -375,12 +391,23 @@ class PackInserter extends ObjectInserter { compress = new DeflaterOutputStream(this, deflater, 8192); hdrBuf = new byte[32]; crc32 = new CRC32(); + atEnd = true; } long getOffset() { + // This value is accurate as long as we only ever write to the end of the + // file, and don't seek back to overwrite any previous segments. Although + // this is subtle, storing the stream counter this way is still preferable + // to returning file.length() here, as it avoids a syscall and possible + // IOException. return out.getCount(); } + void seek(long offset) throws IOException { + file.seek(offset); + atEnd = false; + } + void beginObject(int objectType, long length) throws IOException { crc32.reset(); deflater.reset(); @@ -409,34 +436,49 @@ class PackInserter extends ObjectInserter { @Override public void write(byte[] data, int off, int len) throws IOException { crc32.update(data, off, len); + if (!atEnd) { + file.seek(file.length()); + atEnd = true; + } out.write(data, off, len); } byte[] finishPack() throws IOException { - // Overwrite placeholder header with actual object count, then hash. - file.seek(0); - write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); - - byte[] buf = buffer(); - SHA1 md = digest().reset(); - file.seek(0); - while (true) { - int r = file.read(buf); - if (r < 0) { - break; + // Overwrite placeholder header with actual object count, then hash. This + // method intentionally uses direct seek/write calls rather than the + // wrappers which keep track of atEnd. This leaves atEnd, the file + // pointer, and out's counter in an inconsistent state; that's ok, since + // this method closes the file anyway. + try { + file.seek(0); + out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); + + byte[] buf = buffer(); + SHA1 md = digest().reset(); + file.seek(0); + while (true) { + int r = file.read(buf); + if (r < 0) { + break; + } + md.update(buf, 0, r); } - md.update(buf, 0, r); + byte[] packHash = md.digest(); + out.write(packHash, 0, packHash.length); + return packHash; + } finally { + close(); } - byte[] packHash = md.digest(); - out.write(packHash, 0, packHash.length); - return packHash; } @Override public void close() throws IOException { deflater.end(); - out.close(); - file.close(); + try { + out.close(); + } finally { + file.close(); + } } byte[] inflate(long filePos, int len) throws IOException, DataFormatException { @@ -467,7 +509,7 @@ class PackInserter extends ObjectInserter { private int setInput(long filePos, Inflater inf, byte[] buf) throws IOException { if (file.getFilePointer() != filePos) { - file.seek(filePos); + seek(filePos); } int n = file.read(buf); if (n < 0) { @@ -528,9 +570,8 @@ class PackInserter extends ObjectInserter { } byte[] buf = buffer(); - RandomAccessFile f = packOut.file; - f.seek(obj.getOffset()); - int cnt = f.read(buf, 0, 20); + packOut.seek(obj.getOffset()); + int cnt = packOut.file.read(buf, 0, 20); if (cnt <= 0) { throw new EOFException(JGitText.get().unexpectedEofInPack); } @@ -564,7 +605,7 @@ class PackInserter extends ObjectInserter { return new ObjectLoader.SmallObject(type, data); } } - return new StreamLoader(f, type, sz, zpos); + return new StreamLoader(type, sz, zpos); } private byte[] inflate(PackedObjectInfo obj, long zpos, int sz) @@ -593,13 +634,11 @@ class PackInserter extends ObjectInserter { } private class StreamLoader extends ObjectLoader { - private final RandomAccessFile file; private final int type; private final long size; private final long pos; - StreamLoader(RandomAccessFile file, int type, long size, long pos) { - this.file = file; + StreamLoader(int type, long size, long pos) { this.type = type; this.size = size; this.pos = pos; @@ -609,14 +648,44 @@ class PackInserter extends ObjectInserter { public ObjectStream openStream() throws MissingObjectException, IOException { int bufsz = buffer().length; - file.seek(pos); + packOut.seek(pos); + + InputStream fileStream = new FilterInputStream( + Channels.newInputStream(packOut.file.getChannel())) { + // atEnd was already set to false by the previous seek, but it's + // technically possible for a caller to call insert on the + // inserter in the middle of reading from this stream. Behavior is + // undefined in this case, so it would arguably be ok to ignore, + // but it's not hard to at least make an attempt to not corrupt + // the data. + @Override + public int read() throws IOException { + packOut.atEnd = false; + return super.read(); + } + + @Override + public int read(byte[] b) throws IOException { + packOut.atEnd = false; + return super.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + packOut.atEnd = false; + return super.read(b,off,len); + } + + @Override + public void close() { + // Never close underlying RandomAccessFile, which lasts the + // lifetime of the enclosing PackStream. + } + }; return new ObjectStream.Filter( type, size, new BufferedInputStream( - new InflaterInputStream( - Channels.newInputStream(packOut.file.getChannel()), - inflater(), bufsz), - bufsz)); + new InflaterInputStream(fileStream, inflater(), bufsz), bufsz)); } @Override