Browse Source

Merge branch 'stable-4.9'

* stable-4.9:
  PackInserter: Ensure objects are written at the end of the pack
  ObjectInserter: Add warning about mixing read-back with writes

Change-Id: I308e7c1c6b72e8d4d9b5d0f4f51e9815fc92d7d7
stable-4.10
Dave Borowitz 7 years ago
parent
commit
c1a02f497a
  1. 52
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java
  2. 101
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java
  3. 7
      org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java

52
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackInserterTest.java

@ -67,6 +67,7 @@ import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Random;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -440,6 +441,53 @@ public class PackInserterTest extends RepositoryTestCase {
} }
} }
@Test
public void readBackSmallObjectBeforeLargeObject() throws Exception {
WindowCacheConfig wcc = new WindowCacheConfig();
wcc.setStreamFileThreshold(1024);
wcc.install();
ObjectId blobId1;
ObjectId blobId2;
ObjectId largeId;
byte[] blob1 = Constants.encode("blob1");
byte[] blob2 = Constants.encode("blob2");
byte[] largeBlob = newLargeBlob();
try (PackInserter ins = newInserter()) {
assertThat(blob1.length, lessThan(ins.getBufferSize()));
assertThat(largeBlob.length, greaterThan(ins.getBufferSize()));
blobId1 = ins.insert(OBJ_BLOB, blob1);
largeId = ins.insert(OBJ_BLOB, largeBlob);
try (ObjectReader reader = ins.newReader()) {
// A previous bug did not reset the file pointer to EOF after reading
// back. We need to seek to something further back than a full buffer,
// since the read-back code eagerly reads a full buffer's worth of data
// from the file to pass to the inflater. If we seeked back just a small
// amount, this step would consume the rest of the file, so the file
// pointer would coincidentally end up back at EOF, hiding the bug.
assertBlob(reader, blobId1, blob1);
}
blobId2 = ins.insert(OBJ_BLOB, blob2);
try (ObjectReader reader = ins.newReader()) {
assertBlob(reader, blobId1, blob1);
assertBlob(reader, blobId2, blob2);
assertBlob(reader, largeId, largeBlob);
}
ins.flush();
}
try (ObjectReader reader = db.newObjectReader()) {
assertBlob(reader, blobId1, blob1);
assertBlob(reader, blobId2, blob2);
assertBlob(reader, largeId, largeBlob);
}
}
private List<PackFile> listPacks() throws Exception { private List<PackFile> listPacks() throws Exception {
List<PackFile> fromOpenDb = listPacks(db); List<PackFile> fromOpenDb = listPacks(db);
List<PackFile> reopened; List<PackFile> reopened;
@ -470,9 +518,7 @@ public class PackInserterTest extends RepositoryTestCase {
private static byte[] newLargeBlob() { private static byte[] newLargeBlob() {
byte[] blob = new byte[10240]; byte[] blob = new byte[10240];
for (int i = 0; i < blob.length; i++) { new Random(0).nextBytes(blob);
blob[i] = (byte) ('0' + (i % 10));
}
return blob; return blob;
} }

101
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackInserter.java

@ -52,6 +52,7 @@ import java.io.BufferedInputStream;
import java.io.EOFException; import java.io.EOFException;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@ -264,7 +265,6 @@ class PackInserter extends ObjectInserter {
try { try {
packHash = packOut.finishPack(); packHash = packOut.finishPack();
} finally { } finally {
packOut.close();
packOut = null; packOut = null;
} }
@ -369,6 +369,20 @@ class PackInserter extends ObjectInserter {
return cachedInflater; return cachedInflater;
} }
/**
* Stream that writes to a pack file.
* <p>
* Backed by two views of the same open file descriptor: a random-access file,
* and an output stream. Seeking in the file causes subsequent writes to the
* output stream to occur wherever the file pointer is pointing, so we need to
* take care to always seek to the end of the file before writing a new
* object.
* <p>
* Callers should always use {@link #seek(long)} to seek, rather than reaching
* into the file member. As long as this contract is followed, calls to {@link
* #write(byte[], int, int)} are guaranteed to write at the end of the file,
* even if there have been intermediate seeks.
*/
private class PackStream extends OutputStream { private class PackStream extends OutputStream {
final byte[] hdrBuf; final byte[] hdrBuf;
final CRC32 crc32; final CRC32 crc32;
@ -378,6 +392,8 @@ class PackInserter extends ObjectInserter {
private final CountingOutputStream out; private final CountingOutputStream out;
private final Deflater deflater; private final Deflater deflater;
private boolean atEnd;
PackStream(File pack) throws IOException { PackStream(File pack) throws IOException {
file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$ file = new RandomAccessFile(pack, "rw"); //$NON-NLS-1$
out = new CountingOutputStream(new FileOutputStream(file.getFD())); out = new CountingOutputStream(new FileOutputStream(file.getFD()));
@ -385,12 +401,23 @@ class PackInserter extends ObjectInserter {
compress = new DeflaterOutputStream(this, deflater, 8192); compress = new DeflaterOutputStream(this, deflater, 8192);
hdrBuf = new byte[32]; hdrBuf = new byte[32];
crc32 = new CRC32(); crc32 = new CRC32();
atEnd = true;
} }
long getOffset() { long getOffset() {
// This value is accurate as long as we only ever write to the end of the
// file, and don't seek back to overwrite any previous segments. Although
// this is subtle, storing the stream counter this way is still preferable
// to returning file.length() here, as it avoids a syscall and possible
// IOException.
return out.getCount(); return out.getCount();
} }
void seek(long offset) throws IOException {
file.seek(offset);
atEnd = false;
}
void beginObject(int objectType, long length) throws IOException { void beginObject(int objectType, long length) throws IOException {
crc32.reset(); crc32.reset();
deflater.reset(); deflater.reset();
@ -419,13 +446,22 @@ class PackInserter extends ObjectInserter {
@Override @Override
public void write(byte[] data, int off, int len) throws IOException { public void write(byte[] data, int off, int len) throws IOException {
crc32.update(data, off, len); crc32.update(data, off, len);
if (!atEnd) {
file.seek(file.length());
atEnd = true;
}
out.write(data, off, len); out.write(data, off, len);
} }
byte[] finishPack() throws IOException { byte[] finishPack() throws IOException {
// Overwrite placeholder header with actual object count, then hash. // Overwrite placeholder header with actual object count, then hash. This
// method intentionally uses direct seek/write calls rather than the
// wrappers which keep track of atEnd. This leaves atEnd, the file
// pointer, and out's counter in an inconsistent state; that's ok, since
// this method closes the file anyway.
try {
file.seek(0); file.seek(0);
write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size())); out.write(hdrBuf, 0, writePackHeader(hdrBuf, objectList.size()));
byte[] buf = buffer(); byte[] buf = buffer();
SHA1 md = digest().reset(); SHA1 md = digest().reset();
@ -440,14 +476,20 @@ class PackInserter extends ObjectInserter {
byte[] packHash = md.digest(); byte[] packHash = md.digest();
out.write(packHash, 0, packHash.length); out.write(packHash, 0, packHash.length);
return packHash; return packHash;
} finally {
close();
}
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
deflater.end(); deflater.end();
try {
out.close(); out.close();
} finally {
file.close(); file.close();
} }
}
byte[] inflate(long filePos, int len) throws IOException, DataFormatException { byte[] inflate(long filePos, int len) throws IOException, DataFormatException {
byte[] dstbuf; byte[] dstbuf;
@ -477,7 +519,7 @@ class PackInserter extends ObjectInserter {
private int setInput(long filePos, Inflater inf, byte[] buf) private int setInput(long filePos, Inflater inf, byte[] buf)
throws IOException { throws IOException {
if (file.getFilePointer() != filePos) { if (file.getFilePointer() != filePos) {
file.seek(filePos); seek(filePos);
} }
int n = file.read(buf); int n = file.read(buf);
if (n < 0) { if (n < 0) {
@ -538,9 +580,8 @@ class PackInserter extends ObjectInserter {
} }
byte[] buf = buffer(); byte[] buf = buffer();
RandomAccessFile f = packOut.file; packOut.seek(obj.getOffset());
f.seek(obj.getOffset()); int cnt = packOut.file.read(buf, 0, 20);
int cnt = f.read(buf, 0, 20);
if (cnt <= 0) { if (cnt <= 0) {
throw new EOFException(JGitText.get().unexpectedEofInPack); throw new EOFException(JGitText.get().unexpectedEofInPack);
} }
@ -574,7 +615,7 @@ class PackInserter extends ObjectInserter {
return new ObjectLoader.SmallObject(type, data); return new ObjectLoader.SmallObject(type, data);
} }
} }
return new StreamLoader(f, type, sz, zpos); return new StreamLoader(type, sz, zpos);
} }
private byte[] inflate(PackedObjectInfo obj, long zpos, int sz) private byte[] inflate(PackedObjectInfo obj, long zpos, int sz)
@ -602,13 +643,11 @@ class PackInserter extends ObjectInserter {
} }
private class StreamLoader extends ObjectLoader { private class StreamLoader extends ObjectLoader {
private final RandomAccessFile file;
private final int type; private final int type;
private final long size; private final long size;
private final long pos; private final long pos;
StreamLoader(RandomAccessFile file, int type, long size, long pos) { StreamLoader(int type, long size, long pos) {
this.file = file;
this.type = type; this.type = type;
this.size = size; this.size = size;
this.pos = pos; this.pos = pos;
@ -618,14 +657,44 @@ class PackInserter extends ObjectInserter {
public ObjectStream openStream() public ObjectStream openStream()
throws MissingObjectException, IOException { throws MissingObjectException, IOException {
int bufsz = buffer().length; int bufsz = buffer().length;
file.seek(pos); packOut.seek(pos);
InputStream fileStream = new FilterInputStream(
Channels.newInputStream(packOut.file.getChannel())) {
// atEnd was already set to false by the previous seek, but it's
// technically possible for a caller to call insert on the
// inserter in the middle of reading from this stream. Behavior is
// undefined in this case, so it would arguably be ok to ignore,
// but it's not hard to at least make an attempt to not corrupt
// the data.
@Override
public int read() throws IOException {
packOut.atEnd = false;
return super.read();
}
@Override
public int read(byte[] b) throws IOException {
packOut.atEnd = false;
return super.read(b);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
packOut.atEnd = false;
return super.read(b,off,len);
}
@Override
public void close() {
// Never close underlying RandomAccessFile, which lasts the
// lifetime of the enclosing PackStream.
}
};
return new ObjectStream.Filter( return new ObjectStream.Filter(
type, size, type, size,
new BufferedInputStream( new BufferedInputStream(
new InflaterInputStream( new InflaterInputStream(fileStream, inflater(), bufsz), bufsz));
Channels.newInputStream(packOut.file.getChannel()),
inflater(), bufsz),
bufsz));
} }
@Override @Override

7
org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectInserter.java

@ -428,6 +428,13 @@ public abstract class ObjectInserter implements AutoCloseable {
* <p> * <p>
* The returned reader should return this inserter instance from {@link * The returned reader should return this inserter instance from {@link
* ObjectReader#getCreatedFromInserter()}. * ObjectReader#getCreatedFromInserter()}.
* <p>
* Behavior is undefined if an insert method is called on the inserter in the
* middle of reading from an {@link ObjectStream} opened from this reader. For
* example, reading the remainder of the object may fail, or newly written
* data may even be corrupted. Interleaving whole object reads (including
* streaming reads) with inserts is fine, just not interleaving streaming
* <em>partial</em> object reads with inserts.
* *
* @since 3.5 * @since 3.5
* @return reader for any object, including an object recently inserted by * @return reader for any object, including an object recently inserted by

Loading…
Cancel
Save