From 16419dad35792bd4f9c8cc6eaff38faa7c9cd522 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Wed, 26 May 2010 19:00:06 -0700 Subject: [PATCH] Don't use interruptable pread() to access pack files The J2SE NIO APIs require that FileChannel close the underlying file descriptor if a thread is interrupted while it is inside of a read or write operation on that channel. This is insane, because it means we cannot share the file descriptor between threads. If a thread is in the middle of the FileChannel variant of IO.readFully() and it receives an interrupt, the pack will be automatically closed on us. This causes the other threads trying to use that same FileChannel to receive IOExceptions, which leads to the pack getting marked as invalid. Once the pack is marked invalid, JGit loses access to its entire contents and starts to report MissingObjectExceptions. Because PackWriter must ensure that the chosen pack file stays available until the current object's data is fully copied to the output, JGit cannot simply reopen the pack when its automatically closed due to an interrupt being sent at the wrong time. The pack may have been deleted by a concurrent `git gc` process, and that open file descriptor might be the last reference to the inode on disk. Once its closed, the PackWriter loses access to that object representation, and it cannot complete sending the object the client. Fortunately, RandomAccessFile's readFully method does not have this problem. Interrupts during readFully() are ignored. However, it requires us to first seek to the offset we need to read, then issue the read call. This requires locking around the file descriptor to prevent concurrent threads from moving the pointer before the read. This reduces the concurrency level, as now only one window can be paged in at a time from each pack. However, the WindowCache should already be holding most of the pages required to handle the working set for a process, and its own internal locking was already limiting us on the number of concurrent loads possible. Provided that most concurrent accesses are getting hits in the WindowCache, or are for different repositories on the same server, we shouldn't see a major performance hit due to the more serialized loading. I would have preferred to use a pool of RandomAccessFiles for each pack, with threads borrowing an instance dedicated to that thread whenever they needed to page in a window. This would permit much higher levels of concurrency by using multiple file descriptors (and file pointers) for each pack. However the code became too complex to develop in any reasonable period of time, so I've chosen to retrofit the existing code with more serialization instead. Bug: 308945 Change-Id: I2e6e11c6e5a105e5aef68871b66200fd725134c9 Signed-off-by: Shawn O. Pearce --- .../eclipse/jgit/http/server/FileSender.java | 5 +- .../src/org/eclipse/jgit/lib/PackFile.java | 85 +++++++++++-------- .../src/org/eclipse/jgit/util/IO.java | 32 ------- 3 files changed, 52 insertions(+), 70 deletions(-) diff --git a/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/FileSender.java b/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/FileSender.java index 6b746e319..731b4caa8 100644 --- a/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/FileSender.java +++ b/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/FileSender.java @@ -64,7 +64,6 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.util.IO; /** * Dumps a file over HTTP GET (or its information via HEAD). @@ -122,7 +121,8 @@ final class FileSender { String getTailChecksum() throws IOException { final int n = 20; final byte[] buf = new byte[n]; - IO.readFully(source.getChannel(), fileLen - n, buf, 0, n); + source.seek(fileLen - n); + source.readFully(buf, 0, n); return ObjectId.fromRaw(buf).getName(); } @@ -140,6 +140,7 @@ final class FileSender { final OutputStream out = rsp.getOutputStream(); try { final byte[] buf = new byte[4096]; + source.seek(pos); while (pos < end) { final int r = (int) Math.min(buf.length, end - pos); final int n = source.read(buf, 0, r); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java index 8f4e69163..829832e6a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/PackFile.java @@ -65,7 +65,6 @@ import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.PackInvalidException; import org.eclipse.jgit.errors.PackMismatchException; -import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.NB; import org.eclipse.jgit.util.RawParseUtils; @@ -90,6 +89,9 @@ public class PackFile implements Iterable { private RandomAccessFile fd; + /** Serializes reads performed against {@link #fd}. */ + private final Object readLock = new Object(); + long length; private int activeWindows; @@ -364,9 +366,11 @@ public class PackFile implements Iterable { try { if (invalid) throw new PackInvalidException(packFile); - fd = new RandomAccessFile(packFile, "r"); - length = fd.length(); - onOpenPack(); + synchronized (readLock) { + fd = new RandomAccessFile(packFile, "r"); + length = fd.length(); + onOpenPack(); + } } catch (IOException ioe) { openFail(); throw ioe; @@ -387,53 +391,61 @@ public class PackFile implements Iterable { } private void doClose() { - if (fd != null) { - try { - fd.close(); - } catch (IOException err) { - // Ignore a close event. We had it open only for reading. - // There should not be errors related to network buffers - // not flushed, etc. + synchronized (readLock) { + if (fd != null) { + try { + fd.close(); + } catch (IOException err) { + // Ignore a close event. We had it open only for reading. + // There should not be errors related to network buffers + // not flushed, etc. + } + fd = null; } - fd = null; } } ByteArrayWindow read(final long pos, int size) throws IOException { - if (length < pos + size) - size = (int) (length - pos); - final byte[] buf = new byte[size]; - IO.readFully(fd.getChannel(), pos, buf, 0, size); - return new ByteArrayWindow(this, pos, buf); + synchronized (readLock) { + if (length < pos + size) + size = (int) (length - pos); + final byte[] buf = new byte[size]; + fd.seek(pos); + fd.readFully(buf, 0, size); + return new ByteArrayWindow(this, pos, buf); + } } ByteWindow mmap(final long pos, int size) throws IOException { - if (length < pos + size) - size = (int) (length - pos); + synchronized (readLock) { + if (length < pos + size) + size = (int) (length - pos); - MappedByteBuffer map; - try { - map = fd.getChannel().map(MapMode.READ_ONLY, pos, size); - } catch (IOException ioe1) { - // The most likely reason this failed is the JVM has run out - // of virtual memory. We need to discard quickly, and try to - // force the GC to finalize and release any existing mappings. - // - System.gc(); - System.runFinalization(); - map = fd.getChannel().map(MapMode.READ_ONLY, pos, size); - } + MappedByteBuffer map; + try { + map = fd.getChannel().map(MapMode.READ_ONLY, pos, size); + } catch (IOException ioe1) { + // The most likely reason this failed is the JVM has run out + // of virtual memory. We need to discard quickly, and try to + // force the GC to finalize and release any existing mappings. + // + System.gc(); + System.runFinalization(); + map = fd.getChannel().map(MapMode.READ_ONLY, pos, size); + } - if (map.hasArray()) - return new ByteArrayWindow(this, pos, map.array()); - return new ByteBufferWindow(this, pos, map); + if (map.hasArray()) + return new ByteArrayWindow(this, pos, map.array()); + return new ByteBufferWindow(this, pos, map); + } } private void onOpenPack() throws IOException { final PackIndex idx = idx(); final byte[] buf = new byte[20]; - IO.readFully(fd.getChannel(), 0, buf, 0, 12); + fd.seek(0); + fd.readFully(buf, 0, 12); if (RawParseUtils.match(buf, 0, Constants.PACK_SIGNATURE) != 4) throw new IOException(JGitText.get().notAPACKFile); final long vers = NB.decodeUInt32(buf, 4); @@ -445,7 +457,8 @@ public class PackFile implements Iterable { throw new PackMismatchException(MessageFormat.format( JGitText.get().packObjectCountMismatch, packCnt, idx.getObjectCount(), getPackFile())); - IO.readFully(fd.getChannel(), length - 20, buf, 0, 20); + fd.seek(length - 20); + fd.read(buf, 0, 20); if (!Arrays.equals(buf, packChecksum)) throw new PackMismatchException(MessageFormat.format( JGitText.get().packObjectCountMismatch diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java index 177865420..1f2042d4c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java @@ -51,8 +51,6 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; import java.text.MessageFormat; import org.eclipse.jgit.JGitText; @@ -138,36 +136,6 @@ public class IO { } } - /** - * Read the entire byte array into memory, or throw an exception. - * - * @param fd - * file to read the data from. - * @param pos - * position to read from the file at. - * @param dst - * buffer that must be fully populated, [off, off+len). - * @param off - * position within the buffer to start writing to. - * @param len - * number of bytes that must be read. - * @throws EOFException - * the stream ended before dst was fully populated. - * @throws IOException - * there was an error reading from the stream. - */ - public static void readFully(final FileChannel fd, long pos, - final byte[] dst, int off, int len) throws IOException { - while (len > 0) { - final int r = fd.read(ByteBuffer.wrap(dst, off, len), pos); - if (r <= 0) - throw new EOFException(JGitText.get().shortReadOfBlock); - pos += r; - off += r; - len -= r; - } - } - /** * Skip an entire region of an input stream. *