Browse Source

Extract BlockBasedFile base class for DfsPackFile

This new base class has the minimum set of properties and methods
necessary for DfsBlockCache to manage blocks of a file in the cache.
Subclasses can use DfsBlockCache for any content.

This refactoring opens the door for additional PackExt types other
than PACK to be stored on a block-by-block basis by the DfsBlockCache.

Change-Id: I307228fc805c3ff0c596783beb24fd52bec35ba8
stable-4.9
Shawn Pearce 7 years ago
parent
commit
84c71ac933
  1. 207
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java
  2. 2
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java
  3. 26
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java
  4. 173
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java

207
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/BlockBasedFile.java

@ -0,0 +1,207 @@
/*
* Copyright (C) 2017, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.dfs;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.MessageFormat;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.PackInvalidException;
import org.eclipse.jgit.internal.storage.pack.PackExt;
/** Block based file stored in {@link DfsBlockCache}. */
public abstract class BlockBasedFile {
/** Cache that owns this file and its data. */
final DfsBlockCache cache;
/** Unique identity of this file while in-memory. */
final DfsStreamKey key;
/** Description of the associated pack file's storage. */
final DfsPackDescription packDesc;
final PackExt ext;
/**
* Preferred alignment for loading blocks from the backing file.
* <p>
* It is initialized to 0 and filled in on the first read made from the
* file. Block sizes may be odd, e.g. 4091, caused by the underling DFS
* storing 4091 user bytes and 5 bytes block metadata into a lower level
* 4096 byte block on disk.
*/
volatile int blockSize;
/**
* Total number of bytes in this pack file.
* <p>
* This field initializes to -1 and gets populated when a block is loaded.
*/
volatile long length;
/** True once corruption has been detected that cannot be worked around. */
volatile boolean invalid;
BlockBasedFile(DfsBlockCache cache, DfsStreamKey key,
DfsPackDescription packDesc, PackExt ext) {
this.cache = cache;
this.key = key;
this.packDesc = packDesc;
this.ext = ext;
}
String getFileName() {
return packDesc.getFileName(ext);
}
boolean invalid() {
return invalid;
}
void setInvalid() {
invalid = true;
}
void setBlockSize(int newSize) {
blockSize = newSize;
}
long alignToBlock(long pos) {
int size = blockSize;
if (size == 0)
size = cache.getBlockSize();
return (pos / size) * size;
}
int blockSize(ReadableChannel rc) {
// If the block alignment is not yet known, discover it. Prefer the
// larger size from either the cache or the file itself.
int size = blockSize;
if (size == 0) {
size = rc.blockSize();
if (size <= 0)
size = cache.getBlockSize();
else if (size < cache.getBlockSize())
size = (cache.getBlockSize() / size) * size;
blockSize = size;
}
return size;
}
DfsBlock readOneBlock(long pos, DfsReader ctx,
@Nullable ReadableChannel fileChannel) throws IOException {
if (invalid)
throw new PackInvalidException(getFileName());
ctx.stats.readBlock++;
long start = System.nanoTime();
ReadableChannel rc = fileChannel != null ? fileChannel
: ctx.db.openFile(packDesc, ext);
try {
int size = blockSize(rc);
pos = (pos / size) * size;
// If the size of the file is not yet known, try to discover it.
// Channels may choose to return -1 to indicate they don't
// know the length yet, in this case read up to the size unit
// given by the caller, then recheck the length.
long len = length;
if (len < 0) {
len = rc.size();
if (0 <= len)
length = len;
}
if (0 <= len && len < pos + size)
size = (int) (len - pos);
if (size <= 0)
throw new EOFException(MessageFormat.format(
DfsText.get().shortReadOfBlock, Long.valueOf(pos),
getFileName(), Long.valueOf(0), Long.valueOf(0)));
byte[] buf = new byte[size];
rc.position(pos);
int cnt = read(rc, ByteBuffer.wrap(buf, 0, size));
ctx.stats.readBlockBytes += cnt;
if (cnt != size) {
if (0 <= len) {
throw new EOFException(MessageFormat.format(
DfsText.get().shortReadOfBlock, Long.valueOf(pos),
getFileName(), Integer.valueOf(size),
Integer.valueOf(cnt)));
}
// Assume the entire thing was read in a single shot, compact
// the buffer to only the space required.
byte[] n = new byte[cnt];
System.arraycopy(buf, 0, n, 0, n.length);
buf = n;
} else if (len < 0) {
// With no length at the start of the read, the channel should
// have the length available at the end.
length = len = rc.size();
}
return new DfsBlock(key, pos, buf);
} finally {
if (rc != fileChannel) {
rc.close();
}
ctx.stats.readBlockMicros += elapsedMicros(start);
}
}
static int read(ReadableChannel rc, ByteBuffer buf) throws IOException {
int n;
do {
n = rc.read(buf);
} while (0 < n && buf.hasRemaining());
return buf.position();
}
static long elapsedMicros(long start) {
return (System.nanoTime() - start) / 1000L;
}
}

2
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlock.java

@ -52,7 +52,7 @@ import java.util.zip.Inflater;
import org.eclipse.jgit.internal.storage.pack.PackOutputStream;
/** A cached slice of a {@link DfsPackFile}. */
/** A cached slice of a {@link BlockBasedFile}. */
final class DfsBlock {
final DfsStreamKey stream;

26
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBlockCache.java

@ -57,12 +57,12 @@ import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.internal.JGitText;
/**
* Caches slices of a {@link DfsPackFile} in memory for faster read access.
* Caches slices of a {@link BlockBasedFile} in memory for faster read access.
* <p>
* The DfsBlockCache serves as a Java based "buffer cache", loading segments of
* a DfsPackFile into the JVM heap prior to use. As JGit often wants to do reads
* of only tiny slices of a file, the DfsBlockCache tries to smooth out these
* tiny reads into larger block-sized IO operations.
* a BlockBasedFile into the JVM heap prior to use. As JGit often wants to do
* reads of only tiny slices of a file, the DfsBlockCache tries to smooth out
* these tiny reads into larger block-sized IO operations.
* <p>
* Whenever a cache miss occurs, loading is invoked by exactly one thread for
* the given <code>(DfsPackKey,position)</code> key tuple. This is ensured by an
@ -303,24 +303,24 @@ public final class DfsBlockCache {
/**
* Lookup a cached object, creating and loading it if it doesn't exist.
*
* @param pack
* @param file
* the pack that "contains" the cached object.
* @param position
* offset within <code>pack</code> of the object.
* @param ctx
* current thread's reader.
* @param packChannel
* @param fileChannel
* optional channel to read {@code pack}.
* @return the object reference.
* @throws IOException
* the reference was not in the cache and could not be loaded.
*/
DfsBlock getOrLoad(DfsPackFile pack, long position, DfsReader ctx,
@Nullable ReadableChannel packChannel) throws IOException {
DfsBlock getOrLoad(BlockBasedFile file, long position, DfsReader ctx,
@Nullable ReadableChannel fileChannel) throws IOException {
final long requestedPosition = position;
position = pack.alignToBlock(position);
position = file.alignToBlock(position);
DfsStreamKey key = pack.key;
DfsStreamKey key = file.key;
int slot = slot(key, position);
HashEntry e1 = table.get(slot);
DfsBlock v = scan(e1, key, position);
@ -348,7 +348,7 @@ public final class DfsBlockCache {
statMiss.incrementAndGet();
boolean credit = true;
try {
v = pack.readOneBlock(position, ctx, packChannel);
v = file.readOneBlock(position, ctx, fileChannel);
credit = false;
} finally {
if (credit)
@ -377,9 +377,9 @@ public final class DfsBlockCache {
// If the block size changed from the default, it is possible the block
// that was loaded is the wrong block for the requested position.
if (v.contains(pack.key, requestedPosition))
if (v.contains(file.key, requestedPosition))
return v;
return getOrLoad(pack, requestedPosition, ctx, packChannel);
return getOrLoad(file, requestedPosition, ctx, fileChannel);
}
@SuppressWarnings("unchecked")

173
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java

@ -62,7 +62,6 @@ import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
@ -89,38 +88,10 @@ import org.eclipse.jgit.util.LongList;
* delta packed format yielding high compression of lots of object where some
* objects are similar.
*/
public final class DfsPackFile {
/** Cache that owns this pack file and its data. */
private final DfsBlockCache cache;
/** Description of the pack file's storage. */
private final DfsPackDescription packDesc;
/** Unique identity of this pack while in-memory. */
final DfsStreamKey key;
public final class DfsPackFile extends BlockBasedFile {
final DfsStreamKey idxKey = new DfsStreamKey();
final DfsStreamKey reverseIdxKey = new DfsStreamKey();
final DfsStreamKey bitmapKey = new DfsStreamKey();
/**
* Total number of bytes in this pack file.
* <p>
* This field initializes to -1 and gets populated when a block is loaded.
*/
volatile long length;
/**
* Preferred alignment for loading blocks from the backing file.
* <p>
* It is initialized to 0 and filled in on the first read made from the
* file. Block sizes may be odd, e.g. 4091, caused by the underling DFS
* storing 4091 user bytes and 5 bytes block metadata into a lower level
* 4096 byte block on disk.
*/
private volatile int blockSize;
/** True once corruption has been detected that cannot be worked around. */
private volatile boolean invalid;
DfsStreamKey bitmapKey;
/**
* Lock for initialization of {@link #index} and {@link #corruptObjects}.
@ -158,10 +129,7 @@ public final class DfsPackFile {
* interned key used to identify blocks in the block cache.
*/
DfsPackFile(DfsBlockCache cache, DfsPackDescription desc, DfsStreamKey key) {
this.cache = cache;
this.packDesc = desc;
this.key = key;
super(cache, key, desc, PACK);
length = desc.getFileSize(PACK);
if (length <= 0)
length = -1;
@ -185,14 +153,6 @@ public final class DfsPackFile {
return key.cachedSize.get();
}
String getPackName() {
return packDesc.getFileName(PACK);
}
void setBlockSize(int newSize) {
blockSize = newSize;
}
void setPackIndex(PackIndex idx) {
long objCnt = idx.getObjectCount();
int recSize = Constants.OBJECT_ID_LENGTH + 8;
@ -223,7 +183,7 @@ public final class DfsPackFile {
}
if (invalid)
throw new PackInvalidException(getPackName());
throw new PackInvalidException(getFileName());
Repository.getGlobalListenerList()
.dispatch(new BeforeDfsPackIndexLoadedEvent(this));
@ -276,10 +236,6 @@ public final class DfsPackFile {
}
}
private static long elapsedMicros(long start) {
return (System.nanoTime() - start) / 1000L;
}
final boolean isGarbage() {
return packDesc.getPackSource() == UNREACHABLE_GARBAGE;
}
@ -304,7 +260,9 @@ public final class DfsPackFile {
if (idx != null)
return idx;
}
if (bitmapKey == null) {
bitmapKey = new DfsStreamKey();
}
long size;
PackBitmapIndex idx;
try {
@ -650,7 +608,7 @@ public final class DfsPackFile {
setCorrupt(src.offset);
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackName()));
Long.valueOf(src.offset), getFileName()));
}
} else if (validate) {
assert(crc1 != null);
@ -692,7 +650,7 @@ public final class DfsPackFile {
CorruptObjectException corruptObject = new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackName()));
Long.valueOf(src.offset), getFileName()));
corruptObject.initCause(dataFormat);
StoredObjectRepresentationNotAvailableException gone;
@ -754,24 +712,16 @@ public final class DfsPackFile {
if (crc2.getValue() != expectedCRC) {
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream,
Long.valueOf(src.offset), getPackName()));
Long.valueOf(src.offset), getFileName()));
}
}
}
}
boolean invalid() {
return invalid;
}
void setInvalid() {
invalid = true;
}
private IOException packfileIsTruncated() {
invalid = true;
return new IOException(MessageFormat.format(
JGitText.get().packfileIsTruncated, getPackName()));
JGitText.get().packfileIsTruncated, getFileName()));
}
private void readFully(long position, byte[] dstbuf, int dstoff, int cnt,
@ -780,107 +730,10 @@ public final class DfsPackFile {
throw new EOFException();
}
long alignToBlock(long pos) {
int size = blockSize;
if (size == 0)
size = cache.getBlockSize();
return (pos / size) * size;
}
DfsBlock getOrLoadBlock(long pos, DfsReader ctx) throws IOException {
return cache.getOrLoad(this, pos, ctx, null);
}
DfsBlock readOneBlock(long pos, DfsReader ctx,
@Nullable ReadableChannel packChannel) throws IOException {
if (invalid)
throw new PackInvalidException(getPackName());
ctx.stats.readBlock++;
long start = System.nanoTime();
ReadableChannel rc = packChannel != null
? packChannel
: ctx.db.openFile(packDesc, PACK);
try {
int size = blockSize(rc);
pos = (pos / size) * size;
// If the size of the file is not yet known, try to discover it.
// Channels may choose to return -1 to indicate they don't
// know the length yet, in this case read up to the size unit
// given by the caller, then recheck the length.
long len = length;
if (len < 0) {
len = rc.size();
if (0 <= len)
length = len;
}
if (0 <= len && len < pos + size)
size = (int) (len - pos);
if (size <= 0)
throw new EOFException(MessageFormat.format(
DfsText.get().shortReadOfBlock, Long.valueOf(pos),
getPackName(), Long.valueOf(0), Long.valueOf(0)));
byte[] buf = new byte[size];
rc.position(pos);
int cnt = read(rc, ByteBuffer.wrap(buf, 0, size));
ctx.stats.readBlockBytes += cnt;
if (cnt != size) {
if (0 <= len) {
throw new EOFException(MessageFormat.format(
DfsText.get().shortReadOfBlock,
Long.valueOf(pos),
getPackName(),
Integer.valueOf(size),
Integer.valueOf(cnt)));
}
// Assume the entire thing was read in a single shot, compact
// the buffer to only the space required.
byte[] n = new byte[cnt];
System.arraycopy(buf, 0, n, 0, n.length);
buf = n;
} else if (len < 0) {
// With no length at the start of the read, the channel should
// have the length available at the end.
length = len = rc.size();
}
return new DfsBlock(key, pos, buf);
} finally {
if (rc != packChannel) {
rc.close();
}
ctx.stats.readBlockMicros += elapsedMicros(start);
}
}
private int blockSize(ReadableChannel rc) {
// If the block alignment is not yet known, discover it. Prefer the
// larger size from either the cache or the file itself.
int size = blockSize;
if (size == 0) {
size = rc.blockSize();
if (size <= 0)
size = cache.getBlockSize();
else if (size < cache.getBlockSize())
size = (cache.getBlockSize() / size) * size;
blockSize = size;
}
return size;
}
private static int read(ReadableChannel rc, ByteBuffer buf)
throws IOException {
int n;
do {
n = rc.read(buf);
} while (0 < n && buf.hasRemaining());
return buf.position();
}
ObjectLoader load(DfsReader ctx, long pos)
throws IOException {
try {
@ -1017,7 +870,7 @@ public final class DfsPackFile {
CorruptObjectException coe = new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
getPackName()));
getFileName()));
coe.initCause(dfe);
throw coe;
}
@ -1165,7 +1018,7 @@ public final class DfsPackFile {
CorruptObjectException coe = new CorruptObjectException(
MessageFormat.format(
JGitText.get().objectAtHasBadZlibStream, Long.valueOf(pos),
getPackName()));
getFileName()));
coe.initCause(dfe);
throw coe;
}

Loading…
Cancel
Save