RefAdvertiser: Avoid object parsing

It isn't strictly necessary to validate every reference's target object is reachable in the repository before advertising it to a client. This is an expensive operation when there are thousands of references, and its very unlikely that a reference uses a missing object, because garbage collection proceeds from the references and walks down through the graph. So trying to hide a dangling reference from clients is relatively pointless. Even if we are trying to avoid giving a client a corrupt repository, this simple check isn't sufficient. It is possible for a reference to point to a valid commit, but that commit to have a missing blob in its root tree. This can be caused by staging a file into the index, waiting several weeks, then committing that file while also racing against a prune. The prune may delete the blob, since its modification time is more than 2 weeks ago, but retain the commit, since its modification time is right now. Such graph corruption is already caught during PackWriter as it enumerates the graph from the client's want list and digs back to the roots or common base. Leave the reference validation also for that same phase, where we know we have to parse the object to support the enumeration. Change-Id: Iee70ead0d3ed2d2fcc980417d09d7a69b05f5c2f Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
14 years ago · 04759f3274
5 changed files with 71 additions and 112 deletions
--- a/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/InfoRefsServlet.java
+++ b/org.eclipse.jgit.http.server/src/org/eclipse/jgit/http/server/InfoRefsServlet.java
@ -56,8 +56,6 @@ import javax.servlet.http.HttpServletResponse;
 import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.Repository;
-import org.eclipse.jgit.revwalk.RevFlag;
-import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.transport.RefAdvertiser;
 import org.eclipse.jgit.util.HttpSupport;

@ -73,36 +71,28 @@ class InfoRefsServlet extends HttpServlet {
 		rsp.setCharacterEncoding(Constants.CHARACTER_ENCODING);

 		final Repository db = getRepository(req);
-		final RevWalk walk = new RevWalk(db);
-		try {
-			final RevFlag ADVERTISED = walk.newFlag("ADVERTISED");
+		final OutputStreamWriter out = new OutputStreamWriter(
+				new SmartOutputStream(req, rsp), Constants.CHARSET);
+		final RefAdvertiser adv = new RefAdvertiser() {
+			@Override
+			protected void writeOne(final CharSequence line) throws IOException {
+				// Whoever decided that info/refs should use a different
+				// delimiter than the native git:// protocol shouldn't
+				// be allowed to design this sort of stuff. :-(
+				out.append(line.toString().replace(' ', '\t'));
+			}

-			final OutputStreamWriter out = new OutputStreamWriter(
-					new SmartOutputStream(req, rsp), Constants.CHARSET);
-			final RefAdvertiser adv = new RefAdvertiser() {
-				@Override
-				protected void writeOne(final CharSequence line)
-						throws IOException {
-					// Whoever decided that info/refs should use a different
-					// delimiter than the native git:// protocol shouldn't
-					// be allowed to design this sort of stuff. :-(
-					out.append(line.toString().replace(' ', '\t'));
-				}
+			@Override
+			protected void end() {
+				// No end marker required for info/refs format.
+			}
+		};
+		adv.init(db);
+		adv.setDerefTags(true);

-				@Override
-				protected void end() {
-					// No end marker required for info/refs format.
-				}
-			};
-			adv.init(walk, ADVERTISED);
-			adv.setDerefTags(true);
-
-			Map<String, Ref> refs = db.getAllRefs();
-			refs.remove(Constants.HEAD);
-			adv.send(refs);
-			out.close();
-		} finally {
-			walk.release();
-		}
+		Map<String, Ref> refs = db.getAllRefs();
+		refs.remove(Constants.HEAD);
+		adv.send(refs);
+		out.close();
 	}
 }
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/FileRepository.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/FileRepository.java
@ -343,8 +343,12 @@ public class FileRepository extends Repository {
 				Repository repo;

 				repo = ((AlternateRepository) d).repository;
-				for (Ref ref : repo.getAllRefs().values())
-					r.add(ref.getObjectId());
+				for (Ref ref : repo.getAllRefs().values()) {
+					if (ref.getObjectId() != null)
+						r.add(ref.getObjectId());
+					if (ref.getPeeledObjectId() != null)
+						r.add(ref.getPeeledObjectId());
+				}
 				r.addAll(repo.getAdditionalHaves());
 			}
 		}
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/ReceivePack.java
@ -694,8 +694,7 @@ public class ReceivePack {
 			return;
 		}

-		final RevFlag advertised = walk.newFlag("ADVERTISED");
-		adv.init(walk, advertised);
+		adv.init(db);
 		adv.advertiseCapability(CAPABILITY_SIDE_BAND_64K);
 		adv.advertiseCapability(CAPABILITY_DELETE_REFS);
 		adv.advertiseCapability(CAPABILITY_REPORT_STATUS);
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/RefAdvertiser.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/RefAdvertiser.java
@ -44,6 +44,7 @@
 package org.eclipse.jgit.transport;

 import java.io.IOException;
+import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Set;
@ -55,10 +56,6 @@ import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.RefComparator;
 import org.eclipse.jgit.lib.Repository;
-import org.eclipse.jgit.revwalk.RevFlag;
-import org.eclipse.jgit.revwalk.RevObject;
-import org.eclipse.jgit.revwalk.RevTag;
-import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.util.RefMap;

 /** Support for the start of {@link UploadPack} and {@link ReceivePack}. */
@ -88,33 +85,28 @@ public abstract class RefAdvertiser {
 		}
 	}

-	private RevWalk walk;
-
-	private RevFlag ADVERTISED;
-
 	private final StringBuilder tmpLine = new StringBuilder(100);

 	private final char[] tmpId = new char[Constants.OBJECT_ID_STRING_LENGTH];

 	private final Set<String> capablities = new LinkedHashSet<String>();

+	private final Set<ObjectId> sent = new HashSet<ObjectId>();
+
+	private Repository repository;
+
 	private boolean derefTags;

 	private boolean first = true;

 	/**
-	 * Initialize a new advertisement formatter.
+	 * Initialize this advertiser with a repository for peeling tags.
 	 *
-	 * @param protoWalk
-	 *            the RevWalk used to parse objects that are advertised.
-	 * @param advertisedFlag
-	 *            flag marked on any advertised objects parsed out of the
-	 *            {@code protoWalk}'s object pool, permitting the caller to
-	 *            later quickly determine if an object was advertised (or not).
+	 * @param src
+	 *            the repository to read from.
 	 */
-	public void init(final RevWalk protoWalk, final RevFlag advertisedFlag) {
-		walk = protoWalk;
-		ADVERTISED = advertisedFlag;
+	public void init(Repository src) {
+		repository = src;
 	}

 	/**
@ -124,8 +116,6 @@ public abstract class RefAdvertiser {
 	 * This method must be invoked prior to any of the following:
 	 * <ul>
 	 * <li>{@link #send(Map)}
-	 * <li>{@link #advertiseHave(AnyObjectId)}
-	 * <li>{@link #includeAdditionalHaves(Repository)}
 	 * </ul>
 	 *
 	 * @param deref
@ -163,19 +153,31 @@ public abstract class RefAdvertiser {
 	 *            zero or more refs to format for the client. The collection is
 	 *            sorted before display if necessary, and therefore may appear
 	 *            in any order.
+	 * @return set of ObjectIds that were advertised to the client.
 	 * @throws IOException
 	 *             the underlying output stream failed to write out an
 	 *             advertisement record.
 	 */
-	public void send(final Map<String, Ref> refs) throws IOException {
-		for (final Ref r : getSortedRefs(refs)) {
-			final RevObject obj = parseAnyOrNull(r.getObjectId());
-			if (obj != null) {
-				advertiseAny(obj, r.getName());
-				if (derefTags && obj instanceof RevTag)
-					advertiseTag((RevTag) obj, r.getName() + "^{}");
+	public Set<ObjectId> send(Map<String, Ref> refs) throws IOException {
+		for (Ref ref : getSortedRefs(refs)) {
+			if (ref.getObjectId() == null)
+				continue;
+
+			advertiseAny(ref.getObjectId(), ref.getName());
+
+			if (!derefTags)
+				continue;
+
+			if (!ref.isPeeled()) {
+				if (repository == null)
+					continue;
+				ref = repository.peel(ref);
 			}
+
+			if (ref.getPeeledObjectId() != null)
+				advertiseAny(ref.getPeeledObjectId(), ref.getName() + "^{}");
 		}
+		return sent;
 	}

 	private Iterable<Ref> getSortedRefs(Map<String, Ref> all) {
@ -200,12 +202,7 @@ public abstract class RefAdvertiser {
 	 *             advertisement record.
 	 */
 	public void advertiseHave(AnyObjectId id) throws IOException {
-		RevObject obj = parseAnyOrNull(id);
-		if (obj != null) {
-			advertiseAnyOnce(obj, ".have");
-			if (obj instanceof RevTag)
-				advertiseAnyOnce(((RevTag) obj).getObject(), ".have");
-		}
+		advertiseAnyOnce(id, ".have");
 	}

 	/**
@ -227,45 +224,18 @@ public abstract class RefAdvertiser {
 		return first;
 	}

-	private RevObject parseAnyOrNull(final AnyObjectId id) {
-		if (id == null)
-			return null;
-		try {
-			return walk.parseAny(id);
-		} catch (IOException e) {
-			return null;
-		}
-	}
-
-	private void advertiseAnyOnce(final RevObject obj, final String refName)
+	private void advertiseAnyOnce(AnyObjectId obj, final String refName)
 			throws IOException {
-		if (!obj.has(ADVERTISED))
+		if (!sent.contains(obj))
 			advertiseAny(obj, refName);
 	}

-	private void advertiseAny(final RevObject obj, final String refName)
+	private void advertiseAny(AnyObjectId obj, final String refName)
 			throws IOException {
-		obj.add(ADVERTISED);
+		sent.add(obj.toObjectId());
 		advertiseId(obj, refName);
 	}

-	private void advertiseTag(final RevTag tag, final String refName)
-			throws IOException {
-		RevObject o = tag;
-		do {
-			// Fully unwrap here so later on we have these already parsed.
-			final RevObject target = ((RevTag) o).getObject();
-			try {
-				walk.parseHeaders(target);
-			} catch (IOException err) {
-				return;
-			}
-			target.add(ADVERTISED);
-			o = target;
-		} while (o instanceof RevTag);
-		advertiseAny(tag.getObject(), refName);
-	}
-
 	/**
 	 * Advertise one object under a specific name.
 	 * <p>
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java
@ -151,8 +151,8 @@ public class UploadPack {
 	/** null if {@link #commonBase} should be examined again. */
 	private Boolean okToGiveUp;

-	/** Marked on objects we sent in our advertisement list. */
-	private final RevFlag ADVERTISED;
+	/** Objects we sent in our advertisement list, clients can ask for these. */
+	private Set<ObjectId> advertised;

 	/** Marked on objects the client has asked us to give them. */
 	private final RevFlag WANT;
@ -181,7 +181,6 @@ public class UploadPack {
 		walk = new RevWalk(db);
 		walk.setRetainBody(false);

-		ADVERTISED = walk.newFlag("ADVERTISED");
 		WANT = walk.newFlag("WANT");
 		PEER_HAS = walk.newFlag("PEER_HAS");
 		COMMON = walk.newFlag("COMMON");
@ -189,7 +188,6 @@ public class UploadPack {
 		walk.carry(PEER_HAS);

 		SAVE = new RevFlagSet();
-		SAVE.add(ADVERTISED);
 		SAVE.add(WANT);
 		SAVE.add(PEER_HAS);
 		refFilter = RefFilter.DEFAULT;
@ -327,13 +325,11 @@ public class UploadPack {
 		if (biDirectionalPipe)
 			sendAdvertisedRefs(new PacketLineOutRefAdvertiser(pckOut));
 		else {
+			advertised = new HashSet<ObjectId>();
 			refs = refFilter.filter(db.getAllRefs());
-			for (Ref r : refs.values()) {
-				try {
-					walk.parseAny(r.getObjectId()).add(ADVERTISED);
-				} catch (IOException e) {
-					// Skip missing/corrupt objects
-				}
+			for (Ref ref : refs.values()) {
+				if (ref.getObjectId() != null)
+					advertised.add(ref.getObjectId());
 			}
 		}

@ -361,7 +357,7 @@ public class UploadPack {
 	 *             the formatter failed to write an advertisement.
 	 */
 	public void sendAdvertisedRefs(final RefAdvertiser adv) throws IOException {
-		adv.init(walk, ADVERTISED);
+		adv.init(db);
 		adv.advertiseCapability(OPTION_INCLUDE_TAG);
 		adv.advertiseCapability(OPTION_MULTI_ACK_DETAILED);
 		adv.advertiseCapability(OPTION_MULTI_ACK);
@ -372,7 +368,7 @@ public class UploadPack {
 		adv.advertiseCapability(OPTION_NO_PROGRESS);
 		adv.setDerefTags(true);
 		refs = refFilter.filter(db.getAllRefs());
-		adv.send(refs);
+		advertised = adv.send(refs);
 		adv.end();
 	}

@ -425,7 +421,7 @@ public class UploadPack {
 				if (o.has(WANT)) {
 					// Already processed, the client repeated itself.

-				} else if (o.has(ADVERTISED)) {
+				} else if (advertised.contains(o)) {
 					o.add(WANT);
 					wantAll.add(o);