blob: 5eb0046a6934b3ea934a2e012a4533f83911720b [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
<meta name="generator" content="AsciiDoc 8.6.10" />
<title>Partial Clone Design Notes</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
/* Default font. */
body {
font-family: Georgia,serif;
}
/* Title font. */
h1, h2, h3, h4, h5, h6,
div.title, caption.title,
thead, p.table.header,
#toctitle,
#author, #revnumber, #revdate, #revremark,
#footer {
font-family: Arial,Helvetica,sans-serif;
}
body {
margin: 1em 5% 1em 5%;
}
a {
color: blue;
text-decoration: underline;
}
a:visited {
color: fuchsia;
}
em {
font-style: italic;
color: navy;
}
strong {
font-weight: bold;
color: #083194;
}
h1, h2, h3, h4, h5, h6 {
color: #527bbd;
margin-top: 1.2em;
margin-bottom: 0.5em;
line-height: 1.3;
}
h1, h2, h3 {
border-bottom: 2px solid silver;
}
h2 {
padding-top: 0.5em;
}
h3 {
float: left;
}
h3 + * {
clear: left;
}
h5 {
font-size: 1.0em;
}
div.sectionbody {
margin-left: 0;
}
hr {
border: 1px solid silver;
}
p {
margin-top: 0.5em;
margin-bottom: 0.5em;
}
ul, ol, li > p {
margin-top: 0;
}
ul > li { color: #aaa; }
ul > li > * { color: black; }
.monospaced, code, pre {
font-family: "Courier New", Courier, monospace;
font-size: inherit;
color: navy;
padding: 0;
margin: 0;
}
pre {
white-space: pre-wrap;
}
#author {
color: #527bbd;
font-weight: bold;
font-size: 1.1em;
}
#email {
}
#revnumber, #revdate, #revremark {
}
#footer {
font-size: small;
border-top: 2px solid silver;
padding-top: 0.5em;
margin-top: 4.0em;
}
#footer-text {
float: left;
padding-bottom: 0.5em;
}
#footer-badges {
float: right;
padding-bottom: 0.5em;
}
#preamble {
margin-top: 1.5em;
margin-bottom: 1.5em;
}
div.imageblock, div.exampleblock, div.verseblock,
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
div.admonitionblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.admonitionblock {
margin-top: 2.0em;
margin-bottom: 2.0em;
margin-right: 10%;
color: #606060;
}
div.content { /* Block element content. */
padding: 0;
}
/* Block element titles. */
div.title, caption.title {
color: #527bbd;
font-weight: bold;
text-align: left;
margin-top: 1.0em;
margin-bottom: 0.5em;
}
div.title + * {
margin-top: 0;
}
td div.title:first-child {
margin-top: 0.0em;
}
div.content div.title:first-child {
margin-top: 0.0em;
}
div.content + div.title {
margin-top: 0.0em;
}
div.sidebarblock > div.content {
background: #ffffee;
border: 1px solid #dddddd;
border-left: 4px solid #f0f0f0;
padding: 0.5em;
}
div.listingblock > div.content {
border: 1px solid #dddddd;
border-left: 5px solid #f0f0f0;
background: #f8f8f8;
padding: 0.5em;
}
div.quoteblock, div.verseblock {
padding-left: 1.0em;
margin-left: 1.0em;
margin-right: 10%;
border-left: 5px solid #f0f0f0;
color: #888;
}
div.quoteblock > div.attribution {
padding-top: 0.5em;
text-align: right;
}
div.verseblock > pre.content {
font-family: inherit;
font-size: inherit;
}
div.verseblock > div.attribution {
padding-top: 0.75em;
text-align: left;
}
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
div.verseblock + div.attribution {
text-align: left;
}
div.admonitionblock .icon {
vertical-align: top;
font-size: 1.1em;
font-weight: bold;
text-decoration: underline;
color: #527bbd;
padding-right: 0.5em;
}
div.admonitionblock td.content {
padding-left: 0.5em;
border-left: 3px solid #dddddd;
}
div.exampleblock > div.content {
border-left: 3px solid #dddddd;
padding-left: 0.5em;
}
div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }
dl {
margin-top: 0.8em;
margin-bottom: 0.8em;
}
dt {
margin-top: 0.5em;
margin-bottom: 0;
font-style: normal;
color: navy;
}
dd > *:first-child {
margin-top: 0.1em;
}
ul, ol {
list-style-position: outside;
}
ol.arabic {
list-style-type: decimal;
}
ol.loweralpha {
list-style-type: lower-alpha;
}
ol.upperalpha {
list-style-type: upper-alpha;
}
ol.lowerroman {
list-style-type: lower-roman;
}
ol.upperroman {
list-style-type: upper-roman;
}
div.compact ul, div.compact ol,
div.compact p, div.compact p,
div.compact div, div.compact div {
margin-top: 0.1em;
margin-bottom: 0.1em;
}
tfoot {
font-weight: bold;
}
td > div.verse {
white-space: pre;
}
div.hdlist {
margin-top: 0.8em;
margin-bottom: 0.8em;
}
div.hdlist tr {
padding-bottom: 15px;
}
dt.hdlist1.strong, td.hdlist1.strong {
font-weight: bold;
}
td.hdlist1 {
vertical-align: top;
font-style: normal;
padding-right: 0.8em;
color: navy;
}
td.hdlist2 {
vertical-align: top;
}
div.hdlist.compact tr {
margin: 0;
padding-bottom: 0;
}
.comment {
background: yellow;
}
.footnote, .footnoteref {
font-size: 0.8em;
}
span.footnote, span.footnoteref {
vertical-align: super;
}
#footnotes {
margin: 20px 0 20px 0;
padding: 7px 0 0 0;
}
#footnotes div.footnote {
margin: 0 0 5px 0;
}
#footnotes hr {
border: none;
border-top: 1px solid silver;
height: 1px;
text-align: left;
margin-left: 0;
width: 20%;
min-width: 100px;
}
div.colist td {
padding-right: 0.5em;
padding-bottom: 0.3em;
vertical-align: top;
}
div.colist td img {
margin-top: 0.3em;
}
@media print {
#footer-badges { display: none; }
}
#toc {
margin-bottom: 2.5em;
}
#toctitle {
color: #527bbd;
font-size: 1.1em;
font-weight: bold;
margin-top: 1.0em;
margin-bottom: 0.1em;
}
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
margin-top: 0;
margin-bottom: 0;
}
div.toclevel2 {
margin-left: 2em;
font-size: 0.9em;
}
div.toclevel3 {
margin-left: 4em;
font-size: 0.9em;
}
div.toclevel4 {
margin-left: 6em;
font-size: 0.9em;
}
span.aqua { color: aqua; }
span.black { color: black; }
span.blue { color: blue; }
span.fuchsia { color: fuchsia; }
span.gray { color: gray; }
span.green { color: green; }
span.lime { color: lime; }
span.maroon { color: maroon; }
span.navy { color: navy; }
span.olive { color: olive; }
span.purple { color: purple; }
span.red { color: red; }
span.silver { color: silver; }
span.teal { color: teal; }
span.white { color: white; }
span.yellow { color: yellow; }
span.aqua-background { background: aqua; }
span.black-background { background: black; }
span.blue-background { background: blue; }
span.fuchsia-background { background: fuchsia; }
span.gray-background { background: gray; }
span.green-background { background: green; }
span.lime-background { background: lime; }
span.maroon-background { background: maroon; }
span.navy-background { background: navy; }
span.olive-background { background: olive; }
span.purple-background { background: purple; }
span.red-background { background: red; }
span.silver-background { background: silver; }
span.teal-background { background: teal; }
span.white-background { background: white; }
span.yellow-background { background: yellow; }
span.big { font-size: 2em; }
span.small { font-size: 0.6em; }
span.underline { text-decoration: underline; }
span.overline { text-decoration: overline; }
span.line-through { text-decoration: line-through; }
div.unbreakable { page-break-inside: avoid; }
/*
* xhtml11 specific
*
* */
div.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
div.tableblock > table {
border: 3px solid #527bbd;
}
thead, p.table.header {
font-weight: bold;
color: #527bbd;
}
p.table {
margin-top: 0;
}
/* Because the table frame attribute is overriden by CSS in most browsers. */
div.tableblock > table[frame="void"] {
border-style: none;
}
div.tableblock > table[frame="hsides"] {
border-left-style: none;
border-right-style: none;
}
div.tableblock > table[frame="vsides"] {
border-top-style: none;
border-bottom-style: none;
}
/*
* html5 specific
*
* */
table.tableblock {
margin-top: 1.0em;
margin-bottom: 1.5em;
}
thead, p.tableblock.header {
font-weight: bold;
color: #527bbd;
}
p.tableblock {
margin-top: 0;
}
table.tableblock {
border-width: 3px;
border-spacing: 0px;
border-style: solid;
border-color: #527bbd;
border-collapse: collapse;
}
th.tableblock, td.tableblock {
border-width: 1px;
padding: 4px;
border-style: solid;
border-color: #527bbd;
}
table.tableblock.frame-topbot {
border-left-style: hidden;
border-right-style: hidden;
}
table.tableblock.frame-sides {
border-top-style: hidden;
border-bottom-style: hidden;
}
table.tableblock.frame-none {
border-style: hidden;
}
th.tableblock.halign-left, td.tableblock.halign-left {
text-align: left;
}
th.tableblock.halign-center, td.tableblock.halign-center {
text-align: center;
}
th.tableblock.halign-right, td.tableblock.halign-right {
text-align: right;
}
th.tableblock.valign-top, td.tableblock.valign-top {
vertical-align: top;
}
th.tableblock.valign-middle, td.tableblock.valign-middle {
vertical-align: middle;
}
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
vertical-align: bottom;
}
/*
* manpage specific
*
* */
body.manpage h1 {
padding-top: 0.5em;
padding-bottom: 0.5em;
border-top: 2px solid silver;
border-bottom: 2px solid silver;
}
body.manpage h2 {
border-style: none;
}
body.manpage div.sectionbody {
margin-left: 3em;
}
@media print {
body.manpage div#toc { display: none; }
}
</style>
<script type="text/javascript">
/*<![CDATA[*/
var asciidoc = { // Namespace.
/////////////////////////////////////////////////////////////////////
// Table Of Contents generator
/////////////////////////////////////////////////////////////////////
/* Author: Mihai Bazon, September 2002
* http://students.infoiasi.ro/~mishoo
*
* Table Of Content generator
* Version: 0.4
*
* Feel free to use this script under the terms of the GNU General Public
* License, as long as you do not remove or alter this notice.
*/
/* modified by Troy D. Hanson, September 2006. License: GPL */
/* modified by Stuart Rackham, 2006, 2009. License: GPL */
// toclevels = 1..4.
toc: function (toclevels) {
function getText(el) {
var text = "";
for (var i = el.firstChild; i != null; i = i.nextSibling) {
if (i.nodeType == 3 /* Node.TEXT_NODE */) // IE doesn't speak constants.
text += i.data;
else if (i.firstChild != null)
text += getText(i);
}
return text;
}
function TocEntry(el, text, toclevel) {
this.element = el;
this.text = text;
this.toclevel = toclevel;
}
function tocEntries(el, toclevels) {
var result = new Array;
var re = new RegExp('[hH]([1-'+(toclevels+1)+'])');
// Function that scans the DOM tree for header elements (the DOM2
// nodeIterator API would be a better technique but not supported by all
// browsers).
var iterate = function (el) {
for (var i = el.firstChild; i != null; i = i.nextSibling) {
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
var mo = re.exec(i.tagName);
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
}
iterate(i);
}
}
}
iterate(el);
return result;
}
var toc = document.getElementById("toc");
if (!toc) {
return;
}
// Delete existing TOC entries in case we're reloading the TOC.
var tocEntriesToRemove = [];
var i;
for (i = 0; i < toc.childNodes.length; i++) {
var entry = toc.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div'
&& entry.getAttribute("class")
&& entry.getAttribute("class").match(/^toclevel/))
tocEntriesToRemove.push(entry);
}
for (i = 0; i < tocEntriesToRemove.length; i++) {
toc.removeChild(tocEntriesToRemove[i]);
}
// Rebuild TOC entries.
var entries = tocEntries(document.getElementById("content"), toclevels);
for (var i = 0; i < entries.length; ++i) {
var entry = entries[i];
if (entry.element.id == "")
entry.element.id = "_toc_" + i;
var a = document.createElement("a");
a.href = "#" + entry.element.id;
a.appendChild(document.createTextNode(entry.text));
var div = document.createElement("div");
div.appendChild(a);
div.className = "toclevel" + entry.toclevel;
toc.appendChild(div);
}
if (entries.length == 0)
toc.parentNode.removeChild(toc);
},
/////////////////////////////////////////////////////////////////////
// Footnotes generator
/////////////////////////////////////////////////////////////////////
/* Based on footnote generation code from:
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
*/
footnotes: function () {
// Delete existing footnote entries in case we're reloading the footnodes.
var i;
var noteholder = document.getElementById("footnotes");
if (!noteholder) {
return;
}
var entriesToRemove = [];
for (i = 0; i < noteholder.childNodes.length; i++) {
var entry = noteholder.childNodes[i];
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
entriesToRemove.push(entry);
}
for (i = 0; i < entriesToRemove.length; i++) {
noteholder.removeChild(entriesToRemove[i]);
}
// Rebuild footnote entries.
var cont = document.getElementById("content");
var spans = cont.getElementsByTagName("span");
var refs = {};
var n = 0;
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnote") {
n++;
var note = spans[i].getAttribute("data-note");
if (!note) {
// Use [\s\S] in place of . so multi-line matches work.
// Because JavaScript has no s (dotall) regex flag.
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
spans[i].innerHTML =
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
spans[i].setAttribute("data-note", note);
}
noteholder.innerHTML +=
"<div class='footnote' id='_footnote_" + n + "'>" +
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
n + "</a>. " + note + "</div>";
var id =spans[i].getAttribute("id");
if (id != null) refs["#"+id] = n;
}
}
if (n == 0)
noteholder.parentNode.removeChild(noteholder);
else {
// Process footnoterefs.
for (i=0; i<spans.length; i++) {
if (spans[i].className == "footnoteref") {
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
href = href.match(/#.*/)[0]; // Because IE return full URL.
n = refs[href];
spans[i].innerHTML =
"[<a href='#_footnote_" + n +
"' title='View footnote' class='footnote'>" + n + "</a>]";
}
}
}
},
install: function(toclevels) {
var timerId;
function reinstall() {
asciidoc.footnotes();
if (toclevels) {
asciidoc.toc(toclevels);
}
}
function reinstallAndRemoveTimer() {
clearInterval(timerId);
reinstall();
}
timerId = setInterval(reinstall, 500);
if (document.addEventListener)
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
else
window.onload = reinstallAndRemoveTimer;
}
}
asciidoc.install();
/*]]>*/
</script>
</head>
<body class="article">
<div id="header">
<h1>Partial Clone Design Notes</h1>
</div>
<div id="content">
<div id="preamble">
<div class="sectionbody">
<div class="paragraph"><p>The "Partial Clone" feature is a performance optimization for Git that
allows Git to function without having a complete copy of the repository.
The goal of this work is to allow Git better handle extremely large
repositories.</p></div>
<div class="paragraph"><p>During clone and fetch operations, Git downloads the complete contents
and history of the repository. This includes all commits, trees, and
blobs for the complete life of the repository. For extremely large
repositories, clones can take hours (or days) and consume 100+GiB of disk
space.</p></div>
<div class="paragraph"><p>Often in these repositories there are many blobs and trees that the user
does not need such as:</p></div>
<div class="olist arabic"><ol class="arabic">
<li>
<p>
files outside of the user&#8217;s work area in the tree. For example, in
a repository with 500K directories and 3.5M files in every commit,
we can avoid downloading many objects if the user only needs a
narrow "cone" of the source tree.
</p>
</li>
<li>
<p>
large binary assets. For example, in a repository where large build
artifacts are checked into the tree, we can avoid downloading all
previous versions of these non-mergeable binary assets and only
download versions that are actually referenced.
</p>
</li>
</ol></div>
<div class="paragraph"><p>Partial clone allows us to avoid downloading such unneeded objects <strong>in
advance</strong> during clone and fetch operations and thereby reduce download
times and disk usage. Missing objects can later be "demand fetched"
if/when needed.</p></div>
<div class="paragraph"><p>Use of partial clone requires that the user be online and the origin
remote be available for on-demand fetching of missing objects. This may
or may not be problematic for the user. For example, if the user can
stay within the pre-selected subset of the source tree, they may not
encounter any missing objects. Alternatively, the user could try to
pre-fetch various objects if they know that they are going offline.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_non_goals">Non-Goals</h2>
<div class="sectionbody">
<div class="paragraph"><p>Partial clone is a mechanism to limit the number of blobs and trees downloaded
<strong>within</strong> a given range of commits&#8201;&#8212;&#8201;and is therefore independent of and not
intended to conflict with existing DAG-level mechanisms to limit the set of
requested commits (i.e. shallow clone, single branch, or fetch <em>&lt;refspec&gt;</em>).</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_design_overview">Design Overview</h2>
<div class="sectionbody">
<div class="paragraph"><p>Partial clone logically consists of the following parts:</p></div>
<div class="ulist"><ul>
<li>
<p>
A mechanism for the client to describe unneeded or unwanted objects to
the server.
</p>
</li>
<li>
<p>
A mechanism for the server to omit such unwanted objects from packfiles
sent to the client.
</p>
</li>
<li>
<p>
A mechanism for the client to gracefully handle missing objects (that
were previously omitted by the server).
</p>
</li>
<li>
<p>
A mechanism for the client to backfill missing objects as needed.
</p>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_design_details">Design Details</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
A new pack-protocol capability "filter" is added to the fetch-pack and
upload-pack negotiation.
</p>
<div class="paragraph"><p>This uses the existing capability discovery mechanism.
See "filter" in Documentation/technical/pack-protocol.txt.</p></div>
</li>
<li>
<p>
Clients pass a "filter-spec" to clone and fetch which is passed to the
server to request filtering during packfile construction.
</p>
<div class="paragraph"><p>There are various filters available to accommodate different situations.
See "--filter=&lt;filter-spec&gt;" in Documentation/rev-list-options.txt.</p></div>
</li>
<li>
<p>
On the server pack-objects applies the requested filter-spec as it
creates "filtered" packfiles for the client.
</p>
<div class="paragraph"><p>These filtered packfiles are <strong>incomplete</strong> in the traditional sense because
they may contain objects that reference objects not contained in the
packfile and that the client doesn&#8217;t already have. For example, the
filtered packfile may contain trees or tags that reference missing blobs
or commits that reference missing trees.</p></div>
</li>
<li>
<p>
On the client these incomplete packfiles are marked as "promisor packfiles"
and treated differently by various commands.
</p>
</li>
<li>
<p>
On the client a repository extension is added to the local config to
prevent older versions of git from failing mid-operation because of
missing objects that they cannot handle.
See "extensions.partialClone" in Documentation/technical/repository-version.txt"
</p>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_handling_missing_objects">Handling Missing Objects</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
An object may be missing due to a partial clone or fetch, or missing due
to repository corruption. To differentiate these cases, the local
repository specially indicates such filtered packfiles obtained from the
promisor remote as "promisor packfiles".
</p>
<div class="paragraph"><p>These promisor packfiles consist of a "&lt;name&gt;.promisor" file with
arbitrary contents (like the "&lt;name&gt;.keep" files), in addition to
their "&lt;name&gt;.pack" and "&lt;name&gt;.idx" files.</p></div>
</li>
<li>
<p>
The local repository considers a "promisor object" to be an object that
it knows (to the best of its ability) that the promisor remote has promised
that it has, either because the local repository has that object in one of
its promisor packfiles, or because another promisor object refers to it.
</p>
<div class="paragraph"><p>When Git encounters a missing object, Git can see if it a promisor object
and handle it appropriately. If not, Git can report a corruption.</p></div>
<div class="paragraph"><p>This means that there is no need for the client to explicitly maintain an
expensive-to-modify list of missing objects.[a]</p></div>
</li>
<li>
<p>
Since almost all Git code currently expects any referenced object to be
present locally and because we do not want to force every command to do
a dry-run first, a fallback mechanism is added to allow Git to attempt
to dynamically fetch missing objects from the promisor remote.
</p>
<div class="paragraph"><p>When the normal object lookup fails to find an object, Git invokes
fetch-object to try to get the object from the server and then retry
the object lookup. This allows objects to be "faulted in" without
complicated prediction algorithms.</p></div>
<div class="paragraph"><p>For efficiency reasons, no check as to whether the missing object is
actually a promisor object is performed.</p></div>
<div class="paragraph"><p>Dynamic object fetching tends to be slow as objects are fetched one at
a time.</p></div>
</li>
<li>
<p>
<code>checkout</code> (and any other command using <code>unpack-trees</code>) has been taught
to bulk pre-fetch all required missing blobs in a single batch.
</p>
</li>
<li>
<p>
<code>rev-list</code> has been taught to print missing objects.
</p>
<div class="paragraph"><p>This can be used by other commands to bulk prefetch objects.
For example, a "git log -p A..B" may internally want to first do
something like "git rev-list --objects --quiet --missing=print A..B"
and prefetch those objects in bulk.</p></div>
</li>
<li>
<p>
<code>fsck</code> has been updated to be fully aware of promisor objects.
</p>
</li>
<li>
<p>
<code>repack</code> in GC has been updated to not touch promisor packfiles at all,
and to only repack other objects.
</p>
</li>
<li>
<p>
The global variable "fetch_if_missing" is used to control whether an
object lookup will attempt to dynamically fetch a missing object or
report an error.
</p>
<div class="paragraph"><p>We are not happy with this global variable and would like to remove it,
but that requires significant refactoring of the object code to pass an
additional flag. We hope that concurrent efforts to add an ODB API can
encompass this.</p></div>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_fetching_missing_objects">Fetching Missing Objects</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
Fetching of objects is done using the existing transport mechanism using
transport_fetch_refs(), setting a new transport option
TRANS_OPT_NO_DEPENDENTS to indicate that only the objects themselves are
desired, not any object that they refer to.
</p>
<div class="paragraph"><p>Because some transports invoke fetch_pack() in the same process, fetch_pack()
has been updated to not use any object flags when the corresponding argument
(no_dependents) is set.</p></div>
</li>
<li>
<p>
The local repository sends a request with the hashes of all requested
objects as "want" lines, and does not perform any packfile negotiation.
It then receives a packfile.
</p>
</li>
<li>
<p>
Because we are reusing the existing fetch-pack mechanism, fetching
currently fetches all objects referred to by the requested objects, even
though they are not necessary.
</p>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_current_limitations">Current Limitations</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
The remote used for a partial clone (or the first partial fetch
following a regular clone) is marked as the "promisor remote".
</p>
<div class="paragraph"><p>We are currently limited to a single promisor remote and only that
remote may be used for subsequent partial fetches.</p></div>
<div class="paragraph"><p>We accept this limitation because we believe initial users of this
feature will be using it on repositories with a strong single central
server.</p></div>
</li>
<li>
<p>
Dynamic object fetching will only ask the promisor remote for missing
objects. We assume that the promisor remote has a complete view of the
repository and can satisfy all such requests.
</p>
</li>
<li>
<p>
Repack essentially treats promisor and non-promisor packfiles as 2
distinct partitions and does not mix them. Repack currently only works
on non-promisor packfiles and loose objects.
</p>
</li>
<li>
<p>
Dynamic object fetching invokes fetch-pack once <strong>for each item</strong>
because most algorithms stumble upon a missing object and need to have
it resolved before continuing their work. This may incur significant
overhead&#8201;&#8212;&#8201;and multiple authentication requests&#8201;&#8212;&#8201;if many objects are
needed.
</p>
</li>
<li>
<p>
Dynamic object fetching currently uses the existing pack protocol V0
which means that each object is requested via fetch-pack. The server
will send a full set of info/refs when the connection is established.
If there are large number of refs, this may incur significant overhead.
</p>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_future_work">Future Work</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
Allow more than one promisor remote and define a strategy for fetching
missing objects from specific promisor remotes or of iterating over the
set of promisor remotes until a missing object is found.
</p>
<div class="paragraph"><p>A user might want to have multiple geographically-close cache servers
for fetching missing blobs while continuing to do filtered <code>git-fetch</code>
commands from the central server, for example.</p></div>
<div class="paragraph"><p>Or the user might want to work in a triangular work flow with multiple
promisor remotes that each have an incomplete view of the repository.</p></div>
</li>
<li>
<p>
Allow repack to work on promisor packfiles (while keeping them distinct
from non-promisor packfiles).
</p>
</li>
<li>
<p>
Allow non-pathname-based filters to make use of packfile bitmaps (when
present). This was just an omission during the initial implementation.
</p>
</li>
<li>
<p>
Investigate use of a long-running process to dynamically fetch a series
of objects, such as proposed in [5,6] to reduce process startup and
overhead costs.
</p>
<div class="paragraph"><p>It would be nice if pack protocol V2 could allow that long-running
process to make a series of requests over a single long-running
connection.</p></div>
</li>
<li>
<p>
Investigate pack protocol V2 to avoid the info/refs broadcast on
each connection with the server to dynamically fetch missing objects.
</p>
</li>
<li>
<p>
Investigate the need to handle loose promisor objects.
</p>
<div class="paragraph"><p>Objects in promisor packfiles are allowed to reference missing objects
that can be dynamically fetched from the server. An assumption was
made that loose objects are only created locally and therefore should
not reference a missing object. We may need to revisit that assumption
if, for example, we dynamically fetch a missing tree and store it as a
loose object rather than a single object packfile.</p></div>
<div class="paragraph"><p>This does not necessarily mean we need to mark loose objects as promisor;
it may be sufficient to relax the object lookup or is-promisor functions.</p></div>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_non_tasks">Non-Tasks</h2>
<div class="sectionbody">
<div class="ulist"><ul>
<li>
<p>
Every time the subject of "demand loading blobs" comes up it seems
that someone suggests that the server be allowed to "guess" and send
additional objects that may be related to the requested objects.
</p>
<div class="paragraph"><p>No work has gone into actually doing that; we&#8217;re just documenting that
it is a common suggestion. We&#8217;re not sure how it would work and have
no plans to work on it.</p></div>
<div class="paragraph"><p>It is valid for the server to send more objects than requested (even
for a dynamic object fetch), but we are not building on that.</p></div>
</li>
</ul></div>
</div>
</div>
<div class="sect1">
<h2 id="_footnotes">Footnotes</h2>
<div class="sectionbody">
<div class="paragraph"><p>[a] expensive-to-modify list of missing objects: Earlier in the design of
partial clone we discussed the need for a single list of missing objects.
This would essentially be a sorted linear list of OIDs that the were
omitted by the server during a clone or subsequent fetches.</p></div>
<div class="paragraph"><p>This file would need to be loaded into memory on every object lookup.
It would need to be read, updated, and re-written (like the .git/index)
on every explicit "git fetch" command <strong>and</strong> on any dynamic object fetch.</p></div>
<div class="paragraph"><p>The cost to read, update, and write this file could add significant
overhead to every command if there are many missing objects. For example,
if there are 100M missing blobs, this file would be at least 2GiB on disk.</p></div>
<div class="paragraph"><p>With the "promisor" concept, we <strong>infer</strong> a missing object based upon the
type of packfile that references it.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_related_links">Related Links</h2>
<div class="sectionbody">
<div class="paragraph"><p>[0] <a href="https://crbug.com/git/2">https://crbug.com/git/2</a>
Bug#2: Partial Clone</p></div>
<div class="paragraph"><p>[1] <a href="https://public-inbox.org/git/20170113155253.1644-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170113155253.1644-1-benpeart@microsoft.com/</a><br />
Subject: [RFC] Add support for downloading blobs on demand<br />
Date: Fri, 13 Jan 2017 10:52:53 -0500</p></div>
<div class="paragraph"><p>[2] <a href="https://public-inbox.org/git/cover.1506714999.git.jonathantanmy@google.com/">https://public-inbox.org/git/cover.1506714999.git.jonathantanmy@google.com/</a><br />
Subject: [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches)<br />
Date: Fri, 29 Sep 2017 13:11:36 -0700</p></div>
<div class="paragraph"><p>[3] <a href="https://public-inbox.org/git/20170426221346.25337-1-jonathantanmy@google.com/">https://public-inbox.org/git/20170426221346.25337-1-jonathantanmy@google.com/</a><br />
Subject: Proposal for missing blob support in Git repos<br />
Date: Wed, 26 Apr 2017 15:13:46 -0700</p></div>
<div class="paragraph"><p>[4] <a href="https://public-inbox.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/">https://public-inbox.org/git/1488999039-37631-1-git-send-email-git@jeffhostetler.com/</a><br />
Subject: [PATCH 00/10] RFC Partial Clone and Fetch<br />
Date: Wed, 8 Mar 2017 18:50:29 +0000</p></div>
<div class="paragraph"><p>[5] <a href="https://public-inbox.org/git/20170505152802.6724-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170505152802.6724-1-benpeart@microsoft.com/</a><br />
Subject: [PATCH v7 00/10] refactor the filter process code into a reusable module<br />
Date: Fri, 5 May 2017 11:27:52 -0400</p></div>
<div class="paragraph"><p>[6] <a href="https://public-inbox.org/git/20170714132651.170708-1-benpeart@microsoft.com/">https://public-inbox.org/git/20170714132651.170708-1-benpeart@microsoft.com/</a><br />
Subject: [RFC/PATCH v2 0/1] Add support for downloading blobs on demand<br />
Date: Fri, 14 Jul 2017 09:26:50 -0400</p></div>
</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Last updated
2018-08-21 05:12:02 JST
</div>
</div>
</body>
</html>