blob: 42c28fba0e34b260d5e059b56acf11be6a56d63f [file]
#include "git-compat-util.h"
#include "abspath.h"
#include "chdir-notify.h"
#include "dir.h"
#include "git-zlib.h"
#include "mergesort.h"
#include "midx.h"
#include "odb/source-packed.h"
#include "odb/streaming.h"
#include "packfile.h"
static int find_pack_entry(struct odb_source_packed *store,
const struct object_id *oid,
struct pack_entry *e)
{
struct packfile_list_entry *l;
odb_source_packed_prepare(store);
if (store->midx && fill_midx_entry(store->midx, oid, e))
return 1;
for (l = store->packs.head; l; l = l->next) {
struct packed_git *p = l->pack;
if (!p->multi_pack_index && packfile_fill_entry(p, oid, e)) {
if (!store->skip_mru_updates)
packfile_list_prepend(&store->packs, p);
return 1;
}
}
return 0;
}
static int odb_source_packed_read_object_info(struct odb_source *source,
const struct object_id *oid,
struct object_info *oi,
enum object_info_flags flags)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
int ret;
/*
* In case the first read didn't surface the object, we have to reload
* packfiles. This may cause us to discover new packfiles that have
* been added since the last time we have prepared the packfile store.
*/
if (flags & OBJECT_INFO_SECOND_READ)
odb_source_reprepare(source);
if (!find_pack_entry(packed, oid, &e))
return 1;
/*
* We know that the caller doesn't actually need the
* information below, so return early.
*/
if (!oi)
return 0;
ret = packed_object_info(e.p, e.offset, oi);
if (ret < 0) {
mark_bad_packed_object(e.p, oid);
return -1;
}
return 0;
}
static int odb_source_packed_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
if (!find_pack_entry(packed, oid, &e))
return -1;
return packfile_read_object_stream(out, oid, e.p, e.offset);
}
struct odb_source_packed_for_each_object_wrapper_data {
struct odb_source_packed *store;
const struct object_info *request;
odb_for_each_object_cb cb;
void *cb_data;
};
static int odb_source_packed_for_each_object_wrapper(const struct object_id *oid,
struct packed_git *pack,
uint32_t index_pos,
void *cb_data)
{
struct odb_source_packed_for_each_object_wrapper_data *data = cb_data;
if (data->request) {
off_t offset = nth_packed_object_offset(pack, index_pos);
struct object_info oi = *data->request;
if (packed_object_info_with_index_pos(pack, offset,
&index_pos, &oi) < 0) {
mark_bad_packed_object(pack, oid);
return -1;
}
return data->cb(oid, &oi, data->cb_data);
} else {
return data->cb(oid, NULL, data->cb_data);
}
}
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
{
do {
if (*a != *b)
return 0;
a++;
b++;
len -= 2;
} while (len > 1);
if (len)
if ((*a ^ *b) & 0xf0)
return 0;
return 1;
}
static int for_each_prefixed_object_in_midx(
struct odb_source_packed *store,
struct multi_pack_index *m,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
int ret;
for (; m; m = m->base_midx) {
uint32_t num, i, first = 0;
int len = opts->prefix_hex_len > m->source->base.odb->repo->hash_algo->hexsz ?
m->source->base.odb->repo->hash_algo->hexsz : opts->prefix_hex_len;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
bsearch_one_midx(opts->prefix, m, &first);
/*
* At this point, "first" is the location of the lowest
* object with an object name that could match "opts->prefix".
* See if we have 0, 1 or more objects that actually match(es).
*/
for (i = first; i < num; i++) {
const struct object_id *current = NULL;
struct object_id oid;
current = nth_midxed_object_oid(&oid, m, i);
if (!match_hash(len, opts->prefix->hash, current->hash))
break;
if (data->request) {
struct object_info oi = *data->request;
ret = odb_source_read_object_info(&store->base, current,
&oi, 0);
if (ret)
goto out;
ret = data->cb(&oid, &oi, data->cb_data);
if (ret)
goto out;
} else {
ret = data->cb(&oid, NULL, data->cb_data);
if (ret)
goto out;
}
}
}
ret = 0;
out:
return ret;
}
static int for_each_prefixed_object_in_pack(
struct odb_source_packed *store,
struct packed_git *p,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
uint32_t num, i, first = 0;
int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ?
p->repo->hash_algo->hexsz : opts->prefix_hex_len;
int ret;
num = p->num_objects;
bsearch_pack(opts->prefix, p, &first);
/*
* At this point, "first" is the location of the lowest object
* with an object name that could match "bin_pfx". See if we have
* 0, 1 or more objects that actually match(es).
*/
for (i = first; i < num; i++) {
struct object_id oid;
nth_packed_object_id(&oid, p, i);
if (!match_hash(len, opts->prefix->hash, oid.hash))
break;
if (data->request) {
struct object_info oi = *data->request;
ret = odb_source_read_object_info(&store->base, &oid, &oi, 0);
if (ret)
goto out;
ret = data->cb(&oid, &oi, data->cb_data);
if (ret)
goto out;
} else {
ret = data->cb(&oid, NULL, data->cb_data);
if (ret)
goto out;
}
}
ret = 0;
out:
return ret;
}
static int odb_source_packed_for_each_prefixed_object(
struct odb_source_packed *store,
const struct odb_for_each_object_options *opts,
struct odb_source_packed_for_each_object_wrapper_data *data)
{
struct packfile_list_entry *e;
struct multi_pack_index *m;
bool pack_errors = false;
int ret;
if (opts->flags)
BUG("flags unsupported");
store->skip_mru_updates = true;
m = get_multi_pack_index(store);
if (m) {
ret = for_each_prefixed_object_in_midx(store, m, opts, data);
if (ret)
goto out;
}
for (e = packfile_store_get_packs(store); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack)) {
pack_errors = true;
continue;
}
if (!e->pack->num_objects)
continue;
ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data);
if (ret)
goto out;
}
ret = 0;
out:
store->skip_mru_updates = false;
if (!ret && pack_errors)
ret = -1;
return ret;
}
static int odb_source_packed_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
const struct odb_for_each_object_options *opts)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct odb_source_packed_for_each_object_wrapper_data data = {
.store = packed,
.request = request,
.cb = cb,
.cb_data = cb_data,
};
struct packfile_list_entry *e;
int pack_errors = 0, ret;
if (opts->prefix)
return odb_source_packed_for_each_prefixed_object(packed, opts, &data);
packed->skip_mru_updates = true;
for (e = packfile_store_get_packs(packed); e; e = e->next) {
struct packed_git *p = e->pack;
if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
pack_errors = 1;
continue;
}
ret = for_each_object_in_pack(p, odb_source_packed_for_each_object_wrapper,
&data, opts->flags);
if (ret)
goto out;
}
ret = 0;
out:
packed->skip_mru_updates = false;
if (!ret && pack_errors)
ret = -1;
return ret;
}
static int odb_source_packed_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags UNUSED,
unsigned long *out)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct packfile_list_entry *e;
struct multi_pack_index *m;
unsigned long count = 0;
int ret;
m = get_multi_pack_index(packed);
if (m)
count += m->num_objects + m->num_objects_in_base;
for (e = packfile_store_get_packs(packed); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack)) {
ret = -1;
goto out;
}
count += e->pack->num_objects;
}
*out = count;
ret = 0;
out:
return ret;
}
static int extend_abbrev_len(const struct object_id *a,
const struct object_id *b,
unsigned *out)
{
unsigned len = oid_common_prefix_hexlen(a, b);
if (len != hash_algos[a->algo].hexsz && len >= *out)
*out = len + 1;
return 0;
}
static void find_abbrev_len_for_midx(struct multi_pack_index *m,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
unsigned len = min_len;
for (; m; m = m->base_midx) {
int match = 0;
uint32_t num, first = 0;
struct object_id found_oid;
if (!m->num_objects)
continue;
num = m->num_objects + m->num_objects_in_base;
match = bsearch_one_midx(oid, m, &first);
/*
* first is now the position in the packfile where we
* would insert the object ID if it does not exist (or the
* position of the object ID if it does exist). Hence, we
* consider a maximum of two objects nearby for the
* abbreviation length.
*/
if (!match) {
if (nth_midxed_object_oid(&found_oid, m, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (nth_midxed_object_oid(&found_oid, m, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (nth_midxed_object_oid(&found_oid, m, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
}
*out = len;
}
static void find_abbrev_len_for_pack(struct packed_git *p,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
int match;
uint32_t num, first = 0;
struct object_id found_oid;
unsigned len = min_len;
num = p->num_objects;
match = bsearch_pack(oid, p, &first);
/*
* first is now the position in the packfile where we would insert
* the object ID if it does not exist (or the position of mad->hash if
* it does exist). Hence, we consider a maximum of two objects
* nearby for the abbreviation length.
*/
if (!match) {
if (!nth_packed_object_id(&found_oid, p, first))
extend_abbrev_len(&found_oid, oid, &len);
} else if (first < num - 1) {
if (!nth_packed_object_id(&found_oid, p, first + 1))
extend_abbrev_len(&found_oid, oid, &len);
}
if (first > 0) {
if (!nth_packed_object_id(&found_oid, p, first - 1))
extend_abbrev_len(&found_oid, oid, &len);
}
*out = len;
}
static int odb_source_packed_find_abbrev_len(struct odb_source *source,
const struct object_id *oid,
unsigned min_len,
unsigned *out)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct packfile_list_entry *e;
struct multi_pack_index *m;
m = get_multi_pack_index(packed);
if (m)
find_abbrev_len_for_midx(m, oid, min_len, &min_len);
for (e = packfile_store_get_packs(packed); e; e = e->next) {
if (e->pack->multi_pack_index)
continue;
if (open_pack_index(e->pack) || !e->pack->num_objects)
continue;
find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len);
}
*out = min_len;
return 0;
}
static int odb_source_packed_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
struct pack_entry e;
if (!find_pack_entry(packed, oid, &e))
return 0;
if (e.p->is_cruft)
return 0;
if (e.p->freshened)
return 1;
if (utime(e.p->pack_name, NULL))
return 0;
e.p->freshened = 1;
return 1;
}
static int odb_source_packed_write_object(struct odb_source *source UNUSED,
const void *buf UNUSED,
unsigned long len UNUSED,
enum object_type type UNUSED,
struct object_id *oid UNUSED,
struct object_id *compat_oid UNUSED,
unsigned flags UNUSED)
{
return error("packed backend cannot write objects");
}
static int odb_source_packed_write_object_stream(struct odb_source *source UNUSED,
struct odb_write_stream *stream UNUSED,
size_t len UNUSED,
struct object_id *oid UNUSED)
{
return error("packed backend cannot write object streams");
}
static int odb_source_packed_begin_transaction(struct odb_source *source UNUSED,
struct odb_transaction **out UNUSED)
{
return error("packed backend cannot begin transactions");
}
static int odb_source_packed_read_alternates(struct odb_source *source UNUSED,
struct strvec *out UNUSED)
{
return 0;
}
static int odb_source_packed_write_alternate(struct odb_source *source UNUSED,
const char *alternate UNUSED)
{
return error("packed backend cannot write alternates");
}
void (*report_garbage)(unsigned seen_bits, const char *path);
static void report_helper(const struct string_list *list,
int seen_bits, int first, int last)
{
if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
return;
for (; first < last; first++)
report_garbage(seen_bits, list->items[first].string);
}
static void report_pack_garbage(struct string_list *list)
{
int baselen = -1, first = 0, seen_bits = 0;
if (!report_garbage)
return;
string_list_sort(list);
for (size_t i = 0; i < list->nr; i++) {
const char *path = list->items[i].string;
if (baselen != -1 &&
strncmp(path, list->items[first].string, baselen)) {
report_helper(list, seen_bits, first, i);
baselen = -1;
seen_bits = 0;
}
if (baselen == -1) {
const char *dot = strrchr(path, '.');
if (!dot) {
report_garbage(PACKDIR_FILE_GARBAGE, path);
continue;
}
baselen = dot - path + 1;
first = i;
}
if (!strcmp(path + baselen, "pack"))
seen_bits |= 1;
else if (!strcmp(path + baselen, "idx"))
seen_bits |= 2;
}
report_helper(list, seen_bits, first, list->nr);
}
struct prepare_pack_data {
struct odb_source_packed *source;
struct string_list *garbage;
};
static void prepare_pack(const char *full_name, size_t full_name_len,
const char *file_name, void *_data)
{
struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
size_t base_len = full_name_len;
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
!(data->source->midx &&
midx_contains_pack(data->source->midx, file_name))) {
char *trimmed_path = xstrndup(full_name, full_name_len);
packfile_store_load_pack(data->source,
trimmed_path, data->source->base.local);
free(trimmed_path);
}
if (!report_garbage)
return;
if (!strcmp(file_name, "multi-pack-index") ||
!strcmp(file_name, "multi-pack-index.d"))
return;
if (starts_with(file_name, "multi-pack-index") &&
(ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))
return;
if (ends_with(file_name, ".idx") ||
ends_with(file_name, ".rev") ||
ends_with(file_name, ".pack") ||
ends_with(file_name, ".bitmap") ||
ends_with(file_name, ".keep") ||
ends_with(file_name, ".promisor") ||
ends_with(file_name, ".mtimes"))
string_list_append(data->garbage, full_name);
else
report_garbage(PACKDIR_FILE_GARBAGE, full_name);
}
static void prepare_packed_git_one(struct odb_source_packed *source)
{
struct string_list garbage = STRING_LIST_INIT_DUP;
struct prepare_pack_data data = {
.source = source,
.garbage = &garbage,
};
for_each_file_in_pack_dir(source->base.path, prepare_pack, &data);
report_pack_garbage(data.garbage);
string_list_clear(data.garbage, 0);
}
DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next);
static int sort_pack(const struct packfile_list_entry *a,
const struct packfile_list_entry *b)
{
int st;
/*
* Local packs tend to contain objects specific to our
* variant of the project than remote ones. In addition,
* remote ones could be on a network mounted filesystem.
* Favor local ones for these reasons.
*/
st = a->pack->pack_local - b->pack->pack_local;
if (st)
return -st;
/*
* Younger packs tend to contain more recent objects,
* and more recent objects tend to get accessed more
* often.
*/
if (a->pack->mtime < b->pack->mtime)
return 1;
else if (a->pack->mtime == b->pack->mtime)
return 0;
return -1;
}
void odb_source_packed_prepare(struct odb_source_packed *source)
{
if (source->initialized)
return;
prepare_multi_pack_index_one(source);
prepare_packed_git_one(source);
sort_packs(&source->packs.head, sort_pack);
for (struct packfile_list_entry *e = source->packs.head; e; e = e->next)
if (!e->next)
source->packs.tail = e;
source->initialized = true;
}
static void odb_source_packed_reprepare(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
packed->initialized = false;
odb_source_packed_prepare(packed);
}
static void odb_source_packed_reparent(const char *name UNUSED,
const char *old_cwd,
const char *new_cwd,
void *cb_data)
{
struct odb_source_packed *packed = cb_data;
char *path = reparent_relative_path(old_cwd, new_cwd,
packed->base.path);
free(packed->base.path);
packed->base.path = path;
}
static void odb_source_packed_close(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) {
if (e->pack->do_not_close)
BUG("want to close pack marked 'do-not-close'");
close_pack(e->pack);
}
if (packed->midx)
close_midx(packed->midx);
packed->midx = NULL;
}
static void odb_source_packed_free(struct odb_source *source)
{
struct odb_source_packed *packed = odb_source_packed_downcast(source);
chdir_notify_unregister(NULL, odb_source_packed_reparent, packed);
for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next)
free(e->pack);
packfile_list_clear(&packed->packs);
strmap_clear(&packed->packs_by_path, 0);
odb_source_release(&packed->base);
free(packed);
}
struct odb_source_packed *odb_source_packed_new(struct object_database *odb,
const char *path,
bool local)
{
struct odb_source_packed *packed;
CALLOC_ARRAY(packed, 1);
odb_source_init(&packed->base, odb, ODB_SOURCE_PACKED, path, local);
strmap_init(&packed->packs_by_path);
packed->base.free = odb_source_packed_free;
packed->base.close = odb_source_packed_close;
packed->base.reprepare = odb_source_packed_reprepare;
packed->base.read_object_info = odb_source_packed_read_object_info;
packed->base.read_object_stream = odb_source_packed_read_object_stream;
packed->base.for_each_object = odb_source_packed_for_each_object;
packed->base.count_objects = odb_source_packed_count_objects;
packed->base.find_abbrev_len = odb_source_packed_find_abbrev_len;
packed->base.freshen_object = odb_source_packed_freshen_object;
packed->base.write_object = odb_source_packed_write_object;
packed->base.write_object_stream = odb_source_packed_write_object_stream;
packed->base.begin_transaction = odb_source_packed_begin_transaction;
packed->base.read_alternates = odb_source_packed_read_alternates;
packed->base.write_alternate = odb_source_packed_write_alternate;
if (!is_absolute_path(path))
chdir_notify_register(NULL, odb_source_packed_reparent, packed);
return packed;
}