| #include "git-compat-util.h" |
| #include "abspath.h" |
| #include "chdir-notify.h" |
| #include "dir.h" |
| #include "git-zlib.h" |
| #include "mergesort.h" |
| #include "midx.h" |
| #include "odb/source-packed.h" |
| #include "odb/streaming.h" |
| #include "packfile.h" |
| |
| static int find_pack_entry(struct odb_source_packed *store, |
| const struct object_id *oid, |
| struct pack_entry *e) |
| { |
| struct packfile_list_entry *l; |
| |
| odb_source_packed_prepare(store); |
| if (store->midx && fill_midx_entry(store->midx, oid, e)) |
| return 1; |
| |
| for (l = store->packs.head; l; l = l->next) { |
| struct packed_git *p = l->pack; |
| |
| if (!p->multi_pack_index && packfile_fill_entry(p, oid, e)) { |
| if (!store->skip_mru_updates) |
| packfile_list_prepend(&store->packs, p); |
| return 1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int odb_source_packed_read_object_info(struct odb_source *source, |
| const struct object_id *oid, |
| struct object_info *oi, |
| enum object_info_flags flags) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct pack_entry e; |
| int ret; |
| |
| /* |
| * In case the first read didn't surface the object, we have to reload |
| * packfiles. This may cause us to discover new packfiles that have |
| * been added since the last time we have prepared the packfile store. |
| */ |
| if (flags & OBJECT_INFO_SECOND_READ) |
| odb_source_reprepare(source); |
| |
| if (!find_pack_entry(packed, oid, &e)) |
| return 1; |
| |
| /* |
| * We know that the caller doesn't actually need the |
| * information below, so return early. |
| */ |
| if (!oi) |
| return 0; |
| |
| ret = packed_object_info(e.p, e.offset, oi); |
| if (ret < 0) { |
| mark_bad_packed_object(e.p, oid); |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| static int odb_source_packed_read_object_stream(struct odb_read_stream **out, |
| struct odb_source *source, |
| const struct object_id *oid) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct pack_entry e; |
| |
| if (!find_pack_entry(packed, oid, &e)) |
| return -1; |
| |
| return packfile_read_object_stream(out, oid, e.p, e.offset); |
| } |
| |
| struct odb_source_packed_for_each_object_wrapper_data { |
| struct odb_source_packed *store; |
| const struct object_info *request; |
| odb_for_each_object_cb cb; |
| void *cb_data; |
| }; |
| |
| static int odb_source_packed_for_each_object_wrapper(const struct object_id *oid, |
| struct packed_git *pack, |
| uint32_t index_pos, |
| void *cb_data) |
| { |
| struct odb_source_packed_for_each_object_wrapper_data *data = cb_data; |
| |
| if (data->request) { |
| off_t offset = nth_packed_object_offset(pack, index_pos); |
| struct object_info oi = *data->request; |
| |
| if (packed_object_info_with_index_pos(pack, offset, |
| &index_pos, &oi) < 0) { |
| mark_bad_packed_object(pack, oid); |
| return -1; |
| } |
| |
| return data->cb(oid, &oi, data->cb_data); |
| } else { |
| return data->cb(oid, NULL, data->cb_data); |
| } |
| } |
| |
| static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) |
| { |
| do { |
| if (*a != *b) |
| return 0; |
| a++; |
| b++; |
| len -= 2; |
| } while (len > 1); |
| if (len) |
| if ((*a ^ *b) & 0xf0) |
| return 0; |
| return 1; |
| } |
| |
| static int for_each_prefixed_object_in_midx( |
| struct odb_source_packed *store, |
| struct multi_pack_index *m, |
| const struct odb_for_each_object_options *opts, |
| struct odb_source_packed_for_each_object_wrapper_data *data) |
| { |
| int ret; |
| |
| for (; m; m = m->base_midx) { |
| uint32_t num, i, first = 0; |
| int len = opts->prefix_hex_len > m->source->base.odb->repo->hash_algo->hexsz ? |
| m->source->base.odb->repo->hash_algo->hexsz : opts->prefix_hex_len; |
| |
| if (!m->num_objects) |
| continue; |
| |
| num = m->num_objects + m->num_objects_in_base; |
| |
| bsearch_one_midx(opts->prefix, m, &first); |
| |
| /* |
| * At this point, "first" is the location of the lowest |
| * object with an object name that could match "opts->prefix". |
| * See if we have 0, 1 or more objects that actually match(es). |
| */ |
| for (i = first; i < num; i++) { |
| const struct object_id *current = NULL; |
| struct object_id oid; |
| |
| current = nth_midxed_object_oid(&oid, m, i); |
| |
| if (!match_hash(len, opts->prefix->hash, current->hash)) |
| break; |
| |
| if (data->request) { |
| struct object_info oi = *data->request; |
| |
| ret = odb_source_read_object_info(&store->base, current, |
| &oi, 0); |
| if (ret) |
| goto out; |
| |
| ret = data->cb(&oid, &oi, data->cb_data); |
| if (ret) |
| goto out; |
| } else { |
| ret = data->cb(&oid, NULL, data->cb_data); |
| if (ret) |
| goto out; |
| } |
| } |
| } |
| |
| ret = 0; |
| |
| out: |
| return ret; |
| } |
| |
| static int for_each_prefixed_object_in_pack( |
| struct odb_source_packed *store, |
| struct packed_git *p, |
| const struct odb_for_each_object_options *opts, |
| struct odb_source_packed_for_each_object_wrapper_data *data) |
| { |
| uint32_t num, i, first = 0; |
| int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ? |
| p->repo->hash_algo->hexsz : opts->prefix_hex_len; |
| int ret; |
| |
| num = p->num_objects; |
| bsearch_pack(opts->prefix, p, &first); |
| |
| /* |
| * At this point, "first" is the location of the lowest object |
| * with an object name that could match "bin_pfx". See if we have |
| * 0, 1 or more objects that actually match(es). |
| */ |
| for (i = first; i < num; i++) { |
| struct object_id oid; |
| |
| nth_packed_object_id(&oid, p, i); |
| if (!match_hash(len, opts->prefix->hash, oid.hash)) |
| break; |
| |
| if (data->request) { |
| struct object_info oi = *data->request; |
| |
| ret = odb_source_read_object_info(&store->base, &oid, &oi, 0); |
| if (ret) |
| goto out; |
| |
| ret = data->cb(&oid, &oi, data->cb_data); |
| if (ret) |
| goto out; |
| } else { |
| ret = data->cb(&oid, NULL, data->cb_data); |
| if (ret) |
| goto out; |
| } |
| } |
| |
| ret = 0; |
| |
| out: |
| return ret; |
| } |
| |
| static int odb_source_packed_for_each_prefixed_object( |
| struct odb_source_packed *store, |
| const struct odb_for_each_object_options *opts, |
| struct odb_source_packed_for_each_object_wrapper_data *data) |
| { |
| struct packfile_list_entry *e; |
| struct multi_pack_index *m; |
| bool pack_errors = false; |
| int ret; |
| |
| if (opts->flags) |
| BUG("flags unsupported"); |
| |
| store->skip_mru_updates = true; |
| |
| m = get_multi_pack_index(store); |
| if (m) { |
| ret = for_each_prefixed_object_in_midx(store, m, opts, data); |
| if (ret) |
| goto out; |
| } |
| |
| for (e = packfile_store_get_packs(store); e; e = e->next) { |
| if (e->pack->multi_pack_index) |
| continue; |
| |
| if (open_pack_index(e->pack)) { |
| pack_errors = true; |
| continue; |
| } |
| |
| if (!e->pack->num_objects) |
| continue; |
| |
| ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data); |
| if (ret) |
| goto out; |
| } |
| |
| ret = 0; |
| |
| out: |
| store->skip_mru_updates = false; |
| if (!ret && pack_errors) |
| ret = -1; |
| return ret; |
| } |
| |
| static int odb_source_packed_for_each_object(struct odb_source *source, |
| const struct object_info *request, |
| odb_for_each_object_cb cb, |
| void *cb_data, |
| const struct odb_for_each_object_options *opts) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct odb_source_packed_for_each_object_wrapper_data data = { |
| .store = packed, |
| .request = request, |
| .cb = cb, |
| .cb_data = cb_data, |
| }; |
| struct packfile_list_entry *e; |
| int pack_errors = 0, ret; |
| |
| if (opts->prefix) |
| return odb_source_packed_for_each_prefixed_object(packed, opts, &data); |
| |
| packed->skip_mru_updates = true; |
| |
| for (e = packfile_store_get_packs(packed); e; e = e->next) { |
| struct packed_git *p = e->pack; |
| |
| if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) |
| continue; |
| if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) && |
| !p->pack_promisor) |
| continue; |
| if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) && |
| p->pack_keep_in_core) |
| continue; |
| if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) && |
| p->pack_keep) |
| continue; |
| if (open_pack_index(p)) { |
| pack_errors = 1; |
| continue; |
| } |
| |
| ret = for_each_object_in_pack(p, odb_source_packed_for_each_object_wrapper, |
| &data, opts->flags); |
| if (ret) |
| goto out; |
| } |
| |
| ret = 0; |
| |
| out: |
| packed->skip_mru_updates = false; |
| |
| if (!ret && pack_errors) |
| ret = -1; |
| return ret; |
| } |
| |
| static int odb_source_packed_count_objects(struct odb_source *source, |
| enum odb_count_objects_flags flags UNUSED, |
| unsigned long *out) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct packfile_list_entry *e; |
| struct multi_pack_index *m; |
| unsigned long count = 0; |
| int ret; |
| |
| m = get_multi_pack_index(packed); |
| if (m) |
| count += m->num_objects + m->num_objects_in_base; |
| |
| for (e = packfile_store_get_packs(packed); e; e = e->next) { |
| if (e->pack->multi_pack_index) |
| continue; |
| if (open_pack_index(e->pack)) { |
| ret = -1; |
| goto out; |
| } |
| |
| count += e->pack->num_objects; |
| } |
| |
| *out = count; |
| ret = 0; |
| |
| out: |
| return ret; |
| } |
| |
| static int extend_abbrev_len(const struct object_id *a, |
| const struct object_id *b, |
| unsigned *out) |
| { |
| unsigned len = oid_common_prefix_hexlen(a, b); |
| if (len != hash_algos[a->algo].hexsz && len >= *out) |
| *out = len + 1; |
| return 0; |
| } |
| |
| static void find_abbrev_len_for_midx(struct multi_pack_index *m, |
| const struct object_id *oid, |
| unsigned min_len, |
| unsigned *out) |
| { |
| unsigned len = min_len; |
| |
| for (; m; m = m->base_midx) { |
| int match = 0; |
| uint32_t num, first = 0; |
| struct object_id found_oid; |
| |
| if (!m->num_objects) |
| continue; |
| |
| num = m->num_objects + m->num_objects_in_base; |
| match = bsearch_one_midx(oid, m, &first); |
| |
| /* |
| * first is now the position in the packfile where we |
| * would insert the object ID if it does not exist (or the |
| * position of the object ID if it does exist). Hence, we |
| * consider a maximum of two objects nearby for the |
| * abbreviation length. |
| */ |
| |
| if (!match) { |
| if (nth_midxed_object_oid(&found_oid, m, first)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } else if (first < num - 1) { |
| if (nth_midxed_object_oid(&found_oid, m, first + 1)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } |
| if (first > 0) { |
| if (nth_midxed_object_oid(&found_oid, m, first - 1)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } |
| } |
| |
| *out = len; |
| } |
| |
| static void find_abbrev_len_for_pack(struct packed_git *p, |
| const struct object_id *oid, |
| unsigned min_len, |
| unsigned *out) |
| { |
| int match; |
| uint32_t num, first = 0; |
| struct object_id found_oid; |
| unsigned len = min_len; |
| |
| num = p->num_objects; |
| match = bsearch_pack(oid, p, &first); |
| |
| /* |
| * first is now the position in the packfile where we would insert |
| * the object ID if it does not exist (or the position of mad->hash if |
| * it does exist). Hence, we consider a maximum of two objects |
| * nearby for the abbreviation length. |
| */ |
| if (!match) { |
| if (!nth_packed_object_id(&found_oid, p, first)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } else if (first < num - 1) { |
| if (!nth_packed_object_id(&found_oid, p, first + 1)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } |
| if (first > 0) { |
| if (!nth_packed_object_id(&found_oid, p, first - 1)) |
| extend_abbrev_len(&found_oid, oid, &len); |
| } |
| |
| *out = len; |
| } |
| |
| static int odb_source_packed_find_abbrev_len(struct odb_source *source, |
| const struct object_id *oid, |
| unsigned min_len, |
| unsigned *out) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct packfile_list_entry *e; |
| struct multi_pack_index *m; |
| |
| m = get_multi_pack_index(packed); |
| if (m) |
| find_abbrev_len_for_midx(m, oid, min_len, &min_len); |
| |
| for (e = packfile_store_get_packs(packed); e; e = e->next) { |
| if (e->pack->multi_pack_index) |
| continue; |
| if (open_pack_index(e->pack) || !e->pack->num_objects) |
| continue; |
| |
| find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len); |
| } |
| |
| *out = min_len; |
| return 0; |
| } |
| |
| static int odb_source_packed_freshen_object(struct odb_source *source, |
| const struct object_id *oid) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| struct pack_entry e; |
| |
| if (!find_pack_entry(packed, oid, &e)) |
| return 0; |
| if (e.p->is_cruft) |
| return 0; |
| if (e.p->freshened) |
| return 1; |
| if (utime(e.p->pack_name, NULL)) |
| return 0; |
| e.p->freshened = 1; |
| |
| return 1; |
| } |
| |
| static int odb_source_packed_write_object(struct odb_source *source UNUSED, |
| const void *buf UNUSED, |
| unsigned long len UNUSED, |
| enum object_type type UNUSED, |
| struct object_id *oid UNUSED, |
| struct object_id *compat_oid UNUSED, |
| unsigned flags UNUSED) |
| { |
| return error("packed backend cannot write objects"); |
| } |
| |
| static int odb_source_packed_write_object_stream(struct odb_source *source UNUSED, |
| struct odb_write_stream *stream UNUSED, |
| size_t len UNUSED, |
| struct object_id *oid UNUSED) |
| { |
| return error("packed backend cannot write object streams"); |
| } |
| |
| static int odb_source_packed_begin_transaction(struct odb_source *source UNUSED, |
| struct odb_transaction **out UNUSED) |
| { |
| return error("packed backend cannot begin transactions"); |
| } |
| |
| static int odb_source_packed_read_alternates(struct odb_source *source UNUSED, |
| struct strvec *out UNUSED) |
| { |
| return 0; |
| } |
| |
| static int odb_source_packed_write_alternate(struct odb_source *source UNUSED, |
| const char *alternate UNUSED) |
| { |
| return error("packed backend cannot write alternates"); |
| } |
| |
| void (*report_garbage)(unsigned seen_bits, const char *path); |
| |
| static void report_helper(const struct string_list *list, |
| int seen_bits, int first, int last) |
| { |
| if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX)) |
| return; |
| |
| for (; first < last; first++) |
| report_garbage(seen_bits, list->items[first].string); |
| } |
| |
| static void report_pack_garbage(struct string_list *list) |
| { |
| int baselen = -1, first = 0, seen_bits = 0; |
| |
| if (!report_garbage) |
| return; |
| |
| string_list_sort(list); |
| |
| for (size_t i = 0; i < list->nr; i++) { |
| const char *path = list->items[i].string; |
| if (baselen != -1 && |
| strncmp(path, list->items[first].string, baselen)) { |
| report_helper(list, seen_bits, first, i); |
| baselen = -1; |
| seen_bits = 0; |
| } |
| if (baselen == -1) { |
| const char *dot = strrchr(path, '.'); |
| if (!dot) { |
| report_garbage(PACKDIR_FILE_GARBAGE, path); |
| continue; |
| } |
| baselen = dot - path + 1; |
| first = i; |
| } |
| if (!strcmp(path + baselen, "pack")) |
| seen_bits |= 1; |
| else if (!strcmp(path + baselen, "idx")) |
| seen_bits |= 2; |
| } |
| report_helper(list, seen_bits, first, list->nr); |
| } |
| |
| struct prepare_pack_data { |
| struct odb_source_packed *source; |
| struct string_list *garbage; |
| }; |
| |
| static void prepare_pack(const char *full_name, size_t full_name_len, |
| const char *file_name, void *_data) |
| { |
| struct prepare_pack_data *data = (struct prepare_pack_data *)_data; |
| size_t base_len = full_name_len; |
| |
| if (strip_suffix_mem(full_name, &base_len, ".idx") && |
| !(data->source->midx && |
| midx_contains_pack(data->source->midx, file_name))) { |
| char *trimmed_path = xstrndup(full_name, full_name_len); |
| packfile_store_load_pack(data->source, |
| trimmed_path, data->source->base.local); |
| free(trimmed_path); |
| } |
| |
| if (!report_garbage) |
| return; |
| |
| if (!strcmp(file_name, "multi-pack-index") || |
| !strcmp(file_name, "multi-pack-index.d")) |
| return; |
| if (starts_with(file_name, "multi-pack-index") && |
| (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev"))) |
| return; |
| if (ends_with(file_name, ".idx") || |
| ends_with(file_name, ".rev") || |
| ends_with(file_name, ".pack") || |
| ends_with(file_name, ".bitmap") || |
| ends_with(file_name, ".keep") || |
| ends_with(file_name, ".promisor") || |
| ends_with(file_name, ".mtimes")) |
| string_list_append(data->garbage, full_name); |
| else |
| report_garbage(PACKDIR_FILE_GARBAGE, full_name); |
| } |
| |
| static void prepare_packed_git_one(struct odb_source_packed *source) |
| { |
| struct string_list garbage = STRING_LIST_INIT_DUP; |
| struct prepare_pack_data data = { |
| .source = source, |
| .garbage = &garbage, |
| }; |
| |
| for_each_file_in_pack_dir(source->base.path, prepare_pack, &data); |
| |
| report_pack_garbage(data.garbage); |
| string_list_clear(data.garbage, 0); |
| } |
| |
| DEFINE_LIST_SORT(static, sort_packs, struct packfile_list_entry, next); |
| |
| static int sort_pack(const struct packfile_list_entry *a, |
| const struct packfile_list_entry *b) |
| { |
| int st; |
| |
| /* |
| * Local packs tend to contain objects specific to our |
| * variant of the project than remote ones. In addition, |
| * remote ones could be on a network mounted filesystem. |
| * Favor local ones for these reasons. |
| */ |
| st = a->pack->pack_local - b->pack->pack_local; |
| if (st) |
| return -st; |
| |
| /* |
| * Younger packs tend to contain more recent objects, |
| * and more recent objects tend to get accessed more |
| * often. |
| */ |
| if (a->pack->mtime < b->pack->mtime) |
| return 1; |
| else if (a->pack->mtime == b->pack->mtime) |
| return 0; |
| return -1; |
| } |
| |
| void odb_source_packed_prepare(struct odb_source_packed *source) |
| { |
| if (source->initialized) |
| return; |
| |
| prepare_multi_pack_index_one(source); |
| prepare_packed_git_one(source); |
| |
| sort_packs(&source->packs.head, sort_pack); |
| for (struct packfile_list_entry *e = source->packs.head; e; e = e->next) |
| if (!e->next) |
| source->packs.tail = e; |
| |
| source->initialized = true; |
| } |
| |
| static void odb_source_packed_reprepare(struct odb_source *source) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| packed->initialized = false; |
| odb_source_packed_prepare(packed); |
| } |
| |
| static void odb_source_packed_reparent(const char *name UNUSED, |
| const char *old_cwd, |
| const char *new_cwd, |
| void *cb_data) |
| { |
| struct odb_source_packed *packed = cb_data; |
| char *path = reparent_relative_path(old_cwd, new_cwd, |
| packed->base.path); |
| free(packed->base.path); |
| packed->base.path = path; |
| } |
| |
| static void odb_source_packed_close(struct odb_source *source) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| |
| for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) { |
| if (e->pack->do_not_close) |
| BUG("want to close pack marked 'do-not-close'"); |
| close_pack(e->pack); |
| } |
| if (packed->midx) |
| close_midx(packed->midx); |
| packed->midx = NULL; |
| } |
| |
| static void odb_source_packed_free(struct odb_source *source) |
| { |
| struct odb_source_packed *packed = odb_source_packed_downcast(source); |
| |
| chdir_notify_unregister(NULL, odb_source_packed_reparent, packed); |
| |
| for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next) |
| free(e->pack); |
| packfile_list_clear(&packed->packs); |
| |
| strmap_clear(&packed->packs_by_path, 0); |
| odb_source_release(&packed->base); |
| free(packed); |
| } |
| |
| struct odb_source_packed *odb_source_packed_new(struct object_database *odb, |
| const char *path, |
| bool local) |
| { |
| struct odb_source_packed *packed; |
| |
| CALLOC_ARRAY(packed, 1); |
| odb_source_init(&packed->base, odb, ODB_SOURCE_PACKED, path, local); |
| strmap_init(&packed->packs_by_path); |
| |
| packed->base.free = odb_source_packed_free; |
| packed->base.close = odb_source_packed_close; |
| packed->base.reprepare = odb_source_packed_reprepare; |
| packed->base.read_object_info = odb_source_packed_read_object_info; |
| packed->base.read_object_stream = odb_source_packed_read_object_stream; |
| packed->base.for_each_object = odb_source_packed_for_each_object; |
| packed->base.count_objects = odb_source_packed_count_objects; |
| packed->base.find_abbrev_len = odb_source_packed_find_abbrev_len; |
| packed->base.freshen_object = odb_source_packed_freshen_object; |
| packed->base.write_object = odb_source_packed_write_object; |
| packed->base.write_object_stream = odb_source_packed_write_object_stream; |
| packed->base.begin_transaction = odb_source_packed_begin_transaction; |
| packed->base.read_alternates = odb_source_packed_read_alternates; |
| packed->base.write_alternate = odb_source_packed_write_alternate; |
| |
| if (!is_absolute_path(path)) |
| chdir_notify_register(NULL, odb_source_packed_reparent, packed); |
| |
| return packed; |
| } |