From 2560f7e3bf54153b67cdedd50cc2f165ef82763d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 27 Feb 2019 15:10:01 -0500 Subject: [PATCH 1/2] midx: verify: group objects by packfile to speed up object verification Sort the set of objects by packfile so that only one packfile needs to be open at a time. This is a performance improvement. Previously, objects were verified in OID order. This essentially requires all packfiles to be open at the same time. If the number of packfiles exceeds the open file limit, packfiles would be closed and re-opened many times. Signed-off-by: Jeff Hostetler --- midx.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- packfile.c | 2 +- packfile.h | 2 ++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/midx.c b/midx.c index dd72cac7839a42..4b009d2cef86be 100644 --- a/midx.c +++ b/midx.c @@ -1001,9 +1001,29 @@ static void midx_report(const char *fmt, ...) va_end(ap); } +struct pair_pos_vs_id +{ + uint32_t pos; + uint32_t pack_int_id; +}; + +static int compare_pair_pos_vs_id(const void *_a, const void *_b) +{ + struct pair_pos_vs_id *a = (struct pair_pos_vs_id *)_a; + struct pair_pos_vs_id *b = (struct pair_pos_vs_id *)_b; + + if (a->pack_int_id < b->pack_int_id) + return -1; + if (a->pack_int_id > b->pack_int_id) + return 1; + + return 0; +} + int verify_midx_file(const char *object_dir) { - uint32_t i; + struct pair_pos_vs_id *pairs = NULL; + uint32_t i, k; struct progress *progress; struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); verify_midx_error = 0; @@ -1040,15 +1060,32 @@ int verify_midx_file(const char *object_dir) } progress = start_progress(_("Verifying object offsets"), m->num_objects); + + /* + * Create an array mapping each object to its packfile id. Sort it + * to group the objects by packfile. Using this permutation to visit + * each of the objects only require 1 packfile to be open at a time. + */ + ALLOC_ARRAY(pairs, m->num_objects); for (i = 0; i < m->num_objects; i++) { + pairs[i].pos = i; + pairs[i].pack_int_id = nth_midxed_pack_int_id(m, i); + } + QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); + + for (k = 0; k < m->num_objects; k++) { struct object_id oid; struct pack_entry e; off_t m_offset, p_offset; - nth_midxed_object_oid(&oid, m, i); + if (k > 0 && pairs[k-1].pack_int_id != pairs[k].pack_int_id && + m->packs[pairs[k-1].pack_int_id]) + close_pack_fd(m->packs[pairs[k-1].pack_int_id]); + + nth_midxed_object_oid(&oid, m, pairs[k].pos); if (!fill_midx_entry(&oid, &e, m)) { midx_report(_("failed to load pack entry for oid[%d] = %s"), - i, oid_to_hex(&oid)); + pairs[k].pos, oid_to_hex(&oid)); continue; } @@ -1063,12 +1100,14 @@ int verify_midx_file(const char *object_dir) if (m_offset != p_offset) midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), - i, oid_to_hex(&oid), m_offset, p_offset); + pairs[k].pos, oid_to_hex(&oid), m_offset, p_offset); - display_progress(progress, i + 1); + display_progress(progress, k + 1); } stop_progress(&progress); + free(pairs); + return verify_midx_error; } diff --git a/packfile.c b/packfile.c index bacecb4d0debf0..b41b8319d5aa89 100644 --- a/packfile.c +++ b/packfile.c @@ -309,7 +309,7 @@ void close_pack_windows(struct packed_git *p) } } -static int close_pack_fd(struct packed_git *p) +int close_pack_fd(struct packed_git *p) { if (p->pack_fd < 0) return 0; diff --git a/packfile.h b/packfile.h index 5b7bcdb1dd1212..efa35d99c13790 100644 --- a/packfile.h +++ b/packfile.h @@ -76,6 +76,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +int close_pack_fd(struct packed_git *p); + extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); From 4a4f2f41e767d179ded25f3fa322660a2e626884 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 28 Feb 2019 09:49:56 -0500 Subject: [PATCH 2/2] trace2:data: add trace2 data to midx Log multi-pack-index sub-command (cmd_mode). Log number of objects and number of packfiles. Signed-off-by: Jeff Hostetler --- builtin/multi-pack-index.c | 3 +++ midx.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index d87a2235e39715..a342e32cf30848 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -3,6 +3,7 @@ #include "config.h" #include "parse-options.h" #include "midx.h" +#include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { N_("git multi-pack-index [--object-dir=] (write|verify|expire|repack --batch-size=)"), @@ -43,6 +44,8 @@ int cmd_multi_pack_index(int argc, const char **argv, return 1; } + trace2_cmd_mode(argv[0]); + if (!strcmp(argv[0], "repack")) return midx_repack(opts.object_dir, (size_t)opts.batch_size); if (opts.batch_size) diff --git a/midx.c b/midx.c index 4b009d2cef86be..fc370f0c17b596 100644 --- a/midx.c +++ b/midx.c @@ -9,6 +9,7 @@ #include "midx.h" #include "progress.h" #include "run-command.h" +#include "trace2.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -165,6 +166,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->pack_names[i]); } + trace2_data_intmax("midx", the_repository, "load/num_packs", m->num_packs); + trace2_data_intmax("midx", the_repository, "load/num_objects", m->num_objects); + return m; cleanup_fail: