From 9b0eedc4042098d535e4b1532ce365bee8519d85 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:18:58 -0400 Subject: [PATCH 01/21] fixup! trace2:data: add trace2 data to midx This reverts commit a1040bf6dc8e57272232a32512b63b8d1adbdc96. Signed-off-by: Derrick Stolee --- midx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/midx.c b/midx.c index b268b703392af9..10d9a9421a08a8 100644 --- a/midx.c +++ b/midx.c @@ -10,7 +10,6 @@ #include "progress.h" #include "trace2.h" #include "run-command.h" -#include "trace2.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 From 977b8d255525000963f77a83546b1478e1ff93e9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:19:08 -0400 Subject: [PATCH 02/21] fixup! midx: use more structured data for expire This reverts commit 22997ef167ea566c80ab2bd05f8df280c4582f47. Signed-off-by: Derrick Stolee --- midx.c | 250 ++++++++++++++++++------------------ t/t5319-multi-pack-index.sh | 28 ---- 2 files changed, 124 insertions(+), 154 deletions(-) diff --git a/midx.c b/midx.c index 10d9a9421a08a8..c0cd082ca8bbaf 100644 --- a/midx.c +++ b/midx.c @@ -428,26 +428,14 @@ static size_t write_midx_header(struct hashfile *f, return MIDX_HEADER_SIZE; } -struct midx_info { - uint32_t orig_pack_int_id; - uint32_t new_pack_int_id; - char *pack_name; - struct packed_git *p; - unsigned expired : 1; -}; - -static int midx_info_compare(const void *_a, const void *_b) -{ - struct midx_info *a = (struct midx_info *)_a; - struct midx_info *b = (struct midx_info *)_b; - return strcmp(a->pack_name, b->pack_name); -} - struct pack_list { - struct midx_info *info; + struct packed_git **list; + char **names; uint32_t *perm; uint32_t nr; - uint32_t alloc; + uint32_t alloc_list; + uint32_t alloc_names; + uint32_t alloc_perm; size_t pack_name_concat_len; struct multi_pack_index *m; }; @@ -461,33 +449,69 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, if (packs->m && midx_contains_pack(packs->m, file_name)) return; - ALLOC_GROW(packs->info, packs->nr + 1, packs->alloc); + ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); + ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); + ALLOC_GROW(packs->perm, packs->nr + 1, packs->alloc_perm); - packs->info[packs->nr].p = add_packed_git(full_path, - full_path_len, - 0); - packs->info[packs->nr].expired = 0; + packs->list[packs->nr] = add_packed_git(full_path, + full_path_len, + 0); - if (!packs->info[packs->nr].p) { + if (!packs->list[packs->nr]) { warning(_("failed to add packfile '%s'"), full_path); return; } - if (open_pack_index(packs->info[packs->nr].p)) { + if (open_pack_index(packs->list[packs->nr])) { warning(_("failed to open pack-index '%s'"), full_path); - close_pack(packs->info[packs->nr].p); - FREE_AND_NULL(packs->info[packs->nr].p); + close_pack(packs->list[packs->nr]); + FREE_AND_NULL(packs->list[packs->nr]); return; } - packs->info[packs->nr].pack_name = xstrdup(file_name); - packs->info[packs->nr].orig_pack_int_id = packs->nr; + packs->perm[packs->nr] = packs->nr; + packs->names[packs->nr] = xstrdup(file_name); + packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; } } +struct pack_pair { + uint32_t pack_int_id; + char *pack_name; +}; + +static int pack_pair_compare(const void *_a, const void *_b) +{ + struct pack_pair *a = (struct pack_pair *)_a; + struct pack_pair *b = (struct pack_pair *)_b; + return strcmp(a->pack_name, b->pack_name); +} + +static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm) +{ + uint32_t i; + struct pack_pair *pairs; + + ALLOC_ARRAY(pairs, nr_packs); + + for (i = 0; i < nr_packs; i++) { + pairs[i].pack_int_id = perm[i]; + pairs[i].pack_name = pack_names[i]; + } + + QSORT(pairs, nr_packs, pack_pair_compare); + + for (i = 0; i < nr_packs; i++) { + pack_names[i] = pairs[i].pack_name; + perm[pairs[i].pack_int_id] = i; + } + + free(pairs); +} + struct pack_midx_entry { struct object_id oid; uint32_t pack_int_id; @@ -513,6 +537,7 @@ static int midx_oid_compare(const void *_a, const void *_b) } static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, + uint32_t *pack_perm, struct pack_midx_entry *e, uint32_t pos) { @@ -520,7 +545,7 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, return 1; nth_midxed_object_oid(&e->oid, m, pos); - e->pack_int_id = nth_midxed_pack_int_id(m, pos); + e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; e->offset = nth_midxed_offset(m, pos); /* consider objects in midx to be from "old" packs */ @@ -554,7 +579,8 @@ static void fill_pack_entry(uint32_t pack_int_id, * of a packfile containing the object). */ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, - struct midx_info *info, + struct packed_git **p, + uint32_t *perm, uint32_t nr_packs, uint32_t *nr_objects) { @@ -565,7 +591,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, uint32_t start_pack = m ? m->num_packs : 0; for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) - total_objects += info[cur_pack].p->num_objects; + total_objects += p[cur_pack]->num_objects; /* * As we de-duplicate by fanout value, we expect the fanout @@ -590,7 +616,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, for (cur_object = start; cur_object < end; cur_object++) { ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); - nth_midxed_pack_midx_entry(m, + nth_midxed_pack_midx_entry(m, perm, &entries_by_fanout[nr_fanout], cur_object); nr_fanout++; @@ -601,12 +627,12 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, uint32_t start = 0, end; if (cur_fanout) - start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1); - end = get_pack_fanout(info[cur_pack].p, cur_fanout); + start = get_pack_fanout(p[cur_pack], cur_fanout - 1); + end = get_pack_fanout(p[cur_pack], cur_fanout); for (cur_object = start; cur_object < end; cur_object++) { ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); - fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]); + fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); nr_fanout++; } } @@ -635,7 +661,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, } static size_t write_midx_pack_names(struct hashfile *f, - struct midx_info *info, + char **pack_names, uint32_t num_packs) { uint32_t i; @@ -643,19 +669,14 @@ static size_t write_midx_pack_names(struct hashfile *f, size_t written = 0; for (i = 0; i < num_packs; i++) { - size_t writelen; - - if (info[i].expired) - continue; + size_t writelen = strlen(pack_names[i]) + 1; - writelen = strlen(info[i].pack_name) + 1; - - if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0) + if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) BUG("incorrect pack-file order: %s before %s", - info[i - 1].pack_name, - info[i].pack_name); + pack_names[i - 1], + pack_names[i]); - hashwrite(f, info[i].pack_name, writelen); + hashwrite(f, pack_names[i], writelen); written += writelen; } @@ -726,7 +747,6 @@ static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, } static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, - uint32_t *perm, struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; @@ -735,13 +755,8 @@ static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_nee for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; - int pack_int_id = perm[obj->pack_int_id]; - - if (pack_int_id == UINT_MAX) - BUG("tried to write an object %s with expired pack-int-id", - oid_to_hex(&obj->oid)); - hashwrite_be32(f, pack_int_id); + hashwrite_be32(f, obj->pack_int_id); if (large_offset_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -792,7 +807,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * { unsigned char cur_chunk, num_chunks = 0; char *midx_name; - uint32_t i, drop_count; + uint32_t i; struct hashfile *f = NULL; struct lock_file lk; struct pack_list packs; @@ -817,30 +832,66 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * packs.m = load_multi_pack_index(object_dir, 1); packs.nr = 0; - packs.alloc = packs.m ? packs.m->num_packs : 16; - packs.info = NULL; + packs.alloc_list = packs.m ? packs.m->num_packs : 16; + packs.alloc_perm = packs.alloc_names = packs.alloc_list; + packs.list = NULL; + packs.names = NULL; packs.perm = NULL; packs.pack_name_concat_len = 0; - ALLOC_ARRAY(packs.info, packs.alloc); + ALLOC_ARRAY(packs.list, packs.alloc_list); + ALLOC_ARRAY(packs.names, packs.alloc_names); + ALLOC_ARRAY(packs.perm, packs.alloc_perm); if (packs.m) { + int drop_index = 0, missing_drops = 0; for (i = 0; i < packs.m->num_packs; i++) { - ALLOC_GROW(packs.info, packs.nr + 1, packs.alloc); + if (packs_to_drop && drop_index < packs_to_drop->nr) { + int cmp = strcmp(packs.m->pack_names[i], + packs_to_drop->items[drop_index].string); + + if (!cmp) { + drop_index++; + continue; + } else if (cmp > 0) { + error(_("did not see pack-file %s to drop"), + packs_to_drop->items[drop_index].string); + drop_index++; + i--; + missing_drops++; + continue; + } + } - packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]); - packs.info[packs.nr].orig_pack_int_id = i; - packs.info[packs.nr].p = NULL; - packs.info[packs.nr].expired = 0; + ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); + ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); + ALLOC_GROW(packs.perm, packs.nr + 1, packs.alloc_perm); + + packs.perm[packs.nr] = i; + packs.list[packs.nr] = NULL; + packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); + packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; packs.nr++; } + + if (packs_to_drop && (drop_index < packs_to_drop->nr || missing_drops)) { + error(_("did not see all pack-files to drop")); + result = 1; + goto cleanup; + } } for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); - if (packs.m && packs.nr == packs.m->num_packs && !packs_to_drop) + if (packs.m && packs.nr == packs.m->num_packs) goto cleanup; - entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries); + if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) + packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - + (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); + + sort_packs_by_name(packs.names, packs.nr, packs.perm); + + entries = get_sorted_entries(packs.m, packs.list, packs.perm, packs.nr, &nr_entries); for (i = 0; i < nr_entries; i++) { if (entries[i].offset > 0x7fffffff) @@ -849,60 +900,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * large_offsets_needed = 1; } - QSORT(packs.info, packs.nr, midx_info_compare); - - if (packs_to_drop && packs_to_drop->nr) { - int drop_index = 0; - int missing_drops = 0; - - for (i = 0; i < packs.nr && drop_index < packs_to_drop->nr; i++) { - int cmp = strcmp(packs.info[i].pack_name, - packs_to_drop->items[drop_index].string); - - if (!cmp) { - drop_index++; - packs.info[i].expired = 1; - } else if (cmp > 0) { - error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); - drop_index++; - missing_drops++; - i--; - } - } - - if (missing_drops) { - result = 1; - goto cleanup; - } - } - - drop_count = 0; - for (i = 0; i < packs.nr; i++) { - if (packs.info[i].expired) - drop_count++; - else - packs.info[i].new_pack_int_id = i - drop_count; - } - - packs.perm = xcalloc(packs.nr, sizeof(uint32_t)); - for (i = 0; i < packs.nr; i++) { - if (packs.info[i].expired) - packs.perm[packs.info[i].orig_pack_int_id] = UINT_MAX; - else - packs.perm[packs.info[i].orig_pack_int_id] = - packs.info[i].new_pack_int_id; - } - - for (i = 0; i < packs.nr; i++) { - if (!packs.info[i].expired) - packs.pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; - } - - if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) - packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - - (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); - hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); @@ -913,7 +910,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * cur_chunk = 0; num_chunks = large_offsets_needed ? 5 : 4; - written = write_midx_header(f, num_chunks, packs.nr - drop_count); + written = write_midx_header(f, num_chunks, packs.nr); chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; @@ -968,7 +965,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * switch (chunk_ids[i]) { case MIDX_CHUNKID_PACKNAMES: - written += write_midx_pack_names(f, packs.info, packs.nr); + written += write_midx_pack_names(f, packs.names, packs.nr); break; case MIDX_CHUNKID_OIDFANOUT: @@ -980,7 +977,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * break; case MIDX_CHUNKID_OBJECTOFFSETS: - written += write_midx_object_offsets(f, large_offsets_needed, packs.perm, entries, nr_entries); + written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); break; case MIDX_CHUNKID_LARGEOFFSETS: @@ -1003,14 +1000,15 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * cleanup: for (i = 0; i < packs.nr; i++) { - if (packs.info[i].p) { - close_pack(packs.info[i].p); - free(packs.info[i].p); + if (packs.list[i]) { + close_pack(packs.list[i]); + free(packs.list[i]); } - free(packs.info[i].pack_name); + free(packs.names[i]); } - free(packs.info); + free(packs.list); + free(packs.names); free(packs.perm); free(entries); free(midx_name); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 9d0bad8798d4a2..e03d9a32a061c3 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -461,32 +461,4 @@ test_expect_success 'expire removes repacked packs' ' ) ' -test_expect_success 'expire works when adding new packs' ' - ( - cd dup && - git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && - refs/heads/A - ^refs/heads/B - EOF - git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && - refs/heads/B - ^refs/heads/C - EOF - git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && - refs/heads/C - ^refs/heads/D - EOF - git multi-pack-index write && - git pack-objects --revs .git/objects/pack/pack-a <<-EOF && - refs/heads/D - ^refs/heads/E - EOF - git multi-pack-index write && - git pack-objects --revs .git/objects/pack/pack-z <<-EOF && - refs/heads/E - EOF - git multi-pack-index expire && - git multi-pack-index verify - ) -' test_done From 41f66ab46d1fc62133ef7b21b14ea0ecaaf17eb6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:19:15 -0400 Subject: [PATCH 03/21] fixup! midx: implement midx_repack() This reverts commit e9cd330feea22f0d0171a13fb276cb648f404ff7. Signed-off-by: Derrick Stolee --- midx.c | 109 +----------------------------------- t/t5319-multi-pack-index.sh | 25 --------- 2 files changed, 1 insertion(+), 133 deletions(-) diff --git a/midx.c b/midx.c index c0cd082ca8bbaf..0c2e7252df7b33 100644 --- a/midx.c +++ b/midx.c @@ -9,7 +9,6 @@ #include "midx.h" #include "progress.h" #include "trace2.h" -#include "run-command.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -1226,113 +1225,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir) return result; } -struct time_and_id { - timestamp_t mtime; - uint32_t pack_int_id; -}; - -static int compare_by_mtime(const void *a_, const void *b_) -{ - const struct time_and_id *a, *b; - - a = (const struct time_and_id *)a_; - b = (const struct time_and_id *)b_; - - if (a->mtime < b->mtime) - return -1; - if (a->mtime > b->mtime) - return 1; - return 0; -} - int midx_repack(struct repository *r, const char *object_dir, size_t batch_size) { - int result = 0; - uint32_t i, packs_to_repack; - size_t total_size; - struct time_and_id *pack_ti; - unsigned char *include_pack; - struct child_process cmd = CHILD_PROCESS_INIT; - struct strbuf base_name = STRBUF_INIT; - struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); - - if (!m) - return 0; - - include_pack = xcalloc(m->num_packs, sizeof(unsigned char)); - pack_ti = xcalloc(m->num_packs, sizeof(struct time_and_id)); - - for (i = 0; i < m->num_packs; i++) { - pack_ti[i].pack_int_id = i; - - if (prepare_midx_pack(r, m, i)) - continue; - - pack_ti[i].mtime = m->packs[i]->mtime; - } - QSORT(pack_ti, m->num_packs, compare_by_mtime); - - total_size = 0; - packs_to_repack = 0; - for (i = 0; total_size < batch_size && i < m->num_packs; i++) { - int pack_int_id = pack_ti[i].pack_int_id; - struct packed_git *p = m->packs[pack_int_id]; - - if (!p) - continue; - if (p->pack_size >= batch_size) - continue; - - packs_to_repack++; - total_size += p->pack_size; - include_pack[pack_int_id] = 1; - } - - if (total_size < batch_size || packs_to_repack < 2) - goto cleanup; - - argv_array_push(&cmd.args, "pack-objects"); - - strbuf_addstr(&base_name, object_dir); - strbuf_addstr(&base_name, "/pack/pack"); - argv_array_push(&cmd.args, base_name.buf); - strbuf_release(&base_name); - - cmd.git_cmd = 1; - cmd.in = cmd.out = -1; - - if (start_command(&cmd)) { - error(_("could not start pack-objects")); - result = 1; - goto cleanup; - } - - for (i = 0; i < m->num_objects; i++) { - struct object_id oid; - uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); - - if (!include_pack[pack_int_id]) - continue; - - nth_midxed_object_oid(&oid, m, i); - xwrite(cmd.in, oid_to_hex(&oid), the_hash_algo->hexsz); - xwrite(cmd.in, "\n", 1); - } - close(cmd.in); - - if (finish_command(&cmd)) { - error(_("could not finish pack-objects")); - result = 1; - goto cleanup; - } - - result = write_midx_internal(object_dir, m, NULL); - m = NULL; - -cleanup: - if (m) - close_midx(m); - free(include_pack); - free(pack_ti); - return result; + return 0; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index e03d9a32a061c3..c458908ac0531d 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -436,29 +436,4 @@ test_expect_success 'repack with minimum size does not alter existing packs' ' ) ' -test_expect_success 'repack creates a new pack' ' - ( - cd dup && - SECOND_SMALLEST_SIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | sort -n | head -n 2 | tail -n 1) && - BATCH_SIZE=$(($SECOND_SMALLEST_SIZE + 1)) && - git multi-pack-index repack --batch-size=$BATCH_SIZE && - ls .git/objects/pack/*idx >idx-list && - test_line_count = 5 idx-list && - test-tool read-midx .git/objects | grep idx >midx-list && - test_line_count = 5 midx-list - ) -' - -test_expect_success 'expire removes repacked packs' ' - ( - cd dup && - ls -S .git/objects/pack/*pack | head -n 3 >expect && - git multi-pack-index expire && - ls -S .git/objects/pack/*pack >actual && - test_cmp expect actual && - test-tool read-midx .git/objects | grep idx >midx-list && - test_line_count = 3 midx-list - ) -' - test_done From c7b7cc1ef2265494c3f6175e8583715e5d177f4b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:19:20 -0400 Subject: [PATCH 04/21] fixup! multi-pack-index: prepare 'repack' subcommand This reverts commit 9c5eb86768d8f3672f6b76f9eee3f3c929010b73. Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 11 ----------- builtin/multi-pack-index.c | 10 +--------- midx.c | 5 ----- midx.h | 1 - t/t5319-multi-pack-index.sh | 11 ----------- 5 files changed, 1 insertion(+), 37 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index cc63531cc02b77..6186c4c9369a9c 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -36,17 +36,6 @@ expire:: have no objects referenced by the MIDX. Rewrite the MIDX file afterward to remove all references to these pack-files. -repack:: - Collect a batch of pack-files whose size are all at most the - size given by --batch-size, but whose sizes sum to larger - than --batch-size. The batch is selected by greedily adding - small pack-files starting with the oldest pack-files that fit - the size. Create a new pack-file containing the objects the - multi-pack-index indexes into those pack-files, and rewrite - the multi-pack-index to contain that pack-file. A later run - of 'git multi-pack-index expire' will delete the pack-files - that were part of this batch. - EXAMPLES -------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 3d47f6dc1733b5..ad10d4051214a7 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -6,13 +6,12 @@ #include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=] (write|verify|expire|repack --batch-size=)"), + N_("git multi-pack-index [--object-dir=] (write|verify|expire)"), NULL }; static struct opts_multi_pack_index { const char *object_dir; - unsigned long batch_size; } opts; int cmd_multi_pack_index(int argc, const char **argv, @@ -21,8 +20,6 @@ int cmd_multi_pack_index(int argc, const char **argv, static struct option builtin_multi_pack_index_options[] = { OPT_FILENAME(0, "object-dir", &opts.object_dir, N_("object directory containing set of packfile and pack-index pairs")), - OPT_MAGNITUDE(0, "batch-size", &opts.batch_size, - N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), OPT_END(), }; @@ -46,11 +43,6 @@ int cmd_multi_pack_index(int argc, const char **argv, trace2_cmd_mode(argv[0]); - if (!strcmp(argv[0], "repack")) - return midx_repack(the_repository, opts.object_dir, (size_t)opts.batch_size); - if (opts.batch_size) - die(_("--batch-size option is only for 'repack' verb")); - if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) diff --git a/midx.c b/midx.c index 0c2e7252df7b33..09e255d0a64dcb 100644 --- a/midx.c +++ b/midx.c @@ -1224,8 +1224,3 @@ int expire_midx_packs(struct repository *r, const char *object_dir) string_list_clear(&packs_to_drop, 0); return result; } - -int midx_repack(struct repository *r, const char *object_dir, size_t batch_size) -{ - return 0; -} diff --git a/midx.h b/midx.h index f0ae656b5d7676..505f1431b7e309 100644 --- a/midx.h +++ b/midx.h @@ -51,7 +51,6 @@ int write_midx_file(const char *object_dir); void clear_midx_file(struct repository *r); int verify_midx_file(struct repository *r, const char *object_dir); int expire_midx_packs(struct repository *r, const char *object_dir); -int midx_repack(struct repository *r, const char *object_dir, size_t batch_size); void close_midx(struct multi_pack_index *m); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c458908ac0531d..718a87e0718198 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -425,15 +425,4 @@ test_expect_success 'expire removes unreferenced packs' ' ) ' -test_expect_success 'repack with minimum size does not alter existing packs' ' - ( - cd dup && - ls .git/objects/pack >expect && - MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | sort -n | head -n 1) && - git multi-pack-index repack --batch-size=$MINSIZE && - ls .git/objects/pack >actual && - test_cmp expect actual - ) -' - test_done From 8c71b669c10c3a59f86f090093dc42f5eb3db7f2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:19:26 -0400 Subject: [PATCH 05/21] fixup! multi-pack-index: implement 'expire' verb This reverts commit 931ccfd6124d746c7fae839345c42eb05f9cdcf6. Signed-off-by: Derrick Stolee --- midx.c | 81 ++----------------------------------- t/t5319-multi-pack-index.sh | 15 ------- 2 files changed, 4 insertions(+), 92 deletions(-) diff --git a/midx.c b/midx.c index 09e255d0a64dcb..783af9ac9d1a19 100644 --- a/midx.c +++ b/midx.c @@ -801,8 +801,7 @@ static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_off return written; } -static int write_midx_internal(const char *object_dir, struct multi_pack_index *m, - struct string_list *packs_to_drop) +int write_midx_file(const char *object_dir) { unsigned char cur_chunk, num_chunks = 0; char *midx_name; @@ -816,7 +815,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * uint32_t nr_entries, num_large_offsets = 0; struct pack_midx_entry *entries = NULL; int large_offsets_needed = 0; - int result = 0; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -825,10 +823,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * midx_name); } - if (m) - packs.m = m; - else - packs.m = load_multi_pack_index(object_dir, 1); + packs.m = load_multi_pack_index(object_dir, 1); packs.nr = 0; packs.alloc_list = packs.m ? packs.m->num_packs : 16; @@ -842,25 +837,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * ALLOC_ARRAY(packs.perm, packs.alloc_perm); if (packs.m) { - int drop_index = 0, missing_drops = 0; for (i = 0; i < packs.m->num_packs; i++) { - if (packs_to_drop && drop_index < packs_to_drop->nr) { - int cmp = strcmp(packs.m->pack_names[i], - packs_to_drop->items[drop_index].string); - - if (!cmp) { - drop_index++; - continue; - } else if (cmp > 0) { - error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); - drop_index++; - i--; - missing_drops++; - continue; - } - } - ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); ALLOC_GROW(packs.perm, packs.nr + 1, packs.alloc_perm); @@ -871,12 +848,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; packs.nr++; } - - if (packs_to_drop && (drop_index < packs_to_drop->nr || missing_drops)) { - error(_("did not see all pack-files to drop")); - result = 1; - goto cleanup; - } } for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); @@ -1011,12 +982,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * free(packs.perm); free(entries); free(midx_name); - return result; -} - -int write_midx_file(const char *object_dir) -{ - return write_midx_internal(object_dir, NULL, NULL); + return 0; } void clear_midx_file(struct repository *r) @@ -1183,44 +1149,5 @@ int verify_midx_file(struct repository *r, const char *object_dir) int expire_midx_packs(struct repository *r, const char *object_dir) { - uint32_t i, *count, result = 0; - struct string_list packs_to_drop = STRING_LIST_INIT_DUP; - struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); - - if (!m) - return 0; - - count = xcalloc(m->num_packs, sizeof(uint32_t)); - for (i = 0; i < m->num_objects; i++) { - int pack_int_id = nth_midxed_pack_int_id(m, i); - count[pack_int_id]++; - } - - for (i = 0; i < m->num_packs; i++) { - char *pack_name; - - if (count[i]) - continue; - - if (prepare_midx_pack(r, m, i)) - continue; - - if (m->packs[i]->pack_keep) - continue; - - pack_name = xstrdup(m->packs[i]->pack_name); - close_pack(m->packs[i]); - - string_list_insert(&packs_to_drop, m->pack_names[i]); - unlink_pack_path(pack_name, 0); - free(pack_name); - } - - free(count); - - if (packs_to_drop.nr) - result = write_midx_internal(object_dir, m, &packs_to_drop); - - string_list_clear(&packs_to_drop, 0); - return result; + return 0; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 718a87e0718198..a69304ab0dcfcd 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -410,19 +410,4 @@ test_expect_success 'expire does not remove any packs' ' ) ' -test_expect_success 'expire removes unreferenced packs' ' - ( - cd dup && - git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && - refs/heads/A - ^refs/heads/C - EOF - git multi-pack-index write && - ls .git/objects/pack | grep -v -e pack-[AB] >expect && - git multi-pack-index expire && - ls .git/objects/pack >actual && - test_cmp expect actual - ) -' - test_done From e030d95a90685e10c4b9471d64885ca42864311f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:19:57 -0400 Subject: [PATCH 06/21] fixup! midx: refactor permutation logic This reverts commit 5f5d4b68071302133fedf2e6c21d5ace554df64a. Signed-off-by: Derrick Stolee --- midx.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/midx.c b/midx.c index 783af9ac9d1a19..3b7da1a3603e1f 100644 --- a/midx.c +++ b/midx.c @@ -430,11 +430,9 @@ static size_t write_midx_header(struct hashfile *f, struct pack_list { struct packed_git **list; char **names; - uint32_t *perm; uint32_t nr; uint32_t alloc_list; uint32_t alloc_names; - uint32_t alloc_perm; size_t pack_name_concat_len; struct multi_pack_index *m; }; @@ -450,7 +448,6 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); - ALLOC_GROW(packs->perm, packs->nr + 1, packs->alloc_perm); packs->list[packs->nr] = add_packed_git(full_path, full_path_len, @@ -470,7 +467,6 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, return; } - packs->perm[packs->nr] = packs->nr; packs->names[packs->nr] = xstrdup(file_name); packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; @@ -497,7 +493,7 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p ALLOC_ARRAY(pairs, nr_packs); for (i = 0; i < nr_packs; i++) { - pairs[i].pack_int_id = perm[i]; + pairs[i].pack_int_id = i; pairs[i].pack_name = pack_names[i]; } @@ -809,6 +805,7 @@ int write_midx_file(const char *object_dir) struct hashfile *f = NULL; struct lock_file lk; struct pack_list packs; + uint32_t *pack_perm = NULL; uint64_t written = 0; uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; @@ -827,22 +824,18 @@ int write_midx_file(const char *object_dir) packs.nr = 0; packs.alloc_list = packs.m ? packs.m->num_packs : 16; - packs.alloc_perm = packs.alloc_names = packs.alloc_list; + packs.alloc_names = packs.alloc_list; packs.list = NULL; packs.names = NULL; - packs.perm = NULL; packs.pack_name_concat_len = 0; ALLOC_ARRAY(packs.list, packs.alloc_list); ALLOC_ARRAY(packs.names, packs.alloc_names); - ALLOC_ARRAY(packs.perm, packs.alloc_perm); if (packs.m) { for (i = 0; i < packs.m->num_packs; i++) { ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); - ALLOC_GROW(packs.perm, packs.nr + 1, packs.alloc_perm); - packs.perm[packs.nr] = i; packs.list[packs.nr] = NULL; packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; @@ -859,9 +852,10 @@ int write_midx_file(const char *object_dir) packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); - sort_packs_by_name(packs.names, packs.nr, packs.perm); + ALLOC_ARRAY(pack_perm, packs.nr); + sort_packs_by_name(packs.names, packs.nr, pack_perm); - entries = get_sorted_entries(packs.m, packs.list, packs.perm, packs.nr, &nr_entries); + entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries); for (i = 0; i < nr_entries; i++) { if (entries[i].offset > 0x7fffffff) @@ -979,8 +973,8 @@ int write_midx_file(const char *object_dir) free(packs.list); free(packs.names); - free(packs.perm); free(entries); + free(pack_perm); free(midx_name); return 0; } From cfbfebb42290fc0e8665e00e1bf2fff848d730d4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:20:03 -0400 Subject: [PATCH 07/21] fixup! multi-pack-index: prepare for 'expire' subcommand This reverts commit b31708ebb99c428c7e610ab65e218e4210e6cfa4. Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 5 --- builtin/multi-pack-index.c | 4 +-- midx.c | 5 --- midx.h | 1 - t/t5319-multi-pack-index.sh | 47 -------------------------- 5 files changed, 1 insertion(+), 61 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 6186c4c9369a9c..1af406aca21922 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -31,11 +31,6 @@ write:: verify:: Verify the contents of the MIDX file. -expire:: - Delete the pack-files that are tracked by the MIDX file, but - have no objects referenced by the MIDX. Rewrite the MIDX file - afterward to remove all references to these pack-files. - EXAMPLES -------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index ad10d4051214a7..72dfd3dadc7bf8 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -6,7 +6,7 @@ #include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=] (write|verify|expire)"), + N_("git multi-pack-index [--object-dir=] (write|verify)"), NULL }; @@ -47,8 +47,6 @@ int cmd_multi_pack_index(int argc, const char **argv, return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) return verify_midx_file(the_repository, opts.object_dir); - if (!strcmp(argv[0], "expire")) - return expire_midx_packs(the_repository, opts.object_dir); die(_("unrecognized verb: %s"), argv[0]); } diff --git a/midx.c b/midx.c index 3b7da1a3603e1f..e7e1fe4d65ac3b 100644 --- a/midx.c +++ b/midx.c @@ -1140,8 +1140,3 @@ int verify_midx_file(struct repository *r, const char *object_dir) return verify_midx_error; } - -int expire_midx_packs(struct repository *r, const char *object_dir) -{ - return 0; -} diff --git a/midx.h b/midx.h index 505f1431b7e309..3eb29731f2b1e8 100644 --- a/midx.h +++ b/midx.h @@ -50,7 +50,6 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i int write_midx_file(const char *object_dir); void clear_midx_file(struct repository *r); int verify_midx_file(struct repository *r, const char *object_dir); -int expire_midx_packs(struct repository *r, const char *object_dir); void close_midx(struct multi_pack_index *m); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index a69304ab0dcfcd..1ebf19ec3cd559 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -363,51 +363,4 @@ test_expect_success 'verify incorrect 64-bit offset' ' "incorrect object offset" ' -test_expect_success 'setup expire tests' ' - mkdir dup && - ( - cd dup && - git init && - for i in $(test_seq 1 20) - do - test_commit $i - done && - git branch A HEAD && - git branch B HEAD~8 && - git branch C HEAD~13 && - git branch D HEAD~16 && - git branch E HEAD~18 && - git pack-objects --revs .git/objects/pack/pack-E <<-EOF && - refs/heads/E - EOF - git pack-objects --revs .git/objects/pack/pack-D <<-EOF && - refs/heads/D - ^refs/heads/E - EOF - git pack-objects --revs .git/objects/pack/pack-C <<-EOF && - refs/heads/C - ^refs/heads/D - EOF - git pack-objects --revs .git/objects/pack/pack-B <<-EOF && - refs/heads/B - ^refs/heads/C - EOF - git pack-objects --revs .git/objects/pack/pack-A <<-EOF && - refs/heads/A - ^refs/heads/B - EOF - git multi-pack-index write - ) -' - -test_expect_success 'expire does not remove any packs' ' - ( - cd dup && - ls .git/objects/pack >expect && - git multi-pack-index expire && - ls .git/objects/pack >actual && - test_cmp expect actual - ) -' - test_done From 60d768cde7f6328d4e97625e70c4de02349eafa7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:20:08 -0400 Subject: [PATCH 08/21] fixup! Docs: rearrange subcommands for multi-pack-index This reverts commit 8588e153be6e430d6fcfc8b566a6073eb859555f. Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 1af406aca21922..f7778a2c85c1aa 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -9,7 +9,7 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] +'git multi-pack-index' [--object-dir=] DESCRIPTION ----------- @@ -23,13 +23,13 @@ OPTIONS `/packs/multi-pack-index` for the current MIDX file, and `/packs` for the pack-files to index. -The following subcommands are available: - write:: - Write a new MIDX file. + When given as the verb, write a new MIDX file to + `/packs/multi-pack-index`. verify:: - Verify the contents of the MIDX file. + When given as the verb, verify the contents of the MIDX file + at `/packs/multi-pack-index`. EXAMPLES From 46d0d431319f95b65963e07e3d4b0f3f59cb2071 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 28 May 2019 09:20:14 -0400 Subject: [PATCH 09/21] fixup! repack: refactor pack deletion for future use This reverts commit 8920799d58ced4d3bacb2f5d0ccfacd7bdc0564c. Signed-off-by: Derrick Stolee --- builtin/repack.c | 14 ++++++++++++-- packfile.c | 28 ---------------------------- packfile.h | 7 ------- 3 files changed, 12 insertions(+), 37 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index d6abf74608fe1a..f834b5551b1ffe 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -129,9 +129,19 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list, static void remove_redundant_pack(const char *dir_name, const char *base_name) { + const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; + int i; struct strbuf buf = STRBUF_INIT; - strbuf_addf(&buf, "%s/%s.pack", dir_name, base_name); - unlink_pack_path(buf.buf, 1); + size_t plen; + + strbuf_addf(&buf, "%s/%s", dir_name, base_name); + plen = buf.len; + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_setlen(&buf, plen); + strbuf_addstr(&buf, exts[i]); + unlink(buf.buf); + } strbuf_release(&buf); } diff --git a/packfile.c b/packfile.c index b49cf1b5415775..e65752174dda7f 100644 --- a/packfile.c +++ b/packfile.c @@ -355,34 +355,6 @@ void close_object_store(struct raw_object_store *o) close_commit_graph(o); } -void unlink_pack_path(const char *pack_name, int force_delete) -{ - static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; - int i; - struct strbuf buf = STRBUF_INIT; - size_t plen; - - strbuf_addstr(&buf, pack_name); - strip_suffix_mem(buf.buf, &buf.len, ".pack"); - plen = buf.len; - - if (!force_delete) { - strbuf_addstr(&buf, ".keep"); - if (!access(buf.buf, F_OK)) { - strbuf_release(&buf); - return; - } - } - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_setlen(&buf, plen); - strbuf_addstr(&buf, exts[i]); - unlink(buf.buf); - } - - strbuf_release(&buf); -} - /* * The LRU pack is the one with the oldest MRU window, preferring packs * with no used windows, or the oldest mtime if it has no windows allocated. diff --git a/packfile.h b/packfile.h index 3e98910bdd191f..81e868d55a9b1f 100644 --- a/packfile.h +++ b/packfile.h @@ -95,13 +95,6 @@ void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(const char *path, size_t path_len, int local); -/* - * Unlink the .pack and associated extension files. - * Does not unlink if 'force_delete' is false and the pack-file is - * marked as ".keep". - */ -extern void unlink_pack_path(const char *pack_name, int force_delete); - /* * Make sure that a pointer access into an mmap'd index file is within bounds, * and can provide at least 8 bytes of data. From 9a3b6c219b2b58a190e06299986f8e652a21252d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Dec 2018 16:42:53 -0500 Subject: [PATCH 10/21] repack: refactor pack deletion for future use The repack builtin deletes redundant pack-files and their associated .idx, .promisor, .bitmap, and .keep files. We will want to re-use this logic in the future for other types of repack, so pull the logic into 'unlink_pack_path()' in packfile.c. The 'ignore_keep' parameter is enabled for the use in repack, but will be important for a future caller. Signed-off-by: Derrick Stolee --- builtin/repack.c | 14 ++------------ packfile.c | 28 ++++++++++++++++++++++++++++ packfile.h | 7 +++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index f834b5551b1ffe..d6abf74608fe1a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -129,19 +129,9 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list, static void remove_redundant_pack(const char *dir_name, const char *base_name) { - const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; - int i; struct strbuf buf = STRBUF_INIT; - size_t plen; - - strbuf_addf(&buf, "%s/%s", dir_name, base_name); - plen = buf.len; - - for (i = 0; i < ARRAY_SIZE(exts); i++) { - strbuf_setlen(&buf, plen); - strbuf_addstr(&buf, exts[i]); - unlink(buf.buf); - } + strbuf_addf(&buf, "%s/%s.pack", dir_name, base_name); + unlink_pack_path(buf.buf, 1); strbuf_release(&buf); } diff --git a/packfile.c b/packfile.c index e65752174dda7f..b49cf1b5415775 100644 --- a/packfile.c +++ b/packfile.c @@ -355,6 +355,34 @@ void close_object_store(struct raw_object_store *o) close_commit_graph(o); } +void unlink_pack_path(const char *pack_name, int force_delete) +{ + static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; + int i; + struct strbuf buf = STRBUF_INIT; + size_t plen; + + strbuf_addstr(&buf, pack_name); + strip_suffix_mem(buf.buf, &buf.len, ".pack"); + plen = buf.len; + + if (!force_delete) { + strbuf_addstr(&buf, ".keep"); + if (!access(buf.buf, F_OK)) { + strbuf_release(&buf); + return; + } + } + + for (i = 0; i < ARRAY_SIZE(exts); i++) { + strbuf_setlen(&buf, plen); + strbuf_addstr(&buf, exts[i]); + unlink(buf.buf); + } + + strbuf_release(&buf); +} + /* * The LRU pack is the one with the oldest MRU window, preferring packs * with no used windows, or the oldest mtime if it has no windows allocated. diff --git a/packfile.h b/packfile.h index 81e868d55a9b1f..3e98910bdd191f 100644 --- a/packfile.h +++ b/packfile.h @@ -95,6 +95,13 @@ void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(const char *path, size_t path_len, int local); +/* + * Unlink the .pack and associated extension files. + * Does not unlink if 'force_delete' is false and the pack-file is + * marked as ".keep". + */ +extern void unlink_pack_path(const char *pack_name, int force_delete); + /* * Make sure that a pointer access into an mmap'd index file is within bounds, * and can provide at least 8 bytes of data. From 8f4ee71e1901d9e0866e3dcea8637e53c7fe6fc0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 21 Dec 2018 09:27:26 -0500 Subject: [PATCH 11/21] Docs: rearrange subcommands for multi-pack-index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add new subcommands to the multi-pack-index, and that will make the documentation a bit messier. Clean up the 'verb' descriptions by renaming the concept to 'subcommand' and removing the reference to the object directory. Helped-by: Stefan Beller Helped-by: Szeder Gábor Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index f7778a2c85c1aa..1af406aca21922 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -9,7 +9,7 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] +'git multi-pack-index' [--object-dir=] DESCRIPTION ----------- @@ -23,13 +23,13 @@ OPTIONS `/packs/multi-pack-index` for the current MIDX file, and `/packs` for the pack-files to index. +The following subcommands are available: + write:: - When given as the verb, write a new MIDX file to - `/packs/multi-pack-index`. + Write a new MIDX file. verify:: - When given as the verb, verify the contents of the MIDX file - at `/packs/multi-pack-index`. + Verify the contents of the MIDX file. EXAMPLES From 52be9790f6904a3f76df165b3d4a403561b786b7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 5 Dec 2018 08:11:39 -0500 Subject: [PATCH 12/21] multi-pack-index: prepare for 'expire' subcommand The multi-pack-index tracks objects in a collection of pack-files. Only one copy of each object is indexed, using the modified time of the pack-files to determine tie-breakers. It is possible to have a pack-file with no referenced objects because all objects have a duplicate in a newer pack-file. Introduce a new 'expire' subcommand to the multi-pack-index builtin. This subcommand will delete these unused pack-files and rewrite the multi-pack-index to no longer refer to those files. More details about the specifics will follow as the method is implemented. Add a test that verifies the 'expire' subcommand is correctly wired, but will still be valid when the verb is implemented. Specifically, create a set of packs that should all have referenced objects and should not be removed during an 'expire' operation. The packs are created carefully to ensure they have a specific order when sorted by size. This will be important in a later test. Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 5 +++ builtin/multi-pack-index.c | 4 ++- midx.c | 5 +++ midx.h | 1 + t/t5319-multi-pack-index.sh | 49 ++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 1af406aca21922..6186c4c9369a9c 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -31,6 +31,11 @@ write:: verify:: Verify the contents of the MIDX file. +expire:: + Delete the pack-files that are tracked by the MIDX file, but + have no objects referenced by the MIDX. Rewrite the MIDX file + afterward to remove all references to these pack-files. + EXAMPLES -------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 72dfd3dadc7bf8..ad10d4051214a7 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -6,7 +6,7 @@ #include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=] (write|verify)"), + N_("git multi-pack-index [--object-dir=] (write|verify|expire)"), NULL }; @@ -47,6 +47,8 @@ int cmd_multi_pack_index(int argc, const char **argv, return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) return verify_midx_file(the_repository, opts.object_dir); + if (!strcmp(argv[0], "expire")) + return expire_midx_packs(the_repository, opts.object_dir); die(_("unrecognized verb: %s"), argv[0]); } diff --git a/midx.c b/midx.c index e7e1fe4d65ac3b..3b7da1a3603e1f 100644 --- a/midx.c +++ b/midx.c @@ -1140,3 +1140,8 @@ int verify_midx_file(struct repository *r, const char *object_dir) return verify_midx_error; } + +int expire_midx_packs(struct repository *r, const char *object_dir) +{ + return 0; +} diff --git a/midx.h b/midx.h index 3eb29731f2b1e8..505f1431b7e309 100644 --- a/midx.h +++ b/midx.h @@ -50,6 +50,7 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i int write_midx_file(const char *object_dir); void clear_midx_file(struct repository *r); int verify_midx_file(struct repository *r, const char *object_dir); +int expire_midx_packs(struct repository *r, const char *object_dir); void close_midx(struct multi_pack_index *m); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1ebf19ec3cd559..1b2d32f4756ffe 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -363,4 +363,53 @@ test_expect_success 'verify incorrect 64-bit offset' ' "incorrect object offset" ' +test_expect_success 'setup expire tests' ' + mkdir dup && + ( + cd dup && + git init && + test-tool genrandom "data" 4096 >large_file.txt && + git update-index --add large_file.txt && + for i in $(test_seq 1 20) + do + test_commit $i + done && + git branch A HEAD && + git branch B HEAD~8 && + git branch C HEAD~13 && + git branch D HEAD~16 && + git branch E HEAD~18 && + git pack-objects --revs .git/objects/pack/pack-A <<-EOF && + refs/heads/A + ^refs/heads/B + EOF + git pack-objects --revs .git/objects/pack/pack-B <<-EOF && + refs/heads/B + ^refs/heads/C + EOF + git pack-objects --revs .git/objects/pack/pack-C <<-EOF && + refs/heads/C + ^refs/heads/D + EOF + git pack-objects --revs .git/objects/pack/pack-D <<-EOF && + refs/heads/D + ^refs/heads/E + EOF + git pack-objects --revs .git/objects/pack/pack-E <<-EOF && + refs/heads/E + EOF + git multi-pack-index write + ) +' + +test_expect_success 'expire does not remove any packs' ' + ( + cd dup && + ls .git/objects/pack >expect && + git multi-pack-index expire && + ls .git/objects/pack >actual && + test_cmp expect actual + ) +' + test_done From 63c41198bf8baab3cc88d67ab214ce42426857cf Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jan 2019 15:42:45 -0500 Subject: [PATCH 13/21] midx: simplify computation of pack name lengths Before writing the multi-pack-index, we compute the length of the pack-index names concatenated together. This forms the data in the pack name chunk, and we precompute it to compute chunk offsets. The value is also modified to fit alignment needs. Previously, this computation was coupled with adding packs from the existing multi-pack-index and the remaining packs in the object dir not already covered by the multi-pack-index. In anticipation of this becoming more complicated with the 'expire' subcommand, simplify the computation by centralizing it to a single loop before writing the file. Signed-off-by: Derrick Stolee --- midx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/midx.c b/midx.c index 3b7da1a3603e1f..62404620ad84ed 100644 --- a/midx.c +++ b/midx.c @@ -433,7 +433,6 @@ struct pack_list { uint32_t nr; uint32_t alloc_list; uint32_t alloc_names; - size_t pack_name_concat_len; struct multi_pack_index *m; }; @@ -468,7 +467,6 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, } packs->names[packs->nr] = xstrdup(file_name); - packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; } } @@ -812,6 +810,7 @@ int write_midx_file(const char *object_dir) uint32_t nr_entries, num_large_offsets = 0; struct pack_midx_entry *entries = NULL; int large_offsets_needed = 0; + int pack_name_concat_len = 0; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -827,7 +826,6 @@ int write_midx_file(const char *object_dir) packs.alloc_names = packs.alloc_list; packs.list = NULL; packs.names = NULL; - packs.pack_name_concat_len = 0; ALLOC_ARRAY(packs.list, packs.alloc_list); ALLOC_ARRAY(packs.names, packs.alloc_names); @@ -838,7 +836,6 @@ int write_midx_file(const char *object_dir) packs.list[packs.nr] = NULL; packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); - packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; packs.nr++; } } @@ -848,10 +845,6 @@ int write_midx_file(const char *object_dir) if (packs.m && packs.nr == packs.m->num_packs) goto cleanup; - if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) - packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - - (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); - ALLOC_ARRAY(pack_perm, packs.nr); sort_packs_by_name(packs.names, packs.nr, pack_perm); @@ -864,6 +857,13 @@ int write_midx_file(const char *object_dir) large_offsets_needed = 1; } + for (i = 0; i < packs.nr; i++) + pack_name_concat_len += strlen(packs.names[i]) + 1; + + if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) + pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - + (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); @@ -881,7 +881,7 @@ int write_midx_file(const char *object_dir) cur_chunk++; chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT; - chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len; cur_chunk++; chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; From 1740e2918b45ccc9a83c742212a8af90e0488905 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 7 Jan 2019 16:21:39 -0500 Subject: [PATCH 14/21] midx: refactor permutation logic and pack sorting In anticipation of the expire subcommand, refactor the way we sort the packfiles by name. This will greatly simplify our approach to dropping expired packs from the list. First, create 'struct pack_info' to replace 'struct pack_pair'. This struct contains the necessary information about a pack, including its name, a pointer to its packfile struct (if not already in the multi-pack-index), and the original pack-int-id. Second, track the pack information using an array of pack_info structs in the pack_list struct. This simplifies the logic around the multiple arrays we were tracking in that struct. Finally, update get_sorted_entries() to not permute the pack-int-id and instead supply the permutation to write_midx_object_offsets(). This requires sorting the packs after get_sorted_entries(). Signed-off-by: Derrick Stolee --- midx.c | 156 +++++++++++++++++++++++++-------------------------------- 1 file changed, 69 insertions(+), 87 deletions(-) diff --git a/midx.c b/midx.c index 62404620ad84ed..6d4b84e2438071 100644 --- a/midx.c +++ b/midx.c @@ -427,12 +427,23 @@ static size_t write_midx_header(struct hashfile *f, return MIDX_HEADER_SIZE; } +struct pack_info { + uint32_t orig_pack_int_id; + char *pack_name; + struct packed_git *p; +}; + +static int pack_info_compare(const void *_a, const void *_b) +{ + struct pack_info *a = (struct pack_info *)_a; + struct pack_info *b = (struct pack_info *)_b; + return strcmp(a->pack_name, b->pack_name); +} + struct pack_list { - struct packed_git **list; - char **names; + struct pack_info *info; uint32_t nr; - uint32_t alloc_list; - uint32_t alloc_names; + uint32_t alloc; struct multi_pack_index *m; }; @@ -445,66 +456,32 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, if (packs->m && midx_contains_pack(packs->m, file_name)) return; - ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); - ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); + ALLOC_GROW(packs->info, packs->nr + 1, packs->alloc); - packs->list[packs->nr] = add_packed_git(full_path, - full_path_len, - 0); + packs->info[packs->nr].p = add_packed_git(full_path, + full_path_len, + 0); - if (!packs->list[packs->nr]) { + if (!packs->info[packs->nr].p) { warning(_("failed to add packfile '%s'"), full_path); return; } - if (open_pack_index(packs->list[packs->nr])) { + if (open_pack_index(packs->info[packs->nr].p)) { warning(_("failed to open pack-index '%s'"), full_path); - close_pack(packs->list[packs->nr]); - FREE_AND_NULL(packs->list[packs->nr]); + close_pack(packs->info[packs->nr].p); + FREE_AND_NULL(packs->info[packs->nr].p); return; } - packs->names[packs->nr] = xstrdup(file_name); + packs->info[packs->nr].pack_name = xstrdup(file_name); + packs->info[packs->nr].orig_pack_int_id = packs->nr; packs->nr++; } } -struct pack_pair { - uint32_t pack_int_id; - char *pack_name; -}; - -static int pack_pair_compare(const void *_a, const void *_b) -{ - struct pack_pair *a = (struct pack_pair *)_a; - struct pack_pair *b = (struct pack_pair *)_b; - return strcmp(a->pack_name, b->pack_name); -} - -static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm) -{ - uint32_t i; - struct pack_pair *pairs; - - ALLOC_ARRAY(pairs, nr_packs); - - for (i = 0; i < nr_packs; i++) { - pairs[i].pack_int_id = i; - pairs[i].pack_name = pack_names[i]; - } - - QSORT(pairs, nr_packs, pack_pair_compare); - - for (i = 0; i < nr_packs; i++) { - pack_names[i] = pairs[i].pack_name; - perm[pairs[i].pack_int_id] = i; - } - - free(pairs); -} - struct pack_midx_entry { struct object_id oid; uint32_t pack_int_id; @@ -530,7 +507,6 @@ static int midx_oid_compare(const void *_a, const void *_b) } static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, - uint32_t *pack_perm, struct pack_midx_entry *e, uint32_t pos) { @@ -538,7 +514,7 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, return 1; nth_midxed_object_oid(&e->oid, m, pos); - e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; + e->pack_int_id = nth_midxed_pack_int_id(m, pos); e->offset = nth_midxed_offset(m, pos); /* consider objects in midx to be from "old" packs */ @@ -572,8 +548,7 @@ static void fill_pack_entry(uint32_t pack_int_id, * of a packfile containing the object). */ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, - struct packed_git **p, - uint32_t *perm, + struct pack_info *info, uint32_t nr_packs, uint32_t *nr_objects) { @@ -584,7 +559,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, uint32_t start_pack = m ? m->num_packs : 0; for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) - total_objects += p[cur_pack]->num_objects; + total_objects += info[cur_pack].p->num_objects; /* * As we de-duplicate by fanout value, we expect the fanout @@ -609,7 +584,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, for (cur_object = start; cur_object < end; cur_object++) { ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); - nth_midxed_pack_midx_entry(m, perm, + nth_midxed_pack_midx_entry(m, &entries_by_fanout[nr_fanout], cur_object); nr_fanout++; @@ -620,12 +595,12 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, uint32_t start = 0, end; if (cur_fanout) - start = get_pack_fanout(p[cur_pack], cur_fanout - 1); - end = get_pack_fanout(p[cur_pack], cur_fanout); + start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1); + end = get_pack_fanout(info[cur_pack].p, cur_fanout); for (cur_object = start; cur_object < end; cur_object++) { ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); - fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); + fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]); nr_fanout++; } } @@ -654,7 +629,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, } static size_t write_midx_pack_names(struct hashfile *f, - char **pack_names, + struct pack_info *info, uint32_t num_packs) { uint32_t i; @@ -662,14 +637,14 @@ static size_t write_midx_pack_names(struct hashfile *f, size_t written = 0; for (i = 0; i < num_packs; i++) { - size_t writelen = strlen(pack_names[i]) + 1; + size_t writelen = strlen(info[i].pack_name) + 1; - if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) + if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0) BUG("incorrect pack-file order: %s before %s", - pack_names[i - 1], - pack_names[i]); + info[i - 1].pack_name, + info[i].pack_name); - hashwrite(f, pack_names[i], writelen); + hashwrite(f, info[i].pack_name, writelen); written += writelen; } @@ -740,6 +715,7 @@ static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, } static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, + uint32_t *perm, struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; @@ -749,7 +725,7 @@ static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_nee for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; - hashwrite_be32(f, obj->pack_int_id); + hashwrite_be32(f, perm[obj->pack_int_id]); if (large_offset_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -822,20 +798,17 @@ int write_midx_file(const char *object_dir) packs.m = load_multi_pack_index(object_dir, 1); packs.nr = 0; - packs.alloc_list = packs.m ? packs.m->num_packs : 16; - packs.alloc_names = packs.alloc_list; - packs.list = NULL; - packs.names = NULL; - ALLOC_ARRAY(packs.list, packs.alloc_list); - ALLOC_ARRAY(packs.names, packs.alloc_names); + packs.alloc = packs.m ? packs.m->num_packs : 16; + packs.info = NULL; + ALLOC_ARRAY(packs.info, packs.alloc); if (packs.m) { for (i = 0; i < packs.m->num_packs; i++) { - ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); - ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); + ALLOC_GROW(packs.info, packs.nr + 1, packs.alloc); - packs.list[packs.nr] = NULL; - packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); + packs.info[packs.nr].orig_pack_int_id = i; + packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]); + packs.info[packs.nr].p = NULL; packs.nr++; } } @@ -845,10 +818,7 @@ int write_midx_file(const char *object_dir) if (packs.m && packs.nr == packs.m->num_packs) goto cleanup; - ALLOC_ARRAY(pack_perm, packs.nr); - sort_packs_by_name(packs.names, packs.nr, pack_perm); - - entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries); + entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries); for (i = 0; i < nr_entries; i++) { if (entries[i].offset > 0x7fffffff) @@ -857,8 +827,21 @@ int write_midx_file(const char *object_dir) large_offsets_needed = 1; } + QSORT(packs.info, packs.nr, pack_info_compare); + + /* + * pack_perm stores a permutation between pack-int-ids from the + * previous multi-pack-index to the new one we are writing: + * + * pack_perm[old_id] = new_id + */ + ALLOC_ARRAY(pack_perm, packs.nr); + for (i = 0; i < packs.nr; i++) { + pack_perm[packs.info[i].orig_pack_int_id] = i; + } + for (i = 0; i < packs.nr; i++) - pack_name_concat_len += strlen(packs.names[i]) + 1; + pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - @@ -929,7 +912,7 @@ int write_midx_file(const char *object_dir) switch (chunk_ids[i]) { case MIDX_CHUNKID_PACKNAMES: - written += write_midx_pack_names(f, packs.names, packs.nr); + written += write_midx_pack_names(f, packs.info, packs.nr); break; case MIDX_CHUNKID_OIDFANOUT: @@ -941,7 +924,7 @@ int write_midx_file(const char *object_dir) break; case MIDX_CHUNKID_OBJECTOFFSETS: - written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); + written += write_midx_object_offsets(f, large_offsets_needed, pack_perm, entries, nr_entries); break; case MIDX_CHUNKID_LARGEOFFSETS: @@ -964,15 +947,14 @@ int write_midx_file(const char *object_dir) cleanup: for (i = 0; i < packs.nr; i++) { - if (packs.list[i]) { - close_pack(packs.list[i]); - free(packs.list[i]); + if (packs.info[i].p) { + close_pack(packs.info[i].p); + free(packs.info[i].p); } - free(packs.names[i]); + free(packs.info[i].pack_name); } - free(packs.list); - free(packs.names); + free(packs.info); free(entries); free(pack_perm); free(midx_name); From 95591fd27dbb78230be128ffe68da8d9e9ec6a75 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 5 Dec 2018 08:39:29 -0500 Subject: [PATCH 15/21] multi-pack-index: implement 'expire' subcommand The 'git multi-pack-index expire' subcommand looks at the existing mult-pack-index, counts the number of objects referenced in each pack-file, deletes the pack-fils with no referenced objects, and rewrites the multi-pack-index to no longer reference those packs. Refactor the write_midx_file() method to call write_midx_internal() which now takes an existing 'struct multi_pack_index' and a list of pack-files to drop (as specified by the names of their pack- indexes). As we write the new multi-pack-index, we drop those file names from the list of known pack-files. The expire_midx_packs() method removes the unreferenced pack-files after carefully closing the packs to avoid open handles. Test that a new pack-file that covers the contents of two other pack-files leads to those pack-files being deleted during the expire subcommand. Be sure to read the multi-pack-index to ensure it no longer references those packs. Signed-off-by: Derrick Stolee --- midx.c | 119 +++++++++++++++++++++++++++++++++--- t/t5319-multi-pack-index.sh | 20 ++++++ 2 files changed, 129 insertions(+), 10 deletions(-) diff --git a/midx.c b/midx.c index 6d4b84e2438071..9b0b4c15203100 100644 --- a/midx.c +++ b/midx.c @@ -34,6 +34,8 @@ #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 +#define PACK_EXPIRED UINT_MAX + static char *get_midx_filename(const char *object_dir) { return xstrfmt("%s/pack/multi-pack-index", object_dir); @@ -431,6 +433,7 @@ struct pack_info { uint32_t orig_pack_int_id; char *pack_name; struct packed_git *p; + unsigned expired : 1; }; static int pack_info_compare(const void *_a, const void *_b) @@ -478,6 +481,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, packs->info[packs->nr].pack_name = xstrdup(file_name); packs->info[packs->nr].orig_pack_int_id = packs->nr; + packs->info[packs->nr].expired = 0; packs->nr++; } } @@ -637,13 +641,17 @@ static size_t write_midx_pack_names(struct hashfile *f, size_t written = 0; for (i = 0; i < num_packs; i++) { - size_t writelen = strlen(info[i].pack_name) + 1; + size_t writelen; + + if (info[i].expired) + continue; if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0) BUG("incorrect pack-file order: %s before %s", info[i - 1].pack_name, info[i].pack_name); + writelen = strlen(info[i].pack_name) + 1; hashwrite(f, info[i].pack_name, writelen); written += writelen; } @@ -725,6 +733,11 @@ static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_nee for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; + if (perm[obj->pack_int_id] == PACK_EXPIRED) + BUG("object %s is in an expired pack with int-id %d", + oid_to_hex(&obj->oid), + obj->pack_int_id); + hashwrite_be32(f, perm[obj->pack_int_id]); if (large_offset_needed && obj->offset >> 31) @@ -771,7 +784,8 @@ static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_off return written; } -int write_midx_file(const char *object_dir) +static int write_midx_internal(const char *object_dir, struct multi_pack_index *m, + struct string_list *packs_to_drop) { unsigned char cur_chunk, num_chunks = 0; char *midx_name; @@ -787,6 +801,8 @@ int write_midx_file(const char *object_dir) struct pack_midx_entry *entries = NULL; int large_offsets_needed = 0; int pack_name_concat_len = 0; + int dropped_packs = 0; + int result = 0; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -795,7 +811,10 @@ int write_midx_file(const char *object_dir) midx_name); } - packs.m = load_multi_pack_index(object_dir, 1); + if (m) + packs.m = m; + else + packs.m = load_multi_pack_index(object_dir, 1); packs.nr = 0; packs.alloc = packs.m ? packs.m->num_packs : 16; @@ -809,13 +828,14 @@ int write_midx_file(const char *object_dir) packs.info[packs.nr].orig_pack_int_id = i; packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]); packs.info[packs.nr].p = NULL; + packs.info[packs.nr].expired = 0; packs.nr++; } } for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); - if (packs.m && packs.nr == packs.m->num_packs) + if (packs.m && packs.nr == packs.m->num_packs && !packs_to_drop) goto cleanup; entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries); @@ -829,6 +849,34 @@ int write_midx_file(const char *object_dir) QSORT(packs.info, packs.nr, pack_info_compare); + if (packs_to_drop && packs_to_drop->nr) { + int drop_index = 0; + int missing_drops = 0; + + for (i = 0; i < packs.nr && drop_index < packs_to_drop->nr; i++) { + int cmp = strcmp(packs.info[i].pack_name, + packs_to_drop->items[drop_index].string); + + if (!cmp) { + drop_index++; + packs.info[i].expired = 1; + } else if (cmp > 0) { + error(_("did not see pack-file %s to drop"), + packs_to_drop->items[drop_index].string); + drop_index++; + missing_drops++; + i--; + } else { + packs.info[i].expired = 0; + } + } + + if (missing_drops) { + result = 1; + goto cleanup; + } + } + /* * pack_perm stores a permutation between pack-int-ids from the * previous multi-pack-index to the new one we are writing: @@ -837,11 +885,18 @@ int write_midx_file(const char *object_dir) */ ALLOC_ARRAY(pack_perm, packs.nr); for (i = 0; i < packs.nr; i++) { - pack_perm[packs.info[i].orig_pack_int_id] = i; + if (packs.info[i].expired) { + dropped_packs++; + pack_perm[packs.info[i].orig_pack_int_id] = PACK_EXPIRED; + } else { + pack_perm[packs.info[i].orig_pack_int_id] = i - dropped_packs; + } } - for (i = 0; i < packs.nr; i++) - pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; + for (i = 0; i < packs.nr; i++) { + if (!packs.info[i].expired) + pack_name_concat_len += strlen(packs.info[i].pack_name) + 1; + } if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - @@ -857,7 +912,7 @@ int write_midx_file(const char *object_dir) cur_chunk = 0; num_chunks = large_offsets_needed ? 5 : 4; - written = write_midx_header(f, num_chunks, packs.nr); + written = write_midx_header(f, num_chunks, packs.nr - dropped_packs); chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; @@ -958,7 +1013,12 @@ int write_midx_file(const char *object_dir) free(entries); free(pack_perm); free(midx_name); - return 0; + return result; +} + +int write_midx_file(const char *object_dir) +{ + return write_midx_internal(object_dir, NULL, NULL); } void clear_midx_file(struct repository *r) @@ -1125,5 +1185,44 @@ int verify_midx_file(struct repository *r, const char *object_dir) int expire_midx_packs(struct repository *r, const char *object_dir) { - return 0; + uint32_t i, *count, result = 0; + struct string_list packs_to_drop = STRING_LIST_INIT_DUP; + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + + if (!m) + return 0; + + count = xcalloc(m->num_packs, sizeof(uint32_t)); + for (i = 0; i < m->num_objects; i++) { + int pack_int_id = nth_midxed_pack_int_id(m, i); + count[pack_int_id]++; + } + + for (i = 0; i < m->num_packs; i++) { + char *pack_name; + + if (count[i]) + continue; + + if (prepare_midx_pack(r, m, i)) + continue; + + if (m->packs[i]->pack_keep) + continue; + + pack_name = xstrdup(m->packs[i]->pack_name); + close_pack(m->packs[i]); + + string_list_insert(&packs_to_drop, m->pack_names[i]); + unlink_pack_path(pack_name, 0); + free(pack_name); + } + + free(count); + + if (packs_to_drop.nr) + result = write_midx_internal(object_dir, m, &packs_to_drop); + + string_list_clear(&packs_to_drop, 0); + return result; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1b2d32f4756ffe..12570fe7ace9ee 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -412,4 +412,24 @@ test_expect_success 'expire does not remove any packs' ' ) ' +test_expect_success 'expire removes unreferenced packs' ' + ( + cd dup && + git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && + refs/heads/A + ^refs/heads/C + EOF + git multi-pack-index write && + ls .git/objects/pack | grep -v -e pack-[AB] >expect && + git multi-pack-index expire && + ls .git/objects/pack >actual && + test_cmp expect actual && + ls .git/objects/pack/ | grep idx >expect-idx && + test-tool read-midx .git/objects | grep idx >actual-midx && + test_cmp expect-idx actual-midx && + git multi-pack-index verify && + git fsck + ) +' + test_done From 2227d0b1ec824d7480e1d0eff429b541891a0f56 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 5 Dec 2018 09:37:13 -0500 Subject: [PATCH 16/21] multi-pack-index: prepare 'repack' subcommand In an environment where the multi-pack-index is useful, it is due to many pack-files and an inability to repack the object store into a single pack-file. However, it is likely that many of these pack-files are rather small, and could be repacked into a slightly larger pack-file without too much effort. It may also be important to ensure the object store is highly available and the repack operation does not interrupt concurrent git commands. Introduce a 'repack' subcommand to 'git multi-pack-index' that takes a '--batch-size' option. The subcommand will inspect the multi-pack-index for referenced pack-files whose size is smaller than the batch size, until collecting a list of pack-files whose sizes sum to larger than the batch size. Then, a new pack-file will be created containing the objects from those pack-files that are referenced by the multi-pack-index. The resulting pack is likely to actually be smaller than the batch size due to compression and the fact that there may be objects in the pack- files that have duplicate copies in other pack-files. The current change introduces the command-line arguments, and we add a test that ensures we parse these options properly. Since we specify a small batch size, we will guarantee that future implementations do not change the list of pack-files. In addition, we hard-code the modified times of the packs in the pack directory to ensure the list of packs sorted by modified time matches the order if sorted by size (ascending). This will be important in a future test. Signed-off-by: Derrick Stolee --- Documentation/git-multi-pack-index.txt | 17 +++++++++++++++++ builtin/multi-pack-index.c | 12 ++++++++++-- midx.c | 5 +++++ midx.h | 1 + t/t5319-multi-pack-index.sh | 20 +++++++++++++++++++- 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 6186c4c9369a9c..233b2b786271cc 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -36,6 +36,23 @@ expire:: have no objects referenced by the MIDX. Rewrite the MIDX file afterward to remove all references to these pack-files. +repack:: + Create a new pack-file containing objects in small pack-files + referenced by the multi-pack-index. If the size given by the + `--batch-size=` argument is zero, then create a pack + containing all objects referenced by the multi-pack-index. For + a non-zero batch size, Select the pack-files by examining packs + from oldest-to-newest, computing the "expected size" by counting + the number of objects in the pack referenced by the + multi-pack-index, then divide by the total number of objects in + the pack and multiply by the pack size. We select packs with + expected size below the batch size until the set of packs have + total expected size at least the batch size. If the total size + does not reach the batch size, then do nothing. If a new pack- + file is created, rewrite the multi-pack-index to reference the + new pack-file. A later run of 'git multi-pack-index expire' will + delete the pack-files that were part of this batch. + EXAMPLES -------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index ad10d4051214a7..b1ea1a6aa17724 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -6,12 +6,13 @@ #include "trace2.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=] (write|verify|expire)"), + N_("git multi-pack-index [--object-dir=] (write|verify|expire|repack --batch-size=)"), NULL }; static struct opts_multi_pack_index { const char *object_dir; + unsigned long batch_size; } opts; int cmd_multi_pack_index(int argc, const char **argv, @@ -20,6 +21,8 @@ int cmd_multi_pack_index(int argc, const char **argv, static struct option builtin_multi_pack_index_options[] = { OPT_FILENAME(0, "object-dir", &opts.object_dir, N_("object directory containing set of packfile and pack-index pairs")), + OPT_MAGNITUDE(0, "batch-size", &opts.batch_size, + N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), OPT_END(), }; @@ -43,6 +46,11 @@ int cmd_multi_pack_index(int argc, const char **argv, trace2_cmd_mode(argv[0]); + if (!strcmp(argv[0], "repack")) + return midx_repack(the_repository, opts.object_dir, (size_t)opts.batch_size); + if (opts.batch_size) + die(_("--batch-size option is only for 'repack' subcommand")); + if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); if (!strcmp(argv[0], "verify")) @@ -50,5 +58,5 @@ int cmd_multi_pack_index(int argc, const char **argv, if (!strcmp(argv[0], "expire")) return expire_midx_packs(the_repository, opts.object_dir); - die(_("unrecognized verb: %s"), argv[0]); + die(_("unrecognized subcommand: %s"), argv[0]); } diff --git a/midx.c b/midx.c index 9b0b4c15203100..fbed8a8adb38f6 100644 --- a/midx.c +++ b/midx.c @@ -1226,3 +1226,8 @@ int expire_midx_packs(struct repository *r, const char *object_dir) string_list_clear(&packs_to_drop, 0); return result; } + +int midx_repack(struct repository *r, const char *object_dir, size_t batch_size) +{ + return 0; +} diff --git a/midx.h b/midx.h index 505f1431b7e309..f0ae656b5d7676 100644 --- a/midx.h +++ b/midx.h @@ -51,6 +51,7 @@ int write_midx_file(const char *object_dir); void clear_midx_file(struct repository *r); int verify_midx_file(struct repository *r, const char *object_dir); int expire_midx_packs(struct repository *r, const char *object_dir); +int midx_repack(struct repository *r, const char *object_dir, size_t batch_size); void close_midx(struct multi_pack_index *m); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 12570fe7ace9ee..133d5b7068e8e8 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -398,7 +398,8 @@ test_expect_success 'setup expire tests' ' git pack-objects --revs .git/objects/pack/pack-E <<-EOF && refs/heads/E EOF - git multi-pack-index write + git multi-pack-index write && + cp -r .git/objects/pack .git/objects/pack-backup ) ' @@ -432,4 +433,21 @@ test_expect_success 'expire removes unreferenced packs' ' ) ' +test_expect_success 'repack with minimum size does not alter existing packs' ' + ( + cd dup && + rm -rf .git/objects/pack && + mv .git/objects/pack-backup .git/objects/pack && + touch -m -t 201901010000 .git/objects/pack/pack-D* && + touch -m -t 201901010001 .git/objects/pack/pack-C* && + touch -m -t 201901010002 .git/objects/pack/pack-B* && + touch -m -t 201901010003 .git/objects/pack/pack-A* && + ls .git/objects/pack >expect && + MINSIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | sort -n | head -n 1) && + git multi-pack-index repack --batch-size=$MINSIZE && + ls .git/objects/pack >actual && + test_cmp expect actual + ) +' + test_done From 9c813bfbcae604bfb2a45c4a7d230cb7cdf1be37 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 5 Dec 2018 10:04:17 -0500 Subject: [PATCH 17/21] midx: implement midx_repack() To repack with a non-zero batch-size, first sort all pack-files by their modified time. Second, walk those pack-files from oldest to newest, compute their expected size, and add the packs to a list if they are smaller than the given batch-size. Stop when the total expected size is at least the batch size. If the batch size is zero, select all packs in the multi-pack-index. Finally, collect the objects from the multi-pack-index that are in the selected packs and send them to 'git pack-objects'. Write a new multi-pack-index that includes the new pack. Using a batch size of zero is very similar to a standard 'git repack' command, except that we do not delete the old packs and instead rely on the new multi-pack-index to prevent new processes from reading the old packs. This does not disrupt other Git processes that are currently reading the old packs based on the old multi-pack-index. While first designing a 'git multi-pack-index repack' operation, I started by collecting the batches based on the actual size of the objects instead of the size of the pack-files. This allows repacking a large pack-file that has very few referencd objects. However, this came at a significant cost of parsing pack-files instead of simply reading the multi-pack-index and getting the file information for the pack-files. The "expected size" version provides similar behavior, but could skip a pack-file if the average object size is much larger than the actual size of the referenced objects, or can create a large pack if the actual size of the referenced objects is larger than the expected size. Signed-off-by: Derrick Stolee --- midx.c | 151 +++++++++++++++++++++++++++++++++++- t/t5319-multi-pack-index.sh | 28 +++++++ 2 files changed, 178 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index fbed8a8adb38f6..d6496444206aad 100644 --- a/midx.c +++ b/midx.c @@ -9,6 +9,7 @@ #include "midx.h" #include "progress.h" #include "trace2.h" +#include "run-command.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -1227,7 +1228,155 @@ int expire_midx_packs(struct repository *r, const char *object_dir) return result; } -int midx_repack(struct repository *r, const char *object_dir, size_t batch_size) +struct repack_info { + timestamp_t mtime; + uint32_t referenced_objects; + uint32_t pack_int_id; +}; + +static int compare_by_mtime(const void *a_, const void *b_) { + const struct repack_info *a, *b; + + a = (const struct repack_info *)a_; + b = (const struct repack_info *)b_; + + if (a->mtime < b->mtime) + return -1; + if (a->mtime > b->mtime) + return 1; + return 0; +} + +static int fill_included_packs_all(struct multi_pack_index *m, + unsigned char *include_pack) +{ + uint32_t i; + + for (i = 0; i < m->num_packs; i++) + include_pack[i] = 1; + + return m->num_packs < 2; +} + +static int fill_included_packs_batch(struct repository *r, + struct multi_pack_index *m, + unsigned char *include_pack, + size_t batch_size) +{ + uint32_t i, packs_to_repack; + size_t total_size; + struct repack_info *pack_info = xcalloc(m->num_packs, sizeof(struct repack_info)); + + for (i = 0; i < m->num_packs; i++) { + pack_info[i].pack_int_id = i; + + if (prepare_midx_pack(r, m, i)) + continue; + + pack_info[i].mtime = m->packs[i]->mtime; + } + + for (i = 0; batch_size && i < m->num_objects; i++) { + uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); + pack_info[pack_int_id].referenced_objects++; + } + + QSORT(pack_info, m->num_packs, compare_by_mtime); + + total_size = 0; + packs_to_repack = 0; + for (i = 0; total_size < batch_size && i < m->num_packs; i++) { + int pack_int_id = pack_info[i].pack_int_id; + struct packed_git *p = m->packs[pack_int_id]; + size_t expected_size; + + if (!p) + continue; + if (open_pack_index(p) || !p->num_objects) + continue; + + expected_size = (size_t)(p->pack_size + * pack_info[i].referenced_objects); + expected_size /= p->num_objects; + + if (expected_size >= batch_size) + continue; + + packs_to_repack++; + total_size += expected_size; + include_pack[pack_int_id] = 1; + } + + free(pack_info); + + if (total_size < batch_size || packs_to_repack < 2) + return 1; + return 0; } + +int midx_repack(struct repository *r, const char *object_dir, size_t batch_size) +{ + int result = 0; + uint32_t i; + unsigned char *include_pack; + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf base_name = STRBUF_INIT; + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + + if (!m) + return 0; + + include_pack = xcalloc(m->num_packs, sizeof(unsigned char)); + + if (batch_size) { + if (fill_included_packs_batch(r, m, include_pack, batch_size)) + goto cleanup; + } else if (fill_included_packs_all(m, include_pack)) + goto cleanup; + + argv_array_push(&cmd.args, "pack-objects"); + + strbuf_addstr(&base_name, object_dir); + strbuf_addstr(&base_name, "/pack/pack"); + argv_array_push(&cmd.args, base_name.buf); + strbuf_release(&base_name); + + cmd.git_cmd = 1; + cmd.in = cmd.out = -1; + + if (start_command(&cmd)) { + error(_("could not start pack-objects")); + result = 1; + goto cleanup; + } + + for (i = 0; i < m->num_objects; i++) { + struct object_id oid; + uint32_t pack_int_id = nth_midxed_pack_int_id(m, i); + + if (!include_pack[pack_int_id]) + continue; + + nth_midxed_object_oid(&oid, m, i); + xwrite(cmd.in, oid_to_hex(&oid), the_hash_algo->hexsz); + xwrite(cmd.in, "\n", 1); + } + close(cmd.in); + + if (finish_command(&cmd)) { + error(_("could not finish pack-objects")); + result = 1; + goto cleanup; + } + + result = write_midx_internal(object_dir, m, NULL); + m = NULL; + +cleanup: + if (m) + close_midx(m); + free(include_pack); + return result; +} diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 133d5b7068e8e8..6e47e5d0b2a7d5 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -450,4 +450,32 @@ test_expect_success 'repack with minimum size does not alter existing packs' ' ) ' +test_expect_success 'repack creates a new pack' ' + ( + cd dup && + ls .git/objects/pack/*idx >idx-list && + test_line_count = 5 idx-list && + THIRD_SMALLEST_SIZE=$(ls -l .git/objects/pack/*pack | awk "{print \$5;}" | sort -n | head -n 3 | tail -n 1) && + BATCH_SIZE=$(($THIRD_SMALLEST_SIZE + 1)) && + git multi-pack-index repack --batch-size=$BATCH_SIZE && + ls .git/objects/pack/*idx >idx-list && + test_line_count = 6 idx-list && + test-tool read-midx .git/objects | grep idx >midx-list && + test_line_count = 6 midx-list + ) +' + +test_expect_success 'expire removes repacked packs' ' + ( + cd dup && + ls -al .git/objects/pack/*pack && + ls -S .git/objects/pack/*pack | head -n 4 >expect && + git multi-pack-index expire && + ls -S .git/objects/pack/*pack >actual && + test_cmp expect actual && + test-tool read-midx .git/objects | grep idx >midx-list && + test_line_count = 4 midx-list + ) +' + test_done From e70962a15371fbceee361054e64d759bb588b566 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 8 Jan 2019 11:22:27 -0500 Subject: [PATCH 18/21] multi-pack-index: test expire while adding packs During development of the multi-pack-index expire subcommand, a version went out that improperly computed the pack order if a new pack was introduced while other packs were being removed. Part of the subtlety of the bug involved the new pack being placed before other packs that already existed in the multi-pack-index. Add a test to t5319-multi-pack-index.sh that catches this issue. The test adds new packs that cause another pack to be expired, and creates new packs that are lexicographically sorted before and after the existing packs. Signed-off-by: Derrick Stolee --- t/t5319-multi-pack-index.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 6e47e5d0b2a7d5..8e04ce2821dbd6 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -478,4 +478,36 @@ test_expect_success 'expire removes repacked packs' ' ) ' +test_expect_success 'expire works when adding new packs' ' + ( + cd dup && + git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && + refs/heads/A + ^refs/heads/B + EOF + git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && + refs/heads/B + ^refs/heads/C + EOF + git pack-objects --revs .git/objects/pack/pack-combined <<-EOF && + refs/heads/C + ^refs/heads/D + EOF + git multi-pack-index write && + git pack-objects --revs .git/objects/pack/a-pack <<-EOF && + refs/heads/D + ^refs/heads/E + EOF + git multi-pack-index write && + git pack-objects --revs .git/objects/pack/z-pack <<-EOF && + refs/heads/E + EOF + git multi-pack-index expire && + ls .git/objects/pack/ | grep idx >expect && + test-tool read-midx .git/objects | grep idx >actual && + test_cmp expect actual && + git multi-pack-index verify + ) +' + test_done From 58feee0f192ee323755b2be8d3f08dfd2ef6d2de Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 24 Jan 2019 15:21:29 -0500 Subject: [PATCH 19/21] midx: add test that 'expire' respects .keep files The 'git multi-pack-index expire' subcommand may delete packs that are not needed from the perspective of the multi-pack-index. If a pack has a .keep file, then we should not delete that pack. Add a test that ensures we preserve a pack that would otherwise be expired. First, create a new pack that contains every object in the repo, then add it to the multi-pack-index. Then create a .keep file for a pack starting with "a-pack" that was added in the previous test. Finally, expire and verify that the pack remains and the other packs were expired. Signed-off-by: Derrick Stolee --- t/t5319-multi-pack-index.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 8e04ce2821dbd6..c2889014010bc6 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -510,4 +510,22 @@ test_expect_success 'expire works when adding new packs' ' ) ' +test_expect_success 'expire respects .keep files' ' + ( + cd dup && + git pack-objects --revs .git/objects/pack/pack-all <<-EOF && + refs/heads/A + EOF + git multi-pack-index write && + PACKA=$(ls .git/objects/pack/a-pack*\.pack | sed s/\.pack\$//) && + touch $PACKA.keep && + git multi-pack-index expire && + ls -S .git/objects/pack/a-pack* | grep $PACKA >a-pack-files && + test_line_count = 3 a-pack-files && + test-tool read-midx .git/objects | grep idx >midx-list && + test_line_count = 2 midx-list + ) +' + + test_done From cd63d19d41c9681bce822a4a9e76223ba7987c1f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 24 Apr 2019 09:04:22 -0400 Subject: [PATCH 20/21] t5319-multi-pack-index.sh: test batch size zero The 'git multi-pack-index repack' command can take a batch size of zero, which creates a new pack-file containing all objects in the multi-pack-index. The first 'repack' command will create one new pack-file, and an 'expire' command after that will delete the old pack-files, as they no longer contain any referenced objects in the multi-pack-index. We must remove the .keep file that was added in the previous test in order to expire that pack-file. Also test that a 'repack' will do nothing if there is only one pack-file. Signed-off-by: Derrick Stolee --- t/t5319-multi-pack-index.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c2889014010bc6..79bfaeafa9b172 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -527,5 +527,24 @@ test_expect_success 'expire respects .keep files' ' ) ' +test_expect_success 'repack --batch-size=0 repacks everything' ' + ( + cd dup && + rm .git/objects/pack/*.keep && + ls .git/objects/pack/*idx >idx-list && + test_line_count = 2 idx-list && + git multi-pack-index repack --batch-size=0 && + ls .git/objects/pack/*idx >idx-list && + test_line_count = 3 idx-list && + test-tool read-midx .git/objects | grep idx >midx-list && + test_line_count = 3 midx-list && + git multi-pack-index expire && + ls -al .git/objects/pack/*idx >idx-list && + test_line_count = 1 idx-list && + git multi-pack-index repack --batch-size=0 && + ls -al .git/objects/pack/*idx >new-idx-list && + test_cmp idx-list new-idx-list + ) +' test_done From 9ab635d08b9495c85109de302cc3b2fc619ee549 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 21 Mar 2019 12:36:13 -0700 Subject: [PATCH 21/21] trace2:data: add trace2 data to midx Log multi-pack-index command mode. Log number of objects and packfiles in the midx. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- midx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/midx.c b/midx.c index d6496444206aad..3b28564e9eb9d2 100644 --- a/midx.c +++ b/midx.c @@ -10,6 +10,7 @@ #include "progress.h" #include "trace2.h" #include "run-command.h" +#include "trace2.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1