diff --git a/.gitignore b/.gitignore index d9773e1120585f..65b96241bc2196 100644 --- a/.gitignore +++ b/.gitignore @@ -100,11 +100,11 @@ /git-merge-subtree /git-mergetool /git-mergetool--lib -/git-midx /git-mktag /git-mktree -/git-name-rev +/git-multi-pack-index /git-mv +/git-name-rev /git-notes /git-p4 /git-pack-redundant diff --git a/Documentation/config.txt b/Documentation/config.txt index fa8e7e393f2205..9a995aeaa250e0 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -966,6 +966,11 @@ core.useReplaceRefs:: option was given on the command line. See linkgit:git[1] and linkgit:git-replace[1] for more information. +core.multiPackIndex:: + Use the multi-pack-index file to track multiple packfiles using a + single index. See link:technical/multi-pack-index.html[the + multi-pack-index design document]. + core.gvfs:: Enable the features needed for GVFS. This value can be set to true to indicate all features should be turned on or the bit values listed @@ -1014,9 +1019,6 @@ core.gvfs:: and switch to the new ref. -- -core.midx:: - Enable "multi-pack-index" feature. Set to true to read and write MIDX files. - core.sparseCheckout:: Enable "sparse checkout" feature. See section "Sparse checkout" in linkgit:git-read-tree[1] for more information. diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt deleted file mode 100644 index 6b788cb0c79721..00000000000000 --- a/Documentation/git-midx.txt +++ /dev/null @@ -1,101 +0,0 @@ -git-midx(1) -============ - -NAME ----- -git-midx - Write and verify multi-pack-indexes (MIDX files). - - -SYNOPSIS --------- -[verse] -'git midx' [--pack-dir ] - -DESCRIPTION ------------ -Write or verify a MIDX file. - -OPTIONS -------- - ---pack-dir :: - Use given directory for the location of packfiles, pack-indexes, - and MIDX files. - ---clear:: - If specified, delete the midx file specified by midx-head, and - midx-head. (Cannot be combined with `--write`, `--read`, or - `--verify`.) - ---verify:: - If specified, check the midx file specified by midx-head for - corruption or invalid data. (Cannot be combined with `--write`, - `--read`, or `--clear`.) - ---read:: - If specified, read a midx file specified by the midx-head file - and output basic details about the midx file. (Cannot be combined - with `--write`, `--clear`, or `--verify`.) - ---midx-id :: - If specified with `--read`, use the given oid to read midx-[oid].midx - instead of using midx-head. - ---write:: - If specified, write a new midx file to the pack directory using - the packfiles present. Outputs the hash of the result midx file. - (Cannot be combined with `--read`, `--clear`, or `--verify`.) - ---update-head:: - If specified with `--write`, update the midx-head file to point to - the written midx file. - ---delete-expired:: - If specified with `--write` and `--update-head`, delete the midx file - previously pointed to by midx-head (if changed). - -EXAMPLES --------- - -* Read the midx-head file and output the OID of the head MIDX file. -+ ------------------------------------------------- -$ git midx ------------------------------------------------- - -* Write a MIDX file for the packfiles in your local .git folder. -+ ------------------------------------------------- -$ git midx --write ------------------------------------------------- - -* Write a MIDX file for the packfiles in your local .git folder and -* update the midx-head file. -+ ------------------------------------------------- -$ git midx --write --update-head ------------------------------------------------- - -* Write a MIDX file for the packfiles in a different folder -+ ---------------------------------------------------------- -$ git midx --write --pack-dir ../../.gitObjectCache/pack/ ---------------------------------------------------------- - -* Read a MIDX file in the local .git folder. -+ --------------------------------------------------------------------- -$ git midx --read --midx-id 3e50d982a2257168c7fd0ff12ffe5cf6af38c74e --------------------------------------------------------------------- - -CONFIGURATION -------------- - -core.midx:: - The midx command will fail if core.midx is false. - Also, the written MIDX files will be ignored by other commands - unless core.midx is true. - -GIT ---- -Part of the linkgit:git[1] suite diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt new file mode 100644 index 00000000000000..f7778a2c85c1aa --- /dev/null +++ b/Documentation/git-multi-pack-index.txt @@ -0,0 +1,66 @@ +git-multi-pack-index(1) +======================= + +NAME +---- +git-multi-pack-index - Write and verify multi-pack-indexes + + +SYNOPSIS +-------- +[verse] +'git multi-pack-index' [--object-dir=] + +DESCRIPTION +----------- +Write or verify a multi-pack-index (MIDX) file. + +OPTIONS +------- + +--object-dir=:: + Use given directory for the location of Git objects. We check + `/packs/multi-pack-index` for the current MIDX file, and + `/packs` for the pack-files to index. + +write:: + When given as the verb, write a new MIDX file to + `/packs/multi-pack-index`. + +verify:: + When given as the verb, verify the contents of the MIDX file + at `/packs/multi-pack-index`. + + +EXAMPLES +-------- + +* Write a MIDX file for the packfiles in the current .git folder. ++ +----------------------------------------------- +$ git multi-pack-index write +----------------------------------------------- + +* Write a MIDX file for the packfiles in an alternate object store. ++ +----------------------------------------------- +$ git multi-pack-index --object-dir write +----------------------------------------------- + +* Verify the MIDX file for the packfiles in the current .git folder. ++ +----------------------------------------------- +$ git multi-pack-index verify +----------------------------------------------- + + +SEE ALSO +-------- +See link:technical/multi-pack-index.html[The Multi-Pack-Index Design +Document] and link:technical/pack-format.html[The Multi-Pack-Index +Format] for more information on the multi-pack-index feature. + + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt new file mode 100644 index 00000000000000..d7e57639f70d70 --- /dev/null +++ b/Documentation/technical/multi-pack-index.txt @@ -0,0 +1,109 @@ +Multi-Pack-Index (MIDX) Design Notes +==================================== + +The Git object directory contains a 'pack' directory containing +packfiles (with suffix ".pack") and pack-indexes (with suffix +".idx"). The pack-indexes provide a way to lookup objects and +navigate to their offset within the pack, but these must come +in pairs with the packfiles. This pairing depends on the file +names, as the pack-index differs only in suffix with its pack- +file. While the pack-indexes provide fast lookup per packfile, +this performance degrades as the number of packfiles increases, +because abbreviations need to inspect every packfile and we are +more likely to have a miss on our most-recently-used packfile. +For some large repositories, repacking into a single packfile +is not feasible due to storage space or excessive repack times. + +The multi-pack-index (MIDX for short) stores a list of objects +and their offsets into multiple packfiles. It contains: + +- A list of packfile names. +- A sorted list of object IDs. +- A list of metadata for the ith object ID including: + - A value j referring to the jth packfile. + - An offset within the jth packfile for the object. +- If large offsets are required, we use another list of large + offsets similar to version 2 pack-indexes. + +Thus, we can provide O(log N) lookup time for any number +of packfiles. + +Design Details +-------------- + +- The MIDX is stored in a file named 'multi-pack-index' in the + .git/objects/pack directory. This could be stored in the pack + directory of an alternate. It refers only to packfiles in that + same directory. + +- The pack.multiIndex config setting must be on to consume MIDX files. + +- The file format includes parameters for the object ID hash + function, so a future change of hash algorithm does not require + a change in format. + +- The MIDX keeps only one record per object ID. If an object appears + in multiple packfiles, then the MIDX selects the copy in the most- + recently modified packfile. + +- If there exist packfiles in the pack directory not registered in + the MIDX, then those packfiles are loaded into the `packed_git` + list and `packed_git_mru` cache. + +- The pack-indexes (.idx files) remain in the pack directory so we + can delete the MIDX file, set core.midx to false, or downgrade + without any loss of information. + +- The MIDX file format uses a chunk-based approach (similar to the + commit-graph file) that allows optional data to be added. + +Future Work +----------- + +- Add a 'verify' subcommand to the 'git midx' builtin to verify the + contents of the multi-pack-index file match the offsets listed in + the corresponding pack-indexes. + +- The multi-pack-index allows many packfiles, especially in a context + where repacking is expensive (such as a very large repo), or + unexpected maintenance time is unacceptable (such as a high-demand + build machine). However, the multi-pack-index needs to be rewritten + in full every time. We can extend the format to be incremental, so + writes are fast. By storing a small "tip" multi-pack-index that + points to large "base" MIDX files, we can keep writes fast while + still reducing the number of binary searches required for object + lookups. + +- The reachability bitmap is currently paired directly with a single + packfile, using the pack-order as the object order to hopefully + compress the bitmaps well using run-length encoding. This could be + extended to pair a reachability bitmap with a multi-pack-index. If + the multi-pack-index is extended to store a "stable object order" + (a function Order(hash) = integer that is constant for a given hash, + even as the multi-pack-index is updated) then a reachability bitmap + could point to a multi-pack-index and be updated independently. + +- Packfiles can be marked as "special" using empty files that share + the initial name but replace ".pack" with ".keep" or ".promisor". + We can add an optional chunk of data to the multi-pack-index that + records flags of information about the packfiles. This allows new + states, such as 'repacked' or 'redeltified', that can help with + pack maintenance in a multi-pack environment. It may also be + helpful to organize packfiles by object type (commit, tree, blob, + etc.) and use this metadata to help that maintenance. + +- The partial clone feature records special "promisor" packs that + may point to objects that are not stored locally, but available + on request to a server. The multi-pack-index does not currently + track these promisor packs. + +Related Links +------------- +[0] https://bugs.chromium.org/p/git/issues/detail?id=6 + Chromium work item for: Multi-Pack Index (MIDX) + +[1] https://public-inbox.org/git/20180107181459.222909-1-dstolee@microsoft.com/ + An earlier RFC for the multi-pack-index feature + +[2] https://public-inbox.org/git/alpine.DEB.2.20.1803091557510.23109@alexmv-linux/ + Git Merge 2018 Contributor's summit notes (includes discussion of MIDX) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 1b453da41e0da8..cab5bdd2ff0f88 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -253,9 +253,9 @@ Pack file entry: <+ 20-byte SHA-1-checksum of all of the above. -== midx-*.midx files have the following format: +== multi-pack-index (MIDX) files have the following format: -The meta-index files refer to multiple pack-files and loose objects. +The multi-pack-index files refer to multiple pack-files and loose objects. In order to allow extensions that add extra data to the MIDX, we organize the body into "chunks" and provide a lookup table at the beginning of the @@ -269,20 +269,18 @@ HEADER: 4-byte signature: The signature is: {'M', 'I', 'D', 'X'} - 4-byte version number: - We reserve version number 1 for the format accepted by core git, - and instead use version 0x80000001 for gvfs/master. + 1-byte version number: + Git only writes or recognizes version 1. - 1-byte Object Id Version (1 = SHA-1) + 1-byte Object Id Version + Git only writes or recognizes version 1 (SHA1). - 1-byte Object Id Length (H) + 1-byte number of "chunks" - 1-byte number (I) of base multi-pack-index files: + 1-byte number of base multi-pack-index files: This value is currently always zero. - 1-byte number (C) of "chunks" - - 4-byte number (P) of pack files + 4-byte number of pack files CHUNK LOOKUP: @@ -298,16 +296,22 @@ CHUNK LOOKUP: CHUNK DATA: - OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) + Packfile Names (ID: {'P', 'N', 'A', 'M'}) + Stores the packfile names as concatenated, null-terminated strings. + Packfiles must be listed in lexicographic order for fast lookups by + name. This is the only chunk not guaranteed to be a multiple of four + bytes in length, so should be the last chunk for alignment reasons. + + OID Fanout (ID: {'O', 'I', 'D', 'F'}) The ith entry, F[i], stores the number of OIDs with first byte at most i. Thus F[255] stores the total - number of objects (N). + number of objects. - OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) + OID Lookup (ID: {'O', 'I', 'D', 'L'}) The OIDs for all objects in the MIDX are stored in lexicographic order in this chunk. - Object Offsets (ID: {'O', 'O', 'F', 'F'}) (N * 8 bytes) + Object Offsets (ID: {'O', 'O', 'F', 'F'}) Stores two 4-byte values for every object. 1: The pack-int-id for the pack storing this object. 2: The offset within the pack. @@ -322,17 +326,6 @@ CHUNK DATA: [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'}) 8-byte offsets into large packfiles. - Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes) - P * 4 bytes storing the offset in the packfile name chunk for - the null-terminated string containing the filename for the - ith packfile. - - Packfile Names (ID: {'P', 'N', 'A', 'M'}) - Stores the packfile names as concatenated, null-terminated strings. - Packfiles must be listed in lexicographic order for fast lookups by - name. This is the only chunk not guaranteed to be a multiple of four - bytes in length, so should be the last chunk for alignment reasons. - TRAILER: - H-byte HASH-checksum of all of the above. + 20-byte SHA1-checksum of the above contents. diff --git a/Makefile b/Makefile index 74319a98b88c05..b8b0942d96ab8c 100644 --- a/Makefile +++ b/Makefile @@ -726,6 +726,7 @@ TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-path-utils.o TEST_BUILTINS_OBJS += test-prio-queue.o TEST_BUILTINS_OBJS += test-read-cache.o +TEST_BUILTINS_OBJS += test-read-midx.o TEST_BUILTINS_OBJS += test-ref-store.o TEST_BUILTINS_OBJS += test-regex.o TEST_BUILTINS_OBJS += test-repository.o @@ -1077,9 +1078,9 @@ BUILTIN_OBJS += builtin/merge-index.o BUILTIN_OBJS += builtin/merge-ours.o BUILTIN_OBJS += builtin/merge-recursive.o BUILTIN_OBJS += builtin/merge-tree.o -BUILTIN_OBJS += builtin/midx.o BUILTIN_OBJS += builtin/mktag.o BUILTIN_OBJS += builtin/mktree.o +BUILTIN_OBJS += builtin/multi-pack-index.o BUILTIN_OBJS += builtin/mv.o BUILTIN_OBJS += builtin/name-rev.o BUILTIN_OBJS += builtin/notes.o diff --git a/builtin.h b/builtin.h index 1eb16d1ebbf558..b78ab6e30b26f6 100644 --- a/builtin.h +++ b/builtin.h @@ -189,9 +189,9 @@ extern int cmd_merge_ours(int argc, const char **argv, const char *prefix); extern int cmd_merge_file(int argc, const char **argv, const char *prefix); extern int cmd_merge_recursive(int argc, const char **argv, const char *prefix); extern int cmd_merge_tree(int argc, const char **argv, const char *prefix); -extern int cmd_midx(int argc, const char **argv, const char *prefix); extern int cmd_mktag(int argc, const char **argv, const char *prefix); extern int cmd_mktree(int argc, const char **argv, const char *prefix); +extern int cmd_multi_pack_index(int argc, const char **argv, const char *prefix); extern int cmd_mv(int argc, const char **argv, const char *prefix); extern int cmd_name_rev(int argc, const char **argv, const char *prefix); extern int cmd_notes(int argc, const char **argv, const char *prefix); diff --git a/builtin/count-objects.c b/builtin/count-objects.c index d51e2ce1ec016a..a7cad052c61580 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -123,7 +123,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fsck.c b/builtin/fsck.c index 250f5af1182ddc..f9a90593d11f4a 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -49,6 +49,7 @@ static int name_objects; #define ERROR_PACK 04 #define ERROR_REFS 010 #define ERROR_COMMIT_GRAPH 020 +#define ERROR_MULTI_PACK_INDEX 040 static const char *describe_object(struct object *obj) { @@ -740,7 +741,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct progress *progress = NULL; if (show_progress) { - for (p = get_packed_git(the_repository); p; + for (p = get_all_packs(the_repository); p; p = p->next) { if (open_pack_index(p)) continue; @@ -749,7 +750,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) progress = start_progress(_("Checking objects"), total); } - for (p = get_packed_git(the_repository); p; + for (p = get_all_packs(the_repository); p; p = p->next) { /* verify gives error messages itself */ if (verify_pack(p, fsck_obj_buffer, @@ -848,5 +849,23 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } } + if (!git_config_get_bool("core.multipackindex", &i) && i) { + struct child_process midx_verify = CHILD_PROCESS_INIT; + const char *midx_argv[] = { "multi-pack-index", "verify", NULL, NULL, NULL }; + + midx_verify.argv = midx_argv; + midx_verify.git_cmd = 1; + if (run_command(&midx_verify)) + errors_found |= ERROR_MULTI_PACK_INDEX; + + prepare_alt_odb(the_repository); + for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) { + midx_argv[2] = "--object-dir"; + midx_argv[3] = alt->path; + if (run_command(&midx_verify)) + errors_found |= ERROR_MULTI_PACK_INDEX; + } + } + return errors_found; } diff --git a/builtin/gc.c b/builtin/gc.c index 8a99ed2af2ba34..bc4e0397462ebd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -183,7 +183,7 @@ static struct packed_git *find_base_packs(struct string_list *packs, { struct packed_git *p, *base = NULL; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (limit) { @@ -208,7 +208,7 @@ static int too_many_packs(void) if (gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_packed_git(the_repository); p; p = p->next) { + for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) diff --git a/builtin/midx.c b/builtin/midx.c deleted file mode 100644 index 7ce6b56a3e456d..00000000000000 --- a/builtin/midx.c +++ /dev/null @@ -1,461 +0,0 @@ -#include "builtin.h" -#include "cache.h" -#include "config.h" -#include "dir.h" -#include "git-compat-util.h" -#include "lockfile.h" -#include "packfile.h" -#include "parse-options.h" -#include "midx.h" -#include "object-store.h" - -static char const * const builtin_midx_usage[] ={ - N_("git midx [--pack-dir ]"), - N_("git midx --write [--pack-dir ] [--update-head] [--delete-expired]"), - N_("git midx --read [--midx-id=]"), - N_("git midx --clear [--pack-dir ]"), - N_("git midx --verify [--pack-dir ]"), - NULL -}; - -static struct opts_midx { - const char *pack_dir; - int write; - int update_head; - int delete_expired; - int read; - const char *midx_id; - int clear; - int verify; - int has_existing; - struct object_id old_midx_oid; -} opts; - -static int midx_oid_compare(const void *_a, const void *_b) -{ - struct pack_midx_entry *a = (struct pack_midx_entry *)_a; - struct pack_midx_entry *b = (struct pack_midx_entry *)_b; - int cmp = oidcmp(&a->oid, &b->oid); - - if (cmp) - return cmp; - - if (a->pack_mtime > b->pack_mtime) - return -1; - else if (a->pack_mtime < b->pack_mtime) - return 1; - - return a->pack_int_id - b->pack_int_id; -} - -static uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) -{ - const uint32_t *level1_ofs = p->index_data; - - if (!level1_ofs) { - if (open_pack_index(p)) - return 0; - level1_ofs = p->index_data; - } - - if (p->index_version > 1) { - level1_ofs += 2; - } - - return ntohl(level1_ofs[value]); -} - -/* - * It is possible to artificially get into a state where there are many - * duplicate copies of objects. That can create high memory pressure if - * we are to create a list of all objects before de-duplication. To reduce - * this memory pressure without a significant performance drop, automatically - * group objects by the first byte of their object id. Use the IDX fanout - * tables to group the data, copy to a local array, then sort. - * - * Copy only the de-duplicated entries (selected by most-recent modified time - * of a packfile containing the object). - */ -static void dedupe_and_sort_entries( - struct packed_git **packs, uint32_t nr_packs, - struct midxed_git *midx, - struct pack_midx_entry **objects, uint32_t *nr_objects) -{ - uint32_t first_byte, i; - struct pack_midx_entry *objects_batch = NULL; - uint32_t nr_objects_batch = 0; - uint32_t alloc_objects_batch = 0; - uint32_t alloc_objects; - uint32_t pack_offset = 0; - struct pack_midx_entry *local_objects = NULL; - int nr_local_objects = 0; - - if (midx) { - nr_objects_batch = midx->num_objects; - pack_offset = midx->num_packs; - } - - for (i = pack_offset; i < nr_packs; i++) - nr_objects_batch += packs[i]->num_objects; - - /* - * Predict the size of the batches to be roughly ~1/256 the total - * count, but give some slack as they will not be equally sized. - */ - alloc_objects_batch = nr_objects_batch / 200; - ALLOC_ARRAY(objects_batch, alloc_objects_batch); - - *nr_objects = 0; - alloc_objects = alloc_objects_batch; - ALLOC_ARRAY(local_objects, alloc_objects); - - for (first_byte = 0; first_byte < 256; first_byte++) { - nr_objects_batch = 0; - - if (midx) { - uint32_t start, end; - if (first_byte) - start = get_be32(midx->chunk_oid_fanout + 4 * (first_byte - 1)); - else - start = 0; - - end = get_be32(midx->chunk_oid_fanout + 4 * first_byte); - - while (start < end) { - ALLOC_GROW(objects_batch, nr_objects_batch + 1, alloc_objects_batch); - nth_midxed_object_entry(midx, start, &objects_batch[nr_objects_batch]); - nr_objects_batch++; - start++; - } - } - - for (i = pack_offset; i < nr_packs; i++) { - uint32_t start, end; - - if (first_byte) - start = get_pack_fanout(packs[i], first_byte - 1); - else - start = 0; - end = get_pack_fanout(packs[i], first_byte); - - while (start < end) { - struct pack_midx_entry *entry; - ALLOC_GROW(objects_batch, nr_objects_batch + 1, alloc_objects_batch); - entry = &objects_batch[nr_objects_batch++]; - - if (!nth_packed_object_oid(&entry->oid, packs[i], start)) - die("unable to get sha1 of object %u in %s", - start, packs[i]->pack_name); - - entry->pack_int_id = i; - entry->offset = nth_packed_object_offset(packs[i], start); - entry->pack_mtime = packs[i]->mtime; - start++; - } - } - - QSORT(objects_batch, nr_objects_batch, midx_oid_compare); - - /* de-dupe as we copy from the batch in-order */ - for (i = 0; i < nr_objects_batch; i++) { - if (i > 0 && !oidcmp(&objects_batch[i - 1].oid, &objects_batch[i].oid)) - continue; - - ALLOC_GROW(local_objects, nr_local_objects + 1, alloc_objects); - memcpy(&local_objects[nr_local_objects], &objects_batch[i], sizeof(struct pack_midx_entry)); - nr_local_objects++; - } - } - - *nr_objects = nr_local_objects; - *objects = local_objects; -} - -static int build_midx_from_packs( - const char *pack_dir, - const char **pack_names, uint32_t nr_packs, - const char **midx_id, struct midxed_git *midx) -{ - struct packed_git **packs; - const char **installed_pack_names; - uint32_t i, nr_installed_packs = 0; - uint32_t nr_objects = 0; - struct pack_midx_entry *objects = NULL; - uint32_t nr_total_packs = nr_packs; - struct strbuf pack_path = STRBUF_INIT; - int baselen; - - if (midx) - nr_total_packs += midx->num_packs; - - ALLOC_ARRAY(packs, nr_total_packs); - ALLOC_ARRAY(installed_pack_names, nr_total_packs); - - if (midx) { - for (i = 0; i < midx->num_packs; i++) - installed_pack_names[nr_installed_packs++] = midx->pack_names[i]; - } - - strbuf_addstr(&pack_path, pack_dir); - strbuf_addch(&pack_path, '/'); - baselen = pack_path.len; - for (i = 0; i < nr_packs; i++) { - strbuf_setlen(&pack_path, baselen); - strbuf_addstr(&pack_path, pack_names[i]); - - if (midx && contains_pack(midx, pack_names[i])) - continue; - - strbuf_strip_suffix(&pack_path, ".pack"); - strbuf_addstr(&pack_path, ".idx"); - - packs[nr_installed_packs] = add_packed_git(pack_path.buf, pack_path.len, 0); - - if (packs[nr_installed_packs] != NULL) { - if (open_pack_index(packs[nr_installed_packs])) - continue; - - nr_objects += packs[nr_installed_packs]->num_objects; - installed_pack_names[nr_installed_packs] = pack_names[i]; - nr_installed_packs++; - } - } - strbuf_release(&pack_path); - - if (!nr_objects || !nr_installed_packs) { - if (opts.has_existing) - *midx_id = oid_to_hex(&opts.old_midx_oid); - else - *midx_id = 0; - free(packs); - free(installed_pack_names); - return 0; - } - - dedupe_and_sort_entries(packs, nr_installed_packs, - midx, &objects, &nr_objects); - - *midx_id = write_midx_file(pack_dir, NULL, - installed_pack_names, nr_installed_packs, - objects, nr_objects); - - FREE_AND_NULL(installed_pack_names); - FREE_AND_NULL(objects); - - return 0; -} - -static void update_head_file(const char *pack_dir, const char *midx_id) -{ - struct strbuf head_path = STRBUF_INIT; - int fd; - struct lock_file lk = LOCK_INIT; - - strbuf_addstr(&head_path, pack_dir); - strbuf_addstr(&head_path, "/"); - strbuf_addstr(&head_path, "midx-head"); - - fd = hold_lock_file_for_update(&lk, head_path.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&head_path); - - if (fd < 0) - die_errno("unable to open midx-head"); - - write_in_full(fd, midx_id, GIT_MAX_HEXSZ); - commit_lock_file(&lk); -} - -static int midx_write(void) -{ - const char **pack_names = NULL; - uint32_t i, nr_packs = 0; - const char *midx_id = 0; - DIR *dir; - struct dirent *de; - struct midxed_git *midx = NULL; - - if (opts.has_existing) - midx = get_midxed_git(opts.pack_dir, &opts.old_midx_oid); - - dir = opendir(opts.pack_dir); - if (!dir) { - error_errno("unable to open object pack directory: %s", - opts.pack_dir); - return 1; - } - - nr_packs = 256; - ALLOC_ARRAY(pack_names, nr_packs); - - i = 0; - while ((de = readdir(dir)) != NULL) { - if (is_dot_or_dotdot(de->d_name)) - continue; - - if (ends_with(de->d_name, ".pack")) { - ALLOC_GROW(pack_names, i + 1, nr_packs); - pack_names[i++] = xstrdup(de->d_name); - } - } - - nr_packs = i; - closedir(dir); - - if (!nr_packs) - goto cleanup; - - if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id, midx)) - die("failed to build MIDX"); - - if (!midx_id) - goto cleanup; - - printf("%s\n", midx_id); - - if (opts.update_head) - update_head_file(opts.pack_dir, midx_id); - - if (opts.delete_expired && opts.update_head && opts.has_existing && - strcmp(midx_id, oid_to_hex(&opts.old_midx_oid))) { - char *old_path = get_midx_head_filename_oid(opts.pack_dir, &opts.old_midx_oid); - close_midx(midx); - if (remove_path(old_path)) - die("failed to remove path %s", old_path); - - free(old_path); - } - -cleanup: - if (pack_names) - FREE_AND_NULL(pack_names); - return 0; -} - -static int midx_read(void) -{ - struct object_id midx_oid; - struct midxed_git *midx; - uint32_t i; - - if (opts.midx_id && strlen(opts.midx_id) == GIT_MAX_HEXSZ) - get_oid_hex(opts.midx_id, &midx_oid); - else if (!get_midx_head_oid(opts.pack_dir, &midx_oid)) - die("No midx-head exists."); - - midx = get_midxed_git(opts.pack_dir, &midx_oid); - - printf("header: %08x %08x %02x %02x %02x %02x %08x\n", - ntohl(midx->hdr->midx_signature), - ntohl(midx->hdr->midx_version), - midx->hdr->hash_version, - midx->hdr->hash_len, - midx->hdr->num_base_midx, - midx->hdr->num_chunks, - ntohl(midx->hdr->num_packs)); - printf("num_objects: %d\n", midx->num_objects); - printf("chunks:"); - - if (midx->chunk_pack_lookup) - printf(" pack_lookup"); - if (midx->chunk_pack_names) - printf(" pack_names"); - if (midx->chunk_oid_fanout) - printf(" oid_fanout"); - if (midx->chunk_oid_lookup) - printf(" oid_lookup"); - if (midx->chunk_object_offsets) - printf(" object_offsets"); - if (midx->chunk_large_offsets) - printf(" large_offsets"); - printf("\n"); - - printf("pack_names:\n"); - for (i = 0; i < midx->num_packs; i++) - printf("%s\n", midx->pack_names[i]); - - printf("pack_dir: %s\n", midx->pack_dir); - return 0; -} - -static int midx_clear(void) -{ - struct strbuf head_path = STRBUF_INIT; - char *old_path; - - if (!opts.has_existing) - return 0; - - strbuf_addstr(&head_path, opts.pack_dir); - strbuf_addstr(&head_path, "/"); - strbuf_addstr(&head_path, "midx-head"); - if (remove_path(head_path.buf)) - die("failed to remove path %s", head_path.buf); - strbuf_release(&head_path); - - old_path = get_midx_head_filename_oid(opts.pack_dir, &opts.old_midx_oid); - if (remove_path(old_path)) - die("failed to remove path %s", old_path); - free(old_path); - - return 0; -} - -int cmd_midx(int argc, const char **argv, const char *prefix) -{ - static struct option builtin_midx_options[] = { - { OPTION_STRING, 'p', "pack-dir", &opts.pack_dir, - N_("dir"), - N_("The pack directory containing set of packfile and pack-index pairs.") }, - OPT_BOOL('w', "write", &opts.write, - N_("write midx file")), - OPT_BOOL('u', "update-head", &opts.update_head, - N_("update midx-head to written midx file")), - OPT_BOOL('d', "delete-expired", &opts.delete_expired, - N_("delete expired head midx file")), - OPT_BOOL('r', "read", &opts.read, - N_("read midx file")), - OPT_BOOL('c', "clear", &opts.clear, - N_("clear midx file and midx-head")), - OPT_BOOL(0, "verify", &opts.verify, - N_("verify the contents of a midx file")), - { OPTION_STRING, 'M', "midx-id", &opts.midx_id, - N_("oid"), - N_("An OID for a specific midx file in the pack-dir."), - PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, - OPT_END(), - }; - - if (argc == 2 && !strcmp(argv[1], "-h")) - usage_with_options(builtin_midx_usage, builtin_midx_options); - - git_config(git_default_config, NULL); - if (!core_midx) - die("git-midx requires core.midx=true."); - - argc = parse_options(argc, argv, prefix, - builtin_midx_options, - builtin_midx_usage, 0); - - if (opts.write + opts.read + opts.clear + opts.verify > 1) - usage_with_options(builtin_midx_usage, builtin_midx_options); - - if (!opts.pack_dir) { - struct strbuf path = STRBUF_INIT; - strbuf_addstr(&path, get_object_directory()); - strbuf_addstr(&path, "/pack"); - opts.pack_dir = strbuf_detach(&path, NULL); - } - - opts.has_existing = !!get_midx_head_oid(opts.pack_dir, &opts.old_midx_oid); - - if (opts.write) - return midx_write(); - if (opts.read) - return midx_read(); - if (opts.clear) - return midx_clear(); - if (opts.verify) - return midx_verify(opts.pack_dir, opts.midx_id); - - return 0; -} diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c new file mode 100644 index 00000000000000..fca70f8e4fcca8 --- /dev/null +++ b/builtin/multi-pack-index.c @@ -0,0 +1,49 @@ +#include "builtin.h" +#include "cache.h" +#include "config.h" +#include "parse-options.h" +#include "midx.h" + +static char const * const builtin_multi_pack_index_usage[] = { + N_("git multi-pack-index [--object-dir=] (write|verify)"), + NULL +}; + +static struct opts_multi_pack_index { + const char *object_dir; +} opts; + +int cmd_multi_pack_index(int argc, const char **argv, + const char *prefix) +{ + static struct option builtin_multi_pack_index_options[] = { + OPT_FILENAME(0, "object-dir", &opts.object_dir, + N_("object directory containing set of packfile and pack-index pairs")), + OPT_END(), + }; + + git_config(git_default_config, NULL); + + argc = parse_options(argc, argv, prefix, + builtin_multi_pack_index_options, + builtin_multi_pack_index_usage, 0); + + if (!opts.object_dir) + opts.object_dir = get_object_directory(); + + if (argc == 0) + usage_with_options(builtin_multi_pack_index_usage, + builtin_multi_pack_index_options); + + if (argc > 1) { + die(_("too many arguments")); + return 1; + } + + if (!strcmp(argv[0], "write")) + return write_midx_file(opts.object_dir); + if (!strcmp(argv[0], "verify")) + return verify_midx_file(opts.object_dir); + + die(_("unrecognized verb: %s"), argv[0]); +} diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 108bfc0e67ad7f..2a385b1767d4a9 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -31,6 +31,7 @@ #include "packfile.h" #include "object-store.h" #include "dir.h" +#include "midx.h" #include "trace2.h" #define IN_PACK(obj) oe_in_pack(&to_pack, obj) @@ -1041,6 +1042,7 @@ static int want_object_in_pack(const struct object_id *oid, { int want; struct list_head *pos; + struct multi_pack_index *m; if (!exclude && local && has_loose_object_nonlocal(oid)) return 0; @@ -1055,6 +1057,32 @@ static int want_object_in_pack(const struct object_id *oid, if (want != -1) return want; } + + for (m = get_multi_pack_index(the_repository); m; m = m->next) { + struct pack_entry e; + if (fill_midx_entry(oid, &e, m)) { + struct packed_git *p = e.p; + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(oid->hash, p); + + if (offset) { + if (!*found_pack) { + if (!is_pack_valid(p)) + continue; + *found_offset = offset; + *found_pack = p; + } + want = want_found_object(exclude, p); + if (want != -1) + return want; + } + } + } + list_for_each(pos, get_packed_git_mru(the_repository)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); off_t offset; @@ -2807,7 +2835,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs) memset(&in_pack, 0, sizeof(in_pack)); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { struct object_id oid; struct object *o; @@ -2871,7 +2899,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_packed_git(the_repository); + get_all_packs(the_repository); while (p) { if ((!p->pack_local || p->pack_keep || @@ -2881,7 +2909,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_packed_git(the_repository); + p = get_all_packs(the_repository); else p = p->next; if (p == last_found) @@ -2917,7 +2945,7 @@ static void loosen_unused_packed_objects(struct rev_info *revs) uint32_t i; struct object_id oid; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -3064,7 +3092,7 @@ static void add_extra_kept_packs(const struct string_list *names) if (!names->nr) return; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { const char *name = basename(p->pack_name); int i; @@ -3337,7 +3365,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { struct packed_git *p; - for (p = get_packed_git(the_repository); p; p = p->next) + for (p = get_all_packs(the_repository); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -3350,7 +3378,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) * it also covers non-local objects */ struct packed_git *p; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 0494dceff73d67..cf9a9aabd4eb2e 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -577,7 +577,7 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_packed_git(the_repository); + struct packed_git *p = get_all_packs(the_repository); if (strlen(filename) < 40) die("Bad pack filename: %s", filename); @@ -592,7 +592,7 @@ static struct pack_list * add_pack_file(const char *filename) static void load_all(void) { - struct packed_git *p = get_packed_git(the_repository); + struct packed_git *p = get_all_packs(the_repository); while (p) { add_pack(p); diff --git a/builtin/repack.c b/builtin/repack.c index 5fdd19c9c4f95a..ea2e6f43935580 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -8,6 +8,7 @@ #include "strbuf.h" #include "string-list.h" #include "argv-array.h" +#include "midx.h" #include "packfile.h" #include "object-store.h" @@ -280,6 +281,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) int keep_unreachable = 0; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; int no_update_server_info = 0; + int midx_cleared = 0; struct pack_objects_args po_args = {NULL}; struct option builtin_repack_options[] = { @@ -420,6 +422,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix) for_each_string_list_item(item, &names) { for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname, *fname_old; + + if (!midx_cleared) { + /* if we move a packfile, it will invalidated the midx */ + if (the_repository->objects) { + close_midx(the_repository->objects->multi_pack_index); + the_repository->objects->multi_pack_index = NULL; + } + clear_midx_file(get_object_directory()); + midx_cleared = 1; + } + fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); if (!file_exists(fname)) { @@ -538,6 +551,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!no_update_server_info) update_server_info(0); remove_temporary_files(); + + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) + write_midx_file(get_object_directory()); + string_list_clear(&names, 0); string_list_clear(&rollback, 0); string_list_clear(&existing_packs, 0); diff --git a/cache.h b/cache.h index 1912a8ea8d1c5f..dc9c155b0d2671 100644 --- a/cache.h +++ b/cache.h @@ -880,7 +880,6 @@ extern int core_preload_index; extern int core_apply_sparse_checkout; extern const char *core_virtualfilesystem; extern int core_gvfs; -extern int core_midx; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/command-list.txt b/command-list.txt index 4625a8c4f8d531..c36ea3c18226cb 100644 --- a/command-list.txt +++ b/command-list.txt @@ -123,7 +123,7 @@ git-merge-index plumbingmanipulators git-merge-one-file purehelpers git-mergetool ancillarymanipulators complete git-merge-tree ancillaryinterrogators -git-midx plumbingmanipulators +git-multi-pack-index plumbingmanipulators git-mktag plumbingmanipulators git-mktree plumbingmanipulators git-mv mainporcelain worktree diff --git a/config.c b/config.c index 4c0b1e2c5c4ee4..8385d51910dde2 100644 --- a/config.c +++ b/config.c @@ -1330,11 +1330,6 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } - if (!strcmp(var, "core.midx")) { - core_midx = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "core.sparsecheckout")) { /* virtual file system relies on the sparse checkout logic so force it on */ if (core_virtualfilesystem) diff --git a/environment.c b/environment.c index 2181e89f867ac1..4bdc3c31aa2e64 100644 --- a/environment.c +++ b/environment.c @@ -69,7 +69,6 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_gvfs; -int core_midx; const char *core_virtualfilesystem; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ diff --git a/fast-import.c b/fast-import.c index 89bb0c9db3de9b..f8c3acd3b5577c 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1068,7 +1068,7 @@ static int store_object( duplicate_count_by_type[type]++; return 1; } else if (find_sha1_pack(oid.hash, - get_packed_git(the_repository))) { + get_all_packs(the_repository))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1266,7 +1266,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) truncate_pack(&checkpoint); } else if (find_sha1_pack(oid.hash, - get_packed_git(the_repository))) { + get_all_packs(the_repository))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/git.c b/git.c index 2924710db52e79..a85c48508e6c48 100644 --- a/git.c +++ b/git.c @@ -585,9 +585,9 @@ static struct cmd_struct commands[] = { { "merge-recursive-theirs", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "merge-subtree", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT }, - { "midx", cmd_midx, RUN_SETUP | NO_PARSEOPT }, { "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT }, { "mktree", cmd_mktree, RUN_SETUP }, + { "multi-pack-index", cmd_multi_pack_index, RUN_SETUP_GENTLY }, { "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE }, { "name-rev", cmd_name_rev, RUN_SETUP }, { "notes", cmd_notes, RUN_SETUP }, diff --git a/http-backend.c b/http-backend.c index be87e52435535a..2a97b843de6e7a 100644 --- a/http-backend.c +++ b/http-backend.c @@ -595,13 +595,13 @@ static void get_info_packs(struct strbuf *hdr, char *arg) size_t cnt = 0; select_getanyfile(hdr); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/midx.c b/midx.c index 038a60a48f5b01..3f0b4ca11ff779 100644 --- a/midx.c +++ b/midx.c @@ -1,362 +1,325 @@ #include "cache.h" -#include "git-compat-util.h" -#include "pack.h" +#include "config.h" +#include "csum-file.h" +#include "dir.h" +#include "lockfile.h" #include "packfile.h" -#include "midx.h" #include "object-store.h" +#include "sha1-lookup.h" +#include "midx.h" +#include "progress.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ -#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ +#define MIDX_VERSION 1 +#define MIDX_BYTE_FILE_VERSION 4 +#define MIDX_BYTE_HASH_VERSION 5 +#define MIDX_BYTE_NUM_CHUNKS 6 +#define MIDX_BYTE_NUM_PACKS 8 +#define MIDX_HASH_VERSION 1 +#define MIDX_HEADER_SIZE 12 +#define MIDX_HASH_LEN 20 +#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) + +#define MIDX_MAX_CHUNKS 5 +#define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ #define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ - -#define MIDX_VERSION_GVFS 0x80000001 -#define MIDX_VERSION MIDX_VERSION_GVFS - -#define MIDX_OID_VERSION_SHA1 1 -#define MIDX_OID_LEN_SHA1 20 -#define MIDX_OID_VERSION MIDX_OID_VERSION_SHA1 -#define MIDX_OID_LEN MIDX_OID_LEN_SHA1 - -#define MIDX_LARGE_OFFSET_NEEDED 0x80000000 #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define MIDX_LARGE_OFFSET_NEEDED 0x80000000 -/* MIDX-git global storage */ -struct midxed_git *midxed_git = 0; - -struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid) +static char *get_midx_filename(const char *object_dir) { - struct strbuf head_filename = STRBUF_INIT; - char oid_hex[GIT_MAX_HEXSZ + 1]; - FILE *f; - - strbuf_addstr(&head_filename, pack_dir); - strbuf_addstr(&head_filename, "/midx-head"); - - f = fopen(head_filename.buf, "r"); - strbuf_release(&head_filename); - - if (!f) - return 0; - - if (!fgets(oid_hex, sizeof(oid_hex), f)) - die("Failed to read midx-head"); - - fclose(f); - - if (get_oid_hex(oid_hex, oid)) - return 0; - return oid; + return xstrfmt("%s/pack/multi-pack-index", object_dir); } -char* get_midx_head_filename_oid(const char *pack_dir, - struct object_id *oid) +struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local) { - struct strbuf head_path = STRBUF_INIT; - strbuf_addstr(&head_path, pack_dir); - strbuf_addstr(&head_path, "/midx-"); - strbuf_addstr(&head_path, oid_to_hex(oid)); - strbuf_addstr(&head_path, ".midx"); + struct multi_pack_index *m = NULL; + int fd; + struct stat st; + size_t midx_size; + void *midx_map = NULL; + uint32_t hash_version; + char *midx_name = get_midx_filename(object_dir); + uint32_t i; + const char *cur_pack_name; - return strbuf_detach(&head_path, NULL); -} + fd = git_open(midx_name); -static char* get_midx_head_filename_dir(const char *pack_dir) -{ - struct object_id oid; - if (!get_midx_head_oid(pack_dir, &oid)) - return 0; + if (fd < 0) + goto cleanup_fail; + if (fstat(fd, &st)) { + error_errno(_("failed to read %s"), midx_name); + goto cleanup_fail; + } - return get_midx_head_filename_oid(pack_dir, &oid); -} + midx_size = xsize_t(st.st_size); -struct pack_midx_details_internal { - uint32_t pack_int_id; - uint32_t internal_offset; -}; + if (midx_size < MIDX_MIN_SIZE) { + error(_("multi-pack-index file %s is too small"), midx_name); + goto cleanup_fail; + } -static struct midxed_git *alloc_midxed_git(const char *pack_dir) -{ - struct midxed_git *m = NULL; + FREE_AND_NULL(midx_name); - FLEX_ALLOC_MEM(m, pack_dir, pack_dir, strlen(pack_dir)); + midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); - return m; -} + FLEX_ALLOC_MEM(m, object_dir, object_dir, strlen(object_dir)); + m->fd = fd; + m->data = midx_map; + m->data_len = midx_size; + m->local = local; -static struct midxed_git *load_empty_midxed_git(void) -{ - struct midxed_git *midx = alloc_midxed_git(""); + m->signature = get_be32(m->data); + if (m->signature != MIDX_SIGNATURE) + die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), + m->signature, MIDX_SIGNATURE); - midx->midx_fd = -1; - midx->data = NULL; - midx->num_objects = 0; - midx->packs = NULL; + m->version = m->data[MIDX_BYTE_FILE_VERSION]; + if (m->version != MIDX_VERSION) + die(_("multi-pack-index version %d not recognized"), + m->version); - midx->hdr = (void *)midx; - midx->hdr->num_base_midx = 0; - midx->hdr->num_packs = 0; - midx->hdr->num_chunks = 0; + hash_version = m->data[MIDX_BYTE_HASH_VERSION]; + if (hash_version != MIDX_HASH_VERSION) + die(_("hash version %u does not match"), hash_version); + m->hash_len = MIDX_HASH_LEN; - return 0; -} + m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; -static struct midxed_git *load_midxed_git_one(const char *midx_file, const char *pack_dir) -{ - void *midx_map; - const unsigned char *data; - struct pack_midx_header *hdr; - size_t midx_size, packs_len; - struct stat st; - uint32_t i; - struct midxed_git *midx; - int fd = git_open(midx_file); + m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); - if (fd < 0) - return 0; - if (fstat(fd, &st)) { - close(fd); - return 0; - } - midx_size = xsize_t(st.st_size); - - if (midx_size < 16 + 8 * 5 + 4 * 256 + GIT_MAX_RAWSZ) { - close(fd); - die("midx file %s is too small", midx_file); - } - midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); - data = (const unsigned char *)midx_map; + for (i = 0; i < m->num_chunks; i++) { + uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE + + MIDX_CHUNKLOOKUP_WIDTH * i); + uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 + + MIDX_CHUNKLOOKUP_WIDTH * i); - hdr = midx_map; - if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { - uint32_t signature = ntohl(hdr->midx_signature); - munmap(midx_map, midx_size); - close(fd); - die("midx signature %X does not match signature %X", - signature, MIDX_SIGNATURE); - } - - if (ntohl(hdr->midx_version) != MIDX_VERSION) { - uint32_t version = ntohl(hdr->midx_version); - munmap(midx_map, midx_size); - close(fd); - die("midx version %X does not match version %X", - version, MIDX_VERSION); - } - - /* Time to fill a midx struct */ - midx = alloc_midxed_git(pack_dir); - - midx->hdr = hdr; - midx->midx_fd = fd; - midx->data = midx_map; - midx->data_len = midx_size; - - /* read chunk ids to find pointers */ - for (i = 0; i <= hdr->num_chunks; i++) { - uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i)); - uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4)); - uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8)); - uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2; - - if (sizeof(data) == 4 && chunk_offset >> 32) { - munmap(midx_map, midx_size); - close(fd); - die(_("unable to memory-map in 32-bit address space")); - } + if (chunk_offset >= m->data_len) + die(_("invalid chunk offset (too large)")); switch (chunk_id) { - case MIDX_CHUNKID_PACKLOOKUP: - midx->chunk_pack_lookup = data + chunk_offset; - break; - case MIDX_CHUNKID_PACKNAMES: - midx->chunk_pack_names = data + chunk_offset; + m->chunk_pack_names = m->data + chunk_offset; break; case MIDX_CHUNKID_OIDFANOUT: - midx->chunk_oid_fanout = data + chunk_offset; + m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset); break; case MIDX_CHUNKID_OIDLOOKUP: - midx->chunk_oid_lookup = data + chunk_offset; + m->chunk_oid_lookup = m->data + chunk_offset; break; case MIDX_CHUNKID_OBJECTOFFSETS: - midx->chunk_object_offsets = data + chunk_offset; + m->chunk_object_offsets = m->data + chunk_offset; break; case MIDX_CHUNKID_LARGEOFFSETS: - midx->chunk_large_offsets = data + chunk_offset; + m->chunk_large_offsets = m->data + chunk_offset; + break; + + case 0: + die(_("terminating multi-pack-index chunk id appears earlier than expected")); break; default: - /* We allow optional MIDX chunks, so ignore unrecognized chunk ids */ + /* + * Do nothing on unrecognized chunks, allowing future + * extensions to add optional chunks. + */ break; } } - if (!midx->chunk_oid_fanout) - die("midx missing OID Fanout chunk"); - if (!midx->chunk_pack_lookup) - die("midx missing Packfile Name Lookup chunk"); - if (!midx->chunk_pack_names) - die("midx missing Packfile Name chunk"); - - midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4))); - midx->num_packs = ntohl(midx->hdr->num_packs); + if (!m->chunk_pack_names) + die(_("multi-pack-index missing required pack-name chunk")); + if (!m->chunk_oid_fanout) + die(_("multi-pack-index missing required OID fanout chunk")); + if (!m->chunk_oid_lookup) + die(_("multi-pack-index missing required OID lookup chunk")); + if (!m->chunk_object_offsets) + die(_("multi-pack-index missing required object offsets chunk")); - packs_len = st_mult(sizeof(struct packed_git*), midx->num_packs); + m->num_objects = ntohl(m->chunk_oid_fanout[255]); - if (packs_len) { - ALLOC_ARRAY(midx->packs, midx->num_packs); - ALLOC_ARRAY(midx->pack_names, midx->num_packs); - memset(midx->packs, 0, packs_len); + m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); + m->packs = xcalloc(m->num_packs, sizeof(*m->packs)); - for (i = 0; i < midx->num_packs; i++) { - uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i)); + cur_pack_name = (const char *)m->chunk_pack_names; + for (i = 0; i < m->num_packs; i++) { + m->pack_names[i] = cur_pack_name; - if (midx->chunk_pack_names + name_offset >= midx->data + midx->data_len) - die("invalid packfile name lookup"); + cur_pack_name += strlen(cur_pack_name) + 1; - midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset); - } + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) + die(_("multi-pack-index pack names out of order: '%s' before '%s'"), + m->pack_names[i - 1], + m->pack_names[i]); } - return midx; + return m; + +cleanup_fail: + free(m); + free(midx_name); + if (midx_map) + munmap(midx_map, midx_size); + if (0 <= fd) + close(fd); + return NULL; } -struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid) +void close_midx(struct multi_pack_index *m) { - struct midxed_git *m; - char *fname = get_midx_head_filename_oid(pack_dir, oid); - m = load_midxed_git_one(fname, pack_dir); - free(fname); - return m; + uint32_t i; + + if (!m) + return; + + munmap((unsigned char *)m->data, m->data_len); + close(m->fd); + m->fd = -1; + + for (i = 0; i < m->num_packs; i++) { + if (m->packs[i]) { + close_pack(m->packs[i]); + free(m->packs[i]); + } + } + FREE_AND_NULL(m->packs); + FREE_AND_NULL(m->pack_names); } -static int prepare_midxed_git_head(char *pack_dir, int local) +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { - struct midxed_git *m = midxed_git; - struct midxed_git *m_search; - char *midx_head_path; + struct strbuf pack_name = STRBUF_INIT; - if (!core_midx) - return 1; + if (pack_int_id >= m->num_packs) + die(_("bad pack-int-id: %u (%u total packs"), + pack_int_id, m->num_packs); - for (m_search = midxed_git; m_search; m_search = m_search->next) { - if (!strcmp(pack_dir, m_search->pack_dir)) - return 1; - } + if (m->packs[pack_int_id]) + return 0; - midx_head_path = get_midx_head_filename_dir(pack_dir); - if (midx_head_path) { - midxed_git = load_midxed_git_one(midx_head_path, pack_dir); - midxed_git->next = m; - free(midx_head_path); - } else if (!m) { - midxed_git = load_empty_midxed_git(); - } + strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, + m->pack_names[pack_int_id]); - return !midxed_git; + m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, m->local); + strbuf_release(&pack_name); + return !m->packs[pack_int_id]; } -int prepare_midxed_git_objdir(char *obj_dir, int local) +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) { - int ret; - struct strbuf pack_dir = STRBUF_INIT; - strbuf_addstr(&pack_dir, obj_dir); - strbuf_add(&pack_dir, "/pack", 5); - - ret = prepare_midxed_git_head(pack_dir.buf, local); - strbuf_release(&pack_dir); - return ret; + return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, + MIDX_HASH_LEN, result); } -struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, - uint32_t n, - struct pack_midx_details *d) +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n) { - struct pack_midx_details_internal *d_internal; - const unsigned char *details = m->chunk_object_offsets; - - if (n >= m->num_objects) { + if (n >= m->num_objects) return NULL; - } - d_internal = (struct pack_midx_details_internal*)(details + 8 * n); - d->pack_int_id = ntohl(d_internal->pack_int_id); - d->offset = ntohl(d_internal->internal_offset); + hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n); + return oid; +} + +static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) +{ + const unsigned char *offset_data; + uint32_t offset32; + + offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH; + offset32 = get_be32(offset_data + sizeof(uint32_t)); - if (m->chunk_large_offsets && d->offset & MIDX_LARGE_OFFSET_NEEDED) { - uint32_t large_offset = d->offset ^ MIDX_LARGE_OFFSET_NEEDED; - const unsigned char *large_offsets = m->chunk_large_offsets + 8 * large_offset; + if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { + if (sizeof(off_t) < sizeof(uint64_t)) + die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); - d->offset = (((uint64_t)ntohl(*((uint32_t *)(large_offsets + 0)))) << 32) | - ntohl(*((uint32_t *)(large_offsets + 4))); + offset32 ^= MIDX_LARGE_OFFSET_NEEDED; + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); } - return d; + return offset32; } -struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, - uint32_t n, - struct pack_midx_entry *e) +static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) { - struct pack_midx_details details; - const unsigned char *index = m->chunk_oid_lookup; + return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH); +} - if (!nth_midxed_object_details(m, n, &details)) - return NULL; +static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry *e, uint32_t pos) +{ + uint32_t pack_int_id; + struct packed_git *p; - memcpy(e->oid.hash, index + m->hdr->hash_len * n, m->hdr->hash_len); - e->pack_int_id = details.pack_int_id; - e->offset = details.offset; + if (pos >= m->num_objects) + return 0; - /* Use zero for mtime so this entry is "older" than new duplicates */ - e->pack_mtime = 0; + pack_int_id = nth_midxed_pack_int_id(m, pos); + + if (prepare_midx_pack(m, pack_int_id)) + die(_("error preparing packfile from multi-pack-index")); + p = m->packs[pack_int_id]; + + /* + * We are about to tell the caller where they can locate the + * requested object. We better make sure the packfile is + * still here and can be accessed before supplying that + * answer, as it may have been deleted since the MIDX was + * loaded! + */ + if (!is_pack_valid(p)) + return 0; - return e; + if (p->num_bad_objects) { + uint32_t i; + struct object_id oid; + nth_midxed_object_oid(&oid, m, pos); + for (i = 0; i < p->num_bad_objects; i++) + if (!hashcmp(oid.hash, + p->bad_object_sha1 + the_hash_algo->rawsz * i)) + return 0; + } + + e->offset = nth_midxed_offset(m, pos); + e->p = p; + + return 1; } -const struct object_id *nth_midxed_object_oid(struct object_id *oid, - struct midxed_git *m, - uint32_t n) +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m) { - struct pack_midx_entry e; + uint32_t pos; - if (!nth_midxed_object_entry(m, n, &e)) + if (!bsearch_midx(oid, m, &pos)) return 0; - hashcpy(oid->hash, e.oid.hash); - return oid; + return nth_midxed_pack_entry(m, e, pos); } -int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos) +int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) { - uint32_t last, first = 0; - - if (sha1[0]) - first = ntohl(*(uint32_t*)(m->chunk_oid_fanout + 4 * (sha1[0] - 1))); - last = ntohl(*(uint32_t*)(m->chunk_oid_fanout + 4 * sha1[0])); + uint32_t first = 0, last = m->num_packs; while (first < last) { uint32_t mid = first + (last - first) / 2; - const unsigned char *current; + const char *current; int cmp; - current = m->chunk_oid_lookup + m->hdr->hash_len * mid; - cmp = hashcmp(sha1, current); - if (!cmp) { - *pos = mid; + current = m->pack_names[mid]; + cmp = strcmp(idx_name, current); + if (!cmp) return 1; - } if (cmp > 0) { first = mid + 1; continue; @@ -364,157 +327,322 @@ int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos) last = mid; } - *pos = first; return 0; } -static int prepare_midx_pack(struct midxed_git *m, uint32_t pack_int_id) +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) { - struct strbuf pack_name = STRBUF_INIT; + struct multi_pack_index *m; + struct multi_pack_index *m_search; + int config_value; + static int env_value = -1; - if (pack_int_id >= m->hdr->num_packs) - return 1; + if (env_value < 0) + env_value = git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0); - if (m->packs[pack_int_id]) + if (!env_value && + (repo_config_get_bool(r, "core.multipackindex", &config_value) || + !config_value)) return 0; - strbuf_addstr(&pack_name, m->pack_dir); - strbuf_addstr(&pack_name, "/"); - strbuf_addstr(&pack_name, m->pack_names[pack_int_id]); - strbuf_strip_suffix(&pack_name, ".pack"); - strbuf_addstr(&pack_name, ".idx"); + for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) + if (!strcmp(object_dir, m_search->object_dir)) + return 1; - m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1); - strbuf_release(&pack_name); - return !m->packs[pack_int_id]; + m = load_multi_pack_index(object_dir, local); + + if (m) { + m->next = r->objects->multi_pack_index; + r->objects->multi_pack_index = m; + return 1; + } + + return 0; } -static int find_pack_entry_midx(const struct object_id *oid, - struct midxed_git *m, - struct packed_git **p, - off_t *offset) +static size_t write_midx_header(struct hashfile *f, + unsigned char num_chunks, + uint32_t num_packs) { - uint32_t pos; - struct pack_midx_details d; + unsigned char byte_values[4]; - if (!bsearch_midx(m, oid->hash, &pos) || - !nth_midxed_object_details(m, pos, &d)) - return 0; + hashwrite_be32(f, MIDX_SIGNATURE); + byte_values[0] = MIDX_VERSION; + byte_values[1] = MIDX_HASH_VERSION; + byte_values[2] = num_chunks; + byte_values[3] = 0; /* unused */ + hashwrite(f, byte_values, sizeof(byte_values)); + hashwrite_be32(f, num_packs); - if (d.pack_int_id >= m->num_packs) - die(_("Bad pack-int-id")); + return MIDX_HEADER_SIZE; +} - /* load packfile, if necessary */ - if (prepare_midx_pack(m, d.pack_int_id)) - return 0; +struct pack_list { + struct packed_git **list; + char **names; + uint32_t nr; + uint32_t alloc_list; + uint32_t alloc_names; + size_t pack_name_concat_len; + struct multi_pack_index *m; +}; - *p = m->packs[d.pack_int_id]; - *offset = d.offset; +static void add_pack_to_midx(const char *full_path, size_t full_path_len, + const char *file_name, void *data) +{ + struct pack_list *packs = (struct pack_list *)data; - return 1; -} + if (ends_with(file_name, ".idx")) { + if (packs->m && midx_contains_pack(packs->m, file_name)) + return; -int fill_pack_entry_midx(const struct object_id *oid, - struct pack_entry *e) -{ - struct packed_git *p; - struct midxed_git *m; + ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); + ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); - if (!core_midx) - return 0; + packs->list[packs->nr] = add_packed_git(full_path, + full_path_len, + 0); - m = midxed_git; - while (m) - { - off_t offset; - if (!find_pack_entry_midx(oid, m, &p, &offset)) { - m = m->next; - continue; + if (!packs->list[packs->nr]) { + warning(_("failed to add packfile '%s'"), + full_path); + return; } - /* - * We are about to tell the caller where they can locate the - * requested object. We better make sure the packfile is - * still here and can be accessed before supplying that - * answer, as it may have been deleted since the MIDX was - * loaded! - */ - if (!is_pack_valid(p)) - return 0; - - e->offset = offset; - e->p = p; + if (open_pack_index(packs->list[packs->nr])) { + warning(_("failed to open pack-index '%s'"), + full_path); + close_pack(packs->list[packs->nr]); + FREE_AND_NULL(packs->list[packs->nr]); + return; + } - return 1; + packs->names[packs->nr] = xstrdup(file_name); + packs->pack_name_concat_len += strlen(file_name) + 1; + packs->nr++; } +} - return 0; +struct pack_pair { + uint32_t pack_int_id; + char *pack_name; +}; + +static int pack_pair_compare(const void *_a, const void *_b) +{ + struct pack_pair *a = (struct pack_pair *)_a; + struct pack_pair *b = (struct pack_pair *)_b; + return strcmp(a->pack_name, b->pack_name); } -int contains_pack(struct midxed_git *m, const char *pack_name) +static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm) { - uint32_t first = 0, last = m->num_packs; + uint32_t i; + struct pack_pair *pairs; - while (first < last) { - uint32_t mid = first + (last - first) / 2; - const char *current; - int cmp; + ALLOC_ARRAY(pairs, nr_packs); - current = m->pack_names[mid]; - cmp = strcmp(pack_name, current); - if (!cmp) - return 1; - if (cmp > 0) { - first = mid + 1; - continue; - } - last = mid; + for (i = 0; i < nr_packs; i++) { + pairs[i].pack_int_id = i; + pairs[i].pack_name = pack_names[i]; + } + + QSORT(pairs, nr_packs, pack_pair_compare); + + for (i = 0; i < nr_packs; i++) { + pack_names[i] = pairs[i].pack_name; + perm[pairs[i].pack_int_id] = i; } + free(pairs); +} + +struct pack_midx_entry { + struct object_id oid; + uint32_t pack_int_id; + time_t pack_mtime; + uint64_t offset; +}; + +static int midx_oid_compare(const void *_a, const void *_b) +{ + const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a; + const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b; + int cmp = oidcmp(&a->oid, &b->oid); + + if (cmp) + return cmp; + + if (a->pack_mtime > b->pack_mtime) + return -1; + else if (a->pack_mtime < b->pack_mtime) + return 1; + + return a->pack_int_id - b->pack_int_id; +} + +static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, + uint32_t *pack_perm, + struct pack_midx_entry *e, + uint32_t pos) +{ + if (pos >= m->num_objects) + return 1; + + nth_midxed_object_oid(&e->oid, m, pos); + e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; + e->offset = nth_midxed_offset(m, pos); + + /* consider objects in midx to be from "old" packs */ + e->pack_mtime = 0; return 0; } -static size_t write_midx_chunk_packlookup( - struct hashfile *f, - const char **pack_names, uint32_t nr_packs) +static void fill_pack_entry(uint32_t pack_int_id, + struct packed_git *p, + uint32_t cur_object, + struct pack_midx_entry *entry) { - uint32_t i, cur_len = 0; + if (!nth_packed_object_oid(&entry->oid, p, cur_object)) + die(_("failed to locate object %d in packfile"), cur_object); - for (i = 0; i < nr_packs; i++) { - hashwrite_be32(f, cur_len); - cur_len += strlen(pack_names[i]) + 1; + entry->pack_int_id = pack_int_id; + entry->pack_mtime = p->mtime; + + entry->offset = nth_packed_object_offset(p, cur_object); +} + +/* + * It is possible to artificially get into a state where there are many + * duplicate copies of objects. That can create high memory pressure if + * we are to create a list of all objects before de-duplication. To reduce + * this memory pressure without a significant performance drop, automatically + * group objects by the first byte of their object id. Use the IDX fanout + * tables to group the data, copy to a local array, then sort. + * + * Copy only the de-duplicated entries (selected by most-recent modified time + * of a packfile containing the object). + */ +static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, + struct packed_git **p, + uint32_t *perm, + uint32_t nr_packs, + uint32_t *nr_objects) +{ + uint32_t cur_fanout, cur_pack, cur_object; + uint32_t alloc_fanout, alloc_objects, total_objects = 0; + struct pack_midx_entry *entries_by_fanout = NULL; + struct pack_midx_entry *deduplicated_entries = NULL; + uint32_t start_pack = m ? m->num_packs : 0; + + for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) + total_objects += p[cur_pack]->num_objects; + + /* + * As we de-duplicate by fanout value, we expect the fanout + * slices to be evenly distributed, with some noise. Hence, + * allocate slightly more than one 256th. + */ + alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16; + + ALLOC_ARRAY(entries_by_fanout, alloc_fanout); + ALLOC_ARRAY(deduplicated_entries, alloc_objects); + *nr_objects = 0; + + for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { + uint32_t nr_fanout = 0; + + if (m) { + uint32_t start = 0, end; + + if (cur_fanout) + start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]); + end = ntohl(m->chunk_oid_fanout[cur_fanout]); + + for (cur_object = start; cur_object < end; cur_object++) { + ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + nth_midxed_pack_midx_entry(m, perm, + &entries_by_fanout[nr_fanout], + cur_object); + nr_fanout++; + } + } + + for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) { + uint32_t start = 0, end; + + if (cur_fanout) + start = get_pack_fanout(p[cur_pack], cur_fanout - 1); + end = get_pack_fanout(p[cur_pack], cur_fanout); + + for (cur_object = start; cur_object < end; cur_object++) { + ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); + nr_fanout++; + } + } + + QSORT(entries_by_fanout, nr_fanout, midx_oid_compare); + + /* + * The batch is now sorted by OID and then mtime (descending). + * Take only the first duplicate. + */ + for (cur_object = 0; cur_object < nr_fanout; cur_object++) { + if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid, + &entries_by_fanout[cur_object].oid)) + continue; + + ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); + memcpy(&deduplicated_entries[*nr_objects], + &entries_by_fanout[cur_object], + sizeof(struct pack_midx_entry)); + (*nr_objects)++; + } } - return sizeof(uint32_t) * (size_t)nr_packs; + free(entries_by_fanout); + return deduplicated_entries; } -static size_t write_midx_chunk_packnames( - struct hashfile *f, - const char **pack_names, uint32_t nr_packs) +static size_t write_midx_pack_names(struct hashfile *f, + char **pack_names, + uint32_t num_packs) { uint32_t i; + unsigned char padding[MIDX_CHUNK_ALIGNMENT]; size_t written = 0; - for (i = 0; i < nr_packs; i++) { + + for (i = 0; i < num_packs; i++) { size_t writelen = strlen(pack_names[i]) + 1; - if (i > 0 && strcmp(pack_names[i], pack_names[i-1]) <= 0) - BUG("incorrect pack order: %s before %s", - pack_names[i-1], + + if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) + BUG("incorrect pack-file order: %s before %s", + pack_names[i - 1], pack_names[i]); hashwrite(f, pack_names[i], writelen); written += writelen; } + /* add padding to be aligned */ + i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT); + if (i < MIDX_CHUNK_ALIGNMENT) { + memset(padding, 0, sizeof(padding)); + hashwrite(f, padding, i); + written += i; + } + return written; } -static size_t write_midx_chunk_oidfanout( - struct hashfile *f, - struct pack_midx_entry *objects, uint32_t nr_objects) +static size_t write_midx_oid_fanout(struct hashfile *f, + struct pack_midx_entry *objects, + uint32_t nr_objects) { struct pack_midx_entry *list = objects; struct pack_midx_entry *last = objects + nr_objects; - uint32_t count_distinct = 0; + uint32_t count = 0; uint32_t i; /* @@ -524,31 +652,24 @@ static size_t write_midx_chunk_oidfanout( */ for (i = 0; i < 256; i++) { struct pack_midx_entry *next = list; - struct pack_midx_entry *prev = NULL; - - while (next < last) { - if (next->oid.hash[0] != i) - break; - - if (!prev || oidcmp(&(prev->oid), &(next->oid))) - count_distinct++; - prev = next++; + while (next < last && next->oid.hash[0] == i) { + count++; + next++; } - hashwrite_be32(f, count_distinct); + hashwrite_be32(f, count); list = next; } return MIDX_CHUNK_FANOUT_SIZE; } -static size_t write_midx_chunk_oidlookup( - struct hashfile *f, unsigned char hash_len, - struct pack_midx_entry *objects, uint32_t nr_objects) +static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, + struct pack_midx_entry *objects, + uint32_t nr_objects) { struct pack_midx_entry *list = objects; - struct object_id *last_oid = NULL; uint32_t i; size_t written = 0; @@ -556,19 +677,13 @@ static size_t write_midx_chunk_oidlookup( struct pack_midx_entry *obj = list++; if (i < nr_objects - 1) { - /* Check out-of-order */ struct pack_midx_entry *next = list; if (oidcmp(&obj->oid, &next->oid) >= 0) BUG("OIDs not in order: %s >= %s", - oid_to_hex(&obj->oid), - oid_to_hex(&next->oid)); + oid_to_hex(&obj->oid), + oid_to_hex(&next->oid)); } - /* Skip duplicate objects */ - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; hashwrite(f, obj->oid.hash, (int)hash_len); written += hash_len; } @@ -576,24 +691,17 @@ static size_t write_midx_chunk_oidlookup( return written; } -static size_t write_midx_chunk_objectoffsets( - struct hashfile *f, int large_offset_needed, - struct pack_midx_entry *objects, uint32_t nr_objects, uint32_t *pack_perm) +static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, + struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; - struct object_id *last_oid = 0; uint32_t i, nr_large_offset = 0; size_t written = 0; for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; - - hashwrite_be32(f, pack_perm[obj->pack_int_id]); + hashwrite_be32(f, obj->pack_int_id); if (large_offset_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -604,34 +712,27 @@ static size_t write_midx_chunk_objectoffsets( else hashwrite_be32(f, (uint32_t)obj->offset); - written += 2 * sizeof(uint32_t); + written += MIDX_CHUNK_OFFSET_WIDTH; } return written; } -static size_t write_midx_chunk_largeoffsets( - struct hashfile *f, uint32_t nr_large_offset, - struct pack_midx_entry *objects, uint32_t nr_objects) +static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_offset, + struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; - struct object_id *last_oid = 0; size_t written = 0; while (nr_large_offset) { struct pack_midx_entry *obj = list++; uint64_t offset = obj->offset; - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; - if (!(offset >> 31)) continue; hashwrite_be32(f, offset >> 32); - hashwrite_be32(f, offset & 0xffffffff); + hashwrite_be32(f, offset & 0xffffffffUL); written += 2 * sizeof(uint32_t); nr_large_offset--; @@ -640,411 +741,276 @@ static size_t write_midx_chunk_largeoffsets( return written; } -struct pack_pair { - uint32_t pack_int_id; - const char *pack_name; -}; - -static int pack_pair_compare(const void *_a, const void *_b) -{ - struct pack_pair *a = (struct pack_pair *)_a; - struct pack_pair *b = (struct pack_pair *)_b; - return strcmp(a->pack_name, b->pack_name); -} - -static void sort_packs_by_name(const char **pack_names, uint32_t nr_packs, uint32_t *perm) +int write_midx_file(const char *object_dir) { + unsigned char cur_chunk, num_chunks = 0; + char *midx_name; uint32_t i; - struct pack_pair *pairs; - - ALLOC_ARRAY(pairs, nr_packs); - - for (i = 0; i < nr_packs; i++) { - pairs[i].pack_int_id = i; - pairs[i].pack_name = pack_names[i]; + struct hashfile *f = NULL; + struct lock_file lk; + struct pack_list packs; + uint32_t *pack_perm = NULL; + uint64_t written = 0; + uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; + uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; + uint32_t nr_entries, num_large_offsets = 0; + struct pack_midx_entry *entries = NULL; + int large_offsets_needed = 0; + + midx_name = get_midx_filename(object_dir); + if (safe_create_leading_directories(midx_name)) { + UNLEAK(midx_name); + die_errno(_("unable to create leading directories of %s"), + midx_name); + } + + packs.m = load_multi_pack_index(object_dir, 1); + + packs.nr = 0; + packs.alloc_list = packs.m ? packs.m->num_packs : 16; + packs.alloc_names = packs.alloc_list; + packs.list = NULL; + packs.names = NULL; + packs.pack_name_concat_len = 0; + ALLOC_ARRAY(packs.list, packs.alloc_list); + ALLOC_ARRAY(packs.names, packs.alloc_names); + + if (packs.m) { + for (i = 0; i < packs.m->num_packs; i++) { + ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); + ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); + + packs.list[packs.nr] = NULL; + packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); + packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; + packs.nr++; + } } - QSORT(pairs, nr_packs, pack_pair_compare); + for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); - for (i = 0; i < nr_packs; i++) { - pack_names[i] = pairs[i].pack_name; - perm[pairs[i].pack_int_id] = i; - } -} + if (packs.m && packs.nr == packs.m->num_packs) + goto cleanup; -const char *write_midx_file(const char *pack_dir, - const char *midx_name, - const char **pack_names, - uint32_t nr_packs, - struct pack_midx_entry *objects, - uint32_t nr_objects) -{ - struct hashfile *f; - int i, chunk, fd; - struct pack_midx_header hdr; - uint32_t chunk_ids[7]; - uint64_t chunk_offsets[7]; - unsigned char large_offset_needed = 0; - unsigned int nr_large_offset = 0; - unsigned char final_hash[GIT_MAX_RAWSZ]; - const char *final_hex; - int rename_needed = 0; - int total_name_len = 0; - uint32_t *pack_perm; - size_t written = 0; + if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) + packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - + (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); - if (!core_midx) - return 0; + ALLOC_ARRAY(pack_perm, packs.nr); + sort_packs_by_name(packs.names, packs.nr, pack_perm); - /* determine if large offsets are required */ - for (i = 0; i < nr_objects; i++) { - if (objects[i].offset > 0x7fffffff) - nr_large_offset++; - if (objects[i].offset > 0xffffffff) - large_offset_needed = 1; - } + entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries); - /* Sort packs */ - if (nr_packs) { - ALLOC_ARRAY(pack_perm, nr_packs); - sort_packs_by_name(pack_names, nr_packs, pack_perm); - } else { - pack_perm = 0; + for (i = 0; i < nr_entries; i++) { + if (entries[i].offset > 0x7fffffff) + num_large_offsets++; + if (entries[i].offset > 0xffffffff) + large_offsets_needed = 1; } - if (nr_packs) { - for (i = 0; i < nr_packs; i++) { - total_name_len += strlen(pack_names[i]) + 1; - } - } + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + FREE_AND_NULL(midx_name); - /* open temp file, or direct file if given */ - if (!midx_name) { - struct strbuf tmp_file = STRBUF_INIT; - strbuf_addstr(&tmp_file, pack_dir); - strbuf_addstr(&tmp_file, "/tmp_midx_XXXXXX"); + if (packs.m) + close_midx(packs.m); - fd = git_mkstemp_mode(tmp_file.buf, 0444); - if (fd < 0) - die_errno("unable to create '%s'", tmp_file.buf); + cur_chunk = 0; + num_chunks = large_offsets_needed ? 5 : 4; - midx_name = strbuf_detach(&tmp_file, NULL); - rename_needed = 1; - } else { - unlink(midx_name); - fd = open(midx_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + written = write_midx_header(f, num_chunks, packs.nr); - if (fd < 0) - die_errno("unable to create '%s'", midx_name); - } - f = hashfd(fd, midx_name); + chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; + chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; - /* fill header info */ - hdr.midx_signature = htonl(MIDX_SIGNATURE); - hdr.midx_version = htonl(MIDX_VERSION); + cur_chunk++; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; - hdr.hash_version = MIDX_OID_VERSION; - hdr.hash_len = MIDX_OID_LEN; - hdr.num_base_midx = 0; - hdr.num_packs = htonl(nr_packs); + cur_chunk++; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE; - /* - * We expect the following chunks, which are required: - * - * Packfile Name Lookup - * Packfile Names - * OID Fanout - * OID Lookup - * Object Offsets - */ - hdr.num_chunks = large_offset_needed ? 6 : 5; + cur_chunk++; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; - /* write header to file */ - assert(sizeof(hdr) == 16); - hashwrite(f, &hdr, sizeof(hdr)); - written += sizeof(hdr); + cur_chunk++; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_CHUNK_OFFSET_WIDTH; + if (large_offsets_needed) { + chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS; - /* - * Fill initial chunk values using offsets - * relative to first chunk. - */ - chunk_offsets[0] = sizeof(hdr) + MIDX_CHUNKLOOKUP_WIDTH * (hdr.num_chunks + 1); - chunk_ids[0] = MIDX_CHUNKID_PACKLOOKUP; - chunk_offsets[1] = chunk_offsets[0] + nr_packs * 4; - chunk_ids[1] = MIDX_CHUNKID_OIDFANOUT; - chunk_offsets[2] = chunk_offsets[1] + MIDX_CHUNK_FANOUT_SIZE; - chunk_ids[2] = MIDX_CHUNKID_OIDLOOKUP; - chunk_offsets[3] = chunk_offsets[2] + (uint64_t)nr_objects - * (uint64_t)hdr.hash_len; - chunk_ids[3] = MIDX_CHUNKID_OBJECTOFFSETS; - chunk_offsets[4] = chunk_offsets[3] + MIDX_CHUNK_OFFSET_WIDTH * (uint64_t)nr_objects; - - if (large_offset_needed) { - chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; - chunk_offsets[5] = chunk_offsets[4] + MIDX_CHUNK_LARGE_OFFSET_WIDTH * (uint64_t)nr_large_offset; - chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; - chunk_offsets[6] = chunk_offsets[5] + total_name_len; - chunk_ids[6] = 0; - } else { - chunk_ids[4] = MIDX_CHUNKID_PACKNAMES; - chunk_offsets[5] = chunk_offsets[4] + total_name_len; - chunk_ids[5] = 0; + cur_chunk++; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + + num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH; } - for (i = 0; i <= hdr.num_chunks; i++) { + chunk_ids[cur_chunk] = 0; + + for (i = 0; i <= num_chunks; i++) { + if (i && chunk_offsets[i] < chunk_offsets[i - 1]) + BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64, + chunk_offsets[i - 1], + chunk_offsets[i]); + + if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT) + BUG("chunk offset %"PRIu64" is not properly aligned", + chunk_offsets[i]); + hashwrite_be32(f, chunk_ids[i]); hashwrite_be32(f, chunk_offsets[i] >> 32); - hashwrite_be32(f, chunk_offsets[i] & 0xffffffff); + hashwrite_be32(f, chunk_offsets[i]); + written += MIDX_CHUNKLOOKUP_WIDTH; } - for (chunk = 0; chunk <= hdr.num_chunks; chunk++) { - if (chunk_offsets[chunk] != written) - BUG("chunk %d has intended chunk offset %"PRIx64" does not match expected %"PRIx64"", - chunk, - (uint64_t)chunk_offsets[chunk], - (uint64_t)written); + for (i = 0; i < num_chunks; i++) { + if (written != chunk_offsets[i]) + BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32, + chunk_offsets[i], + written, + chunk_ids[i]); - switch (chunk_ids[chunk]) { - case MIDX_CHUNKID_PACKLOOKUP: - written += write_midx_chunk_packlookup(f, pack_names, nr_packs); - break; - - case MIDX_CHUNKID_PACKNAMES: - written += write_midx_chunk_packnames(f, pack_names, nr_packs); - break; - - case MIDX_CHUNKID_OIDFANOUT: - written += write_midx_chunk_oidfanout(f, objects, nr_objects); - break; + switch (chunk_ids[i]) { + case MIDX_CHUNKID_PACKNAMES: + written += write_midx_pack_names(f, packs.names, packs.nr); + break; - case MIDX_CHUNKID_OIDLOOKUP: - written += write_midx_chunk_oidlookup(f, hdr.hash_len, objects, - nr_objects); - break; + case MIDX_CHUNKID_OIDFANOUT: + written += write_midx_oid_fanout(f, entries, nr_entries); + break; - case MIDX_CHUNKID_OBJECTOFFSETS: - written += write_midx_chunk_objectoffsets(f, large_offset_needed, - objects, nr_objects, - pack_perm); - break; + case MIDX_CHUNKID_OIDLOOKUP: + written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); + break; - case MIDX_CHUNKID_LARGEOFFSETS: - written += write_midx_chunk_largeoffsets(f, nr_large_offset, - objects, nr_objects); - break; + case MIDX_CHUNKID_OBJECTOFFSETS: + written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); + break; - case 0: - break; + case MIDX_CHUNKID_LARGEOFFSETS: + written += write_midx_large_offsets(f, num_large_offsets, entries, nr_entries); + break; - default: - BUG("midx tried to write an invalid chunk ID %08X", chunk_ids[chunk]); - break; + default: + BUG("trying to write unknown chunk id %"PRIx32, + chunk_ids[i]); } } - finalize_hashfile(f, final_hash, CSUM_CLOSE | CSUM_FSYNC | CSUM_HASH_IN_STREAM); - - if (rename_needed) - { - struct object_id oid; - char *fname; - - memcpy(oid.hash, final_hash, GIT_MAX_RAWSZ); - fname = get_midx_head_filename_oid(pack_dir, &oid); - final_hex = sha1_to_hex(final_hash); + if (written != chunk_offsets[num_chunks]) + BUG("incorrect final offset %"PRIu64" != %"PRIu64, + written, + chunk_offsets[num_chunks]); - if (rename(midx_name, fname)) - die("failed to rename %s to %s", midx_name, fname); + finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); + commit_lock_file(&lk); - free(fname); - } else { - final_hex = midx_name; - } - - return final_hex; -} - -int close_midx(struct midxed_git *m) -{ - int i; - if (m->midx_fd < 0) - return 0; - - for (i = 0; i < m->num_packs; i++) { - if (m->packs[i]) { - close_pack(m->packs[i]); - free(m->packs[i]); - m->packs[i] = NULL; +cleanup: + for (i = 0; i < packs.nr; i++) { + if (packs.list[i]) { + close_pack(packs.list[i]); + free(packs.list[i]); } + free(packs.names[i]); } - munmap((void *)m->data, m->data_len); - m->data = 0; - - close(m->midx_fd); - m->midx_fd = -1; - - free(m->packs); - free(m->pack_names); - - return 1; + free(packs.list); + free(packs.names); + free(entries); + free(pack_perm); + free(midx_name); + return 0; } -void close_all_midx(void) +void clear_midx_file(const char *object_dir) { - struct midxed_git *m = midxed_git; - struct midxed_git *next; - - while (m) { - next = m->next; - close_midx(m); - free(m); - m = next; + char *midx = get_midx_filename(object_dir); + + if (remove_path(midx)) { + UNLEAK(midx); + die(_("failed to clear multi-pack-index at %s"), midx); } - midxed_git = 0; + free(midx); } -static int verify_midx_error = 0; +int verify_midx_error; static void midx_report(const char *fmt, ...) { va_list ap; - struct strbuf sb = STRBUF_INIT; verify_midx_error = 1; - va_start(ap, fmt); - strbuf_vaddf(&sb, fmt, ap); - - fprintf(stderr, "%s\n", sb.buf); - strbuf_release(&sb); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); va_end(ap); } -int midx_verify(const char *pack_dir, const char *midx_id) +int verify_midx_file(const char *object_dir) { - uint32_t i, cur_fanout_pos = 0; - struct midxed_git *m; - const char *midx_head_path; - struct object_id cur_oid, prev_oid, checksum; - struct hashfile *f; - int devnull, checksum_fail = 0; - - if (midx_id) { - size_t sz; - struct strbuf sb = STRBUF_INIT; - strbuf_addf(&sb, "%s/midx-%s.midx", pack_dir, midx_id); - midx_head_path = strbuf_detach(&sb, &sz); - } else { - midx_head_path = get_midx_head_filename_dir(pack_dir); - } + uint32_t i; + struct progress *progress = NULL; + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + verify_midx_error = 0; - m = load_midxed_git_one(midx_head_path, pack_dir); + if (!m) + return 0; - if (!m) { - midx_report("failed to find specified midx file"); - goto cleanup; + for (i = 0; i < m->num_packs; i++) { + if (prepare_midx_pack(m, i)) + midx_report("failed to load pack in position %d", i); } + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]); - devnull = open("/dev/null", O_WRONLY); - f = hashfd(devnull, NULL); - hashwrite(f, m->data, m->data_len - m->hdr->hash_len); - finalize_hashfile(f, checksum.hash, CSUM_CLOSE); - if (hashcmp(checksum.hash, m->data + m->data_len - m->hdr->hash_len)) { - midx_report(_("the midx file has incorrect checksum and is likely corrupt")); - verify_midx_error = 0; - checksum_fail = 1; + if (oid_fanout1 > oid_fanout2) + midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), + i, oid_fanout1, oid_fanout2, i + 1); } - if (m->hdr->hash_version != MIDX_OID_VERSION) - midx_report("invalid hash version"); - if (m->hdr->hash_len != MIDX_OID_LEN) - midx_report("invalid hash length"); + for (i = 0; i < m->num_objects - 1; i++) { + struct object_id oid1, oid2; - if (verify_midx_error) - goto cleanup; + nth_midxed_object_oid(&oid1, m, i); + nth_midxed_object_oid(&oid2, m, i + 1); - if (!m->chunk_oid_lookup) - midx_report("missing OID Lookup chunk"); - if (!m->chunk_object_offsets) - midx_report("missing Object Offset chunk"); - - if (verify_midx_error) - goto cleanup; - - for (i = 0; i < m->num_packs; i++) { - if (prepare_midx_pack(m, i)) { - midx_report("failed to prepare pack %s", - m->pack_names[i]); - continue; - } - - if (!m->packs[i]->index_data && - open_pack_index(m->packs[i])) - midx_report("failed to open index for pack %s", - m->pack_names[i]); + if (oidcmp(&oid1, &oid2) >= 0) + midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), + i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); } - if (verify_midx_error) - goto cleanup; - + progress = start_progress(_("Verifying object offsets"), m->num_objects); for (i = 0; i < m->num_objects; i++) { - struct pack_midx_details details; - uint32_t index_pos, pack_id; - struct packed_git *p; - off_t pack_offset; - - hashcpy(cur_oid.hash, m->chunk_oid_lookup + m->hdr->hash_len * i); - - while (cur_oid.hash[0] > cur_fanout_pos) { - uint32_t fanout_value = get_be32(m->chunk_oid_fanout + cur_fanout_pos * sizeof(uint32_t)); - if (i != fanout_value) - midx_report("midx has incorrect fanout value: fanout[%d] = %u != %u", - cur_fanout_pos, fanout_value, i); - - cur_fanout_pos++; - } - - if (i && oidcmp(&prev_oid, &cur_oid) >= 0) - midx_report("midx has incorrect OID order: %s then %s", - oid_to_hex(&prev_oid), - oid_to_hex(&cur_oid)); - - oidcpy(&prev_oid, &cur_oid); + struct object_id oid; + struct pack_entry e; + off_t m_offset, p_offset; - if (!nth_midxed_object_details(m, i, &details)) { - midx_report("nth_midxed_object_details failed with n=%d", i); + nth_midxed_object_oid(&oid, m, i); + if (!fill_midx_entry(&oid, &e, m)) { + midx_report(_("failed to load pack entry for oid[%d] = %s"), + i, oid_to_hex(&oid)); continue; } - pack_id = details.pack_int_id; - if (pack_id >= m->num_packs) { - midx_report("pack-int-id for object n=%d is invalid: %u", - pack_id); - continue; + if (open_pack_index(e.p)) { + midx_report(_("failed to load pack-index for packfile %s"), + e.p->pack_name); + break; } - p = m->packs[pack_id]; + m_offset = e.offset; + p_offset = find_pack_entry_one(oid.hash, e.p); - if (!find_pack_entry_pos(cur_oid.hash, p, &index_pos)) { - midx_report("midx contains object not present in packfile: %s", - oid_to_hex(&cur_oid)); - continue; - } + if (m_offset != p_offset) + midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), + i, oid_to_hex(&oid), m_offset, p_offset); - pack_offset = nth_packed_object_offset(p, index_pos); - if (details.offset != pack_offset) - midx_report("midx has incorrect offset for %s : %"PRIx64" != %"PRIx64, - oid_to_hex(&cur_oid), - details.offset, - pack_offset); + display_progress(progress, i + 1); } + stop_progress(&progress); -cleanup: - if (m) - close_midx(m); - free(m); - return verify_midx_error | checksum_fail; + return verify_midx_error; } diff --git a/midx.h b/midx.h index 7bc4a3a548a744..ef7326afdbf2f1 100644 --- a/midx.h +++ b/midx.h @@ -1,137 +1,52 @@ -#ifndef MIDX_H -#define MIDX_H +#ifndef __MIDX_H__ +#define __MIDX_H__ -#include "git-compat-util.h" -#include "object.h" -#include "csum-file.h" +#include "repository.h" -extern struct object_id *get_midx_head_oid(const char *pack_dir, - struct object_id *oid); -extern char *get_midx_head_filename_oid(const char *pack_dir, - struct object_id *oid); -extern int fill_pack_entry_midx(const struct object_id *oid, - struct pack_entry *e); +#define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" -struct pack_midx_entry { - struct object_id oid; - uint32_t pack_int_id; - off_t offset; - timestamp_t pack_mtime; -}; - -struct pack_midx_header { - uint32_t midx_signature; - uint32_t midx_version; - unsigned char hash_version; - unsigned char hash_len; - unsigned char num_base_midx; - unsigned char num_chunks; - uint32_t num_packs; -}; - -extern struct midxed_git { - struct midxed_git *next; +struct multi_pack_index { + struct multi_pack_index *next; - int midx_fd; + int fd; - /* the mmap'd data for the midx file */ const unsigned char *data; size_t data_len; - /* points into the mmap'd data */ - struct pack_midx_header *hdr; - - /* can construct filename from obj_dir + "/packs/midx-" + oid + ".midx" */ - struct object_id oid; - - /* derived from the fanout chunk */ + uint32_t signature; + unsigned char version; + unsigned char hash_len; + unsigned char num_chunks; + uint32_t num_packs; uint32_t num_objects; - /* converted number of packs */ - uint32_t num_packs; + int local; - /* hdr->num_packs * 4 bytes */ - const unsigned char *chunk_pack_lookup; const unsigned char *chunk_pack_names; - - /* 256 * 4 bytes */ - const unsigned char *chunk_oid_fanout; - - /* num_objects * hdr->hash_len bytes */ + const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; - - /* num_objects * 8 bytes */ const unsigned char *chunk_object_offsets; - - /* - * 8 bytes per large offset. - * (Optional: may be null.) - */ const unsigned char *chunk_large_offsets; - /* - * Points into mmap'd data storing the pack filenames. - */ const char **pack_names; - - /* - * Store an array of pack-pointers. If NULL, then the - * pack has not been loaded yet. The array indices - * correspond to the pack_int_ids from the midx storage. - */ struct packed_git **packs; - - /* something like ".git/objects/pack" */ - char pack_dir[FLEX_ARRAY]; /* more */ -} *midxed_git; - -extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); -extern int prepare_midxed_git_objdir(char *obj_dir, int local); - -struct pack_midx_details { - uint32_t pack_int_id; - off_t offset; + char object_dir[FLEX_ARRAY]; }; -extern struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, - uint32_t n, - struct pack_midx_details *d); -extern struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, - uint32_t n, - struct pack_midx_entry *e); -extern const struct object_id *nth_midxed_object_oid(struct object_id *oid, - struct midxed_git *m, - uint32_t n); - -/* - * Perform a binary search on the object list in a MIDX file for the given sha1. - * - * If the object exists, then return 1 and set *pos to the position of the sha1. - * Otherwise, return 0 and set *pos to the position of the lex-first object greater - * than the given sha1. - */ -extern int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos); - -extern int contains_pack(struct midxed_git *m, const char *pack_name); - -/* - * Write a single MIDX file storing the given entries for the - * given list of packfiles. If midx_name is null, then a temp - * file will be created and swapped using the result hash value. - * Otherwise, write directly to midx_name. - * - * Returns the final name of the MIDX file within pack_dir. - */ -extern const char *write_midx_file(const char *pack_dir, - const char *midx_name, - const char **pack_names, - uint32_t nr_packs, - struct pack_midx_entry *objects, - uint32_t nr_objects); - -extern int close_midx(struct midxed_git *m); -extern void close_all_midx(void); - -int midx_verify(const char *pack_dir, const char *midx_id); +struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n); +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); +int midx_contains_pack(struct multi_pack_index *m, const char *idx_name); +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); + +int write_midx_file(const char *object_dir); +void clear_midx_file(const char *object_dir); +int verify_midx_file(const char *object_dir); + +void close_midx(struct multi_pack_index *m); #endif diff --git a/object-store.h b/object-store.h index 67e66227d9c41e..63b7605a3e0b30 100644 --- a/object-store.h +++ b/object-store.h @@ -88,6 +88,8 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct multi_pack_index; + struct raw_object_store { /* * Path to the repository's object store. @@ -110,6 +112,13 @@ struct raw_object_store { struct commit_graph *commit_graph; unsigned commit_graph_attempted : 1; /* if loading has been attempted */ + /* + * private data + * + * should only be accessed directly by packfile.c and midx.c + */ + struct multi_pack_index *multi_pack_index; + /* * private data * @@ -120,6 +129,12 @@ struct raw_object_store { /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; + /* + * A linked list containing all packfiles, starting with those + * contained in the multi_pack_index. + */ + struct packed_git *all_packs; + /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use diff --git a/pack-bitmap.c b/pack-bitmap.c index f0a1937a1cc5fb..4e50ab391fa0df 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -335,7 +335,7 @@ static int open_pack_bitmap(struct bitmap_index *bitmap_git) assert(!bitmap_git->map && !bitmap_git->loaded); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) ret = 0; } diff --git a/pack-objects.c b/pack-objects.c index 6ef87e5683aacd..d04cfa8e9f173b 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -99,7 +99,7 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_packed_git(the_repository); p; p = p->next, cnt++) { + for (p = get_all_packs(the_repository); p; p = p->next, cnt++) { if (cnt == nr) { free(mapping); return; diff --git a/packfile.c b/packfile.c index faec8591d5a7a2..cbef7033c3b8ea 100644 --- a/packfile.c +++ b/packfile.c @@ -197,6 +197,23 @@ int open_pack_index(struct packed_git *p) return ret; } +uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) +{ + const uint32_t *level1_ofs = p->index_data; + + if (!level1_ofs) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + } + + if (p->index_version > 1) { + level1_ofs += 2; + } + + return ntohl(level1_ofs[value]); +} + static struct packed_git *alloc_packed_git(int extra) { struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); @@ -316,18 +333,6 @@ void close_pack(struct packed_git *p) void close_all_packs(struct raw_object_store *o) { struct packed_git *p; - struct midxed_git *m; - - for (m = midxed_git; m; m = m->next) { - int i; - for (i = 0; i < m->num_packs; i++) { - p = m->packs[i]; - if (p && p->do_not_close) - die("BUG: want to close pack marked 'do-not-close'"); - else if (p) - close_pack(p); - } - } for (p = o->packed_git; p; p = p->next) if (p->do_not_close) @@ -464,8 +469,19 @@ static int open_packed_git_1(struct packed_git *p) ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; - if (!p->index_data && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); + if (!p->index_data) { + struct multi_pack_index *m; + const char *pack_name = strrchr(p->pack_name, '/'); + + for (m = the_repository->objects->multi_pack_index; + m; m = m->next) { + if (midx_contains_pack(m, pack_name)) + break; + } + + if (!m && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + } if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -516,6 +532,10 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); + /* Skip index checking if in multi-pack-index */ + if (!p->index_data) + return 0; + /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" @@ -751,13 +771,14 @@ static void report_pack_garbage(struct string_list *list) report_helper(list, seen_bits, first, list->nr); } -static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data) { struct strbuf path = STRBUF_INIT; size_t dirnamelen; DIR *dir; struct dirent *de; - struct string_list garbage = STRING_LIST_INIT_DUP; strbuf_addstr(&path, objdir); strbuf_addstr(&path, "/pack"); @@ -772,63 +793,87 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local strbuf_addch(&path, '/'); dirnamelen = path.len; while ((de = readdir(dir)) != NULL) { - struct packed_git *p; - struct midxed_git *m; - size_t base_len; - if (is_dot_or_dotdot(de->d_name)) continue; strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); - base_len = path.len; - if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - struct strbuf pack_name = STRBUF_INIT; - strbuf_addstr(&pack_name, de->d_name); - strbuf_setlen(&pack_name, pack_name.len - 3); - strbuf_add(&pack_name, "pack", 4); - - /* Don't reopen a pack we already have. */ - for (m = midxed_git; m; m = m->next) - if (contains_pack(m, pack_name.buf)) - break; - for (p = r->objects->packed_git; p; - p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, path.buf, len)) - break; - } - if (m == NULL && p == NULL && - /* - * See if it really is a valid .idx file with - * corresponding .pack file that we can map. - */ - (p = add_packed_git(path.buf, path.len, local)) != NULL) - install_packed_git(r, p); - } - - if (!report_garbage) - continue; - - if (ends_with(de->d_name, ".idx") || - ends_with(de->d_name, ".pack") || - ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep") || - ends_with(de->d_name, ".promisor") || - ends_with(de->d_name, ".midx")) - string_list_append(&garbage, path.buf); - else - report_garbage(PACKDIR_FILE_GARBAGE, path.buf); + fn(path.buf, path.len, de->d_name, data); } + closedir(dir); - report_pack_garbage(&garbage); - string_list_clear(&garbage, 0); strbuf_release(&path); } +struct prepare_pack_data { + struct repository *r; + struct string_list *garbage; + int local; + struct multi_pack_index *m; +}; + +static void prepare_pack(const char *full_name, size_t full_name_len, + const char *file_name, void *_data) +{ + struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct packed_git *p; + size_t base_len = full_name_len; + + if (strip_suffix_mem(full_name, &base_len, ".idx") && + !(data->m && midx_contains_pack(data->m, file_name))) { + /* Don't reopen a pack we already have. */ + for (p = data->r->objects->packed_git; p; p = p->next) { + size_t len; + if (strip_suffix(p->pack_name, ".pack", &len) && + len == base_len && + !memcmp(p->pack_name, full_name, len)) + break; + } + + if (!p) { + p = add_packed_git(full_name, full_name_len, data->local); + if (p) + install_packed_git(data->r, p); + } + } + + if (!report_garbage) + return; + + if (!strcmp(file_name, "multi-pack-index")) + return; + if (ends_with(file_name, ".idx") || + ends_with(file_name, ".pack") || + ends_with(file_name, ".bitmap") || + ends_with(file_name, ".keep") || + ends_with(file_name, ".promisor")) + string_list_append(data->garbage, full_name); + else + report_garbage(PACKDIR_FILE_GARBAGE, full_name); +} + +static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +{ + struct prepare_pack_data data; + struct string_list garbage = STRING_LIST_INIT_DUP; + + data.m = r->objects->multi_pack_index; + + /* look for the multi-pack-index for this object directory */ + while (data.m && strcmp(data.m->object_dir, objdir)) + data.m = data.m->next; + + data.r = r; + data.garbage = &garbage; + data.local = local; + + for_each_file_in_pack_dir(objdir, prepare_pack, &data); + + report_pack_garbage(data.garbage); + string_list_clear(data.garbage, 0); +} + static void prepare_packed_git(struct repository *r); /* * Give a fast, rough count of the number of objects in the repository. This @@ -841,12 +886,12 @@ unsigned long approximate_object_count(void) { if (!the_repository->objects->approximate_object_count_valid) { unsigned long count; + struct multi_pack_index *m; struct packed_git *p; - struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); + prepare_packed_git(the_repository); count = 0; - for (m = midxed_git; m; m = m->next) + for (m = get_multi_pack_index(the_repository); m; m = m->next) count += m->num_objects; for (p = the_repository->objects->packed_git; p; p = p->next) { if (open_pack_index(p)) @@ -913,115 +958,74 @@ static void prepare_packed_git_mru(struct repository *r) list_add_tail(&p->mru, &r->objects->packed_git_mru); } -/** - * We have a few states that we can be in. - * - * N: No MIDX or packfiles loaded - * P: No MIDX loaded, all packfiles loaded into packed_git - * M: MIDX loaded, packfiles not in MIDX loaded into packed_git - * - * In state M, we load the MIDX first and only load packfiles - * that are not in the MIDX. - * - * We begin in state N. - * - * We can change states with a call to - * prepare_packed_git_internal(use_midx), depending on the value - * of use_midx. - * - * Here are the transition cases: - * - * - State N, use_midx = 0 -> P - * (only load packfiles, skip MIDX) - * - State N, use_midx = 1 -> M - * (load both packfiles and MIDX) - * - State M, use_midx = 0 -> P - * (unload MIDX and add packfiles to packed_git) - * - State M, use_midx = 1 -> M - * (no-op, unless refresh = 1) - * - State P, use_midx = 0 -> P - * (no-op, unless refresh = 1) - * - State P, use_midx = 1 -> P - * (no-op, unless refresh = 1) - * - * We prevent the P -> M transition by setting - * prepare_packed_git_midx_state to 0 when transitioning to P. - * - * Calling reprepare_packed_git_internal(use_midx) signals that we - * want to check the ODB for more packfiles or MIDX files, but - * should not unload the existing files. However, we do trigger - * some transitions. For instance, use_midx = 0 will trigger the - * M -> P transition (if we are in state M). - */ -static int prepare_packed_git_midx_state = 1; -static void prepare_packed_git_with_refresh(struct repository *r, int use_midx, int refresh) +static void prepare_packed_git(struct repository *r) { struct alternate_object_database *alt; - char *obj_dir; - if (!use_midx && prepare_packed_git_midx_state) { - /* - * If this is the first time called with - * use_midx = 0, then close any MIDX that - * may exist and reprepare the packs. - */ - close_all_midx(); - prepare_packed_git_midx_state = 0; - refresh = 1; - } - - if (r->objects->packed_git_initialized && !refresh) + if (r->objects->packed_git_initialized) return; - - r->objects->approximate_object_count_valid = 0; - obj_dir = r->objects->objectdir; - if (prepare_packed_git_midx_state) { - prepare_midxed_git_objdir(obj_dir, 1); - prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) - prepare_midxed_git_objdir(alt->path, 0); - } - - prepare_packed_git_one(r, obj_dir, 1); + prepare_multi_pack_index_one(r, r->objects->objectdir, 1); + prepare_packed_git_one(r, r->objects->objectdir, 1); prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + prepare_multi_pack_index_one(r, alt->path, 0); prepare_packed_git_one(r, alt->path, 0); + } rearrange_packed_git(r); + + r->objects->all_packs = NULL; + prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; } -void prepare_packed_git_internal(struct repository *r, int use_midx) +void reprepare_packed_git(struct repository *r) { - prepare_packed_git_with_refresh(r, use_midx, 0); + r->objects->approximate_object_count_valid = 0; + r->objects->packed_git_initialized = 0; + prepare_packed_git(r); } -static void prepare_packed_git(struct repository *r) +struct packed_git *get_packed_git(struct repository *r) { - prepare_packed_git_internal(r, 0); + prepare_packed_git(r); + return r->objects->packed_git; } -void reprepare_packed_git_internal(struct repository *r, int use_midx) +struct multi_pack_index *get_multi_pack_index(struct repository *r) { - prepare_packed_git_with_refresh(r, use_midx, 1); + prepare_packed_git(r); + return r->objects->multi_pack_index; } -void reprepare_packed_git(struct repository *r) +struct packed_git *get_all_packs(struct repository *r) { - prepare_packed_git_with_refresh(r, 0, 1); -} + prepare_packed_git(r); -struct packed_git *get_packed_git(struct repository *r) -{ - prepare_packed_git_with_refresh(r, 0, 0); - return r->objects->packed_git; + if (!r->objects->all_packs) { + struct packed_git *p = r->objects->packed_git; + struct multi_pack_index *m; + + for (m = r->objects->multi_pack_index; m; m = m->next) { + uint32_t i; + for (i = 0; i < m->num_packs; i++) { + if (!prepare_midx_pack(m, i)) { + m->packs[i]->next = p; + p = m->packs[i]; + } + } + } + + r->objects->all_packs = p; + } + + return r->objects->all_packs; } struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); return &r->objects->packed_git_mru; - prepare_packed_git_with_refresh(r, 0, 1); } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -1872,12 +1876,12 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) } } -int find_pack_entry_pos(const unsigned char *sha1, - struct packed_git *p, - uint32_t *result) +off_t find_pack_entry_one(const unsigned char *sha1, + struct packed_git *p) { const unsigned char *index = p->index_data; struct object_id oid; + uint32_t result; if (!index) { if (open_pack_index(p)) @@ -1885,14 +1889,7 @@ int find_pack_entry_pos(const unsigned char *sha1, } hashcpy(oid.hash, sha1); - return bsearch_pack(&oid, p, result); -} - -off_t find_pack_entry_one(const unsigned char *sha1, - struct packed_git *p) -{ - uint32_t result; - if (find_pack_entry_pos(sha1, p, &result)) + if (bsearch_pack(&oid, p, &result)) return nth_packed_object_offset(p, result); return 0; } @@ -1966,17 +1963,17 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { struct list_head *pos; + struct multi_pack_index *m; - if (core_midx) { - prepare_packed_git_internal(r, USE_MIDX); - if (fill_pack_entry_midx(oid, e)) - return 1; - } else - prepare_packed_git(r); - - if (!r->objects->packed_git) + prepare_packed_git(r); + if (!r->objects->packed_git && !r->objects->multi_pack_index) return 0; + for (m = r->objects->multi_pack_index; m; m = m->next) { + if (fill_midx_entry(oid, e, m)) + return 1; + } + list_for_each(pos, &r->objects->packed_git_mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (fill_pack_entry(oid, e, p)) { @@ -2039,7 +2036,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, int pack_errors = 0; prepare_packed_git(the_repository); - for (p = the_repository->objects->packed_git; p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/packfile.h b/packfile.h index 15e007861bcec7..442625723dea4b 100644 --- a/packfile.h +++ b/packfile.h @@ -33,26 +33,25 @@ extern char *sha1_pack_index_name(const unsigned char *sha1); extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); +typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len, + const char *file_pach, void *data); +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * While the MIDX feature is evolving, not all callers are ready to be - * MIDX-aware. In the meantime, call these *_internal(use_midx) methods - * when ready for MIDX (or to specify you are not ready for MIDX). - */ -#define USE_MIDX 1 -extern void prepare_packed_git_internal(struct repository *r, int use_midx); -extern void reprepare_packed_git_internal(struct repository *r, int use_midx); - extern void reprepare_packed_git(struct repository *r); extern void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); +struct multi_pack_index *get_multi_pack_index(struct repository *r); +struct packed_git *get_all_packs(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy @@ -77,6 +76,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); + extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); extern void close_pack(struct packed_git *); @@ -123,10 +124,6 @@ extern const struct object_id *nth_packed_object_oid(struct object_id *, struct */ extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); -int find_pack_entry_pos(const unsigned char *sha1, - struct packed_git *p, - uint32_t *result); - /* * If the object named sha1 is present in the specified packfile, * return its offset within the packfile; otherwise, return 0. diff --git a/server-info.c b/server-info.c index 41050c2449b1ad..e2b2d6a27a40b1 100644 --- a/server-info.c +++ b/server-info.c @@ -199,7 +199,7 @@ static void init_pack_info(const char *infofile, int force) objdir = get_object_directory(); objdirlen = strlen(objdir); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ @@ -209,7 +209,7 @@ static void init_pack_info(const char *infofile, int force) } num_pack = i; info = xcalloc(num_pack, sizeof(struct pack_info *)); - for (i = 0, p = get_packed_git(the_repository); p; p = p->next) { + for (i = 0, p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; info[i] = xcalloc(1, sizeof(struct pack_info)); diff --git a/sha1-file.c b/sha1-file.c index 593cc4aa1df3b2..2f6395fe806d7a 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1439,7 +1439,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git_internal(r, core_midx ? USE_MIDX : 0); + reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; if (core_virtualize_objects && !tried_hook) { diff --git a/sha1-name.c b/sha1-name.c index 1b4f27dba7203c..18546bbc7a6cca 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -150,17 +150,17 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char * return 1; } -static void unique_in_pack(struct packed_git *p, +static void unique_in_midx(struct multi_pack_index *m, struct disambiguate_state *ds) { uint32_t num, i, first = 0; const struct object_id *current = NULL; + num = m->num_objects; - if (open_pack_index(p) || !p->num_objects) + if (!num) return; - num = p->num_objects; - bsearch_pack(&ds->bin_pfx, p, &first); + bsearch_midx(&ds->bin_pfx, m, &first); /* * At this point, "first" is the location of the lowest object @@ -169,24 +169,24 @@ static void unique_in_pack(struct packed_git *p, */ for (i = first; i < num && !ds->ambiguous; i++) { struct object_id oid; - current = nth_packed_object_oid(&oid, p, i); + current = nth_midxed_object_oid(&oid, m, i); if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) break; update_candidates(ds, current); } } -static void unique_in_midx(struct midxed_git *m, +static void unique_in_pack(struct packed_git *p, struct disambiguate_state *ds) { uint32_t num, i, first = 0; const struct object_id *current = NULL; - if (!m->num_objects) + if (open_pack_index(p) || !p->num_objects) return; - num = m->num_objects; - bsearch_midx(m, ds->bin_pfx.hash, &first); + num = p->num_objects; + bsearch_pack(&ds->bin_pfx, p, &first); /* * At this point, "first" is the location of the lowest object @@ -195,7 +195,7 @@ static void unique_in_midx(struct midxed_git *m, */ for (i = first; i < num && !ds->ambiguous; i++) { struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); + current = nth_packed_object_oid(&oid, p, i); if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) break; update_candidates(ds, current); @@ -204,11 +204,11 @@ static void unique_in_midx(struct midxed_git *m, static void find_short_packed_object(struct disambiguate_state *ds) { + struct multi_pack_index *m; struct packed_git *p; - struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); - for (m = midxed_git; m && !ds->ambiguous; m = m->next) + for (m = get_multi_pack_index(the_repository); m && !ds->ambiguous; + m = m->next) unique_in_midx(m, ds); for (p = get_packed_git(the_repository); p && !ds->ambiguous; p = p->next) @@ -560,33 +560,39 @@ static int extend_abbrev_len(const struct object_id *oid, void *cb_data) return 0; } -static void find_abbrev_len_for_midx(struct midxed_git *m, +static void find_abbrev_len_for_midx(struct multi_pack_index *m, struct min_abbrev_data *mad) { int match = 0; - uint32_t first = 0; + uint32_t num, first = 0; struct object_id oid; + const struct object_id *mad_oid; if (!m->num_objects) return; - match = bsearch_midx(m, mad->oid->hash, &first); + num = m->num_objects; + mad_oid = mad->oid; + match = bsearch_midx(mad_oid, m, &first); /* * first is now the position in the packfile where we would insert - * mad->oid->hash if it does not exist (or the position of - * mad->oid->hash if it does exist). Hence, we consider a maximum of - * three objects nearby for the abbreviation length. + * mad->hash if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. */ mad->init_len = 0; - if (!match && nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - else if (first < m->num_objects - 1 && - nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - if (first > 0 && nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - + if (!match) { + if (nth_midxed_object_oid(&oid, m, first)) + extend_abbrev_len(&oid, mad); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&oid, m, first + 1)) + extend_abbrev_len(&oid, mad); + } + if (first > 0) { + if (nth_midxed_object_oid(&oid, m, first - 1)) + extend_abbrev_len(&oid, mad); + } mad->init_len = mad->cur_len; } @@ -607,9 +613,9 @@ static void find_abbrev_len_for_pack(struct packed_git *p, /* * first is now the position in the packfile where we would insert - * mad->oid->hash if it does not exist (or the position of - * mad->oid->hash if it does exist). Hence, we consider a maximum of two - * objects nearby for the abbreviation length. + * mad->hash if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. */ mad->init_len = 0; if (!match) { @@ -628,11 +634,10 @@ static void find_abbrev_len_for_pack(struct packed_git *p, static void find_abbrev_len_packed(struct min_abbrev_data *mad) { + struct multi_pack_index *m; struct packed_git *p; - struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); - for (m = midxed_git; m; m = m->next) + for (m = get_multi_pack_index(the_repository); m; m = m->next) find_abbrev_len_for_midx(m, mad); for (p = get_packed_git(the_repository); p; p = p->next) find_abbrev_len_for_pack(p, mad); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c new file mode 100644 index 00000000000000..831b586d022215 --- /dev/null +++ b/t/helper/test-read-midx.c @@ -0,0 +1,51 @@ +#include "test-tool.h" +#include "cache.h" +#include "midx.h" +#include "repository.h" +#include "object-store.h" + +static int read_midx_file(const char *object_dir) +{ + uint32_t i; + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + + if (!m) + return 1; + + printf("header: %08x %d %d %d\n", + m->signature, + m->version, + m->num_chunks, + m->num_packs); + + printf("chunks:"); + + if (m->chunk_pack_names) + printf(" pack-names"); + if (m->chunk_oid_fanout) + printf(" oid-fanout"); + if (m->chunk_oid_lookup) + printf(" oid-lookup"); + if (m->chunk_object_offsets) + printf(" object-offsets"); + if (m->chunk_large_offsets) + printf(" large-offsets"); + + printf("\nnum_objects: %d\n", m->num_objects); + + printf("packs:\n"); + for (i = 0; i < m->num_packs; i++) + printf("%s\n", m->pack_names[i]); + + printf("object-dir: %s\n", m->object_dir); + + return 0; +} + +int cmd__read_midx(int argc, const char **argv) +{ + if (argc != 2) + usage("read-midx "); + + return read_midx_file(argv[1]); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index ae43a27f6420c0..1ac1e29d8a2851 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -31,6 +31,7 @@ static struct test_cmd cmds[] = { { "path-utils", cmd__path_utils }, { "prio-queue", cmd__prio_queue }, { "read-cache", cmd__read_cache }, + { "read-midx", cmd__read_midx }, { "ref-store", cmd__ref_store }, { "regex", cmd__regex }, { "repository", cmd__repository }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index ea4bab00b2df74..55a90fbd12a1c8 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -26,6 +26,7 @@ int cmd__online_cpus(int argc, const char **argv); int cmd__path_utils(int argc, const char **argv); int cmd__prio_queue(int argc, const char **argv); int cmd__read_cache(int argc, const char **argv); +int cmd__read_midx(int argc, const char **argv); int cmd__ref_store(int argc, const char **argv); int cmd__regex(int argc, const char **argv); int cmd__repository(int argc, const char **argv); diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 7bff7923f2a911..736d5c3225113c 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -191,6 +191,7 @@ test_expect_success 'pack-objects respects --honor-pack-keep (local bitmapped pa test_expect_success 'pack-objects respects --local (non-local bitmapped pack)' ' mv .git/objects/pack/$packbitmap.* alt.git/objects/pack/ && + rm -f .git/objects/pack/multi-pack-index && test_when_finished "mv alt.git/objects/pack/$packbitmap.* .git/objects/pack/" && echo HEAD | git pack-objects --local --stdout --revs >3b.pack && git index-pack 3b.pack && diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh deleted file mode 100755 index d5a0011f2b5ae8..00000000000000 --- a/t/t5319-midx.sh +++ /dev/null @@ -1,430 +0,0 @@ -#!/bin/sh - -test_description='meta-pack indexes' -. ./test-lib.sh - -test_expect_success 'setup' ' - rm -rf .git && - git init && - git config core.midx true && - git config pack.threads 1 -' - -test_expect_success 'write-midx with no packs' ' - git midx --write --update-head --delete-expired --pack-dir . -' - -test_expect_success 'create packs' ' - i=1 && - while test $i -le 5 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree obj-list && - git update-ref HEAD $commit -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-1 && - test_line_count = 18 rev-list-out-1 -' - -test_expect_success 'write-midx from index version 1' ' - pack1=$(git pack-objects --index-version=1 test-1 midx-read-out-1 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-1 && - echo "num_objects: 17" >>midx-read-expect-1 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-1 && - echo "pack_names:" >>midx-read-expect-1 && - echo "test-1-$pack1.pack" >>midx-read-expect-1 && - echo "pack_dir: ." >>midx-read-expect-1 && - test_cmp midx-read-out-1 midx-read-expect-1 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-2 && - test_line_count = 18 rev-list-out-2 -' - -test_expect_success 'write-midx from index version 2' ' - rm "test-1-$pack1.pack" && - pack2=$(git pack-objects --index-version=2 test-2 midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx2 >midx-read-out-2 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-2 && - echo "num_objects: 17" >>midx-read-expect-2 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-2 && - echo "pack_names:" >>midx-read-expect-2 && - echo "test-2-$pack2.pack" >>midx-read-expect-2 && - echo "pack_dir: ." >>midx-read-expect-2 && - test_cmp midx-read-out-2 midx-read-expect-2 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-3 && - test_line_count = 18 rev-list-out-3 -' - -test_expect_success 'Add more objects' ' - i=6 && - while test $i -le 10 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit && - pack3=$(git pack-objects --index-version=2 test-pack rev-list-out-4 && - test_line_count = 35 rev-list-out-4 -' - -test_expect_success 'write-midx with two packs' ' - midx3=$(git midx --write --update-head --delete-expired --pack-dir .) && - test -f midx-$midx3.midx && - ! test -f midx-$midx2.midx && - printf $midx3 > midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx3 >midx-read-out-3 && - echo "header: 4d494458 80000001 01 14 00 05 00000002" >midx-read-expect-3 && - echo "num_objects: 33" >>midx-read-expect-3 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-3 && - echo "pack_names:" >>midx-read-expect-3 && - echo "test-2-$pack2.pack" >>midx-read-expect-3 && - echo "test-pack-$pack3.pack" >>midx-read-expect-3 && - echo "pack_dir: ." >>midx-read-expect-3 && - test_cmp midx-read-out-3 midx-read-expect-3 && - git midx --read --pack-dir . >midx-read-out-3-head && - test_cmp midx-read-out-3-head midx-read-expect-3 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-5 && - test_line_count = 35 rev-list-out-5 -' - -test_expect_success 'Add more packs' ' - j=0 && - while test $j -le 10 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit && - git pack-objects --index-version=2 test-pack rev-list-out-6 && - test_line_count = 90 rev-list-out-6 -' - -test_expect_success 'write-midx with twelve packs' ' - midx4=$(git midx --write --update-head --delete-expired --pack-dir .) && - test -f midx-$midx4.midx && - ! test -f midx-$midx3.midx && - printf $midx4 > midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx4 >midx-read-out-4 && - echo "header: 4d494458 80000001 01 14 00 05 0000000d" >midx-read-expect-4 && - echo "num_objects: 77" >>midx-read-expect-4 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-4 && - echo "pack_names:" >>midx-read-expect-4 && - ls test-*.pack | sort >>midx-read-expect-4 && - echo "pack_dir: ." >>midx-read-expect-4 && - test_cmp midx-read-out-4 midx-read-expect-4 && - git midx --read --pack-dir . >midx-read-out-4-head && - test_cmp midx-read-out-4-head midx-read-expect-4 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-7 && - test_line_count = 90 rev-list-out-7 -' - -test_expect_success 'write-midx with nothing new' ' - midx5=$(git midx --write --update-head --delete-expired --pack-dir .) && - printf $midx5 > midx-head-5 && - test_cmp midx-head-5 midx-head-expect -' - -test_expect_success 'midx --clear' ' - git midx --clear --pack-dir . && - ! test -f "midx-$midx4.midx" && - ! test -f "midx-head" -' - -test_expect_success 'midx --verify fails on missing midx' ' - test_must_fail git midx --verify --pack-dir . -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-8 && - test_line_count = 90 rev-list-out-8 -' - -# The 'verify' commands below expect a midx-head file pointing -# to an existing MIDX file. -test_expect_success 'recompute valid midx' ' - git midx --write --update-head --pack-dir . -' - -HASH_LEN=20 -MIDX_BYTE_VERSION=4 -MIDX_BYTE_OID_VERSION=8 -MIDX_BYTE_OID_LEN=9 -MIDX_BYTE_CHUNK_COUNT=11 -MIDX_OFFSET_CHUNK_LOOKUP=16 -MIDX_WIDTH_CHUNK_LOOKUP=12 -MIDX_NUM_CHUNKS=6 -MIDX_NUM_PACKS=13 -MIDX_NUM_OBJECTS=77 -MIDX_BYTE_CHUNK_PACKLOOKUP_ID=$MIDX_OFFSET_CHUNK_LOOKUP -MIDX_BYTE_CHUNK_FANOUT_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_LOOKUP_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 2 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_OFFSET_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 3 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_PACKNAME_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 4 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_PACKFILE_LOOKUP=`expr $MIDX_OFFSET_PACKLOOKUP + 4` -MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ - 4 \* $MIDX_NUM_PACKS` -MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` -MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` -MIDX_BYTE_OID_ORDER=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50` -MIDX_BYTE_OID_MISSING=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50 + 5` -MIDX_OFFSET_OBJECT_OFFSETS=`expr $MIDX_OFFSET_OID_LOOKUP + \ - $HASH_LEN \* $MIDX_NUM_OBJECTS` -MIDX_WIDTH_OBJECT_OFFSETS=8 -MIDX_BYTE_OBJECT_PACKID=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 1` -MIDX_BYTE_OBJECT_OFFSET=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 4` -MIDX_OFFSET_PACKFILE_NAMES=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` -MIDX_BYTE_PACKFILE_NAMES=`expr $MIDX_OFFSET_PACKFILE_NAMES + 10` -MIDX_PACKNAME_SIZE=`expr $(ls *.pack | wc -c) + $MIDX_NUM_PACKS` -MIDX_BYTE_CHECKSUM=`expr $MIDX_OFFSET_PACKFILE_NAMES + $MIDX_PACKNAME_SIZE` - -test_expect_success 'midx --verify succeeds' ' - git midx --verify --pack-dir . -' - -# usage: corrupt_midx_and_verify [] -corrupt_midx_and_verify() { - pos=$1 - data="${2:-\0}" - grepstr=$3 - packdir=$4 - midxid=$(cat ./$packdir/midx-head) && - file=./$packdir/midx-$midxid.midx && - chmod a+w "$file" && - test_when_finished mv midx-backup "$file" && - cp "$file" midx-backup && - printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc && - test_must_fail git midx --verify --pack-dir "./$packdir" 2>test_err && - grep -v "^+" test_err >err && - grep "$grepstr" err -} - -test_expect_success 'verify bad signature' ' - corrupt_midx_and_verify 0 "\00" \ - "midx signature" -' - -test_expect_success 'verify bad version' ' - corrupt_midx_and_verify $MIDX_BYTE_VERSION "\02" \ - "midx version" -' - -test_expect_success 'verify bad object id version' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_VERSION "\02" \ - "hash version" -' - -test_expect_success 'verify bad object id length' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_LEN "\010" \ - "hash length" -' - -test_expect_success 'verify bad chunk count' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" \ - "missing Packfile Name chunk" -' - -test_expect_success 'verify bad packfile lookup chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKLOOKUP_ID "\00" \ - "missing Packfile Name Lookup chunk" -' - -test_expect_success 'verify bad OID fanout chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_FANOUT_ID "\00" \ - "missing OID Fanout chunk" -' - -test_expect_success 'verify bad OID lookup chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_LOOKUP_ID "\00" \ - "missing OID Lookup chunk" -' - -test_expect_success 'verify bad offset chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET_ID "\00" \ - "missing Object Offset chunk" -' - -test_expect_success 'verify bad packfile name chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKNAME_ID "\00" \ - "missing Packfile Name chunk" -' - -test_expect_success 'verify bad OID fanout value' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_FANOUT "\01" \ - "incorrect fanout value" -' - -test_expect_success 'verify bad OID lookup order' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_ORDER "\00" \ - "incorrect OID order" -' - -test_expect_success 'verify bad OID lookup (object missing)' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_MISSING "\00" \ - "object not present in pack" -' - -test_expect_success 'verify bad pack-int-id' ' - corrupt_midx_and_verify $MIDX_BYTE_OBJECT_PACKID "\01" \ - "pack-int-id for object" -' - -test_expect_success 'verify bad 32-bit offset' ' - corrupt_midx_and_verify $MIDX_BYTE_OBJECT_OFFSET "\01" \ - "incorrect offset" -' - -test_expect_success 'verify packfile name' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKFILE_NAMES "\00" \ - "failed to prepare pack" -' - -test_expect_success 'verify packfile lookup' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKFILE_LOOKUP "\01" \ - "invalid packfile name lookup" -' - -test_expect_success 'verify checksum hash' ' - corrupt_midx_and_verify $MIDX_BYTE_CHECKSUM "\00" \ - "incorrect checksum" -' - -# usage: corrupt_data [] -corrupt_data() { - file=$1 - pos=$2 - data="${3:-\0}" - printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc -} - -# Force 64-bit offsets by manipulating the idx file. -# This makes the IDX file _incorrect_ so be careful to clean up after! -test_expect_success 'force some 64-bit offsets with pack-objects' ' - pack64=$(git pack-objects --index-version=2,0x40 test-64 midx-read-out-64 && - echo "header: 4d494458 80000001 01 14 00 06 00000001" >midx-read-expect-64 && - echo "num_objects: 65" >>midx-read-expect-64 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets large_offsets" >>midx-read-expect-64 && - echo "pack_names:" >>midx-read-expect-64 && - echo test-64-$pack64.pack >>midx-read-expect-64 && - echo "pack_dir: packs-64" >>midx-read-expect-64 && - test_cmp midx-read-out-64 midx-read-expect-64 -' - -HASH_LEN=20 -MIDX_OFFSET_CHUNK_LOOKUP=16 -MIDX_WIDTH_CHUNK_LOOKUP=12 -MIDX_NUM_CHUNKS=7 -MIDX_NUM_PACKS=1 -MIDX_NUM_OBJECTS=65 -MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ - 4 \* $MIDX_NUM_PACKS` -MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` -MIDX_OFFSET_OBJECT_OFFSETS=`expr $MIDX_OFFSET_OID_LOOKUP + \ - $HASH_LEN \* $MIDX_NUM_OBJECTS` -MIDX_WIDTH_OBJECT_OFFSETS=8 -MIDX_OFFSET_LARGE_OFFSETS=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` -MIDX_BYTE_LARGE_OFFSETS=`expr $MIDX_OFFSET_LARGE_OFFSETS + 3` - -test_expect_success 'verify bad 64-bit offset' ' - corrupt_midx_and_verify $MIDX_BYTE_LARGE_OFFSETS "\01" \ - "incorrect offset" packs-64 -' - -test_done diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh new file mode 100755 index 00000000000000..70926b5bc04643 --- /dev/null +++ b/t/t5319-multi-pack-index.sh @@ -0,0 +1,351 @@ +#!/bin/sh + +test_description='multi-pack-indexes' +. ./test-lib.sh + +objdir=.git/objects + +midx_read_expect () { + NUM_PACKS=$1 + NUM_OBJECTS=$2 + NUM_CHUNKS=$3 + OBJECT_DIR=$4 + EXTRA_CHUNKS="$5" + { + cat <<-EOF && + header: 4d494458 1 $NUM_CHUNKS $NUM_PACKS + chunks: pack-names oid-fanout oid-lookup object-offsets$EXTRA_CHUNKS + num_objects: $NUM_OBJECTS + packs: + EOF + if test $NUM_PACKS -ge 1 + then + ls $OBJECT_DIR/pack/ | grep idx | sort + fi && + printf "object-dir: $OBJECT_DIR\n" + } >expect && + test-tool read-midx $OBJECT_DIR >actual && + test_cmp expect actual +} + +test_expect_success 'write midx with no packs' ' + test_when_finished rm -f pack/multi-pack-index && + git multi-pack-index --object-dir=. write && + midx_read_expect 0 0 4 . +' + +generate_objects () { + i=$1 + iii=$(printf '%03i' $i) + { + test-tool genrandom "bar" 200 && + test-tool genrandom "baz $iii" 50 + } >wide_delta_$iii && + { + test-tool genrandom "foo"$i 100 && + test-tool genrandom "foo"$(( $i + 1 )) 100 && + test-tool genrandom "foo"$(( $i + 2 )) 100 + } >deep_delta_$iii && + { + echo $iii && + test-tool genrandom "$iii" 8192 + } >file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii +} + +commit_and_list_objects () { + { + echo 101 && + test-tool genrandom 100 8192; + } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree -p HEADobj-list && + git reset --hard $commit +} + +test_expect_success 'create objects' ' + test_commit initial && + for i in $(test_seq 1 5) + do + generate_objects $i + done && + commit_and_list_objects +' + +test_expect_success 'write midx with one v1 pack' ' + pack=$(git pack-objects --index-version=1 $objdir/pack/test expect && + git -c core.multiPackIndex=true $1 | sort >actual + else + git -c core.multiPackIndex=false $1 >expect && + git -c core.multiPackIndex=true $1 >actual + fi && + test_cmp expect actual +} + +compare_results_with_midx () { + MSG=$1 + test_expect_success "check normal git operations: $MSG" ' + midx_git_two_modes "rev-list --objects --all" && + midx_git_two_modes "log --raw" && + midx_git_two_modes "count-objects --verbose" && + midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check" && + midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check --unsorted" sorted + ' +} + +test_expect_success 'write midx with one v2 pack' ' + git pack-objects --index-version=2,0x40 $objdir/pack/test +corrupt_midx_and_verify() { + POS=$1 && + DATA="${2:-\0}" && + OBJDIR=$3 && + GREPSTR="$4" && + COMMAND="$5" && + if test -z "$COMMAND" + then + COMMAND="git multi-pack-index verify --object-dir=$OBJDIR" + fi && + FILE=$OBJDIR/pack/multi-pack-index && + chmod a+w $FILE && + test_when_finished mv midx-backup $FILE && + cp $FILE midx-backup && + printf "$DATA" | dd of="$FILE" bs=1 seek="$POS" conv=notrunc && + test_must_fail $COMMAND 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "$GREPSTR" err +} + +test_expect_success 'verify bad signature' ' + corrupt_midx_and_verify 0 "\00" $objdir \ + "multi-pack-index signature" +' + +HASH_LEN=20 +NUM_OBJECTS=74 +MIDX_BYTE_VERSION=4 +MIDX_BYTE_OID_VERSION=5 +MIDX_BYTE_CHUNK_COUNT=6 +MIDX_HEADER_SIZE=12 +MIDX_BYTE_CHUNK_ID=$MIDX_HEADER_SIZE +MIDX_BYTE_CHUNK_OFFSET=$(($MIDX_HEADER_SIZE + 4)) +MIDX_NUM_CHUNKS=5 +MIDX_CHUNK_LOOKUP_WIDTH=12 +MIDX_OFFSET_PACKNAMES=$(($MIDX_HEADER_SIZE + \ + $MIDX_NUM_CHUNKS * $MIDX_CHUNK_LOOKUP_WIDTH)) +MIDX_BYTE_PACKNAME_ORDER=$(($MIDX_OFFSET_PACKNAMES + 2)) +MIDX_OFFSET_OID_FANOUT=$(($MIDX_OFFSET_PACKNAMES + 652)) +MIDX_OID_FANOUT_WIDTH=4 +MIDX_BYTE_OID_FANOUT_ORDER=$((MIDX_OFFSET_OID_FANOUT + 250 * $MIDX_OID_FANOUT_WIDTH + 1)) +MIDX_OFFSET_OID_LOOKUP=$(($MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH)) +MIDX_BYTE_OID_LOOKUP=$(($MIDX_OFFSET_OID_LOOKUP + 16 * $HASH_LEN)) +MIDX_OFFSET_OBJECT_OFFSETS=$(($MIDX_OFFSET_OID_LOOKUP + $NUM_OBJECTS * $HASH_LEN)) +MIDX_OFFSET_WIDTH=8 +MIDX_BYTE_PACK_INT_ID=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 2)) +MIDX_BYTE_OFFSET=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 6)) + +test_expect_success 'verify bad version' ' + corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ + "multi-pack-index version" +' + +test_expect_success 'verify bad OID version' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_VERSION "\02" $objdir \ + "hash version" +' + +test_expect_success 'verify truncated chunk count' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" $objdir \ + "missing required" +' + +test_expect_success 'verify extended chunk count' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\07" $objdir \ + "terminating multi-pack-index chunk id appears earlier than expected" +' + +test_expect_success 'verify missing required chunk' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \ + "missing required" +' + +test_expect_success 'verify invalid chunk offset' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET "\01" $objdir \ + "invalid chunk offset (too large)" +' + +test_expect_success 'verify packnames out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ + "pack names out of order" +' + +test_expect_success 'verify packnames out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ + "failed to load pack" +' + +test_expect_success 'verify oid fanout out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_FANOUT_ORDER "\01" $objdir \ + "oid fanout out of order" +' + +test_expect_success 'verify oid lookup out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_LOOKUP "\00" $objdir \ + "oid lookup out of order" +' + +test_expect_success 'verify incorrect pack-int-id' ' + corrupt_midx_and_verify $MIDX_BYTE_PACK_INT_ID "\07" $objdir \ + "bad pack-int-id" +' + +test_expect_success 'verify incorrect offset' ' + corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \ + "incorrect object offset" +' + +test_expect_success 'git-fsck incorrect offset' ' + corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \ + "incorrect object offset" \ + "git -c core.multipackindex=true fsck" +' + +test_expect_success 'repack removes multi-pack-index' ' + test_path_is_file $objdir/pack/multi-pack-index && + GIT_TEST_MULTI_PACK_INDEX=0 git repack -adf && + test_path_is_missing $objdir/pack/multi-pack-index +' + +compare_results_with_midx "after repack" + +test_expect_success 'multi-pack-index and pack-bitmap' ' + git -c repack.writeBitmaps=true repack -ad && + git multi-pack-index write && + git rev-list --test-bitmap HEAD +' + +test_expect_success 'multi-pack-index and alternates' ' + git init --bare alt.git && + echo $(pwd)/alt.git/objects >.git/objects/info/alternates && + echo content1 >file1 && + altblob=$(GIT_DIR=alt.git git hash-object -w file1) && + git cat-file blob $altblob && + git rev-list --all +' + +compare_results_with_midx "with alternate (local midx)" + +test_expect_success 'multi-pack-index in an alternate' ' + mv .git/objects/pack/* alt.git/objects/pack && + test_commit add_local_objects && + git repack --local && + git multi-pack-index write && + midx_read_expect 1 3 4 $objdir && + git reset --hard HEAD~1 && + rm -f .git/objects/pack/* +' + +compare_results_with_midx "with alternate (remote midx)" + +# usage: corrupt_data [] +corrupt_data () { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +# Force 64-bit offsets by manipulating the idx file. +# This makes the IDX file _incorrect_ so be careful to clean up after! +test_expect_success 'force some 64-bit offsets with pack-objects' ' + mkdir objects64 && + mkdir objects64/pack && + for i in $(test_seq 1 11) + do + generate_objects 11 + done && + commit_and_list_objects && + pack64=$(git pack-objects --index-version=2,0x40 objects64/pack/test-64 actual && test_cmp expect actual