From ceab693d1f19b9800958001d074b731a752d6bdd Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:18 -0400 Subject: [PATCH 01/92] multi-pack-index: add design document Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/multi-pack-index.txt | 109 +++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 Documentation/technical/multi-pack-index.txt diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt new file mode 100644 index 00000000000000..d7e57639f70d70 --- /dev/null +++ b/Documentation/technical/multi-pack-index.txt @@ -0,0 +1,109 @@ +Multi-Pack-Index (MIDX) Design Notes +==================================== + +The Git object directory contains a 'pack' directory containing +packfiles (with suffix ".pack") and pack-indexes (with suffix +".idx"). The pack-indexes provide a way to lookup objects and +navigate to their offset within the pack, but these must come +in pairs with the packfiles. This pairing depends on the file +names, as the pack-index differs only in suffix with its pack- +file. While the pack-indexes provide fast lookup per packfile, +this performance degrades as the number of packfiles increases, +because abbreviations need to inspect every packfile and we are +more likely to have a miss on our most-recently-used packfile. +For some large repositories, repacking into a single packfile +is not feasible due to storage space or excessive repack times. + +The multi-pack-index (MIDX for short) stores a list of objects +and their offsets into multiple packfiles. It contains: + +- A list of packfile names. +- A sorted list of object IDs. +- A list of metadata for the ith object ID including: + - A value j referring to the jth packfile. + - An offset within the jth packfile for the object. +- If large offsets are required, we use another list of large + offsets similar to version 2 pack-indexes. + +Thus, we can provide O(log N) lookup time for any number +of packfiles. + +Design Details +-------------- + +- The MIDX is stored in a file named 'multi-pack-index' in the + .git/objects/pack directory. This could be stored in the pack + directory of an alternate. It refers only to packfiles in that + same directory. + +- The pack.multiIndex config setting must be on to consume MIDX files. + +- The file format includes parameters for the object ID hash + function, so a future change of hash algorithm does not require + a change in format. + +- The MIDX keeps only one record per object ID. If an object appears + in multiple packfiles, then the MIDX selects the copy in the most- + recently modified packfile. + +- If there exist packfiles in the pack directory not registered in + the MIDX, then those packfiles are loaded into the `packed_git` + list and `packed_git_mru` cache. + +- The pack-indexes (.idx files) remain in the pack directory so we + can delete the MIDX file, set core.midx to false, or downgrade + without any loss of information. + +- The MIDX file format uses a chunk-based approach (similar to the + commit-graph file) that allows optional data to be added. + +Future Work +----------- + +- Add a 'verify' subcommand to the 'git midx' builtin to verify the + contents of the multi-pack-index file match the offsets listed in + the corresponding pack-indexes. + +- The multi-pack-index allows many packfiles, especially in a context + where repacking is expensive (such as a very large repo), or + unexpected maintenance time is unacceptable (such as a high-demand + build machine). However, the multi-pack-index needs to be rewritten + in full every time. We can extend the format to be incremental, so + writes are fast. By storing a small "tip" multi-pack-index that + points to large "base" MIDX files, we can keep writes fast while + still reducing the number of binary searches required for object + lookups. + +- The reachability bitmap is currently paired directly with a single + packfile, using the pack-order as the object order to hopefully + compress the bitmaps well using run-length encoding. This could be + extended to pair a reachability bitmap with a multi-pack-index. If + the multi-pack-index is extended to store a "stable object order" + (a function Order(hash) = integer that is constant for a given hash, + even as the multi-pack-index is updated) then a reachability bitmap + could point to a multi-pack-index and be updated independently. + +- Packfiles can be marked as "special" using empty files that share + the initial name but replace ".pack" with ".keep" or ".promisor". + We can add an optional chunk of data to the multi-pack-index that + records flags of information about the packfiles. This allows new + states, such as 'repacked' or 'redeltified', that can help with + pack maintenance in a multi-pack environment. It may also be + helpful to organize packfiles by object type (commit, tree, blob, + etc.) and use this metadata to help that maintenance. + +- The partial clone feature records special "promisor" packs that + may point to objects that are not stored locally, but available + on request to a server. The multi-pack-index does not currently + track these promisor packs. + +Related Links +------------- +[0] https://bugs.chromium.org/p/git/issues/detail?id=6 + Chromium work item for: Multi-Pack Index (MIDX) + +[1] https://public-inbox.org/git/20180107181459.222909-1-dstolee@microsoft.com/ + An earlier RFC for the multi-pack-index feature + +[2] https://public-inbox.org/git/alpine.DEB.2.20.1803091557510.23109@alexmv-linux/ + Git Merge 2018 Contributor's summit notes (includes discussion of MIDX) From e0d1bcf82590aea8ee007ab087772f1c82f6890b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:19 -0400 Subject: [PATCH 02/92] multi-pack-index: add format details The multi-pack-index feature generalizes the existing pack-index feature by indexing objects across multiple pack-files. Describe the basic file format, using a 12-byte header followed by a lookup table for a list of "chunks" which will be described later. The file ends with a footer containing a checksum using the hash algorithm. The header allows later versions to create breaking changes by advancing the version number. We can also change the hash algorithm using a different version value. We will add the individual chunk format information as we introduce the code that writes that information. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/pack-format.txt | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 70a99fd1423894..e060e693f41a76 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -252,3 +252,52 @@ Pack file entry: <+ corresponding packfile. 20-byte SHA-1-checksum of all of the above. + +== multi-pack-index (MIDX) files have the following format: + +The multi-pack-index files refer to multiple pack-files and loose objects. + +In order to allow extensions that add extra data to the MIDX, we organize +the body into "chunks" and provide a lookup table at the beginning of the +body. The header includes certain length values, such as the number of packs, +the number of base MIDX files, hash lengths and types. + +All 4-byte numbers are in network order. + +HEADER: + + 4-byte signature: + The signature is: {'M', 'I', 'D', 'X'} + + 1-byte version number: + Git only writes or recognizes version 1. + + 1-byte Object Id Version + Git only writes or recognizes version 1 (SHA1). + + 1-byte number of "chunks" + + 1-byte number of base multi-pack-index files: + This value is currently always zero. + + 4-byte number of pack files + +CHUNK LOOKUP: + + (C + 1) * 12 bytes providing the chunk offsets: + First 4 bytes describe chunk id. Value 0 is a terminating label. + Other 8 bytes provide offset in current file for chunk to start. + (Chunks are provided in file-order, so you can infer the length + using the next chunk position if necessary.) + + The remaining data in the body is described one chunk at a time, and + these chunks may be given in any order. Chunks are required unless + otherwise specified. + +CHUNK DATA: + + (This section intentionally left incomplete.) + +TRAILER: + + 20-byte SHA1-checksum of the above contents. From 6a257f03ba9b86c744064d08df98db1847cf1722 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:20 -0400 Subject: [PATCH 03/92] multi-pack-index: add builtin This new 'git multi-pack-index' builtin will be the plumbing access for writing, reading, and checking multi-pack-index files. The initial implementation is a no-op. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- .gitignore | 3 ++- Documentation/git-multi-pack-index.txt | 36 ++++++++++++++++++++++++++ Makefile | 1 + builtin.h | 1 + builtin/multi-pack-index.c | 34 ++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + 7 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 Documentation/git-multi-pack-index.txt create mode 100644 builtin/multi-pack-index.c diff --git a/.gitignore b/.gitignore index 388cc4beee54fa..25633bc51551a1 100644 --- a/.gitignore +++ b/.gitignore @@ -99,8 +99,9 @@ /git-mergetool--lib /git-mktag /git-mktree -/git-name-rev +/git-multi-pack-index /git-mv +/git-name-rev /git-notes /git-p4 /git-pack-redundant diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt new file mode 100644 index 00000000000000..74f6f2a95768be --- /dev/null +++ b/Documentation/git-multi-pack-index.txt @@ -0,0 +1,36 @@ +git-multi-pack-index(1) +======================= + +NAME +---- +git-multi-pack-index - Write and verify multi-pack-indexes + + +SYNOPSIS +-------- +[verse] +'git multi-pack-index' [--object-dir=] + +DESCRIPTION +----------- +Write or verify a multi-pack-index (MIDX) file. + +OPTIONS +------- + +--object-dir=:: + Use given directory for the location of Git objects. We check + `/packs/multi-pack-index` for the current MIDX file, and + `/packs` for the pack-files to index. + + +SEE ALSO +-------- +See link:technical/multi-pack-index.html[The Multi-Pack-Index Design +Document] and link:technical/pack-format.html[The Multi-Pack-Index +Format] for more information on the multi-pack-index feature. + + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index e4b503d259906d..54610875ece02a 100644 --- a/Makefile +++ b/Makefile @@ -1047,6 +1047,7 @@ BUILTIN_OBJS += builtin/merge-recursive.o BUILTIN_OBJS += builtin/merge-tree.o BUILTIN_OBJS += builtin/mktag.o BUILTIN_OBJS += builtin/mktree.o +BUILTIN_OBJS += builtin/multi-pack-index.o BUILTIN_OBJS += builtin/mv.o BUILTIN_OBJS += builtin/name-rev.o BUILTIN_OBJS += builtin/notes.o diff --git a/builtin.h b/builtin.h index 4e0f64723ed8dd..70997d7ace5ba6 100644 --- a/builtin.h +++ b/builtin.h @@ -191,6 +191,7 @@ extern int cmd_merge_recursive(int argc, const char **argv, const char *prefix); extern int cmd_merge_tree(int argc, const char **argv, const char *prefix); extern int cmd_mktag(int argc, const char **argv, const char *prefix); extern int cmd_mktree(int argc, const char **argv, const char *prefix); +extern int cmd_multi_pack_index(int argc, const char **argv, const char *prefix); extern int cmd_mv(int argc, const char **argv, const char *prefix); extern int cmd_name_rev(int argc, const char **argv, const char *prefix); extern int cmd_notes(int argc, const char **argv, const char *prefix); diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c new file mode 100644 index 00000000000000..3161ddae86d689 --- /dev/null +++ b/builtin/multi-pack-index.c @@ -0,0 +1,34 @@ +#include "builtin.h" +#include "cache.h" +#include "config.h" +#include "parse-options.h" + +static char const * const builtin_multi_pack_index_usage[] = { + N_("git multi-pack-index [--object-dir=]"), + NULL +}; + +static struct opts_multi_pack_index { + const char *object_dir; +} opts; + +int cmd_multi_pack_index(int argc, const char **argv, + const char *prefix) +{ + static struct option builtin_multi_pack_index_options[] = { + OPT_FILENAME(0, "object-dir", &opts.object_dir, + N_("object directory containing set of packfile and pack-index pairs")), + OPT_END(), + }; + + git_config(git_default_config, NULL); + + argc = parse_options(argc, argv, prefix, + builtin_multi_pack_index_options, + builtin_multi_pack_index_usage, 0); + + if (!opts.object_dir) + opts.object_dir = get_object_directory(); + + return 0; +} diff --git a/command-list.txt b/command-list.txt index e1c26c1bb7e618..61071f8fa2b94d 100644 --- a/command-list.txt +++ b/command-list.txt @@ -123,6 +123,7 @@ git-merge-index plumbingmanipulators git-merge-one-file purehelpers git-mergetool ancillarymanipulators complete git-merge-tree ancillaryinterrogators +git-multi-pack-index plumbingmanipulators git-mktag plumbingmanipulators git-mktree plumbingmanipulators git-mv mainporcelain worktree diff --git a/git.c b/git.c index c2f48d53dd4aab..a7509fa5f7198d 100644 --- a/git.c +++ b/git.c @@ -505,6 +505,7 @@ static struct cmd_struct commands[] = { { "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT }, { "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT }, { "mktree", cmd_mktree, RUN_SETUP }, + { "multi-pack-index", cmd_multi_pack_index, RUN_SETUP_GENTLY }, { "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE }, { "name-rev", cmd_name_rev, RUN_SETUP }, { "notes", cmd_notes, RUN_SETUP }, From a3407730261b11b45dda131464b73ec29922392a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:21 -0400 Subject: [PATCH 04/92] multi-pack-index: add 'write' verb In anticipation of writing multi-pack-indexes, add a skeleton 'git multi-pack-index write' subcommand and send the options to a write_midx_file() method. Also create a skeleton test script that tests the 'write' subcommand. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.txt | 22 +++++++++++++++++++++- Makefile | 1 + builtin/multi-pack-index.c | 17 +++++++++++++++-- midx.c | 7 +++++++ midx.h | 6 ++++++ t/t5319-multi-pack-index.sh | 10 ++++++++++ 6 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 midx.c create mode 100644 midx.h create mode 100755 t/t5319-multi-pack-index.sh diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 74f6f2a95768be..1f97e79912f45d 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -9,7 +9,7 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] +'git multi-pack-index' [--object-dir=] DESCRIPTION ----------- @@ -23,6 +23,26 @@ OPTIONS `/packs/multi-pack-index` for the current MIDX file, and `/packs` for the pack-files to index. +write:: + When given as the verb, write a new MIDX file to + `/packs/multi-pack-index`. + + +EXAMPLES +-------- + +* Write a MIDX file for the packfiles in the current .git folder. ++ +----------------------------------------------- +$ git multi-pack-index write +----------------------------------------------- + +* Write a MIDX file for the packfiles in an alternate object store. ++ +----------------------------------------------- +$ git multi-pack-index --object-dir write +----------------------------------------------- + SEE ALSO -------- diff --git a/Makefile b/Makefile index 54610875ece02a..f5636c711dbb96 100644 --- a/Makefile +++ b/Makefile @@ -890,6 +890,7 @@ LIB_OBJS += merge.o LIB_OBJS += merge-blobs.o LIB_OBJS += merge-recursive.o LIB_OBJS += mergesort.o +LIB_OBJS += midx.o LIB_OBJS += name-hash.o LIB_OBJS += notes.o LIB_OBJS += notes-cache.o diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 3161ddae86d689..6a7aa00cf2e7b1 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -2,9 +2,10 @@ #include "cache.h" #include "config.h" #include "parse-options.h" +#include "midx.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=]"), + N_("git multi-pack-index [--object-dir=] write"), NULL }; @@ -30,5 +31,17 @@ int cmd_multi_pack_index(int argc, const char **argv, if (!opts.object_dir) opts.object_dir = get_object_directory(); - return 0; + if (argc == 0) + goto usage; + + if (!strcmp(argv[0], "write")) { + if (argc > 1) + goto usage; + + return write_midx_file(opts.object_dir); + } + +usage: + usage_with_options(builtin_multi_pack_index_usage, + builtin_multi_pack_index_options); } diff --git a/midx.c b/midx.c new file mode 100644 index 00000000000000..32468db1a28752 --- /dev/null +++ b/midx.c @@ -0,0 +1,7 @@ +#include "cache.h" +#include "midx.h" + +int write_midx_file(const char *object_dir) +{ + return 0; +} diff --git a/midx.h b/midx.h new file mode 100644 index 00000000000000..dbdbe9f8732239 --- /dev/null +++ b/midx.h @@ -0,0 +1,6 @@ +#ifndef __MIDX_H__ +#define __MIDX_H__ + +int write_midx_file(const char *object_dir); + +#endif diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh new file mode 100755 index 00000000000000..ec3ddbe79cc13b --- /dev/null +++ b/t/t5319-multi-pack-index.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +test_description='multi-pack-indexes' +. ./test-lib.sh + +test_expect_success 'write midx with no packs' ' + git multi-pack-index --object-dir=. write +' + +test_done From fc59e74844613feac74f305943656f21f92c705e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:22 -0400 Subject: [PATCH 05/92] midx: write header information to lockfile As we begin writing the multi-pack-index format to disk, start with the basics: the 12-byte header and the 20-byte checksum footer. Start with these basics so we can add the rest of the format in small increments. As we implement the format, we will use a technique to check that our computed offsets within the multi-pack-index file match what we are actually writing. Each method that writes to the hashfile will return the number of bytes written, and we will track that those values match our expectations. Currently, write_midx_header() returns 12, but is not checked. We will check the return value in a later commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 50 +++++++++++++++++++++++++++++++++++++ t/t5319-multi-pack-index.sh | 4 ++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 32468db1a28752..f85f2d334d4762 100644 --- a/midx.c +++ b/midx.c @@ -1,7 +1,57 @@ #include "cache.h" +#include "csum-file.h" +#include "lockfile.h" #include "midx.h" +#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ +#define MIDX_VERSION 1 +#define MIDX_HASH_VERSION 1 +#define MIDX_HEADER_SIZE 12 + +static char *get_midx_filename(const char *object_dir) +{ + return xstrfmt("%s/pack/multi-pack-index", object_dir); +} + +static size_t write_midx_header(struct hashfile *f, + unsigned char num_chunks, + uint32_t num_packs) +{ + unsigned char byte_values[4]; + + hashwrite_be32(f, MIDX_SIGNATURE); + byte_values[0] = MIDX_VERSION; + byte_values[1] = MIDX_HASH_VERSION; + byte_values[2] = num_chunks; + byte_values[3] = 0; /* unused */ + hashwrite(f, byte_values, sizeof(byte_values)); + hashwrite_be32(f, num_packs); + + return MIDX_HEADER_SIZE; +} + int write_midx_file(const char *object_dir) { + unsigned char num_chunks = 0; + char *midx_name; + struct hashfile *f = NULL; + struct lock_file lk; + + midx_name = get_midx_filename(object_dir); + if (safe_create_leading_directories(midx_name)) { + UNLEAK(midx_name); + die_errno(_("unable to create leading directories of %s"), + midx_name); + } + + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + FREE_AND_NULL(midx_name); + + write_midx_header(f, num_chunks, 0); + + finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); + commit_lock_file(&lk); + return 0; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index ec3ddbe79cc13b..50e80f8f2cde8c 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -4,7 +4,9 @@ test_description='multi-pack-indexes' . ./test-lib.sh test_expect_success 'write midx with no packs' ' - git multi-pack-index --object-dir=. write + test_when_finished rm -f pack/multi-pack-index && + git multi-pack-index --object-dir=. write && + test_path_is_file pack/multi-pack-index ' test_done From 4d80560c546179654c32499132a6bdaf3c45b16f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:23 -0400 Subject: [PATCH 06/92] multi-pack-index: load into memory Create a new multi_pack_index struct for loading multi-pack-indexes into memory. Create a test-tool builtin for reading basic information about that multi-pack-index to verify the correct data is written. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Makefile | 1 + midx.c | 79 +++++++++++++++++++++++++++++++++++++ midx.h | 18 +++++++++ object-store.h | 2 + t/helper/test-read-midx.c | 31 +++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t5319-multi-pack-index.sh | 11 +++++- 8 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 t/helper/test-read-midx.c diff --git a/Makefile b/Makefile index f5636c711dbb96..0b801d1b16b96f 100644 --- a/Makefile +++ b/Makefile @@ -717,6 +717,7 @@ TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-path-utils.o TEST_BUILTINS_OBJS += test-prio-queue.o TEST_BUILTINS_OBJS += test-read-cache.o +TEST_BUILTINS_OBJS += test-read-midx.o TEST_BUILTINS_OBJS += test-ref-store.o TEST_BUILTINS_OBJS += test-regex.o TEST_BUILTINS_OBJS += test-revision-walking.o diff --git a/midx.c b/midx.c index f85f2d334d4762..c1ff5acf853911 100644 --- a/midx.c +++ b/midx.c @@ -1,18 +1,97 @@ #include "cache.h" #include "csum-file.h" #include "lockfile.h" +#include "object-store.h" #include "midx.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 +#define MIDX_BYTE_FILE_VERSION 4 +#define MIDX_BYTE_HASH_VERSION 5 +#define MIDX_BYTE_NUM_CHUNKS 6 +#define MIDX_BYTE_NUM_PACKS 8 #define MIDX_HASH_VERSION 1 #define MIDX_HEADER_SIZE 12 +#define MIDX_HASH_LEN 20 +#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) static char *get_midx_filename(const char *object_dir) { return xstrfmt("%s/pack/multi-pack-index", object_dir); } +struct multi_pack_index *load_multi_pack_index(const char *object_dir) +{ + struct multi_pack_index *m = NULL; + int fd; + struct stat st; + size_t midx_size; + void *midx_map = NULL; + uint32_t hash_version; + char *midx_name = get_midx_filename(object_dir); + + fd = git_open(midx_name); + + if (fd < 0) + goto cleanup_fail; + if (fstat(fd, &st)) { + error_errno(_("failed to read %s"), midx_name); + goto cleanup_fail; + } + + midx_size = xsize_t(st.st_size); + + if (midx_size < MIDX_MIN_SIZE) { + error(_("multi-pack-index file %s is too small"), midx_name); + goto cleanup_fail; + } + + FREE_AND_NULL(midx_name); + + midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); + + FLEX_ALLOC_MEM(m, object_dir, object_dir, strlen(object_dir)); + m->fd = fd; + m->data = midx_map; + m->data_len = midx_size; + + m->signature = get_be32(m->data); + if (m->signature != MIDX_SIGNATURE) { + error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), + m->signature, MIDX_SIGNATURE); + goto cleanup_fail; + } + + m->version = m->data[MIDX_BYTE_FILE_VERSION]; + if (m->version != MIDX_VERSION) { + error(_("multi-pack-index version %d not recognized"), + m->version); + goto cleanup_fail; + } + + hash_version = m->data[MIDX_BYTE_HASH_VERSION]; + if (hash_version != MIDX_HASH_VERSION) { + error(_("hash version %u does not match"), hash_version); + goto cleanup_fail; + } + m->hash_len = MIDX_HASH_LEN; + + m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; + + m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); + + return m; + +cleanup_fail: + free(m); + free(midx_name); + if (midx_map) + munmap(midx_map, midx_size); + if (0 <= fd) + close(fd); + return NULL; +} + static size_t write_midx_header(struct hashfile *f, unsigned char num_chunks, uint32_t num_packs) diff --git a/midx.h b/midx.h index dbdbe9f8732239..0e05051bca8f26 100644 --- a/midx.h +++ b/midx.h @@ -1,6 +1,24 @@ #ifndef __MIDX_H__ #define __MIDX_H__ +struct multi_pack_index { + int fd; + + const unsigned char *data; + size_t data_len; + + uint32_t signature; + unsigned char version; + unsigned char hash_len; + unsigned char num_chunks; + uint32_t num_packs; + uint32_t num_objects; + + char object_dir[FLEX_ARRAY]; +}; + +struct multi_pack_index *load_multi_pack_index(const char *object_dir); + int write_midx_file(const char *object_dir); #endif diff --git a/object-store.h b/object-store.h index d683112fd7bbab..13a766aea8ece9 100644 --- a/object-store.h +++ b/object-store.h @@ -84,6 +84,8 @@ struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ }; +struct multi_pack_index; + struct raw_object_store { /* * Path to the repository's object store. diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c new file mode 100644 index 00000000000000..988a48716986c6 --- /dev/null +++ b/t/helper/test-read-midx.c @@ -0,0 +1,31 @@ +#include "test-tool.h" +#include "cache.h" +#include "midx.h" +#include "repository.h" +#include "object-store.h" + +static int read_midx_file(const char *object_dir) +{ + struct multi_pack_index *m = load_multi_pack_index(object_dir); + + if (!m) + return 1; + + printf("header: %08x %d %d %d\n", + m->signature, + m->version, + m->num_chunks, + m->num_packs); + + printf("object-dir: %s\n", m->object_dir); + + return 0; +} + +int cmd__read_midx(int argc, const char **argv) +{ + if (argc != 2) + usage("read-midx "); + + return read_midx_file(argv[1]); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 805a45de9c877d..1c3ab36e6c3ef0 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -27,6 +27,7 @@ static struct test_cmd cmds[] = { { "path-utils", cmd__path_utils }, { "prio-queue", cmd__prio_queue }, { "read-cache", cmd__read_cache }, + { "read-midx", cmd__read_midx }, { "ref-store", cmd__ref_store }, { "regex", cmd__regex }, { "revision-walking", cmd__revision_walking }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7116ddfb94398d..6af8c08a6655ad 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -21,6 +21,7 @@ int cmd__online_cpus(int argc, const char **argv); int cmd__path_utils(int argc, const char **argv); int cmd__prio_queue(int argc, const char **argv); int cmd__read_cache(int argc, const char **argv); +int cmd__read_midx(int argc, const char **argv); int cmd__ref_store(int argc, const char **argv); int cmd__regex(int argc, const char **argv); int cmd__revision_walking(int argc, const char **argv); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 50e80f8f2cde8c..506bd8abb8383d 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -3,10 +3,19 @@ test_description='multi-pack-indexes' . ./test-lib.sh +midx_read_expect () { + cat >expect <<-EOF + header: 4d494458 1 0 0 + object-dir: . + EOF + test-tool read-midx . >actual && + test_cmp expect actual +} + test_expect_success 'write midx with no packs' ' test_when_finished rm -f pack/multi-pack-index && git multi-pack-index --object-dir=. write && - test_path_is_file pack/multi-pack-index + midx_read_expect ' test_done From 2c3813354b2c02221f8496d8f8671f5b33d878ad Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:24 -0400 Subject: [PATCH 07/92] t5319: expand test data As we build the multi-pack-index file format, we want to test the format on real repositories. Add tests that create repository data including multiple packfiles with both version 1 and version 2 formats. The current 'git multi-pack-index write' command will always write the same file with no "real" data. This will be expanded in future commits, along with the test expectations. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 84 +++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 506bd8abb8383d..1240127ec1a0c8 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -18,4 +18,88 @@ test_expect_success 'write midx with no packs' ' midx_read_expect ' +generate_objects () { + i=$1 + iii=$(printf '%03i' $i) + { + test-tool genrandom "bar" 200 && + test-tool genrandom "baz $iii" 50 + } >wide_delta_$iii && + { + test-tool genrandom "foo"$i 100 && + test-tool genrandom "foo"$(( $i + 1 )) 100 && + test-tool genrandom "foo"$(( $i + 2 )) 100 + } >deep_delta_$iii && + { + echo $iii && + test-tool genrandom "$iii" 8192 + } >file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii +} + +commit_and_list_objects () { + { + echo 101 && + test-tool genrandom 100 8192; + } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree -p HEADobj-list && + git reset --hard $commit +} + +test_expect_success 'create objects' ' + test_commit initial && + for i in $(test_seq 1 5) + do + generate_objects $i + done && + commit_and_list_objects +' + +test_expect_success 'write midx with one v1 pack' ' + pack=$(git pack-objects --index-version=1 pack/test Date: Thu, 12 Jul 2018 15:39:25 -0400 Subject: [PATCH 08/92] packfile: generalize pack directory list In anticipation of sharing the pack directory listing with the multi-pack-index, generalize prepare_packed_git_one() into for_each_file_in_pack_dir(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 101 +++++++++++++++++++++++++++++++++-------------------- packfile.h | 6 ++++ 2 files changed, 69 insertions(+), 38 deletions(-) diff --git a/packfile.c b/packfile.c index 7cd45aa4b2a0e0..ee1ab9b804d54c 100644 --- a/packfile.c +++ b/packfile.c @@ -738,13 +738,14 @@ static void report_pack_garbage(struct string_list *list) report_helper(list, seen_bits, first, list->nr); } -static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data) { struct strbuf path = STRBUF_INIT; size_t dirnamelen; DIR *dir; struct dirent *de; - struct string_list garbage = STRING_LIST_INIT_DUP; strbuf_addstr(&path, objdir); strbuf_addstr(&path, "/pack"); @@ -759,53 +760,77 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local strbuf_addch(&path, '/'); dirnamelen = path.len; while ((de = readdir(dir)) != NULL) { - struct packed_git *p; - size_t base_len; - if (is_dot_or_dotdot(de->d_name)) continue; strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); - base_len = path.len; - if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - /* Don't reopen a pack we already have. */ - for (p = r->objects->packed_git; p; - p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, path.buf, len)) - break; - } - if (p == NULL && - /* - * See if it really is a valid .idx file with - * corresponding .pack file that we can map. - */ - (p = add_packed_git(path.buf, path.len, local)) != NULL) - install_packed_git(r, p); - } - - if (!report_garbage) - continue; - - if (ends_with(de->d_name, ".idx") || - ends_with(de->d_name, ".pack") || - ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep") || - ends_with(de->d_name, ".promisor")) - string_list_append(&garbage, path.buf); - else - report_garbage(PACKDIR_FILE_GARBAGE, path.buf); + fn(path.buf, path.len, de->d_name, data); } + closedir(dir); - report_pack_garbage(&garbage); - string_list_clear(&garbage, 0); strbuf_release(&path); } +struct prepare_pack_data { + struct repository *r; + struct string_list *garbage; + int local; +}; + +static void prepare_pack(const char *full_name, size_t full_name_len, + const char *file_name, void *_data) +{ + struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct packed_git *p; + size_t base_len = full_name_len; + + if (strip_suffix_mem(full_name, &base_len, ".idx")) { + /* Don't reopen a pack we already have. */ + for (p = data->r->objects->packed_git; p; p = p->next) { + size_t len; + if (strip_suffix(p->pack_name, ".pack", &len) && + len == base_len && + !memcmp(p->pack_name, full_name, len)) + break; + } + + if (!p) { + p = add_packed_git(full_name, full_name_len, data->local); + if (p) + install_packed_git(data->r, p); + } + } + + if (!report_garbage) + return; + + if (ends_with(file_name, ".idx") || + ends_with(file_name, ".pack") || + ends_with(file_name, ".bitmap") || + ends_with(file_name, ".keep") || + ends_with(file_name, ".promisor")) + string_list_append(data->garbage, full_name); + else + report_garbage(PACKDIR_FILE_GARBAGE, full_name); +} + +static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +{ + struct prepare_pack_data data; + struct string_list garbage = STRING_LIST_INIT_DUP; + + data.r = r; + data.garbage = &garbage; + data.local = local; + + for_each_file_in_pack_dir(objdir, prepare_pack, &data); + + report_pack_garbage(data.garbage); + string_list_clear(data.garbage, 0); +} + static void prepare_packed_git(struct repository *r); /* * Give a fast, rough count of the number of objects in the repository. This diff --git a/packfile.h b/packfile.h index e0a38aba9321de..d2ad30300a2740 100644 --- a/packfile.h +++ b/packfile.h @@ -28,6 +28,12 @@ extern char *sha1_pack_index_name(const unsigned char *sha1); extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); +typedef void each_file_in_pack_dir_fn(const char *full_path, size_t full_path_len, + const char *file_pach, void *data); +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data); + /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 From 396f257018a6031c4eb0803d4693441ad8a9fd10 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:26 -0400 Subject: [PATCH 09/92] multi-pack-index: read packfile list When constructing a multi-pack-index file for a given object directory, read the files within the enclosed pack directory and find matches that end with ".idx" and find the correct paired packfile using add_packed_git(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 48 ++++++++++++++++++++++++++++++++++++- t/t5319-multi-pack-index.sh | 15 ++++++------ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/midx.c b/midx.c index c1ff5acf853911..f742d7ccd70783 100644 --- a/midx.c +++ b/midx.c @@ -1,6 +1,8 @@ #include "cache.h" #include "csum-file.h" +#include "dir.h" #include "lockfile.h" +#include "packfile.h" #include "object-store.h" #include "midx.h" @@ -109,12 +111,41 @@ static size_t write_midx_header(struct hashfile *f, return MIDX_HEADER_SIZE; } +struct pack_list { + struct packed_git **list; + uint32_t nr; + uint32_t alloc_list; +}; + +static void add_pack_to_midx(const char *full_path, size_t full_path_len, + const char *file_name, void *data) +{ + struct pack_list *packs = (struct pack_list *)data; + + if (ends_with(file_name, ".idx")) { + ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); + + packs->list[packs->nr] = add_packed_git(full_path, + full_path_len, + 0); + if (!packs->list[packs->nr]) { + warning(_("failed to add packfile '%s'"), + full_path); + return; + } + + packs->nr++; + } +} + int write_midx_file(const char *object_dir) { unsigned char num_chunks = 0; char *midx_name; + uint32_t i; struct hashfile *f = NULL; struct lock_file lk; + struct pack_list packs; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -123,14 +154,29 @@ int write_midx_file(const char *object_dir) midx_name); } + packs.nr = 0; + packs.alloc_list = 16; + packs.list = NULL; + ALLOC_ARRAY(packs.list, packs.alloc_list); + + for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); - write_midx_header(f, num_chunks, 0); + write_midx_header(f, num_chunks, packs.nr); finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); commit_lock_file(&lk); + for (i = 0; i < packs.nr; i++) { + if (packs.list[i]) { + close_pack(packs.list[i]); + free(packs.list[i]); + } + } + + free(packs.list); return 0; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1240127ec1a0c8..54117a7f49563f 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -4,8 +4,9 @@ test_description='multi-pack-indexes' . ./test-lib.sh midx_read_expect () { + NUM_PACKS=$1 cat >expect <<-EOF - header: 4d494458 1 0 0 + header: 4d494458 1 0 $NUM_PACKS object-dir: . EOF test-tool read-midx . >actual && @@ -15,7 +16,7 @@ midx_read_expect () { test_expect_success 'write midx with no packs' ' test_when_finished rm -f pack/multi-pack-index && git multi-pack-index --object-dir=. write && - midx_read_expect + midx_read_expect 0 ' generate_objects () { @@ -65,13 +66,13 @@ test_expect_success 'write midx with one v1 pack' ' pack=$(git pack-objects --index-version=1 pack/test Date: Thu, 12 Jul 2018 15:39:27 -0400 Subject: [PATCH 10/92] multi-pack-index: write pack names in chunk The multi-pack-index needs to track which packfiles it indexes. Store these in our first required chunk. Since filenames are not well structured, add padding to keep good alignment in later chunks. Modify the 'git multi-pack-index read' subcommand to output the existence of the pack-file name chunk. Modify t5319-multi-pack-index.sh to reflect this new output and the new expected number of chunks. Defense in depth: A pattern we are using in the multi-pack-index feature is to verify the data as we write it. We want to ensure we never write invalid data to the multi-pack-index. There are many checks that verify that the values we are writing fit the format definitions. This mainly helps developers while working on the feature, but it can also identify issues that only appear when dealing with very large data sets. These large sets are hard to encode into test cases. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/pack-format.txt | 6 + midx.c | 174 +++++++++++++++++++++++- midx.h | 2 + t/helper/test-read-midx.c | 7 + t/t5319-multi-pack-index.sh | 3 +- 5 files changed, 189 insertions(+), 3 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index e060e693f41a76..6c5a77475f4a65 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -296,6 +296,12 @@ CHUNK LOOKUP: CHUNK DATA: + Packfile Names (ID: {'P', 'N', 'A', 'M'}) + Stores the packfile names as concatenated, null-terminated strings. + Packfiles must be listed in lexicographic order for fast lookups by + name. This is the only chunk not guaranteed to be a multiple of four + bytes in length, so should be the last chunk for alignment reasons. + (This section intentionally left incomplete.) TRAILER: diff --git a/midx.c b/midx.c index f742d7ccd70783..ca7a32bf956596 100644 --- a/midx.c +++ b/midx.c @@ -17,6 +17,11 @@ #define MIDX_HASH_LEN 20 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) +#define MIDX_MAX_CHUNKS 1 +#define MIDX_CHUNK_ALIGNMENT 4 +#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) + static char *get_midx_filename(const char *object_dir) { return xstrfmt("%s/pack/multi-pack-index", object_dir); @@ -31,6 +36,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) void *midx_map = NULL; uint32_t hash_version; char *midx_name = get_midx_filename(object_dir); + uint32_t i; fd = git_open(midx_name); @@ -82,6 +88,33 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); + for (i = 0; i < m->num_chunks; i++) { + uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE + + MIDX_CHUNKLOOKUP_WIDTH * i); + uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 + + MIDX_CHUNKLOOKUP_WIDTH * i); + + switch (chunk_id) { + case MIDX_CHUNKID_PACKNAMES: + m->chunk_pack_names = m->data + chunk_offset; + break; + + case 0: + die(_("terminating multi-pack-index chunk id appears earlier than expected")); + break; + + default: + /* + * Do nothing on unrecognized chunks, allowing future + * extensions to add optional chunks. + */ + break; + } + } + + if (!m->chunk_pack_names) + die(_("multi-pack-index missing required pack-name chunk")); + return m; cleanup_fail: @@ -113,8 +146,11 @@ static size_t write_midx_header(struct hashfile *f, struct pack_list { struct packed_git **list; + char **names; uint32_t nr; uint32_t alloc_list; + uint32_t alloc_names; + size_t pack_name_concat_len; }; static void add_pack_to_midx(const char *full_path, size_t full_path_len, @@ -124,6 +160,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, if (ends_with(file_name, ".idx")) { ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); + ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); packs->list[packs->nr] = add_packed_git(full_path, full_path_len, @@ -134,18 +171,89 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, return; } + packs->names[packs->nr] = xstrdup(file_name); + packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; } } +struct pack_pair { + uint32_t pack_int_id; + char *pack_name; +}; + +static int pack_pair_compare(const void *_a, const void *_b) +{ + struct pack_pair *a = (struct pack_pair *)_a; + struct pack_pair *b = (struct pack_pair *)_b; + return strcmp(a->pack_name, b->pack_name); +} + +static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm) +{ + uint32_t i; + struct pack_pair *pairs; + + ALLOC_ARRAY(pairs, nr_packs); + + for (i = 0; i < nr_packs; i++) { + pairs[i].pack_int_id = i; + pairs[i].pack_name = pack_names[i]; + } + + QSORT(pairs, nr_packs, pack_pair_compare); + + for (i = 0; i < nr_packs; i++) { + pack_names[i] = pairs[i].pack_name; + perm[pairs[i].pack_int_id] = i; + } + + free(pairs); +} + +static size_t write_midx_pack_names(struct hashfile *f, + char **pack_names, + uint32_t num_packs) +{ + uint32_t i; + unsigned char padding[MIDX_CHUNK_ALIGNMENT]; + size_t written = 0; + + for (i = 0; i < num_packs; i++) { + size_t writelen = strlen(pack_names[i]) + 1; + + if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) + BUG("incorrect pack-file order: %s before %s", + pack_names[i - 1], + pack_names[i]); + + hashwrite(f, pack_names[i], writelen); + written += writelen; + } + + /* add padding to be aligned */ + i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT); + if (i < MIDX_CHUNK_ALIGNMENT) { + memset(padding, 0, sizeof(padding)); + hashwrite(f, padding, i); + written += i; + } + + return written; +} + int write_midx_file(const char *object_dir) { - unsigned char num_chunks = 0; + unsigned char cur_chunk, num_chunks = 0; char *midx_name; uint32_t i; struct hashfile *f = NULL; struct lock_file lk; struct pack_list packs; + uint32_t *pack_perm = NULL; + uint64_t written = 0; + uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; + uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -156,16 +264,76 @@ int write_midx_file(const char *object_dir) packs.nr = 0; packs.alloc_list = 16; + packs.alloc_names = 16; packs.list = NULL; + packs.pack_name_concat_len = 0; ALLOC_ARRAY(packs.list, packs.alloc_list); + ALLOC_ARRAY(packs.names, packs.alloc_names); for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); + if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) + packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - + (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); + + ALLOC_ARRAY(pack_perm, packs.nr); + sort_packs_by_name(packs.names, packs.nr, pack_perm); + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); - write_midx_header(f, num_chunks, packs.nr); + cur_chunk = 0; + num_chunks = 1; + + written = write_midx_header(f, num_chunks, packs.nr); + + chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; + chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; + + cur_chunk++; + chunk_ids[cur_chunk] = 0; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; + + for (i = 0; i <= num_chunks; i++) { + if (i && chunk_offsets[i] < chunk_offsets[i - 1]) + BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64, + chunk_offsets[i - 1], + chunk_offsets[i]); + + if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT) + BUG("chunk offset %"PRIu64" is not properly aligned", + chunk_offsets[i]); + + hashwrite_be32(f, chunk_ids[i]); + hashwrite_be32(f, chunk_offsets[i] >> 32); + hashwrite_be32(f, chunk_offsets[i]); + + written += MIDX_CHUNKLOOKUP_WIDTH; + } + + for (i = 0; i < num_chunks; i++) { + if (written != chunk_offsets[i]) + BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32, + chunk_offsets[i], + written, + chunk_ids[i]); + + switch (chunk_ids[i]) { + case MIDX_CHUNKID_PACKNAMES: + written += write_midx_pack_names(f, packs.names, packs.nr); + break; + + default: + BUG("trying to write unknown chunk id %"PRIx32, + chunk_ids[i]); + } + } + + if (written != chunk_offsets[num_chunks]) + BUG("incorrect final offset %"PRIu64" != %"PRIu64, + written, + chunk_offsets[num_chunks]); finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); commit_lock_file(&lk); @@ -175,8 +343,10 @@ int write_midx_file(const char *object_dir) close_pack(packs.list[i]); free(packs.list[i]); } + free(packs.names[i]); } free(packs.list); + free(packs.names); return 0; } diff --git a/midx.h b/midx.h index 0e05051bca8f26..38af01fa3beac9 100644 --- a/midx.h +++ b/midx.h @@ -14,6 +14,8 @@ struct multi_pack_index { uint32_t num_packs; uint32_t num_objects; + const unsigned char *chunk_pack_names; + char object_dir[FLEX_ARRAY]; }; diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 988a48716986c6..3f2d2cfa7807cd 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -17,6 +17,13 @@ static int read_midx_file(const char *object_dir) m->num_chunks, m->num_packs); + printf("chunks:"); + + if (m->chunk_pack_names) + printf(" pack-names"); + + printf("\n"); + printf("object-dir: %s\n", m->object_dir); return 0; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 54117a7f49563f..7512d55c927ed9 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -6,7 +6,8 @@ test_description='multi-pack-indexes' midx_read_expect () { NUM_PACKS=$1 cat >expect <<-EOF - header: 4d494458 1 0 $NUM_PACKS + header: 4d494458 1 1 $NUM_PACKS + chunks: pack-names object-dir: . EOF test-tool read-midx . >actual && From 3227565cfdeabcb06eede914d38c8729e6ff1434 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:28 -0400 Subject: [PATCH 11/92] midx: read pack names into array Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 17 +++++++++++++++++ midx.h | 1 + t/helper/test-read-midx.c | 5 +++++ t/t5319-multi-pack-index.sh | 17 ++++++++++++----- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/midx.c b/midx.c index ca7a32bf956596..fcdf6553cea312 100644 --- a/midx.c +++ b/midx.c @@ -37,6 +37,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) uint32_t hash_version; char *midx_name = get_midx_filename(object_dir); uint32_t i; + const char *cur_pack_name; fd = git_open(midx_name); @@ -115,6 +116,22 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) if (!m->chunk_pack_names) die(_("multi-pack-index missing required pack-name chunk")); + m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); + + cur_pack_name = (const char *)m->chunk_pack_names; + for (i = 0; i < m->num_packs; i++) { + m->pack_names[i] = cur_pack_name; + + cur_pack_name += strlen(cur_pack_name) + 1; + + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) { + error(_("multi-pack-index pack names out of order: '%s' before '%s'"), + m->pack_names[i - 1], + m->pack_names[i]); + goto cleanup_fail; + } + } + return m; cleanup_fail: diff --git a/midx.h b/midx.h index 38af01fa3beac9..17b56172e3cc4c 100644 --- a/midx.h +++ b/midx.h @@ -16,6 +16,7 @@ struct multi_pack_index { const unsigned char *chunk_pack_names; + const char **pack_names; char object_dir[FLEX_ARRAY]; }; diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 3f2d2cfa7807cd..76a60d78828307 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -6,6 +6,7 @@ static int read_midx_file(const char *object_dir) { + uint32_t i; struct multi_pack_index *m = load_multi_pack_index(object_dir); if (!m) @@ -24,6 +25,10 @@ static int read_midx_file(const char *object_dir) printf("\n"); + printf("packs:\n"); + for (i = 0; i < m->num_packs; i++) + printf("%s\n", m->pack_names[i]); + printf("object-dir: %s\n", m->object_dir); return 0; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 7512d55c927ed9..e8da082c64fa49 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -5,11 +5,18 @@ test_description='multi-pack-indexes' midx_read_expect () { NUM_PACKS=$1 - cat >expect <<-EOF - header: 4d494458 1 1 $NUM_PACKS - chunks: pack-names - object-dir: . - EOF + { + cat <<-EOF && + header: 4d494458 1 1 $NUM_PACKS + chunks: pack-names + packs: + EOF + if test $NUM_PACKS -ge 1 + then + ls pack/ | grep idx | sort + fi && + printf "object-dir: .\n" + } >expect && test-tool read-midx . >actual && test_cmp expect actual } From fe1ed56f5e482507b54a4fb491273f122c5fd9ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:29 -0400 Subject: [PATCH 12/92] midx: sort and deduplicate objects from packfiles Before writing a list of objects and their offsets to a multi-pack-index, we need to collect the list of objects contained in the packfiles. There may be multiple copies of some objects, so this list must be deduplicated. It is possible to artificially get into a state where there are many duplicate copies of objects. That can create high memory pressure if we are to create a list of all objects before de-duplication. To reduce this memory pressure without a significant performance drop, automatically group objects by the first byte of their object id. Use the IDX fanout tables to group the data, copy to a local array, then sort. Copy only the de-duplicated entries. Select the duplicate based on the most-recent modified time of a packfile containing the object. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.c | 17 +++++++ packfile.h | 2 + 3 files changed, 147 insertions(+) diff --git a/midx.c b/midx.c index fcdf6553cea312..29f8de5ee6461e 100644 --- a/midx.c +++ b/midx.c @@ -4,6 +4,7 @@ #include "lockfile.h" #include "packfile.h" #include "object-store.h" +#include "packfile.h" #include "midx.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ @@ -182,12 +183,21 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, packs->list[packs->nr] = add_packed_git(full_path, full_path_len, 0); + if (!packs->list[packs->nr]) { warning(_("failed to add packfile '%s'"), full_path); return; } + if (open_pack_index(packs->list[packs->nr])) { + warning(_("failed to open pack-index '%s'"), + full_path); + close_pack(packs->list[packs->nr]); + FREE_AND_NULL(packs->list[packs->nr]); + return; + } + packs->names[packs->nr] = xstrdup(file_name); packs->pack_name_concat_len += strlen(file_name) + 1; packs->nr++; @@ -228,6 +238,119 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p free(pairs); } +struct pack_midx_entry { + struct object_id oid; + uint32_t pack_int_id; + time_t pack_mtime; + uint64_t offset; +}; + +static int midx_oid_compare(const void *_a, const void *_b) +{ + const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a; + const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b; + int cmp = oidcmp(&a->oid, &b->oid); + + if (cmp) + return cmp; + + if (a->pack_mtime > b->pack_mtime) + return -1; + else if (a->pack_mtime < b->pack_mtime) + return 1; + + return a->pack_int_id - b->pack_int_id; +} + +static void fill_pack_entry(uint32_t pack_int_id, + struct packed_git *p, + uint32_t cur_object, + struct pack_midx_entry *entry) +{ + if (!nth_packed_object_oid(&entry->oid, p, cur_object)) + die(_("failed to locate object %d in packfile"), cur_object); + + entry->pack_int_id = pack_int_id; + entry->pack_mtime = p->mtime; + + entry->offset = nth_packed_object_offset(p, cur_object); +} + +/* + * It is possible to artificially get into a state where there are many + * duplicate copies of objects. That can create high memory pressure if + * we are to create a list of all objects before de-duplication. To reduce + * this memory pressure without a significant performance drop, automatically + * group objects by the first byte of their object id. Use the IDX fanout + * tables to group the data, copy to a local array, then sort. + * + * Copy only the de-duplicated entries (selected by most-recent modified time + * of a packfile containing the object). + */ +static struct pack_midx_entry *get_sorted_entries(struct packed_git **p, + uint32_t *perm, + uint32_t nr_packs, + uint32_t *nr_objects) +{ + uint32_t cur_fanout, cur_pack, cur_object; + uint32_t alloc_fanout, alloc_objects, total_objects = 0; + struct pack_midx_entry *entries_by_fanout = NULL; + struct pack_midx_entry *deduplicated_entries = NULL; + + for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) + total_objects += p[cur_pack]->num_objects; + + /* + * As we de-duplicate by fanout value, we expect the fanout + * slices to be evenly distributed, with some noise. Hence, + * allocate slightly more than one 256th. + */ + alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16; + + ALLOC_ARRAY(entries_by_fanout, alloc_fanout); + ALLOC_ARRAY(deduplicated_entries, alloc_objects); + *nr_objects = 0; + + for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { + uint32_t nr_fanout = 0; + + for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) { + uint32_t start = 0, end; + + if (cur_fanout) + start = get_pack_fanout(p[cur_pack], cur_fanout - 1); + end = get_pack_fanout(p[cur_pack], cur_fanout); + + for (cur_object = start; cur_object < end; cur_object++) { + ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); + nr_fanout++; + } + } + + QSORT(entries_by_fanout, nr_fanout, midx_oid_compare); + + /* + * The batch is now sorted by OID and then mtime (descending). + * Take only the first duplicate. + */ + for (cur_object = 0; cur_object < nr_fanout; cur_object++) { + if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid, + &entries_by_fanout[cur_object].oid)) + continue; + + ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); + memcpy(&deduplicated_entries[*nr_objects], + &entries_by_fanout[cur_object], + sizeof(struct pack_midx_entry)); + (*nr_objects)++; + } + } + + free(entries_by_fanout); + return deduplicated_entries; +} + static size_t write_midx_pack_names(struct hashfile *f, char **pack_names, uint32_t num_packs) @@ -271,6 +394,8 @@ int write_midx_file(const char *object_dir) uint64_t written = 0; uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; + uint32_t nr_entries; + struct pack_midx_entry *entries = NULL; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -296,6 +421,8 @@ int write_midx_file(const char *object_dir) ALLOC_ARRAY(pack_perm, packs.nr); sort_packs_by_name(packs.names, packs.nr, pack_perm); + entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries); + hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); @@ -365,5 +492,6 @@ int write_midx_file(const char *object_dir) free(packs.list); free(packs.names); + free(entries); return 0; } diff --git a/packfile.c b/packfile.c index ee1ab9b804d54c..3d652212c60633 100644 --- a/packfile.c +++ b/packfile.c @@ -196,6 +196,23 @@ int open_pack_index(struct packed_git *p) return ret; } +uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) +{ + const uint32_t *level1_ofs = p->index_data; + + if (!level1_ofs) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + } + + if (p->index_version > 1) { + level1_ofs += 2; + } + + return ntohl(level1_ofs[value]); +} + static struct packed_git *alloc_packed_git(int extra) { struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); diff --git a/packfile.h b/packfile.h index d2ad30300a2740..b0eed44c0b1e35 100644 --- a/packfile.h +++ b/packfile.h @@ -69,6 +69,8 @@ extern int open_pack_index(struct packed_git *); */ extern void close_pack_index(struct packed_git *); +extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); + extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); extern void close_pack(struct packed_git *); From 0d5b3a5ef72383f3b6fe93793be3bbd107a88eaa Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:30 -0400 Subject: [PATCH 13/92] midx: write object ids in a chunk Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/pack-format.txt | 4 +++ midx.c | 47 +++++++++++++++++++++++-- midx.h | 1 + t/helper/test-read-midx.c | 2 ++ t/t5319-multi-pack-index.sh | 4 +-- 5 files changed, 53 insertions(+), 5 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 6c5a77475f4a65..78ee0489c60cf3 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -302,6 +302,10 @@ CHUNK DATA: name. This is the only chunk not guaranteed to be a multiple of four bytes in length, so should be the last chunk for alignment reasons. + OID Lookup (ID: {'O', 'I', 'D', 'L'}) + The OIDs for all objects in the MIDX are stored in lexicographic + order in this chunk. + (This section intentionally left incomplete.) TRAILER: diff --git a/midx.c b/midx.c index 29f8de5ee6461e..3f113e1bebbf74 100644 --- a/midx.c +++ b/midx.c @@ -18,9 +18,10 @@ #define MIDX_HASH_LEN 20 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) -#define MIDX_MAX_CHUNKS 1 +#define MIDX_MAX_CHUNKS 2 #define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) static char *get_midx_filename(const char *object_dir) @@ -101,6 +102,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->chunk_pack_names = m->data + chunk_offset; break; + case MIDX_CHUNKID_OIDLOOKUP: + m->chunk_oid_lookup = m->data + chunk_offset; + break; + case 0: die(_("terminating multi-pack-index chunk id appears earlier than expected")); break; @@ -116,6 +121,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) if (!m->chunk_pack_names) die(_("multi-pack-index missing required pack-name chunk")); + if (!m->chunk_oid_lookup) + die(_("multi-pack-index missing required OID lookup chunk")); m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); @@ -382,6 +389,32 @@ static size_t write_midx_pack_names(struct hashfile *f, return written; } +static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, + struct pack_midx_entry *objects, + uint32_t nr_objects) +{ + struct pack_midx_entry *list = objects; + uint32_t i; + size_t written = 0; + + for (i = 0; i < nr_objects; i++) { + struct pack_midx_entry *obj = list++; + + if (i < nr_objects - 1) { + struct pack_midx_entry *next = list; + if (oidcmp(&obj->oid, &next->oid) >= 0) + BUG("OIDs not in order: %s >= %s", + oid_to_hex(&obj->oid), + oid_to_hex(&next->oid)); + } + + hashwrite(f, obj->oid.hash, (int)hash_len); + written += hash_len; + } + + return written; +} + int write_midx_file(const char *object_dir) { unsigned char cur_chunk, num_chunks = 0; @@ -428,7 +461,7 @@ int write_midx_file(const char *object_dir) FREE_AND_NULL(midx_name); cur_chunk = 0; - num_chunks = 1; + num_chunks = 2; written = write_midx_header(f, num_chunks, packs.nr); @@ -436,9 +469,13 @@ int write_midx_file(const char *object_dir) chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; cur_chunk++; - chunk_ids[cur_chunk] = 0; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; + cur_chunk++; + chunk_ids[cur_chunk] = 0; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; + for (i = 0; i <= num_chunks; i++) { if (i && chunk_offsets[i] < chunk_offsets[i - 1]) BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64, @@ -468,6 +505,10 @@ int write_midx_file(const char *object_dir) written += write_midx_pack_names(f, packs.names, packs.nr); break; + case MIDX_CHUNKID_OIDLOOKUP: + written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); + break; + default: BUG("trying to write unknown chunk id %"PRIx32, chunk_ids[i]); diff --git a/midx.h b/midx.h index 17b56172e3cc4c..4d3bceafc58058 100644 --- a/midx.h +++ b/midx.h @@ -15,6 +15,7 @@ struct multi_pack_index { uint32_t num_objects; const unsigned char *chunk_pack_names; + const unsigned char *chunk_oid_lookup; const char **pack_names; char object_dir[FLEX_ARRAY]; diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 76a60d78828307..de6d452a7ced7d 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -22,6 +22,8 @@ static int read_midx_file(const char *object_dir) if (m->chunk_pack_names) printf(" pack-names"); + if (m->chunk_oid_lookup) + printf(" oid-lookup"); printf("\n"); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index e8da082c64fa49..4813610115783f 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -7,8 +7,8 @@ midx_read_expect () { NUM_PACKS=$1 { cat <<-EOF && - header: 4d494458 1 1 $NUM_PACKS - chunks: pack-names + header: 4d494458 1 2 $NUM_PACKS + chunks: pack-names oid-lookup packs: EOF if test $NUM_PACKS -ge 1 From d7cacf29ccfcb2a33bcd8468f83daf822430f19a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:31 -0400 Subject: [PATCH 14/92] midx: write object id fanout chunk Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/pack-format.txt | 5 +++ midx.c | 53 +++++++++++++++++++++++-- midx.h | 1 + t/helper/test-read-midx.c | 4 +- t/t5319-multi-pack-index.sh | 16 ++++---- 5 files changed, 68 insertions(+), 11 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 78ee0489c60cf3..3215f7bfcdbda9 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -302,6 +302,11 @@ CHUNK DATA: name. This is the only chunk not guaranteed to be a multiple of four bytes in length, so should be the last chunk for alignment reasons. + OID Fanout (ID: {'O', 'I', 'D', 'F'}) + The ith entry, F[i], stores the number of OIDs with first + byte at most i. Thus F[255] stores the total + number of objects. + OID Lookup (ID: {'O', 'I', 'D', 'L'}) The OIDs for all objects in the MIDX are stored in lexicographic order in this chunk. diff --git a/midx.c b/midx.c index 3f113e1bebbf74..7a954eb0cd317a 100644 --- a/midx.c +++ b/midx.c @@ -18,11 +18,13 @@ #define MIDX_HASH_LEN 20 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) -#define MIDX_MAX_CHUNKS 2 +#define MIDX_MAX_CHUNKS 3 #define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) +#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) static char *get_midx_filename(const char *object_dir) { @@ -102,6 +104,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->chunk_pack_names = m->data + chunk_offset; break; + case MIDX_CHUNKID_OIDFANOUT: + m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset); + break; + case MIDX_CHUNKID_OIDLOOKUP: m->chunk_oid_lookup = m->data + chunk_offset; break; @@ -121,9 +127,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) if (!m->chunk_pack_names) die(_("multi-pack-index missing required pack-name chunk")); + if (!m->chunk_oid_fanout) + die(_("multi-pack-index missing required OID fanout chunk")); if (!m->chunk_oid_lookup) die(_("multi-pack-index missing required OID lookup chunk")); + m->num_objects = ntohl(m->chunk_oid_fanout[255]); + m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); cur_pack_name = (const char *)m->chunk_pack_names; @@ -389,6 +399,35 @@ static size_t write_midx_pack_names(struct hashfile *f, return written; } +static size_t write_midx_oid_fanout(struct hashfile *f, + struct pack_midx_entry *objects, + uint32_t nr_objects) +{ + struct pack_midx_entry *list = objects; + struct pack_midx_entry *last = objects + nr_objects; + uint32_t count = 0; + uint32_t i; + + /* + * Write the first-level table (the list is sorted, + * but we use a 256-entry lookup to be able to avoid + * having to do eight extra binary search iterations). + */ + for (i = 0; i < 256; i++) { + struct pack_midx_entry *next = list; + + while (next < last && next->oid.hash[0] == i) { + count++; + next++; + } + + hashwrite_be32(f, count); + list = next; + } + + return MIDX_CHUNK_FANOUT_SIZE; +} + static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, struct pack_midx_entry *objects, uint32_t nr_objects) @@ -461,7 +500,7 @@ int write_midx_file(const char *object_dir) FREE_AND_NULL(midx_name); cur_chunk = 0; - num_chunks = 2; + num_chunks = 3; written = write_midx_header(f, num_chunks, packs.nr); @@ -469,9 +508,13 @@ int write_midx_file(const char *object_dir) chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; cur_chunk++; - chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; + cur_chunk++; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE; + cur_chunk++; chunk_ids[cur_chunk] = 0; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; @@ -505,6 +548,10 @@ int write_midx_file(const char *object_dir) written += write_midx_pack_names(f, packs.names, packs.nr); break; + case MIDX_CHUNKID_OIDFANOUT: + written += write_midx_oid_fanout(f, entries, nr_entries); + break; + case MIDX_CHUNKID_OIDLOOKUP: written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); break; diff --git a/midx.h b/midx.h index 4d3bceafc58058..8572cf0f4b3540 100644 --- a/midx.h +++ b/midx.h @@ -15,6 +15,7 @@ struct multi_pack_index { uint32_t num_objects; const unsigned char *chunk_pack_names; + const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const char **pack_names; diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index de6d452a7ced7d..f7c17b0940d342 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -22,10 +22,12 @@ static int read_midx_file(const char *object_dir) if (m->chunk_pack_names) printf(" pack-names"); + if (m->chunk_oid_fanout) + printf(" oid-fanout"); if (m->chunk_oid_lookup) printf(" oid-lookup"); - printf("\n"); + printf("\nnum_objects: %d\n", m->num_objects); printf("packs:\n"); for (i = 0; i < m->num_packs; i++) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 4813610115783f..95e731ae52f125 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -5,10 +5,12 @@ test_description='multi-pack-indexes' midx_read_expect () { NUM_PACKS=$1 + NUM_OBJECTS=$2 { cat <<-EOF && - header: 4d494458 1 2 $NUM_PACKS - chunks: pack-names oid-lookup + header: 4d494458 1 3 $NUM_PACKS + chunks: pack-names oid-fanout oid-lookup + num_objects: $NUM_OBJECTS packs: EOF if test $NUM_PACKS -ge 1 @@ -24,7 +26,7 @@ midx_read_expect () { test_expect_success 'write midx with no packs' ' test_when_finished rm -f pack/multi-pack-index && git multi-pack-index --object-dir=. write && - midx_read_expect 0 + midx_read_expect 0 0 ' generate_objects () { @@ -74,13 +76,13 @@ test_expect_success 'write midx with one v1 pack' ' pack=$(git pack-objects --index-version=1 pack/test Date: Thu, 12 Jul 2018 15:39:32 -0400 Subject: [PATCH 15/92] midx: write object offsets The final pair of chunks for the multi-pack-index file stores the object offsets. We default to using 32-bit offsets as in the pack-index version 1 format, but if there exists an offset larger than 32-bits, we use a trick similar to the pack-index version 2 format by storing all offsets at least 2^31 in a 64-bit table; we use the 32-bit table to point into that 64-bit table as necessary. We only store these 64-bit offsets if necessary, so create a test that manipulates a version 2 pack-index to fake a large offset. This allows us to test that the large offset table is created, but the data does not match the actual packfile offsets. The multi-pack-index offset does match the (corrupted) pack-index offset, so a future feature will compare these offsets during a 'verify' step. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/pack-format.txt | 15 +++- midx.c | 100 +++++++++++++++++++++++- midx.h | 2 + t/helper/test-read-midx.c | 4 + t/t5319-multi-pack-index.sh | 49 +++++++++--- 5 files changed, 155 insertions(+), 15 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 3215f7bfcdbda9..cab5bdd2ff0f88 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -311,7 +311,20 @@ CHUNK DATA: The OIDs for all objects in the MIDX are stored in lexicographic order in this chunk. - (This section intentionally left incomplete.) + Object Offsets (ID: {'O', 'O', 'F', 'F'}) + Stores two 4-byte values for every object. + 1: The pack-int-id for the pack storing this object. + 2: The offset within the pack. + If all offsets are less than 2^31, then the large offset chunk + will not exist and offsets are stored as in IDX v1. + If there is at least one offset value larger than 2^32-1, then + the large offset chunk must exist. If the large offset chunk + exists and the 31st bit is on, then removing that bit reveals + the row in the large offsets containing the 8-byte offset of + this object. + + [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'}) + 8-byte offsets into large packfiles. TRAILER: diff --git a/midx.c b/midx.c index 7a954eb0cd317a..e83110ae92b476 100644 --- a/midx.c +++ b/midx.c @@ -18,13 +18,18 @@ #define MIDX_HASH_LEN 20 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN) -#define MIDX_MAX_CHUNKS 3 +#define MIDX_MAX_CHUNKS 5 #define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ +#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ +#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) +#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) +#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define MIDX_LARGE_OFFSET_NEEDED 0x80000000 static char *get_midx_filename(const char *object_dir) { @@ -112,6 +117,14 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->chunk_oid_lookup = m->data + chunk_offset; break; + case MIDX_CHUNKID_OBJECTOFFSETS: + m->chunk_object_offsets = m->data + chunk_offset; + break; + + case MIDX_CHUNKID_LARGEOFFSETS: + m->chunk_large_offsets = m->data + chunk_offset; + break; + case 0: die(_("terminating multi-pack-index chunk id appears earlier than expected")); break; @@ -131,6 +144,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) die(_("multi-pack-index missing required OID fanout chunk")); if (!m->chunk_oid_lookup) die(_("multi-pack-index missing required OID lookup chunk")); + if (!m->chunk_object_offsets) + die(_("multi-pack-index missing required object offsets chunk")); m->num_objects = ntohl(m->chunk_oid_fanout[255]); @@ -454,6 +469,56 @@ static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len, return written; } +static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, + struct pack_midx_entry *objects, uint32_t nr_objects) +{ + struct pack_midx_entry *list = objects; + uint32_t i, nr_large_offset = 0; + size_t written = 0; + + for (i = 0; i < nr_objects; i++) { + struct pack_midx_entry *obj = list++; + + hashwrite_be32(f, obj->pack_int_id); + + if (large_offset_needed && obj->offset >> 31) + hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); + else if (!large_offset_needed && obj->offset >> 32) + BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!", + oid_to_hex(&obj->oid), + obj->offset); + else + hashwrite_be32(f, (uint32_t)obj->offset); + + written += MIDX_CHUNK_OFFSET_WIDTH; + } + + return written; +} + +static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_offset, + struct pack_midx_entry *objects, uint32_t nr_objects) +{ + struct pack_midx_entry *list = objects; + size_t written = 0; + + while (nr_large_offset) { + struct pack_midx_entry *obj = list++; + uint64_t offset = obj->offset; + + if (!(offset >> 31)) + continue; + + hashwrite_be32(f, offset >> 32); + hashwrite_be32(f, offset & 0xffffffffUL); + written += 2 * sizeof(uint32_t); + + nr_large_offset--; + } + + return written; +} + int write_midx_file(const char *object_dir) { unsigned char cur_chunk, num_chunks = 0; @@ -466,8 +531,9 @@ int write_midx_file(const char *object_dir) uint64_t written = 0; uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1]; uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1]; - uint32_t nr_entries; + uint32_t nr_entries, num_large_offsets = 0; struct pack_midx_entry *entries = NULL; + int large_offsets_needed = 0; midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) { @@ -494,13 +560,19 @@ int write_midx_file(const char *object_dir) sort_packs_by_name(packs.names, packs.nr, pack_perm); entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries); + for (i = 0; i < nr_entries; i++) { + if (entries[i].offset > 0x7fffffff) + num_large_offsets++; + if (entries[i].offset > 0xffffffff) + large_offsets_needed = 1; + } hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); cur_chunk = 0; - num_chunks = 3; + num_chunks = large_offsets_needed ? 5 : 4; written = write_midx_header(f, num_chunks, packs.nr); @@ -516,9 +588,21 @@ int write_midx_file(const char *object_dir) chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + MIDX_CHUNK_FANOUT_SIZE; cur_chunk++; - chunk_ids[cur_chunk] = 0; + chunk_ids[cur_chunk] = MIDX_CHUNKID_OBJECTOFFSETS; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_HASH_LEN; + cur_chunk++; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + nr_entries * MIDX_CHUNK_OFFSET_WIDTH; + if (large_offsets_needed) { + chunk_ids[cur_chunk] = MIDX_CHUNKID_LARGEOFFSETS; + + cur_chunk++; + chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + + num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH; + } + + chunk_ids[cur_chunk] = 0; + for (i = 0; i <= num_chunks; i++) { if (i && chunk_offsets[i] < chunk_offsets[i - 1]) BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64, @@ -556,6 +640,14 @@ int write_midx_file(const char *object_dir) written += write_midx_oid_lookup(f, MIDX_HASH_LEN, entries, nr_entries); break; + case MIDX_CHUNKID_OBJECTOFFSETS: + written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); + break; + + case MIDX_CHUNKID_LARGEOFFSETS: + written += write_midx_large_offsets(f, num_large_offsets, entries, nr_entries); + break; + default: BUG("trying to write unknown chunk id %"PRIx32, chunk_ids[i]); diff --git a/midx.h b/midx.h index 8572cf0f4b3540..e15966272ff4a5 100644 --- a/midx.h +++ b/midx.h @@ -17,6 +17,8 @@ struct multi_pack_index { const unsigned char *chunk_pack_names; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; + const unsigned char *chunk_object_offsets; + const unsigned char *chunk_large_offsets; const char **pack_names; char object_dir[FLEX_ARRAY]; diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index f7c17b0940d342..8e19972e8939d1 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -26,6 +26,10 @@ static int read_midx_file(const char *object_dir) printf(" oid-fanout"); if (m->chunk_oid_lookup) printf(" oid-lookup"); + if (m->chunk_object_offsets) + printf(" object-offsets"); + if (m->chunk_large_offsets) + printf(" large-offsets"); printf("\nnum_objects: %d\n", m->num_objects); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 95e731ae52f125..4a4fa26f7a5225 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -6,27 +6,30 @@ test_description='multi-pack-indexes' midx_read_expect () { NUM_PACKS=$1 NUM_OBJECTS=$2 + NUM_CHUNKS=$3 + OBJECT_DIR=$4 + EXTRA_CHUNKS="$5" { cat <<-EOF && - header: 4d494458 1 3 $NUM_PACKS - chunks: pack-names oid-fanout oid-lookup + header: 4d494458 1 $NUM_CHUNKS $NUM_PACKS + chunks: pack-names oid-fanout oid-lookup object-offsets$EXTRA_CHUNKS num_objects: $NUM_OBJECTS packs: EOF if test $NUM_PACKS -ge 1 then - ls pack/ | grep idx | sort + ls $OBJECT_DIR/pack/ | grep idx | sort fi && - printf "object-dir: .\n" + printf "object-dir: $OBJECT_DIR\n" } >expect && - test-tool read-midx . >actual && + test-tool read-midx $OBJECT_DIR >actual && test_cmp expect actual } test_expect_success 'write midx with no packs' ' test_when_finished rm -f pack/multi-pack-index && git multi-pack-index --object-dir=. write && - midx_read_expect 0 0 + midx_read_expect 0 0 4 . ' generate_objects () { @@ -76,13 +79,13 @@ test_expect_success 'write midx with one v1 pack' ' pack=$(git pack-objects --index-version=1 pack/test [] +corrupt_data () { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +# Force 64-bit offsets by manipulating the idx file. +# This makes the IDX file _incorrect_ so be careful to clean up after! +test_expect_success 'force some 64-bit offsets with pack-objects' ' + mkdir objects64 && + mkdir objects64/pack && + for i in $(test_seq 1 11) + do + generate_objects 11 + done && + commit_and_list_objects && + pack64=$(git pack-objects --index-version=2,0x40 objects64/pack/test-64 Date: Thu, 12 Jul 2018 15:39:33 -0400 Subject: [PATCH 16/92] config: create core.multiPackIndex setting The core.multiPackIndex config setting controls the multi-pack- index (MIDX) feature. If false, the setting will disable all reads from the multi-pack-index file. Read this config setting in the new prepare_multi_pack_index_one() which is called during prepare_packed_git(). This check is run once per repository. Add comparison commands in t5319-multi-pack-index.sh to check typical Git behavior remains the same as the config setting is turned on and off. This currently includes 'git rev-list' and 'git log' commands to trigger several object database reads. Currently, these would only catch an error in the prepare_multi_pack_index_one(), but with later commits will catch errors in object lookups, abbreviations, and approximate object counts. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config.txt | 5 ++++ midx.c | 25 ++++++++++++++++++ midx.h | 5 ++++ object-store.h | 7 +++++ packfile.c | 6 ++++- t/t5319-multi-pack-index.sh | 51 +++++++++++++++++++++++++++---------- 6 files changed, 85 insertions(+), 14 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index ab641bf5a9984b..25f817ca427c45 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -908,6 +908,11 @@ core.commitGraph:: Enable git commit graph feature. Allows reading from the commit-graph file. +core.multiPackIndex:: + Use the multi-pack-index file to track multiple packfiles using a + single index. See link:technical/multi-pack-index.html[the + multi-pack-index design document]. + core.sparseCheckout:: Enable "sparse checkout" feature. See section "Sparse checkout" in linkgit:git-read-tree[1] for more information. diff --git a/midx.c b/midx.c index e83110ae92b476..4090cf4ca42fe5 100644 --- a/midx.c +++ b/midx.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "config.h" #include "csum-file.h" #include "dir.h" #include "lockfile.h" @@ -177,6 +178,30 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) return NULL; } +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) +{ + struct multi_pack_index *m = r->objects->multi_pack_index; + struct multi_pack_index *m_search; + int config_value; + + if (repo_config_get_bool(r, "core.multipackindex", &config_value) || + !config_value) + return 0; + + for (m_search = m; m_search; m_search = m_search->next) + if (!strcmp(object_dir, m_search->object_dir)) + return 1; + + r->objects->multi_pack_index = load_multi_pack_index(object_dir); + + if (r->objects->multi_pack_index) { + r->objects->multi_pack_index->next = m; + return 1; + } + + return 0; +} + static size_t write_midx_header(struct hashfile *f, unsigned char num_chunks, uint32_t num_packs) diff --git a/midx.h b/midx.h index e15966272ff4a5..9bcfc82d2ec3b2 100644 --- a/midx.h +++ b/midx.h @@ -1,7 +1,11 @@ #ifndef __MIDX_H__ #define __MIDX_H__ +#include "repository.h" + struct multi_pack_index { + struct multi_pack_index *next; + int fd; const unsigned char *data; @@ -25,6 +29,7 @@ struct multi_pack_index { }; struct multi_pack_index *load_multi_pack_index(const char *object_dir); +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); diff --git a/object-store.h b/object-store.h index 13a766aea8ece9..c2b162489a161f 100644 --- a/object-store.h +++ b/object-store.h @@ -105,6 +105,13 @@ struct raw_object_store { */ struct oidmap *replace_map; + /* + * private data + * + * should only be accessed directly by packfile.c and midx.c + */ + struct multi_pack_index *multi_pack_index; + /* * private data * diff --git a/packfile.c b/packfile.c index 3d652212c60633..5d4493dbf4f9a4 100644 --- a/packfile.c +++ b/packfile.c @@ -15,6 +15,7 @@ #include "tree-walk.h" #include "tree.h" #include "object-store.h" +#include "midx.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -935,10 +936,13 @@ static void prepare_packed_git(struct repository *r) if (r->objects->packed_git_initialized) return; + prepare_multi_pack_index_one(r, r->objects->objectdir); prepare_packed_git_one(r, r->objects->objectdir, 1); prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + prepare_multi_pack_index_one(r, alt->path); prepare_packed_git_one(r, alt->path, 0); + } rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 4a4fa26f7a5225..b9661c7c660e20 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -3,6 +3,8 @@ test_description='multi-pack-indexes' . ./test-lib.sh +objdir=.git/objects + midx_read_expect () { NUM_PACKS=$1 NUM_OBJECTS=$2 @@ -76,18 +78,35 @@ test_expect_success 'create objects' ' ' test_expect_success 'write midx with one v1 pack' ' - pack=$(git pack-objects --index-version=1 pack/test expect && + git -c core.multiPackIndex=true $1 >actual && + test_cmp expect actual +} + +compare_results_with_midx () { + MSG=$1 + test_expect_success "check normal git operations: $MSG" ' + midx_git_two_modes "rev-list --objects --all" && + midx_git_two_modes "log --raw" + ' +} + test_expect_success 'write midx with one v2 pack' ' - git pack-objects --index-version=2,0x40 pack/test [] corrupt_data () { file=$1 From 3715a6335c37367b4240b6bfa842dc64dedee34d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:34 -0400 Subject: [PATCH 17/92] midx: read objects from multi-pack-index Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++- midx.h | 3 ++ packfile.c | 8 ++++- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 4090cf4ca42fe5..182535933c33ed 100644 --- a/midx.c +++ b/midx.c @@ -5,7 +5,7 @@ #include "lockfile.h" #include "packfile.h" #include "object-store.h" -#include "packfile.h" +#include "sha1-lookup.h" #include "midx.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ @@ -151,6 +151,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->num_objects = ntohl(m->chunk_oid_fanout[255]); m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names)); + m->packs = xcalloc(m->num_packs, sizeof(*m->packs)); cur_pack_name = (const char *)m->chunk_pack_names; for (i = 0; i < m->num_packs; i++) { @@ -178,6 +179,94 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) return NULL; } +static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) +{ + struct strbuf pack_name = STRBUF_INIT; + + if (pack_int_id >= m->num_packs) + BUG("bad pack-int-id"); + + if (m->packs[pack_int_id]) + return 0; + + strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, + m->pack_names[pack_int_id]); + + m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1); + strbuf_release(&pack_name); + return !m->packs[pack_int_id]; +} + +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) +{ + return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, + MIDX_HASH_LEN, result); +} + +static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) +{ + const unsigned char *offset_data; + uint32_t offset32; + + offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH; + offset32 = get_be32(offset_data + sizeof(uint32_t)); + + if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { + if (sizeof(offset32) < sizeof(uint64_t)) + die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); + + offset32 ^= MIDX_LARGE_OFFSET_NEEDED; + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); + } + + return offset32; +} + +static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) +{ + return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH); +} + +static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry *e, uint32_t pos) +{ + uint32_t pack_int_id; + struct packed_git *p; + + if (pos >= m->num_objects) + return 0; + + pack_int_id = nth_midxed_pack_int_id(m, pos); + + if (prepare_midx_pack(m, pack_int_id)) + die(_("error preparing packfile from multi-pack-index")); + p = m->packs[pack_int_id]; + + /* + * We are about to tell the caller where they can locate the + * requested object. We better make sure the packfile is + * still here and can be accessed before supplying that + * answer, as it may have been deleted since the MIDX was + * loaded! + */ + if (!is_pack_valid(p)) + return 0; + + e->offset = nth_midxed_offset(m, pos); + e->p = p; + + return 1; +} + +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m) +{ + uint32_t pos; + + if (!bsearch_midx(oid, m, &pos)) + return 0; + + return nth_midxed_pack_entry(m, e, pos); +} + int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) { struct multi_pack_index *m = r->objects->multi_pack_index; diff --git a/midx.h b/midx.h index 9bcfc82d2ec3b2..377838c9ca2724 100644 --- a/midx.h +++ b/midx.h @@ -25,10 +25,13 @@ struct multi_pack_index { const unsigned char *chunk_large_offsets; const char **pack_names; + struct packed_git **packs; char object_dir[FLEX_ARRAY]; }; struct multi_pack_index *load_multi_pack_index(const char *object_dir); +int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); +int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); diff --git a/packfile.c b/packfile.c index 5d4493dbf4f9a4..bc763d91b9b64c 100644 --- a/packfile.c +++ b/packfile.c @@ -1902,11 +1902,17 @@ static int fill_pack_entry(const struct object_id *oid, int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { struct list_head *pos; + struct multi_pack_index *m; prepare_packed_git(r); - if (!r->objects->packed_git) + if (!r->objects->packed_git && !r->objects->multi_pack_index) return 0; + for (m = r->objects->multi_pack_index; m; m = m->next) { + if (fill_midx_entry(oid, e, m)) + return 1; + } + list_for_each(pos, &r->objects->packed_git_mru) { struct packed_git *p = list_entry(pos, struct packed_git, mru); if (fill_pack_entry(oid, e, p)) { From 8aac67a174061a0744557a3984a433f926bf5cb3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:35 -0400 Subject: [PATCH 18/92] midx: use midx in abbreviation calculations Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 11 +++++++++ midx.h | 3 +++ packfile.c | 6 +++++ packfile.h | 1 + sha1-name.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+) diff --git a/midx.c b/midx.c index 182535933c33ed..4e014ff6e3b189 100644 --- a/midx.c +++ b/midx.c @@ -203,6 +203,17 @@ int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32 MIDX_HASH_LEN, result); } +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n) +{ + if (n >= m->num_objects) + return NULL; + + hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n); + return oid; +} + static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) { const unsigned char *offset_data; diff --git a/midx.h b/midx.h index 377838c9ca2724..1b976df8735da6 100644 --- a/midx.h +++ b/midx.h @@ -31,6 +31,9 @@ struct multi_pack_index { struct multi_pack_index *load_multi_pack_index(const char *object_dir); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); +struct object_id *nth_midxed_object_oid(struct object_id *oid, + struct multi_pack_index *m, + uint32_t n); int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); diff --git a/packfile.c b/packfile.c index bc763d91b9b64c..c0eb5ac885f5ff 100644 --- a/packfile.c +++ b/packfile.c @@ -961,6 +961,12 @@ struct packed_git *get_packed_git(struct repository *r) return r->objects->packed_git; } +struct multi_pack_index *get_multi_pack_index(struct repository *r) +{ + prepare_packed_git(r); + return r->objects->multi_pack_index; +} + struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); diff --git a/packfile.h b/packfile.h index b0eed44c0b1e35..046280caf372ff 100644 --- a/packfile.h +++ b/packfile.h @@ -45,6 +45,7 @@ extern void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); +struct multi_pack_index *get_multi_pack_index(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy diff --git a/sha1-name.c b/sha1-name.c index 60d9ef3c7e7108..7dc71201e68be9 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -12,6 +12,7 @@ #include "packfile.h" #include "object-store.h" #include "repository.h" +#include "midx.h" static int get_oid_oneline(const char *, struct object_id *, struct commit_list *); @@ -149,6 +150,32 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char * return 1; } +static void unique_in_midx(struct multi_pack_index *m, + struct disambiguate_state *ds) +{ + uint32_t num, i, first = 0; + const struct object_id *current = NULL; + num = m->num_objects; + + if (!num) + return; + + bsearch_midx(&ds->bin_pfx, m, &first); + + /* + * At this point, "first" is the location of the lowest object + * with an object name that could match "bin_pfx". See if we have + * 0, 1 or more objects that actually match(es). + */ + for (i = first; i < num && !ds->ambiguous; i++) { + struct object_id oid; + current = nth_midxed_object_oid(&oid, m, i); + if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) + break; + update_candidates(ds, current); + } +} + static void unique_in_pack(struct packed_git *p, struct disambiguate_state *ds) { @@ -177,8 +204,12 @@ static void unique_in_pack(struct packed_git *p, static void find_short_packed_object(struct disambiguate_state *ds) { + struct multi_pack_index *m; struct packed_git *p; + for (m = get_multi_pack_index(the_repository); m && !ds->ambiguous; + m = m->next) + unique_in_midx(m, ds); for (p = get_packed_git(the_repository); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); @@ -527,6 +558,42 @@ static int extend_abbrev_len(const struct object_id *oid, void *cb_data) return 0; } +static void find_abbrev_len_for_midx(struct multi_pack_index *m, + struct min_abbrev_data *mad) +{ + int match = 0; + uint32_t num, first = 0; + struct object_id oid; + const struct object_id *mad_oid; + + if (!m->num_objects) + return; + + num = m->num_objects; + mad_oid = mad->oid; + match = bsearch_midx(mad_oid, m, &first); + + /* + * first is now the position in the packfile where we would insert + * mad->hash if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. + */ + mad->init_len = 0; + if (!match) { + if (nth_midxed_object_oid(&oid, m, first)) + extend_abbrev_len(&oid, mad); + } else if (first < num - 1) { + if (nth_midxed_object_oid(&oid, m, first + 1)) + extend_abbrev_len(&oid, mad); + } + if (first > 0) { + if (nth_midxed_object_oid(&oid, m, first - 1)) + extend_abbrev_len(&oid, mad); + } + mad->init_len = mad->cur_len; +} + static void find_abbrev_len_for_pack(struct packed_git *p, struct min_abbrev_data *mad) { @@ -565,8 +632,11 @@ static void find_abbrev_len_for_pack(struct packed_git *p, static void find_abbrev_len_packed(struct min_abbrev_data *mad) { + struct multi_pack_index *m; struct packed_git *p; + for (m = get_multi_pack_index(the_repository); m; m = m->next) + find_abbrev_len_for_midx(m, mad); for (p = get_packed_git(the_repository); p; p = p->next) find_abbrev_len_for_pack(p, mad); } From a40498a12654259335995d785cc1da9f90f249c7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:36 -0400 Subject: [PATCH 19/92] midx: use existing midx when writing new one Due to how Windows handles replacing a lockfile when there is an open handle, create the close_midx() method to close the existing midx before writing the new one. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- midx.h | 1 + 2 files changed, 111 insertions(+), 6 deletions(-) diff --git a/midx.c b/midx.c index 4e014ff6e3b189..bf2334acc6f2a2 100644 --- a/midx.c +++ b/midx.c @@ -179,6 +179,23 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) return NULL; } +static void close_midx(struct multi_pack_index *m) +{ + uint32_t i; + munmap((unsigned char *)m->data, m->data_len); + close(m->fd); + m->fd = -1; + + for (i = 0; i < m->num_packs; i++) { + if (m->packs[i]) { + close_pack(m->packs[i]); + free(m->packs); + } + } + FREE_AND_NULL(m->packs); + FREE_AND_NULL(m->pack_names); +} + static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { struct strbuf pack_name = STRBUF_INIT; @@ -278,6 +295,29 @@ int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct mu return nth_midxed_pack_entry(m, e, pos); } +int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) +{ + uint32_t first = 0, last = m->num_packs; + + while (first < last) { + uint32_t mid = first + (last - first) / 2; + const char *current; + int cmp; + + current = m->pack_names[mid]; + cmp = strcmp(idx_name, current); + if (!cmp) + return 1; + if (cmp > 0) { + first = mid + 1; + continue; + } + last = mid; + } + + return 0; +} + int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) { struct multi_pack_index *m = r->objects->multi_pack_index; @@ -326,6 +366,7 @@ struct pack_list { uint32_t alloc_list; uint32_t alloc_names; size_t pack_name_concat_len; + struct multi_pack_index *m; }; static void add_pack_to_midx(const char *full_path, size_t full_path_len, @@ -334,6 +375,9 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, struct pack_list *packs = (struct pack_list *)data; if (ends_with(file_name, ".idx")) { + if (packs->m && midx_contains_pack(packs->m, file_name)) + return; + ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names); @@ -419,6 +463,23 @@ static int midx_oid_compare(const void *_a, const void *_b) return a->pack_int_id - b->pack_int_id; } +static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, + uint32_t *pack_perm, + struct pack_midx_entry *e, + uint32_t pos) +{ + if (pos >= m->num_objects) + return 1; + + nth_midxed_object_oid(&e->oid, m, pos); + e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; + e->offset = nth_midxed_offset(m, pos); + + /* consider objects in midx to be from "old" packs */ + e->pack_mtime = 0; + return 0; +} + static void fill_pack_entry(uint32_t pack_int_id, struct packed_git *p, uint32_t cur_object, @@ -444,7 +505,8 @@ static void fill_pack_entry(uint32_t pack_int_id, * Copy only the de-duplicated entries (selected by most-recent modified time * of a packfile containing the object). */ -static struct pack_midx_entry *get_sorted_entries(struct packed_git **p, +static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, + struct packed_git **p, uint32_t *perm, uint32_t nr_packs, uint32_t *nr_objects) @@ -453,8 +515,9 @@ static struct pack_midx_entry *get_sorted_entries(struct packed_git **p, uint32_t alloc_fanout, alloc_objects, total_objects = 0; struct pack_midx_entry *entries_by_fanout = NULL; struct pack_midx_entry *deduplicated_entries = NULL; + uint32_t start_pack = m ? m->num_packs : 0; - for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) + for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) total_objects += p[cur_pack]->num_objects; /* @@ -471,7 +534,23 @@ static struct pack_midx_entry *get_sorted_entries(struct packed_git **p, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { uint32_t nr_fanout = 0; - for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) { + if (m) { + uint32_t start = 0, end; + + if (cur_fanout) + start = ntohl(m->chunk_oid_fanout[cur_fanout - 1]); + end = ntohl(m->chunk_oid_fanout[cur_fanout]); + + for (cur_object = start; cur_object < end; cur_object++) { + ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + nth_midxed_pack_midx_entry(m, perm, + &entries_by_fanout[nr_fanout], + cur_object); + nr_fanout++; + } + } + + for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) { uint32_t start = 0, end; if (cur_fanout) @@ -667,16 +746,34 @@ int write_midx_file(const char *object_dir) midx_name); } + packs.m = load_multi_pack_index(object_dir); + packs.nr = 0; - packs.alloc_list = 16; - packs.alloc_names = 16; + packs.alloc_list = packs.m ? packs.m->num_packs : 16; + packs.alloc_names = packs.alloc_list; packs.list = NULL; + packs.names = NULL; packs.pack_name_concat_len = 0; ALLOC_ARRAY(packs.list, packs.alloc_list); ALLOC_ARRAY(packs.names, packs.alloc_names); + if (packs.m) { + for (i = 0; i < packs.m->num_packs; i++) { + ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); + ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names); + + packs.list[packs.nr] = NULL; + packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); + packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; + packs.nr++; + } + } + for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); + if (packs.m && packs.nr == packs.m->num_packs) + goto cleanup; + if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT - (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT); @@ -684,7 +781,8 @@ int write_midx_file(const char *object_dir) ALLOC_ARRAY(pack_perm, packs.nr); sort_packs_by_name(packs.names, packs.nr, pack_perm); - entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries); + entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries); + for (i = 0; i < nr_entries; i++) { if (entries[i].offset > 0x7fffffff) num_large_offsets++; @@ -696,6 +794,9 @@ int write_midx_file(const char *object_dir) f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); FREE_AND_NULL(midx_name); + if (packs.m) + close_midx(packs.m); + cur_chunk = 0; num_chunks = large_offsets_needed ? 5 : 4; @@ -787,6 +888,7 @@ int write_midx_file(const char *object_dir) finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM); commit_lock_file(&lk); +cleanup: for (i = 0; i < packs.nr; i++) { if (packs.list[i]) { close_pack(packs.list[i]); @@ -798,5 +900,7 @@ int write_midx_file(const char *object_dir) free(packs.list); free(packs.names); free(entries); + free(pack_perm); + free(midx_name); return 0; } diff --git a/midx.h b/midx.h index 1b976df8735da6..d4cde99473aa1d 100644 --- a/midx.h +++ b/midx.h @@ -35,6 +35,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, uint32_t n); int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); +int midx_contains_pack(struct multi_pack_index *m, const char *idx_name); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); From b8990fbfedf7cd9fc92a5208b0fbbd7dad79be6d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:37 -0400 Subject: [PATCH 20/92] midx: use midx in approximate_object_count Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packfile.c b/packfile.c index c0eb5ac885f5ff..97e7812b6be638 100644 --- a/packfile.c +++ b/packfile.c @@ -861,10 +861,13 @@ unsigned long approximate_object_count(void) { if (!the_repository->objects->approximate_object_count_valid) { unsigned long count; + struct multi_pack_index *m; struct packed_git *p; prepare_packed_git(the_repository); count = 0; + for (m = get_multi_pack_index(the_repository); m; m = m->next) + count += m->num_objects; for (p = the_repository->objects->packed_git; p; p = p->next) { if (open_pack_index(p)) continue; From f3a002bd84790e89399c3a18f1e7101b850ed6f8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:38 -0400 Subject: [PATCH 21/92] midx: prevent duplicate packfile loads The multi-pack-index, when present, tracks the existence of objects and their offsets within a list of packfiles. This allows us to use the multi-pack-index for object lookups, abbreviations, and object counts. When the multi-pack-index tracks a packfile, then we do not need to add that packfile to the packed_git linked list or the MRU list. We still need to load the packfiles that are not tracked by the multi-pack-index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packfile.c b/packfile.c index 97e7812b6be638..2c819a0ad8deb3 100644 --- a/packfile.c +++ b/packfile.c @@ -795,6 +795,7 @@ struct prepare_pack_data { struct repository *r; struct string_list *garbage; int local; + struct multi_pack_index *m; }; static void prepare_pack(const char *full_name, size_t full_name_len, @@ -805,6 +806,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, size_t base_len = full_name_len; if (strip_suffix_mem(full_name, &base_len, ".idx")) { + if (data->m && midx_contains_pack(data->m, file_name)) + return; /* Don't reopen a pack we already have. */ for (p = data->r->objects->packed_git; p; p = p->next) { size_t len; @@ -839,6 +842,12 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local struct prepare_pack_data data; struct string_list garbage = STRING_LIST_INIT_DUP; + data.m = r->objects->multi_pack_index; + + /* look for the multi-pack-index for this object directory */ + while (data.m && strcmp(data.m->object_dir, objdir)) + data.m = data.m->next; + data.r = r; data.garbage = &garbage; data.local = local; From 17c35c89698c1b9e130ae9a3dc9c016b353308d8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:39 -0400 Subject: [PATCH 22/92] packfile: skip loading index if in multi-pack-index Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/packfile.c b/packfile.c index 2c819a0ad8deb3..e6ecf12ab581bf 100644 --- a/packfile.c +++ b/packfile.c @@ -469,8 +469,19 @@ static int open_packed_git_1(struct packed_git *p) ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; - if (!p->index_data && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); + if (!p->index_data) { + struct multi_pack_index *m; + const char *pack_name = strrchr(p->pack_name, '/'); + + for (m = the_repository->objects->multi_pack_index; + m; m = m->next) { + if (midx_contains_pack(m, pack_name)) + break; + } + + if (!m && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + } if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -521,6 +532,10 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); + /* Skip index checking if in multi-pack-index */ + if (!p->index_data) + return 0; + /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" From 525e18c04bb38450e6677bb2aa5c65b78254b5c2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 15:39:40 -0400 Subject: [PATCH 23/92] midx: clear midx on repack If a 'git repack' command replaces existing packfiles, then we must clear the existing multi-pack-index before moving the packfiles it references. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/repack.c | 9 +++++++++ midx.c | 12 ++++++++++++ midx.h | 1 + t/t5319-multi-pack-index.sh | 9 +++++++++ 4 files changed, 31 insertions(+) diff --git a/builtin/repack.c b/builtin/repack.c index 6c636e159eaf2d..7f7cdc8b17dcd2 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -8,6 +8,7 @@ #include "strbuf.h" #include "string-list.h" #include "argv-array.h" +#include "midx.h" static int delta_base_offset = 1; static int pack_kept_objects = -1; @@ -174,6 +175,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) int no_update_server_info = 0; int quiet = 0; int local = 0; + int midx_cleared = 0; struct option builtin_repack_options[] = { OPT_BIT('a', NULL, &pack_everything, @@ -333,6 +335,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix) for_each_string_list_item(item, &names) { for (ext = 0; ext < ARRAY_SIZE(exts); ext++) { char *fname, *fname_old; + + if (!midx_cleared) { + /* if we move a packfile, it will invalidated the midx */ + clear_midx_file(get_object_directory()); + midx_cleared = 1; + } + fname = mkpathdup("%s/pack-%s%s", packdir, item->string, exts[ext].name); if (!file_exists(fname)) { diff --git a/midx.c b/midx.c index bf2334acc6f2a2..19b7df338ee76b 100644 --- a/midx.c +++ b/midx.c @@ -904,3 +904,15 @@ int write_midx_file(const char *object_dir) free(midx_name); return 0; } + +void clear_midx_file(const char *object_dir) +{ + char *midx = get_midx_filename(object_dir); + + if (remove_path(midx)) { + UNLEAK(midx); + die(_("failed to clear multi-pack-index at %s"), midx); + } + + free(midx); +} diff --git a/midx.h b/midx.h index d4cde99473aa1d..e3b07f15862a75 100644 --- a/midx.h +++ b/midx.h @@ -39,5 +39,6 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_name); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); int write_midx_file(const char *object_dir); +void clear_midx_file(const char *object_dir); #endif diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index b9661c7c660e20..ae1d5d4592e2af 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -141,6 +141,15 @@ test_expect_success 'write midx with twelve packs' ' compare_results_with_midx "twelve packs" +test_expect_success 'repack removes multi-pack-index' ' + test_path_is_file $objdir/pack/multi-pack-index && + git repack -adf && + test_path_is_missing $objdir/pack/multi-pack-index +' + +compare_results_with_midx "after repack" + + # usage: corrupt_data [] corrupt_data () { file=$1 From 6d68e6a46174746d95373a47ab4ef4f57aa56d22 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:51:53 +0000 Subject: [PATCH 24/92] multi-pack-index: provide more helpful usage info The multi-pack-index builtin has a very simple command-line interface. Instead of simply reporting usage, give the user a hint to why the arguments failed. Reported-by: Eric Sunshine Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 6a7aa00cf2e7b1..2633efd95d01cd 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -32,16 +32,16 @@ int cmd_multi_pack_index(int argc, const char **argv, opts.object_dir = get_object_directory(); if (argc == 0) - goto usage; + usage_with_options(builtin_multi_pack_index_usage, + builtin_multi_pack_index_options); - if (!strcmp(argv[0], "write")) { - if (argc > 1) - goto usage; + if (argc > 1) { + die(_("too many arguments")); + return 1; + } + if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); - } -usage: - usage_with_options(builtin_multi_pack_index_usage, - builtin_multi_pack_index_options); + die(_("unrecognized verb: %s"), argv[0]); } From 2cf489a3bf75d7569c228147c3d9c559f02fd62c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:51:55 +0000 Subject: [PATCH 25/92] multi-pack-index: store local property A pack-file is 'local' if it is stored within the usual object directory. If it is stored in an alternate, it is non-local. Pack-files are stored using a 'pack_local' member in the packed_git struct. Add a similar 'local' member to the multi_pack_index struct and 'local' parameters to the methods that load and prepare multi- pack-indexes. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 11 ++++++----- midx.h | 6 ++++-- packfile.c | 4 ++-- t/helper/test-read-midx.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/midx.c b/midx.c index 19b7df338ee76b..6824acf5f89379 100644 --- a/midx.c +++ b/midx.c @@ -37,7 +37,7 @@ static char *get_midx_filename(const char *object_dir) return xstrfmt("%s/pack/multi-pack-index", object_dir); } -struct multi_pack_index *load_multi_pack_index(const char *object_dir) +struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local) { struct multi_pack_index *m = NULL; int fd; @@ -73,6 +73,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir) m->fd = fd; m->data = midx_map; m->data_len = midx_size; + m->local = local; m->signature = get_be32(m->data); if (m->signature != MIDX_SIGNATURE) { @@ -209,7 +210,7 @@ static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir, m->pack_names[pack_int_id]); - m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1); + m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, m->local); strbuf_release(&pack_name); return !m->packs[pack_int_id]; } @@ -318,7 +319,7 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) return 0; } -int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) { struct multi_pack_index *m = r->objects->multi_pack_index; struct multi_pack_index *m_search; @@ -332,7 +333,7 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir) if (!strcmp(object_dir, m_search->object_dir)) return 1; - r->objects->multi_pack_index = load_multi_pack_index(object_dir); + r->objects->multi_pack_index = load_multi_pack_index(object_dir, local); if (r->objects->multi_pack_index) { r->objects->multi_pack_index->next = m; @@ -746,7 +747,7 @@ int write_midx_file(const char *object_dir) midx_name); } - packs.m = load_multi_pack_index(object_dir); + packs.m = load_multi_pack_index(object_dir, 1); packs.nr = 0; packs.alloc_list = packs.m ? packs.m->num_packs : 16; diff --git a/midx.h b/midx.h index e3b07f15862a75..8aa79f4b628fea 100644 --- a/midx.h +++ b/midx.h @@ -18,6 +18,8 @@ struct multi_pack_index { uint32_t num_packs; uint32_t num_objects; + int local; + const unsigned char *chunk_pack_names; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; @@ -29,14 +31,14 @@ struct multi_pack_index { char object_dir[FLEX_ARRAY]; }; -struct multi_pack_index *load_multi_pack_index(const char *object_dir); +struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, uint32_t n); int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); int midx_contains_pack(struct multi_pack_index *m, const char *idx_name); -int prepare_multi_pack_index_one(struct repository *r, const char *object_dir); +int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); int write_midx_file(const char *object_dir); void clear_midx_file(const char *object_dir); diff --git a/packfile.c b/packfile.c index 12db1a9d7d6251..896da460ac7779 100644 --- a/packfile.c +++ b/packfile.c @@ -963,11 +963,11 @@ static void prepare_packed_git(struct repository *r) if (r->objects->packed_git_initialized) return; - prepare_multi_pack_index_one(r, r->objects->objectdir); + prepare_multi_pack_index_one(r, r->objects->objectdir, 1); prepare_packed_git_one(r, r->objects->objectdir, 1); prepare_alt_odb(r); for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { - prepare_multi_pack_index_one(r, alt->path); + prepare_multi_pack_index_one(r, alt->path, 0); prepare_packed_git_one(r, alt->path, 0); } rearrange_packed_git(r); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 8e19972e8939d1..831b586d022215 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -7,7 +7,7 @@ static int read_midx_file(const char *object_dir) { uint32_t i; - struct multi_pack_index *m = load_multi_pack_index(object_dir); + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); if (!m) return 1; From c39b02ae0ae90b9fda353f87502ace9ba36db839 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:51:57 +0000 Subject: [PATCH 26/92] midx: mark bad packed objects When an object fails to decompress from a pack-file, we mark the object as 'bad' so we can retry with a different copy of the object (if such a copy exists). Before now, the multi-pack-index did not update the bad objects list for the pack-files it contains, and we did not check the bad objects list when reading an object. Now, do both. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/midx.c b/midx.c index 6824acf5f89379..7fa75a37a3cd45 100644 --- a/midx.c +++ b/midx.c @@ -280,6 +280,16 @@ static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry * if (!is_pack_valid(p)) return 0; + if (p->num_bad_objects) { + uint32_t i; + struct object_id oid; + nth_midxed_object_oid(&oid, m, pos); + for (i = 0; i < p->num_bad_objects; i++) + if (!hashcmp(oid.hash, + p->bad_object_sha1 + the_hash_algo->rawsz * i)) + return 0; + } + e->offset = nth_midxed_offset(m, pos); e->p = p; From fe86c3beb5893edd4e5648dab8cca66d6cc2e77d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:51:59 +0000 Subject: [PATCH 27/92] midx: stop reporting garbage When prepare_packed_git is called with the report_garbage method initialized, we report unexpected files in the objects directory as garbage. Stop reporting the multi-pack-index and the pack-files it covers as garbage. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packfile.c b/packfile.c index 896da460ac7779..fe713a0242212d 100644 --- a/packfile.c +++ b/packfile.c @@ -820,9 +820,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, struct packed_git *p; size_t base_len = full_name_len; - if (strip_suffix_mem(full_name, &base_len, ".idx")) { - if (data->m && midx_contains_pack(data->m, file_name)) - return; + if (strip_suffix_mem(full_name, &base_len, ".idx") && + !(data->m && midx_contains_pack(data->m, file_name))) { /* Don't reopen a pack we already have. */ for (p = data->r->objects->packed_git; p; p = p->next) { size_t len; @@ -842,6 +841,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!report_garbage) return; + if (!strcmp(file_name, "multi-pack-index")) + return; if (ends_with(file_name, ".idx") || ends_with(file_name, ".pack") || ends_with(file_name, ".bitmap") || From 29e2016b8f952c900b2f4ce148be5279c53fd9e3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:52:00 +0000 Subject: [PATCH 28/92] midx: fix bug that skips midx with alternates The logic for constructing the linked list of multi-pack-indexes in the object store is incorrect. If the local object store has a multi-pack-index, but an alternate does not, then the list is dropped. Add tests that would have revealed this bug. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 11 ++++++----- t/t5319-multi-pack-index.sh | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/midx.c b/midx.c index 7fa75a37a3cd45..0710c4c175c72d 100644 --- a/midx.c +++ b/midx.c @@ -331,7 +331,7 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_name) int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) { - struct multi_pack_index *m = r->objects->multi_pack_index; + struct multi_pack_index *m; struct multi_pack_index *m_search; int config_value; @@ -339,14 +339,15 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i !config_value) return 0; - for (m_search = m; m_search; m_search = m_search->next) + for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) if (!strcmp(object_dir, m_search->object_dir)) return 1; - r->objects->multi_pack_index = load_multi_pack_index(object_dir, local); + m = load_multi_pack_index(object_dir, local); - if (r->objects->multi_pack_index) { - r->objects->multi_pack_index->next = m; + if (m) { + m->next = r->objects->multi_pack_index; + r->objects->multi_pack_index = m; return 1; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index ae1d5d4592e2af..4b6e2825a6dc37 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -149,6 +149,23 @@ test_expect_success 'repack removes multi-pack-index' ' compare_results_with_midx "after repack" +test_expect_success 'multi-pack-index and alternates' ' + git init --bare alt.git && + echo $(pwd)/alt.git/objects >.git/objects/info/alternates && + echo content1 >file1 && + altblob=$(GIT_DIR=alt.git git hash-object -w file1) && + git cat-file blob $altblob && + git rev-list --all +' + +compare_results_with_midx "with alternate (local midx)" + +test_expect_success 'multi-pack-index in an alternate' ' + mv .git/objects/pack/* alt.git/objects/pack +' + +compare_results_with_midx "with alternate (remote midx)" + # usage: corrupt_data [] corrupt_data () { From 0bff5269d3ed7124259bb3a5b33ddf2c4080b7e7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:52:02 +0000 Subject: [PATCH 29/92] packfile: add all_packs list If a repo contains a multi-pack-index, then the packed_git list does not contain the packfiles that are covered by the multi-pack-index. This is important for doing object lookups, abbreviations, and approximating object count. However, there are many operations that really want to iterate over all packfiles. Create a new 'all_packs' linked list that contains this list, starting with the packfiles in the multi-pack-index and then continuing along the packed_git linked list. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 2 +- midx.h | 1 + object-store.h | 6 ++++++ packfile.c | 27 +++++++++++++++++++++++++++ packfile.h | 1 + 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 0710c4c175c72d..f3e8dbc1082030 100644 --- a/midx.c +++ b/midx.c @@ -197,7 +197,7 @@ static void close_midx(struct multi_pack_index *m) FREE_AND_NULL(m->pack_names); } -static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) { struct strbuf pack_name = STRBUF_INIT; diff --git a/midx.h b/midx.h index 8aa79f4b628fea..a210f1af2af6bd 100644 --- a/midx.h +++ b/midx.h @@ -32,6 +32,7 @@ struct multi_pack_index { }; struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); +int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, diff --git a/object-store.h b/object-store.h index 97f1c160e59b96..63b7605a3e0b30 100644 --- a/object-store.h +++ b/object-store.h @@ -129,6 +129,12 @@ struct raw_object_store { /* A most-recently-used ordered version of the packed_git list. */ struct list_head packed_git_mru; + /* + * A linked list containing all packfiles, starting with those + * contained in the multi_pack_index. + */ + struct packed_git *all_packs; + /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use diff --git a/packfile.c b/packfile.c index fe713a0242212d..adcf2e12a0749b 100644 --- a/packfile.c +++ b/packfile.c @@ -972,6 +972,9 @@ static void prepare_packed_git(struct repository *r) prepare_packed_git_one(r, alt->path, 0); } rearrange_packed_git(r); + + r->objects->all_packs = NULL; + prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; } @@ -995,6 +998,30 @@ struct multi_pack_index *get_multi_pack_index(struct repository *r) return r->objects->multi_pack_index; } +struct packed_git *get_all_packs(struct repository *r) +{ + prepare_packed_git(r); + + if (!r->objects->all_packs) { + struct packed_git *p = r->objects->packed_git; + struct multi_pack_index *m; + + for (m = r->objects->multi_pack_index; m; m = m->next) { + uint32_t i; + for (i = 0; i < m->num_packs; i++) { + if (!prepare_midx_pack(m, i)) { + m->packs[i]->next = p; + p = m->packs[i]; + } + } + } + + r->objects->all_packs = p; + } + + return r->objects->all_packs; +} + struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); diff --git a/packfile.h b/packfile.h index 5abfaf2ab5c347..442625723dea4b 100644 --- a/packfile.h +++ b/packfile.h @@ -51,6 +51,7 @@ extern void install_packed_git(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); struct multi_pack_index *get_multi_pack_index(struct repository *r); +struct packed_git *get_all_packs(struct repository *r); /* * Give a rough count of objects in the repository. This sacrifices accuracy From 454ea2e4d7036862e8b2f69ef2dea640f8787510 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:52:04 +0000 Subject: [PATCH 30/92] treewide: use get_all_packs There are many places in the codebase that want to iterate over all packfiles known to Git. The purposes are wide-ranging, and those that can take advantage of the multi-pack-index already do. So, use get_all_packs() instead of get_packed_git() to be sure we are iterating over all packfiles. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/count-objects.c | 2 +- builtin/fsck.c | 4 ++-- builtin/gc.c | 4 ++-- builtin/pack-objects.c | 14 +++++++------- builtin/pack-redundant.c | 4 ++-- fast-import.c | 4 ++-- http-backend.c | 4 ++-- pack-bitmap.c | 2 +- pack-objects.c | 2 +- packfile.c | 2 +- server-info.c | 4 ++-- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index d51e2ce1ec016a..a7cad052c61580 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -123,7 +123,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) struct strbuf pack_buf = STRBUF_INIT; struct strbuf garbage_buf = STRBUF_INIT; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (open_pack_index(p)) diff --git a/builtin/fsck.c b/builtin/fsck.c index 250f5af1182ddc..63c8578cc16cd2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -740,7 +740,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct progress *progress = NULL; if (show_progress) { - for (p = get_packed_git(the_repository); p; + for (p = get_all_packs(the_repository); p; p = p->next) { if (open_pack_index(p)) continue; @@ -749,7 +749,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) progress = start_progress(_("Checking objects"), total); } - for (p = get_packed_git(the_repository); p; + for (p = get_all_packs(the_repository); p; p = p->next) { /* verify gives error messages itself */ if (verify_pack(p, fsck_obj_buffer, diff --git a/builtin/gc.c b/builtin/gc.c index 57069442b0dc12..2b592260e9ad0d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -183,7 +183,7 @@ static struct packed_git *find_base_packs(struct string_list *packs, { struct packed_git *p, *base = NULL; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (limit) { @@ -208,7 +208,7 @@ static int too_many_packs(void) if (gc_auto_pack_limit <= 0) return 0; - for (cnt = 0, p = get_packed_git(the_repository); p; p = p->next) { + for (cnt = 0, p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; if (p->pack_keep) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 0d80dee2ba1ad8..cf5cf45ac94056 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2809,7 +2809,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs) memset(&in_pack, 0, sizeof(in_pack)); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { struct object_id oid; struct object *o; @@ -2873,7 +2873,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) struct packed_git *p; p = (last_found != (void *)1) ? last_found : - get_packed_git(the_repository); + get_all_packs(the_repository); while (p) { if ((!p->pack_local || p->pack_keep || @@ -2883,7 +2883,7 @@ static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) return 1; } if (p == last_found) - p = get_packed_git(the_repository); + p = get_all_packs(the_repository); else p = p->next; if (p == last_found) @@ -2919,7 +2919,7 @@ static void loosen_unused_packed_objects(struct rev_info *revs) uint32_t i; struct object_id oid; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) continue; @@ -3066,7 +3066,7 @@ static void add_extra_kept_packs(const struct string_list *names) if (!names->nr) return; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { const char *name = basename(p->pack_name); int i; @@ -3339,7 +3339,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) add_extra_kept_packs(&keep_pack_list); if (ignore_packed_keep_on_disk) { struct packed_git *p; - for (p = get_packed_git(the_repository); p; p = p->next) + for (p = get_all_packs(the_repository); p; p = p->next) if (p->pack_local && p->pack_keep) break; if (!p) /* no keep-able packs found */ @@ -3352,7 +3352,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) * it also covers non-local objects */ struct packed_git *p; - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) { have_non_local_packs = 1; break; diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 0494dceff73d67..cf9a9aabd4eb2e 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -577,7 +577,7 @@ static struct pack_list * add_pack(struct packed_git *p) static struct pack_list * add_pack_file(const char *filename) { - struct packed_git *p = get_packed_git(the_repository); + struct packed_git *p = get_all_packs(the_repository); if (strlen(filename) < 40) die("Bad pack filename: %s", filename); @@ -592,7 +592,7 @@ static struct pack_list * add_pack_file(const char *filename) static void load_all(void) { - struct packed_git *p = get_packed_git(the_repository); + struct packed_git *p = get_all_packs(the_repository); while (p) { add_pack(p); diff --git a/fast-import.c b/fast-import.c index 89bb0c9db3de9b..f8c3acd3b5577c 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1068,7 +1068,7 @@ static int store_object( duplicate_count_by_type[type]++; return 1; } else if (find_sha1_pack(oid.hash, - get_packed_git(the_repository))) { + get_all_packs(the_repository))) { e->type = type; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ @@ -1266,7 +1266,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) truncate_pack(&checkpoint); } else if (find_sha1_pack(oid.hash, - get_packed_git(the_repository))) { + get_all_packs(the_repository))) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ diff --git a/http-backend.c b/http-backend.c index 88c38c834ba479..809ba7d2c49eac 100644 --- a/http-backend.c +++ b/http-backend.c @@ -595,13 +595,13 @@ static void get_info_packs(struct strbuf *hdr, char *arg) size_t cnt = 0; select_getanyfile(hdr); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (p->pack_local) cnt++; } strbuf_grow(&buf, cnt * 53 + 2); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (p->pack_local) strbuf_addf(&buf, "P %s\n", p->pack_name + objdirlen + 6); } diff --git a/pack-bitmap.c b/pack-bitmap.c index f0a1937a1cc5fb..4e50ab391fa0df 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -335,7 +335,7 @@ static int open_pack_bitmap(struct bitmap_index *bitmap_git) assert(!bitmap_git->map && !bitmap_git->loaded); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) ret = 0; } diff --git a/pack-objects.c b/pack-objects.c index 92708522e76b45..832dcf74627975 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -99,7 +99,7 @@ static void prepare_in_pack_by_idx(struct packing_data *pdata) * (i.e. in_pack_idx also zero) should return NULL. */ mapping[cnt++] = NULL; - for (p = get_packed_git(the_repository); p; p = p->next, cnt++) { + for (p = get_all_packs(the_repository); p; p = p->next, cnt++) { if (cnt == nr) { free(mapping); return; diff --git a/packfile.c b/packfile.c index adcf2e12a0749b..cbef7033c3b8ea 100644 --- a/packfile.c +++ b/packfile.c @@ -2036,7 +2036,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, int pack_errors = 0; prepare_packed_git(the_repository); - for (p = the_repository->objects->packed_git; p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && diff --git a/server-info.c b/server-info.c index 41050c2449b1ad..e2b2d6a27a40b1 100644 --- a/server-info.c +++ b/server-info.c @@ -199,7 +199,7 @@ static void init_pack_info(const char *infofile, int force) objdir = get_object_directory(); objdirlen = strlen(objdir); - for (p = get_packed_git(the_repository); p; p = p->next) { + for (p = get_all_packs(the_repository); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ @@ -209,7 +209,7 @@ static void init_pack_info(const char *infofile, int force) } num_pack = i; info = xcalloc(num_pack, sizeof(struct pack_info *)); - for (i = 0, p = get_packed_git(the_repository); p; p = p->next) { + for (i = 0, p = get_all_packs(the_repository); p; p = p->next) { if (!p->pack_local) continue; info[i] = xcalloc(1, sizeof(struct pack_info)); From e9ab2ed7de33a399b44295628e587db6a57bf897 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:52:06 +0000 Subject: [PATCH 31/92] midx: test a few commands that use get_all_packs The new get_all_packs() method exposed the packfiles coverede by a multi-pack-index. Before, the 'git cat-file --batch' and 'git count-objects' commands would skip objects in an environment with a multi-pack-index. Further, a reachability bitmap would be ignored if its pack-file was covered by a multi-pack-index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 4b6e2825a6dc37..424d0c640f5ee4 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -86,8 +86,14 @@ test_expect_success 'write midx with one v1 pack' ' ' midx_git_two_modes () { - git -c core.multiPackIndex=false $1 >expect && - git -c core.multiPackIndex=true $1 >actual && + if [ "$2" = "sorted" ] + then + git -c core.multiPackIndex=false $1 | sort >expect && + git -c core.multiPackIndex=true $1 | sort >actual + else + git -c core.multiPackIndex=false $1 >expect && + git -c core.multiPackIndex=true $1 >actual + fi && test_cmp expect actual } @@ -95,7 +101,10 @@ compare_results_with_midx () { MSG=$1 test_expect_success "check normal git operations: $MSG" ' midx_git_two_modes "rev-list --objects --all" && - midx_git_two_modes "log --raw" + midx_git_two_modes "log --raw" && + midx_git_two_modes "count-objects --verbose" && + midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check" && + midx_git_two_modes "cat-file --batch-all-objects --buffer --batch-check --unsorted" sorted ' } @@ -149,6 +158,12 @@ test_expect_success 'repack removes multi-pack-index' ' compare_results_with_midx "after repack" +test_expect_success 'multi-pack-index and pack-bitmap' ' + git -c repack.writeBitmaps=true repack -ad && + git multi-pack-index write && + git rev-list --test-bitmap HEAD +' + test_expect_success 'multi-pack-index and alternates' ' git init --bare alt.git && echo $(pwd)/alt.git/objects >.git/objects/info/alternates && From 6a22d521260f86dff8fe6f23ab329cebb62ba4f0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 20 Aug 2018 16:52:08 +0000 Subject: [PATCH 32/92] pack-objects: consider packs in multi-pack-index When running 'git pack-objects --local', we want to avoid packing objects that are in an alternate. Currently, we check for these objects using the packed_git_mru list, which excludes the pack-files covered by a multi-pack-index. Add a new iteration over the multi-pack-indexes to find these copies and mark them as unwanted. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 28 ++++++++++++++++++++++++++++ t/t5319-multi-pack-index.sh | 8 +++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index cf5cf45ac94056..807f0343653ada 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -31,6 +31,7 @@ #include "packfile.h" #include "object-store.h" #include "dir.h" +#include "midx.h" #define IN_PACK(obj) oe_in_pack(&to_pack, obj) #define SIZE(obj) oe_size(&to_pack, obj) @@ -1040,6 +1041,7 @@ static int want_object_in_pack(const struct object_id *oid, { int want; struct list_head *pos; + struct multi_pack_index *m; if (!exclude && local && has_loose_object_nonlocal(oid)) return 0; @@ -1054,6 +1056,32 @@ static int want_object_in_pack(const struct object_id *oid, if (want != -1) return want; } + + for (m = get_multi_pack_index(the_repository); m; m = m->next) { + struct pack_entry e; + if (fill_midx_entry(oid, &e, m)) { + struct packed_git *p = e.p; + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(oid->hash, p); + + if (offset) { + if (!*found_pack) { + if (!is_pack_valid(p)) + continue; + *found_offset = offset; + *found_pack = p; + } + want = want_found_object(exclude, p); + if (want != -1) + return want; + } + } + } + list_for_each(pos, get_packed_git_mru(the_repository)) { struct packed_git *p = list_entry(pos, struct packed_git, mru); off_t offset; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 424d0c640f5ee4..6f56b38674fd62 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -176,7 +176,13 @@ test_expect_success 'multi-pack-index and alternates' ' compare_results_with_midx "with alternate (local midx)" test_expect_success 'multi-pack-index in an alternate' ' - mv .git/objects/pack/* alt.git/objects/pack + mv .git/objects/pack/* alt.git/objects/pack && + test_commit add_local_objects && + git repack --local && + git multi-pack-index write && + midx_read_expect 1 3 4 $objdir && + git reset --hard HEAD~1 && + rm -f .git/objects/pack/* ' compare_results_with_midx "with alternate (remote midx)" From 64cbf3df212f1cefd068fabdffc961ae866be0d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:13 -0700 Subject: [PATCH 33/92] multi-pack-index: add 'verify' verb The multi-pack-index builtin writes multi-pack-index files, and uses a 'write' verb to do so. Add a 'verify' verb that checks this file matches the contents of the pack-indexes it replaces. The current implementation is a no-op, but will be extended in small increments in later commits. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.txt | 10 ++++++++++ builtin/multi-pack-index.c | 4 +++- midx.c | 13 +++++++++++++ midx.h | 1 + t/t5319-multi-pack-index.sh | 8 ++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index 1f97e79912f45d..f7778a2c85c1aa 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -27,6 +27,10 @@ write:: When given as the verb, write a new MIDX file to `/packs/multi-pack-index`. +verify:: + When given as the verb, verify the contents of the MIDX file + at `/packs/multi-pack-index`. + EXAMPLES -------- @@ -43,6 +47,12 @@ $ git multi-pack-index write $ git multi-pack-index --object-dir write ----------------------------------------------- +* Verify the MIDX file for the packfiles in the current .git folder. ++ +----------------------------------------------- +$ git multi-pack-index verify +----------------------------------------------- + SEE ALSO -------- diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 2633efd95d01cd..fca70f8e4fcca8 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -5,7 +5,7 @@ #include "midx.h" static char const * const builtin_multi_pack_index_usage[] = { - N_("git multi-pack-index [--object-dir=] write"), + N_("git multi-pack-index [--object-dir=] (write|verify)"), NULL }; @@ -42,6 +42,8 @@ int cmd_multi_pack_index(int argc, const char **argv, if (!strcmp(argv[0], "write")) return write_midx_file(opts.object_dir); + if (!strcmp(argv[0], "verify")) + return verify_midx_file(opts.object_dir); die(_("unrecognized verb: %s"), argv[0]); } diff --git a/midx.c b/midx.c index f3e8dbc1082030..b253bed51700d9 100644 --- a/midx.c +++ b/midx.c @@ -928,3 +928,16 @@ void clear_midx_file(const char *object_dir) free(midx); } + +int verify_midx_error; + +int verify_midx_file(const char *object_dir) +{ + struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); + verify_midx_error = 0; + + if (!m) + return 0; + + return verify_midx_error; +} diff --git a/midx.h b/midx.h index a210f1af2af6bd..ce80b91c68ac5d 100644 --- a/midx.h +++ b/midx.h @@ -43,5 +43,6 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i int write_midx_file(const char *object_dir); void clear_midx_file(const char *object_dir); +int verify_midx_file(const char *object_dir); #endif diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 6f56b38674fd62..1c4e0e6d31b92a 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -150,6 +150,10 @@ test_expect_success 'write midx with twelve packs' ' compare_results_with_midx "twelve packs" +test_expect_success 'verify multi-pack-index success' ' + git multi-pack-index verify --object-dir=$objdir +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && @@ -214,4 +218,8 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' midx_read_expect 1 63 5 objects64 " large-offsets" ' +test_expect_success 'verify multi-pack-index with 64-bit offsets' ' + git multi-pack-index verify --object-dir=objects64 +' + test_done From 04ade3a798c134a9a71e986b6658c434810e9af2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:15 -0700 Subject: [PATCH 34/92] multi-pack-index: verify bad header When verifying if a multi-pack-index file is valid, we want the command to fail to signal an invalid file. Previously, we wrote an error to stderr and continued as if we had no multi-pack-index. Now, die() instead of error(). Add tests that check corrupted headers in a few ways: * Bad signature * Bad file version * Bad hash version * Truncated hash count * Extended hash count Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 18 +++++---------- t/t5319-multi-pack-index.sh | 46 ++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/midx.c b/midx.c index b253bed51700d9..ec78254bb6dc6d 100644 --- a/midx.c +++ b/midx.c @@ -76,24 +76,18 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->local = local; m->signature = get_be32(m->data); - if (m->signature != MIDX_SIGNATURE) { - error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), + if (m->signature != MIDX_SIGNATURE) + die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"), m->signature, MIDX_SIGNATURE); - goto cleanup_fail; - } m->version = m->data[MIDX_BYTE_FILE_VERSION]; - if (m->version != MIDX_VERSION) { - error(_("multi-pack-index version %d not recognized"), + if (m->version != MIDX_VERSION) + die(_("multi-pack-index version %d not recognized"), m->version); - goto cleanup_fail; - } hash_version = m->data[MIDX_BYTE_HASH_VERSION]; - if (hash_version != MIDX_HASH_VERSION) { - error(_("hash version %u does not match"), hash_version); - goto cleanup_fail; - } + if (hash_version != MIDX_HASH_VERSION) + die(_("hash version %u does not match"), hash_version); m->hash_len = MIDX_HASH_LEN; m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS]; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1c4e0e6d31b92a..e04b5f43a26f9e 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -154,6 +154,51 @@ test_expect_success 'verify multi-pack-index success' ' git multi-pack-index verify --object-dir=$objdir ' +# usage: corrupt_midx_and_verify +corrupt_midx_and_verify() { + POS=$1 && + DATA="${2:-\0}" && + OBJDIR=$3 && + GREPSTR="$4" && + FILE=$OBJDIR/pack/multi-pack-index && + chmod a+w $FILE && + test_when_finished mv midx-backup $FILE && + cp $FILE midx-backup && + printf "$DATA" | dd of="$FILE" bs=1 seek="$POS" conv=notrunc && + test_must_fail git multi-pack-index verify --object-dir=$OBJDIR 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "$GREPSTR" err +} + +test_expect_success 'verify bad signature' ' + corrupt_midx_and_verify 0 "\00" $objdir \ + "multi-pack-index signature" +' + +MIDX_BYTE_VERSION=4 +MIDX_BYTE_OID_VERSION=5 +MIDX_BYTE_CHUNK_COUNT=6 + +test_expect_success 'verify bad version' ' + corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ + "multi-pack-index version" +' + +test_expect_success 'verify bad OID version' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_VERSION "\02" $objdir \ + "hash version" +' + +test_expect_success 'verify truncated chunk count' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" $objdir \ + "missing required" +' + +test_expect_success 'verify extended chunk count' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\07" $objdir \ + "terminating multi-pack-index chunk id appears earlier than expected" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && @@ -191,7 +236,6 @@ test_expect_success 'multi-pack-index in an alternate' ' compare_results_with_midx "with alternate (remote midx)" - # usage: corrupt_data [] corrupt_data () { file=$1 From c05b2ff1e8503dbc64e6eb5b52fa8ca9e4ed93ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:16 -0700 Subject: [PATCH 35/92] multi-pack-index: verify corrupt chunk lookup table Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 3 +++ t/t5319-multi-pack-index.sh | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/midx.c b/midx.c index ec78254bb6dc6d..8b054b39ab4ecd 100644 --- a/midx.c +++ b/midx.c @@ -100,6 +100,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 + MIDX_CHUNKLOOKUP_WIDTH * i); + if (chunk_offset >= m->data_len) + die(_("invalid chunk offset (too large)")); + switch (chunk_id) { case MIDX_CHUNKID_PACKNAMES: m->chunk_pack_names = m->data + chunk_offset; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index e04b5f43a26f9e..c54b6e71882a8d 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -178,6 +178,9 @@ test_expect_success 'verify bad signature' ' MIDX_BYTE_VERSION=4 MIDX_BYTE_OID_VERSION=5 MIDX_BYTE_CHUNK_COUNT=6 +MIDX_HEADER_SIZE=12 +MIDX_BYTE_CHUNK_ID=$MIDX_HEADER_SIZE +MIDX_BYTE_CHUNK_OFFSET=$(($MIDX_HEADER_SIZE + 4)) test_expect_success 'verify bad version' ' corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ @@ -199,6 +202,16 @@ test_expect_success 'verify extended chunk count' ' "terminating multi-pack-index chunk id appears earlier than expected" ' +test_expect_success 'verify missing required chunk' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \ + "missing required" +' + +test_expect_success 'verify invalid chunk offset' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET "\01" $objdir \ + "invalid chunk offset (too large)" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From 68e83e93507b9887d17a12bddf5961ad2e9c4453 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:18 -0700 Subject: [PATCH 36/92] multi-pack-index: verify packname order The final check we make while loading a multi-pack-index is that the packfile names are in lexicographical order. Make this error be a die() instead. In order to test this condition, we need multiple packfiles. Earlier in t5319-multi-pack-index.sh, we tested the interaction with 'git repack' but this limits us to one packfile in our object dir. Move these repack tests until after the 'verify' tests. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 6 ++---- t/t5319-multi-pack-index.sh | 10 ++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/midx.c b/midx.c index 8b054b39ab4ecd..e655a15aeded8b 100644 --- a/midx.c +++ b/midx.c @@ -157,12 +157,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cur_pack_name += strlen(cur_pack_name) + 1; - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) { - error(_("multi-pack-index pack names out of order: '%s' before '%s'"), + if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) + die(_("multi-pack-index pack names out of order: '%s' before '%s'"), m->pack_names[i - 1], m->pack_names[i]); - goto cleanup_fail; - } } return m; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index c54b6e71882a8d..01a3cd6b004d7c 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -181,6 +181,11 @@ MIDX_BYTE_CHUNK_COUNT=6 MIDX_HEADER_SIZE=12 MIDX_BYTE_CHUNK_ID=$MIDX_HEADER_SIZE MIDX_BYTE_CHUNK_OFFSET=$(($MIDX_HEADER_SIZE + 4)) +MIDX_NUM_CHUNKS=5 +MIDX_CHUNK_LOOKUP_WIDTH=12 +MIDX_OFFSET_PACKNAMES=$(($MIDX_HEADER_SIZE + \ + $MIDX_NUM_CHUNKS * $MIDX_CHUNK_LOOKUP_WIDTH)) +MIDX_BYTE_PACKNAME_ORDER=$(($MIDX_OFFSET_PACKNAMES + 2)) test_expect_success 'verify bad version' ' corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ @@ -212,6 +217,11 @@ test_expect_success 'verify invalid chunk offset' ' "invalid chunk offset (too large)" ' +test_expect_success 'verify packnames out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ + "pack names out of order" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From cd1f9e7eca7b341fc6b4ba7f06b01ccb26a9859f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:19 -0700 Subject: [PATCH 37/92] multi-pack-index: verify missing pack Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 16 ++++++++++++++++ t/t5319-multi-pack-index.sh | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/midx.c b/midx.c index e655a15aeded8b..a02b19efc1d5ac 100644 --- a/midx.c +++ b/midx.c @@ -926,13 +926,29 @@ void clear_midx_file(const char *object_dir) int verify_midx_error; +static void midx_report(const char *fmt, ...) +{ + va_list ap; + verify_midx_error = 1; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + int verify_midx_file(const char *object_dir) { + uint32_t i; struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); verify_midx_error = 0; if (!m) return 0; + for (i = 0; i < m->num_packs; i++) { + if (prepare_midx_pack(m, i)) + midx_report("failed to load pack in position %d", i); + } + return verify_midx_error; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 01a3cd6b004d7c..0a566afb055cc9 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -222,6 +222,11 @@ test_expect_success 'verify packnames out of order' ' "pack names out of order" ' +test_expect_success 'verify packnames out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ + "failed to load pack" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From 48d194f299ecdb774db0c72596dbf22dc8368c43 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:20 -0700 Subject: [PATCH 38/92] multi-pack-index: verify oid fanout order Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 9 +++++++++ t/t5319-multi-pack-index.sh | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/midx.c b/midx.c index a02b19efc1d5ac..dfd26b4d748aea 100644 --- a/midx.c +++ b/midx.c @@ -950,5 +950,14 @@ int verify_midx_file(const char *object_dir) midx_report("failed to load pack in position %d", i); } + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]); + + if (oid_fanout1 > oid_fanout2) + midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"), + i, oid_fanout1, oid_fanout2, i + 1); + } + return verify_midx_error; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 0a566afb055cc9..47a54e138d53c5 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -186,6 +186,9 @@ MIDX_CHUNK_LOOKUP_WIDTH=12 MIDX_OFFSET_PACKNAMES=$(($MIDX_HEADER_SIZE + \ $MIDX_NUM_CHUNKS * $MIDX_CHUNK_LOOKUP_WIDTH)) MIDX_BYTE_PACKNAME_ORDER=$(($MIDX_OFFSET_PACKNAMES + 2)) +MIDX_OFFSET_OID_FANOUT=$(($MIDX_OFFSET_PACKNAMES + 652)) +MIDX_OID_FANOUT_WIDTH=4 +MIDX_BYTE_OID_FANOUT_ORDER=$((MIDX_OFFSET_OID_FANOUT + 250 * $MIDX_OID_FANOUT_WIDTH + 1)) test_expect_success 'verify bad version' ' corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ @@ -227,6 +230,11 @@ test_expect_success 'verify packnames out of order' ' "failed to load pack" ' +test_expect_success 'verify oid fanout out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_FANOUT_ORDER "\01" $objdir \ + "oid fanout out of order" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From 914581326581bda3c2bcb41623a4bee2a93ba20c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:22 -0700 Subject: [PATCH 39/92] multi-pack-index: verify oid lookup order Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 11 +++++++++++ t/t5319-multi-pack-index.sh | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/midx.c b/midx.c index dfd26b4d748aea..06d5cfc826b8e2 100644 --- a/midx.c +++ b/midx.c @@ -959,5 +959,16 @@ int verify_midx_file(const char *object_dir) i, oid_fanout1, oid_fanout2, i + 1); } + for (i = 0; i < m->num_objects - 1; i++) { + struct object_id oid1, oid2; + + nth_midxed_object_oid(&oid1, m, i); + nth_midxed_object_oid(&oid2, m, i + 1); + + if (oidcmp(&oid1, &oid2) >= 0) + midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"), + i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); + } + return verify_midx_error; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 47a54e138d53c5..a968b9a959684b 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -175,6 +175,7 @@ test_expect_success 'verify bad signature' ' "multi-pack-index signature" ' +HASH_LEN=20 MIDX_BYTE_VERSION=4 MIDX_BYTE_OID_VERSION=5 MIDX_BYTE_CHUNK_COUNT=6 @@ -189,6 +190,8 @@ MIDX_BYTE_PACKNAME_ORDER=$(($MIDX_OFFSET_PACKNAMES + 2)) MIDX_OFFSET_OID_FANOUT=$(($MIDX_OFFSET_PACKNAMES + 652)) MIDX_OID_FANOUT_WIDTH=4 MIDX_BYTE_OID_FANOUT_ORDER=$((MIDX_OFFSET_OID_FANOUT + 250 * $MIDX_OID_FANOUT_WIDTH + 1)) +MIDX_OFFSET_OID_LOOKUP=$(($MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH)) +MIDX_BYTE_OID_LOOKUP=$(($MIDX_OFFSET_OID_LOOKUP + 16 * $HASH_LEN)) test_expect_success 'verify bad version' ' corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ @@ -235,6 +238,11 @@ test_expect_success 'verify oid fanout out of order' ' "oid fanout out of order" ' +test_expect_success 'verify oid lookup out of order' ' + corrupt_midx_and_verify $MIDX_BYTE_OID_LOOKUP "\00" $objdir \ + "oid lookup out of order" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From 51c12a12d5a74655bc0fea61df7ba3ee6b0e6686 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:23 -0700 Subject: [PATCH 40/92] multi-pack-index: fix 32-bit vs 64-bit size check When loading a 64-bit offset, we intend to check that off_t can store the resulting offset. However, the condition accidentally checks the 32-bit offset to see if it is smaller than a 64-bit value. Fix it, and this will be covered by a test in the 'git multi-pack-index verify' command in a later commit. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 06d5cfc826b8e2..80094c02a7dcef 100644 --- a/midx.c +++ b/midx.c @@ -236,7 +236,7 @@ static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) offset32 = get_be32(offset_data + sizeof(uint32_t)); if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) { - if (sizeof(offset32) < sizeof(uint64_t)) + if (sizeof(off_t) < sizeof(uint64_t)) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; From df4cbcb93b3b2f844e041bccc471792e4ed4fa8c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:25 -0700 Subject: [PATCH 41/92] multi-pack-index: verify object offsets The 'git multi-pack-index verify' command must verify the object offsets stored in the multi-pack-index are correct. There are two ways the offset chunk can be incorrect: the pack-int-id and the object offset. Replace the BUG() statement with a die() statement, now that we may hit a bad pack-int-id during a 'verify' command on a corrupt multi-pack-index, and it is covered by a test. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 29 ++++++++++++++++++++++++++++- t/t5319-multi-pack-index.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 80094c02a7dcef..47e7e6113a990f 100644 --- a/midx.c +++ b/midx.c @@ -197,7 +197,8 @@ int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id) struct strbuf pack_name = STRBUF_INIT; if (pack_int_id >= m->num_packs) - BUG("bad pack-int-id"); + die(_("bad pack-int-id: %u (%u total packs"), + pack_int_id, m->num_packs); if (m->packs[pack_int_id]) return 0; @@ -970,5 +971,31 @@ int verify_midx_file(const char *object_dir) i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); } + for (i = 0; i < m->num_objects; i++) { + struct object_id oid; + struct pack_entry e; + off_t m_offset, p_offset; + + nth_midxed_object_oid(&oid, m, i); + if (!fill_midx_entry(&oid, &e, m)) { + midx_report(_("failed to load pack entry for oid[%d] = %s"), + i, oid_to_hex(&oid)); + continue; + } + + if (open_pack_index(e.p)) { + midx_report(_("failed to load pack-index for packfile %s"), + e.p->pack_name); + break; + } + + m_offset = e.offset; + p_offset = find_pack_entry_one(oid.hash, e.p); + + if (m_offset != p_offset) + midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), + i, oid_to_hex(&oid), m_offset, p_offset); + } + return verify_midx_error; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index a968b9a959684b..828c240389f2a0 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -176,6 +176,7 @@ test_expect_success 'verify bad signature' ' ' HASH_LEN=20 +NUM_OBJECTS=74 MIDX_BYTE_VERSION=4 MIDX_BYTE_OID_VERSION=5 MIDX_BYTE_CHUNK_COUNT=6 @@ -192,6 +193,10 @@ MIDX_OID_FANOUT_WIDTH=4 MIDX_BYTE_OID_FANOUT_ORDER=$((MIDX_OFFSET_OID_FANOUT + 250 * $MIDX_OID_FANOUT_WIDTH + 1)) MIDX_OFFSET_OID_LOOKUP=$(($MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH)) MIDX_BYTE_OID_LOOKUP=$(($MIDX_OFFSET_OID_LOOKUP + 16 * $HASH_LEN)) +MIDX_OFFSET_OBJECT_OFFSETS=$(($MIDX_OFFSET_OID_LOOKUP + $NUM_OBJECTS * $HASH_LEN)) +MIDX_OFFSET_WIDTH=8 +MIDX_BYTE_PACK_INT_ID=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 2)) +MIDX_BYTE_OFFSET=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 6)) test_expect_success 'verify bad version' ' corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \ @@ -243,6 +248,16 @@ test_expect_success 'verify oid lookup out of order' ' "oid lookup out of order" ' +test_expect_success 'verify incorrect pack-int-id' ' + corrupt_midx_and_verify $MIDX_BYTE_PACK_INT_ID "\07" $objdir \ + "bad pack-int-id" +' + +test_expect_success 'verify incorrect offset' ' + corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \ + "incorrect object offset" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && @@ -310,4 +325,16 @@ test_expect_success 'verify multi-pack-index with 64-bit offsets' ' git multi-pack-index verify --object-dir=objects64 ' +NUM_OBJECTS=63 +MIDX_OFFSET_OID_FANOUT=$((MIDX_OFFSET_PACKNAMES + 54)) +MIDX_OFFSET_OID_LOOKUP=$((MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH)) +MIDX_OFFSET_OBJECT_OFFSETS=$(($MIDX_OFFSET_OID_LOOKUP + $NUM_OBJECTS * $HASH_LEN)) +MIDX_OFFSET_LARGE_OFFSETS=$(($MIDX_OFFSET_OBJECT_OFFSETS + $NUM_OBJECTS * $MIDX_OFFSET_WIDTH)) +MIDX_BYTE_LARGE_OFFSET=$(($MIDX_OFFSET_LARGE_OFFSETS + 3)) + +test_expect_success 'verify incorrect 64-bit offset' ' + corrupt_midx_and_verify $MIDX_BYTE_LARGE_OFFSET "\07" objects64 \ + "incorrect object offset" +' + test_done From 661441321862dcf8440efbfaa54e6b9a49afe74e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:26 -0700 Subject: [PATCH 42/92] multi-pack-index: report progress during 'verify' When verifying a multi-pack-index, the only action that takes significant time is checking the object offsets. For example, to verify a multi-pack-index containing 6.2 million objects in the Linux kernel repository takes 1.3 seconds on my machine. 99% of that time is spent looking up object offsets in each of the packfiles and comparing them to the multi-pack-index offset. Add a progress indicator for that section of the 'verify' verb. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- midx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/midx.c b/midx.c index 47e7e6113a990f..4d4c9305223338 100644 --- a/midx.c +++ b/midx.c @@ -7,6 +7,7 @@ #include "object-store.h" #include "sha1-lookup.h" #include "midx.h" +#include "progress.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -940,6 +941,7 @@ static void midx_report(const char *fmt, ...) int verify_midx_file(const char *object_dir) { uint32_t i; + struct progress *progress = NULL; struct multi_pack_index *m = load_multi_pack_index(object_dir, 1); verify_midx_error = 0; @@ -971,6 +973,7 @@ int verify_midx_file(const char *object_dir) i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1); } + progress = start_progress(_("Verifying object offsets"), m->num_objects); for (i = 0; i < m->num_objects; i++) { struct object_id oid; struct pack_entry e; @@ -995,7 +998,10 @@ int verify_midx_file(const char *object_dir) if (m_offset != p_offset) midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64), i, oid_to_hex(&oid), m_offset, p_offset); + + display_progress(progress, i + 1); } + stop_progress(&progress); return verify_midx_error; } From ea5ae6c3aabae9fecec10456f2e57afb1e27e218 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 13 Sep 2018 11:02:27 -0700 Subject: [PATCH 43/92] fsck: verify multi-pack-index When core.multiPackIndex is true, we may have a multi-pack-index in our object directory. Add calls to 'git multi-pack-index verify' at the end of 'git fsck' if so. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fsck.c | 18 ++++++++++++++++++ t/t5319-multi-pack-index.sh | 13 ++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 63c8578cc16cd2..06eb42172099a3 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -848,5 +848,23 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } } + if (!git_config_get_bool("core.multipackindex", &i) && i) { + struct child_process midx_verify = CHILD_PROCESS_INIT; + const char *midx_argv[] = { "multi-pack-index", "verify", NULL, NULL, NULL }; + + midx_verify.argv = midx_argv; + midx_verify.git_cmd = 1; + if (run_command(&midx_verify)) + errors_found |= ERROR_COMMIT_GRAPH; + + prepare_alt_odb(the_repository); + for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) { + midx_argv[2] = "--object-dir"; + midx_argv[3] = alt->path; + if (run_command(&midx_verify)) + errors_found |= ERROR_COMMIT_GRAPH; + } + } + return errors_found; } diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 828c240389f2a0..bd8e841b816bcb 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -160,12 +160,17 @@ corrupt_midx_and_verify() { DATA="${2:-\0}" && OBJDIR=$3 && GREPSTR="$4" && + COMMAND="$5" && + if test -z "$COMMAND" + then + COMMAND="git multi-pack-index verify --object-dir=$OBJDIR" + fi && FILE=$OBJDIR/pack/multi-pack-index && chmod a+w $FILE && test_when_finished mv midx-backup $FILE && cp $FILE midx-backup && printf "$DATA" | dd of="$FILE" bs=1 seek="$POS" conv=notrunc && - test_must_fail git multi-pack-index verify --object-dir=$OBJDIR 2>test_err && + test_must_fail $COMMAND 2>test_err && grep -v "^+" test_err >err && test_i18ngrep "$GREPSTR" err } @@ -258,6 +263,12 @@ test_expect_success 'verify incorrect offset' ' "incorrect object offset" ' +test_expect_success 'git-fsck incorrect offset' ' + corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \ + "incorrect object offset" \ + "git -c core.multipackindex=true fsck" +' + test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && git repack -adf && From 9d690f7da6dd266d063c3efe6af1d7f4fd29205f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 24 Sep 2018 19:07:28 +0000 Subject: [PATCH 44/92] fsck: use ERROR_MULTI_PACK_INDEX The multi-pack-index was added to the data verified by git-fsck in ea5ae6c3 "fsck: verify multi-pack-index". This implementation was based on the implementation for verifying the commit-graph, and a copy-paste error kept the ERROR_COMMIT_GRAPH flag as the bit set when an error appears in the multi-pack-index. Add a new flag, ERROR_MULTI_PACK_INDEX, and use that instead. Signed-off-by: Derrick Stolee --- builtin/fsck.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 06eb42172099a3..f9a90593d11f4a 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -49,6 +49,7 @@ static int name_objects; #define ERROR_PACK 04 #define ERROR_REFS 010 #define ERROR_COMMIT_GRAPH 020 +#define ERROR_MULTI_PACK_INDEX 040 static const char *describe_object(struct object *obj) { @@ -855,14 +856,14 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) midx_verify.argv = midx_argv; midx_verify.git_cmd = 1; if (run_command(&midx_verify)) - errors_found |= ERROR_COMMIT_GRAPH; + errors_found |= ERROR_MULTI_PACK_INDEX; prepare_alt_odb(the_repository); for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) { midx_argv[2] = "--object-dir"; midx_argv[3] = alt->path; if (run_command(&midx_verify)) - errors_found |= ERROR_COMMIT_GRAPH; + errors_found |= ERROR_MULTI_PACK_INDEX; } } From bb38f98b94f5b306e9833f7b2fc15f1914fd6b07 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 8 Oct 2018 14:31:55 +0000 Subject: [PATCH 45/92] midx: fix broken free() in close_midx() When closing a multi-pack-index, we intend to close each pack-file and free the struct packed_git that represents it. However, this line was previously freeing the array of pointers, not the pointer itself. This leads to a double-free issue. Signed-off-by: Derrick Stolee --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 4d4c9305223338..f3802ec13c9b40 100644 --- a/midx.c +++ b/midx.c @@ -186,7 +186,7 @@ static void close_midx(struct multi_pack_index *m) for (i = 0; i < m->num_packs; i++) { if (m->packs[i]) { close_pack(m->packs[i]); - free(m->packs); + free(m->packs[i]); } } FREE_AND_NULL(m->packs); From 9983ab20687e9b9a91302e322272f4c8a1d5a83d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 8 Oct 2018 14:27:44 +0000 Subject: [PATCH 46/92] midx: close multi-pack-index on repack When repacking, we may remove pack-files. This invalidates the multi-pack-index (if it exists). Previously, we removed the multi-pack-index file before removing any pack-file. In some cases, the repack command may load the multi-pack-index into memory. This may lead to later in-memory references to the non-existent pack- files. Signed-off-by: Derrick Stolee --- builtin/repack.c | 4 ++++ midx.c | 6 +++++- midx.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/builtin/repack.c b/builtin/repack.c index 42be88e86ce6fd..aa18835b199131 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -423,6 +423,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!midx_cleared) { /* if we move a packfile, it will invalidated the midx */ + if (the_repository->objects) { + close_midx(the_repository->objects->multi_pack_index); + the_repository->objects->multi_pack_index = NULL; + } clear_midx_file(get_object_directory()); midx_cleared = 1; } diff --git a/midx.c b/midx.c index f3802ec13c9b40..5fc83ff2a87c65 100644 --- a/midx.c +++ b/midx.c @@ -176,9 +176,13 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local return NULL; } -static void close_midx(struct multi_pack_index *m) +void close_midx(struct multi_pack_index *m) { uint32_t i; + + if (!m) + return; + munmap((unsigned char *)m->data, m->data_len); close(m->fd); m->fd = -1; diff --git a/midx.h b/midx.h index ce80b91c68ac5d..9cca6f9a3b43c9 100644 --- a/midx.h +++ b/midx.h @@ -45,4 +45,6 @@ int write_midx_file(const char *object_dir); void clear_midx_file(const char *object_dir); int verify_midx_file(const char *object_dir); +void close_midx(struct multi_pack_index *m); + #endif From e057fd8f8b68e96ab1c03178b20457828356f384 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 29 Aug 2018 14:50:50 +0000 Subject: [PATCH 47/92] multi-pack-index: define GIT_TEST_MULTI_PACK_INDEX The multi-pack-index feature is tested in isolation by t5319-multi-pack-index.sh, but there are many more interesting scenarios in the test suite surrounding pack-file data shapes and interactions. Since the multi-pack-index is an optional data structure, it does not make sense to include it by default in those tests. Instead, add a new GIT_TEST_MULTI_PACK_INDEX environment variable that enables core.multiPackIndex and writes a multi-pack-index after each 'git repack' command. This adds extra test coverage when needed. There are a few spots in the test suite that need to react to this change: * t5319-multi-pack-index.sh: there is a test that checks that 'git repack' deletes the multi-pack-index. Disable the environment variable to ensure this still happens. * t5310-pack-bitmaps.sh: One test moves a pack-file from the object directory to an alternate. This breaks the multi-pack-index, so delete the multi-pack-index at this point, if it exists. * t9300-fast-import.sh: One test verifies the number of files in the .git/objects/pack directory is exactly 8. Exclude the multi-pack-index from this count so it is still 8 in all cases. Signed-off-by: Derrick Stolee --- builtin/repack.c | 4 ++++ midx.c | 9 +++++++-- midx.h | 2 ++ t/t5310-pack-bitmaps.sh | 1 + t/t5319-multi-pack-index.sh | 2 +- t/t9300-fast-import.sh | 2 +- 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index aa18835b199131..75519fdef79cb9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -549,6 +549,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!no_update_server_info) update_server_info(0); remove_temporary_files(); + + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) + write_midx_file(get_object_directory()); + string_list_clear(&names, 0); string_list_clear(&rollback, 0); string_list_clear(&existing_packs, 0); diff --git a/midx.c b/midx.c index 5fc83ff2a87c65..3f0b4ca11ff779 100644 --- a/midx.c +++ b/midx.c @@ -335,9 +335,14 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i struct multi_pack_index *m; struct multi_pack_index *m_search; int config_value; + static int env_value = -1; - if (repo_config_get_bool(r, "core.multipackindex", &config_value) || - !config_value) + if (env_value < 0) + env_value = git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0); + + if (!env_value && + (repo_config_get_bool(r, "core.multipackindex", &config_value) || + !config_value)) return 0; for (m_search = r->objects->multi_pack_index; m_search; m_search = m_search->next) diff --git a/midx.h b/midx.h index 9cca6f9a3b43c9..ef7326afdbf2f1 100644 --- a/midx.h +++ b/midx.h @@ -3,6 +3,8 @@ #include "repository.h" +#define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" + struct multi_pack_index { struct multi_pack_index *next; diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 557bd0d0c09e37..d430c24dc41fc9 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -191,6 +191,7 @@ test_expect_success 'pack-objects respects --honor-pack-keep (local bitmapped pa test_expect_success 'pack-objects respects --local (non-local bitmapped pack)' ' mv .git/objects/pack/$packbitmap.* alt.git/objects/pack/ && + rm -f .git/objects/pack/multi-pack-index && test_when_finished "mv alt.git/objects/pack/$packbitmap.* .git/objects/pack/" && echo HEAD | git pack-objects --local --stdout --revs >3b.pack && git index-pack 3b.pack && diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index bd8e841b816bcb..70926b5bc04643 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -271,7 +271,7 @@ test_expect_success 'git-fsck incorrect offset' ' test_expect_success 'repack removes multi-pack-index' ' test_path_is_file $objdir/pack/multi-pack-index && - git repack -adf && + GIT_TEST_MULTI_PACK_INDEX=0 git repack -adf && test_path_is_missing $objdir/pack/multi-pack-index ' diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 40fe7e49767ac4..59a13b6a779b43 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1558,7 +1558,7 @@ test_expect_success 'O: blank lines not necessary after other commands' ' INPUT_END git fast-import actual && test_cmp expect actual From 645f47091c5715db74651704c787556de2cfdb4a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:51:09 -0400 Subject: [PATCH 48/92] fixup! midx: predict packfile name size using `wc` This reverts commit 03b0500a008d9cddfd9af8c26ec28b08f5cde9d9. --- t/t5319-midx.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index d5a0011f2b5ae8..b8e7324740e0ef 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -262,8 +262,8 @@ MIDX_BYTE_OBJECT_OFFSET=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ MIDX_OFFSET_PACKFILE_NAMES=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` MIDX_BYTE_PACKFILE_NAMES=`expr $MIDX_OFFSET_PACKFILE_NAMES + 10` -MIDX_PACKNAME_SIZE=`expr $(ls *.pack | wc -c) + $MIDX_NUM_PACKS` -MIDX_BYTE_CHECKSUM=`expr $MIDX_OFFSET_PACKFILE_NAMES + $MIDX_PACKNAME_SIZE` +MIDX_LEN=$(stat --printf="%s" midx-*.midx) +MIDX_BYTE_CHECKSUM=`expr $MIDX_LEN - $HASH_LEN` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . From a14a7ee76a3c56210f5c969ef0fc141cd58b7251 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:51:40 -0400 Subject: [PATCH 49/92] fixup! midx: responding to PR feedback:wq This reverts commit 3b40c7e4f16147dac6ba2fc191f65367a89b9ebd. --- midx.c | 13 ++----------- t/t5319-midx.sh | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/midx.c b/midx.c index 038a60a48f5b01..41832ddff21b70 100644 --- a/midx.c +++ b/midx.c @@ -135,7 +135,7 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char hdr = midx_map; if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { - uint32_t signature = ntohl(hdr->midx_signature); + uint32_t signature = hdr->midx_signature; munmap(midx_map, midx_size); close(fd); die("midx signature %X does not match signature %X", @@ -195,10 +195,6 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char case MIDX_CHUNKID_LARGEOFFSETS: midx->chunk_large_offsets = data + chunk_offset; break; - - default: - /* We allow optional MIDX chunks, so ignore unrecognized chunk ids */ - break; } } @@ -834,12 +830,7 @@ const char *write_midx_file(const char *pack_dir, objects, nr_objects); break; - case 0: - break; - - default: - BUG("midx tried to write an invalid chunk ID %08X", chunk_ids[chunk]); - break; + /* We allow optional MIDX chunks, so ignore unrecognized chunk ids */ } } diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index b8e7324740e0ef..921cd2a00c6533 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -218,7 +218,7 @@ test_expect_success 'Verify normal git operations succeed' ' test_line_count = 90 rev-list-out-8 ' -# The 'verify' commands below expect a midx-head file pointing +# The 'verify' commands below expect a midx-head file pointint # to an existing MIDX file. test_expect_success 'recompute valid midx' ' git midx --write --update-head --pack-dir . @@ -269,19 +269,19 @@ test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . ' -# usage: corrupt_midx_and_verify [] +# usage: corrupt_midx_and_verify corrupt_midx_and_verify() { pos=$1 data="${2:-\0}" grepstr=$3 - packdir=$4 - midxid=$(cat ./$packdir/midx-head) && - file=./$packdir/midx-$midxid.midx && - chmod a+w "$file" && - test_when_finished mv midx-backup "$file" && - cp "$file" midx-backup && + packdir=$5 + midxid=$(cat midx-head) && + file=midx-$midxid.midx && + chmod a+w $file && + test_when_finished mv midx-backup $file && + cp $file midx-backup && printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc && - test_must_fail git midx --verify --pack-dir "./$packdir" 2>test_err && + test_must_fail git midx --verify --pack-dir ./$packdir 2>test_err && grep -v "^+" test_err >err && grep "$grepstr" err } @@ -401,7 +401,8 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' echo "pack_names:" >>midx-read-expect-64 && echo test-64-$pack64.pack >>midx-read-expect-64 && echo "pack_dir: packs-64" >>midx-read-expect-64 && - test_cmp midx-read-out-64 midx-read-expect-64 + test_cmp midx-read-out-64 midx-read-expect-64 && + rm -rf packs-64 ' HASH_LEN=20 @@ -409,7 +410,7 @@ MIDX_OFFSET_CHUNK_LOOKUP=16 MIDX_WIDTH_CHUNK_LOOKUP=12 MIDX_NUM_CHUNKS=7 MIDX_NUM_PACKS=1 -MIDX_NUM_OBJECTS=65 +MIDX_NUM_OBJECTS=55 MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ @@ -420,7 +421,9 @@ MIDX_OFFSET_OBJECT_OFFSETS=`expr $MIDX_OFFSET_OID_LOOKUP + \ MIDX_WIDTH_OBJECT_OFFSETS=8 MIDX_OFFSET_LARGE_OFFSETS=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` -MIDX_BYTE_LARGE_OFFSETS=`expr $MIDX_OFFSET_LARGE_OFFSETS + 3` +MIDX_WIDTH_LARGE_OFFSETS=8 +MIDX_BYTE_LARGE_OFFSETS=`expr $MIDX_OFFSET_LARGE_OFFSETS + \ + $MIDX_WIDTH_LARGE_OFFSETS \* 5 + 3` test_expect_success 'verify bad 64-bit offset' ' corrupt_midx_and_verify $MIDX_BYTE_LARGE_OFFSETS "\01" \ From 3d0b234f4d59a7bfdf8fbd5c43c48bea30da2d65 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:51:59 -0400 Subject: [PATCH 50/92] fixup! midx: verify checksum footer This reverts commit 336f98f377980aee3f3455c7d9a018910df20dac. --- midx.c | 19 ++++--------------- t/t5319-midx.sh | 7 ------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/midx.c b/midx.c index 41832ddff21b70..24f073bbb434cd 100644 --- a/midx.c +++ b/midx.c @@ -918,9 +918,7 @@ int midx_verify(const char *pack_dir, const char *midx_id) uint32_t i, cur_fanout_pos = 0; struct midxed_git *m; const char *midx_head_path; - struct object_id cur_oid, prev_oid, checksum; - struct hashfile *f; - int devnull, checksum_fail = 0; + struct object_id cur_oid, prev_oid; if (midx_id) { size_t sz; @@ -938,17 +936,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) goto cleanup; } - - devnull = open("/dev/null", O_WRONLY); - f = hashfd(devnull, NULL); - hashwrite(f, m->data, m->data_len - m->hdr->hash_len); - finalize_hashfile(f, checksum.hash, CSUM_CLOSE); - if (hashcmp(checksum.hash, m->data + m->data_len - m->hdr->hash_len)) { - midx_report(_("the midx file has incorrect checksum and is likely corrupt")); - verify_midx_error = 0; - checksum_fail = 1; - } - if (m->hdr->hash_version != MIDX_OID_VERSION) midx_report("invalid hash version"); if (m->hdr->hash_len != MIDX_OID_LEN) @@ -966,6 +953,8 @@ int midx_verify(const char *pack_dir, const char *midx_id) goto cleanup; for (i = 0; i < m->num_packs; i++) { + fprintf(stderr, "preparing %s\n", m->pack_names[i]); + fflush(stderr); if (prepare_midx_pack(m, i)) { midx_report("failed to prepare pack %s", m->pack_names[i]); @@ -1037,5 +1026,5 @@ int midx_verify(const char *pack_dir, const char *midx_id) if (m) close_midx(m); free(m); - return verify_midx_error | checksum_fail; + return verify_midx_error; } diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 921cd2a00c6533..9425a5fe77da94 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -262,8 +262,6 @@ MIDX_BYTE_OBJECT_OFFSET=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ MIDX_OFFSET_PACKFILE_NAMES=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` MIDX_BYTE_PACKFILE_NAMES=`expr $MIDX_OFFSET_PACKFILE_NAMES + 10` -MIDX_LEN=$(stat --printf="%s" midx-*.midx) -MIDX_BYTE_CHECKSUM=`expr $MIDX_LEN - $HASH_LEN` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -371,11 +369,6 @@ test_expect_success 'verify packfile lookup' ' "invalid packfile name lookup" ' -test_expect_success 'verify checksum hash' ' - corrupt_midx_and_verify $MIDX_BYTE_CHECKSUM "\00" \ - "incorrect checksum" -' - # usage: corrupt_data [] corrupt_data() { file=$1 From ceee514e86f8f937159d121bb5e2e588707f96a3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:52:18 -0400 Subject: [PATCH 51/92] fixup! midx: verify 64-bit offsets and packfile lookups This reverts commit a489f2ab3fecb5e93b523890a43fc1124710d211. --- midx.c | 4 --- t/t5319-midx.sh | 92 ++++++++++++++++++------------------------------- 2 files changed, 33 insertions(+), 63 deletions(-) diff --git a/midx.c b/midx.c index 24f073bbb434cd..d825918c83ecd0 100644 --- a/midx.c +++ b/midx.c @@ -217,10 +217,6 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char for (i = 0; i < midx->num_packs; i++) { uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i)); - - if (midx->chunk_pack_names + name_offset >= midx->data + midx->data_len) - die("invalid packfile name lookup"); - midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset); } } diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 9425a5fe77da94..c2ab8fa4389b7c 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -218,6 +218,35 @@ test_expect_success 'Verify normal git operations succeed' ' test_line_count = 90 rev-list-out-8 ' +# usage: corrupt_data [] +corrupt_data() { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +# Force 64-bit offsets by manipulating the idx file. +# This makes the IDX file _incorrect_ so be careful to clean up after! +test_expect_success 'force some 64-bit offsets with pack-objects' ' + pack64=$(git pack-objects --index-version=2,0x40 test-64 midx-read-out-64 && + echo "header: 4d494458 80000001 01 14 00 06 00000001" >midx-read-expect-64 && + echo "num_objects: 65" >>midx-read-expect-64 && + echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets large_offsets" >>midx-read-expect-64 && + echo "pack_names:" >>midx-read-expect-64 && + echo test-64-$pack64.pack >>midx-read-expect-64 && + echo "pack_dir: packs-64" >>midx-read-expect-64 && + test_cmp midx-read-out-64 midx-read-expect-64 && + rm -rf packs-64 +' + # The 'verify' commands below expect a midx-head file pointint # to an existing MIDX file. test_expect_success 'recompute valid midx' ' @@ -245,7 +274,6 @@ MIDX_BYTE_CHUNK_PACKNAME_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 4 \* $MIDX_WIDTH_CHUNK_LOOKUP` MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_PACKFILE_LOOKUP=`expr $MIDX_OFFSET_PACKLOOKUP + 4` MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ 4 \* $MIDX_NUM_PACKS` MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` @@ -272,14 +300,13 @@ corrupt_midx_and_verify() { pos=$1 data="${2:-\0}" grepstr=$3 - packdir=$5 midxid=$(cat midx-head) && file=midx-$midxid.midx && chmod a+w $file && test_when_finished mv midx-backup $file && cp $file midx-backup && printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc && - test_must_fail git midx --verify --pack-dir ./$packdir 2>test_err && + test_must_fail git midx --verify --pack-dir . 2>test_err && grep -v "^+" test_err >err && grep "$grepstr" err } @@ -360,67 +387,14 @@ test_expect_success 'verify bad 32-bit offset' ' ' test_expect_success 'verify packfile name' ' + echo $MIDX_BYTE_PACKFILE_NAMES && corrupt_midx_and_verify $MIDX_BYTE_PACKFILE_NAMES "\00" \ "failed to prepare pack" ' -test_expect_success 'verify packfile lookup' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKFILE_LOOKUP "\01" \ - "invalid packfile name lookup" -' - -# usage: corrupt_data [] -corrupt_data() { - file=$1 - pos=$2 - data="${3:-\0}" - printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc -} - -# Force 64-bit offsets by manipulating the idx file. -# This makes the IDX file _incorrect_ so be careful to clean up after! -test_expect_success 'force some 64-bit offsets with pack-objects' ' - pack64=$(git pack-objects --index-version=2,0x40 test-64 midx-read-out-64 && - echo "header: 4d494458 80000001 01 14 00 06 00000001" >midx-read-expect-64 && - echo "num_objects: 65" >>midx-read-expect-64 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets large_offsets" >>midx-read-expect-64 && - echo "pack_names:" >>midx-read-expect-64 && - echo test-64-$pack64.pack >>midx-read-expect-64 && - echo "pack_dir: packs-64" >>midx-read-expect-64 && - test_cmp midx-read-out-64 midx-read-expect-64 && - rm -rf packs-64 -' - -HASH_LEN=20 -MIDX_OFFSET_CHUNK_LOOKUP=16 -MIDX_WIDTH_CHUNK_LOOKUP=12 -MIDX_NUM_CHUNKS=7 -MIDX_NUM_PACKS=1 -MIDX_NUM_OBJECTS=55 -MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ - 4 \* $MIDX_NUM_PACKS` -MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` -MIDX_OFFSET_OBJECT_OFFSETS=`expr $MIDX_OFFSET_OID_LOOKUP + \ - $HASH_LEN \* $MIDX_NUM_OBJECTS` -MIDX_WIDTH_OBJECT_OFFSETS=8 -MIDX_OFFSET_LARGE_OFFSETS=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` -MIDX_WIDTH_LARGE_OFFSETS=8 -MIDX_BYTE_LARGE_OFFSETS=`expr $MIDX_OFFSET_LARGE_OFFSETS + \ - $MIDX_WIDTH_LARGE_OFFSETS \* 5 + 3` - test_expect_success 'verify bad 64-bit offset' ' - corrupt_midx_and_verify $MIDX_BYTE_LARGE_OFFSETS "\01" \ - "incorrect offset" packs-64 + corrupt_midx_and_verify $MIDX_BYTE_OBJECT_OFFSET "\01" \ + "incorrect offset" ' test_done From 983af147bc49e096d9ea066d3bed3d5083d5756c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:53:33 -0400 Subject: [PATCH 52/92] fixup! midx: verify corrupted packfile names This reverts commit 261a27b203edd3bd20f857e88d6df736acfa3bce. --- midx.c | 28 +++++++++------------------- t/t5319-midx.sh | 16 +--------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/midx.c b/midx.c index d825918c83ecd0..4de99b36b1be62 100644 --- a/midx.c +++ b/midx.c @@ -3,7 +3,6 @@ #include "pack.h" #include "packfile.h" #include "midx.h" -#include "object-store.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ @@ -945,24 +944,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) if (!m->chunk_object_offsets) midx_report("missing Object Offset chunk"); - if (verify_midx_error) - goto cleanup; - - for (i = 0; i < m->num_packs; i++) { - fprintf(stderr, "preparing %s\n", m->pack_names[i]); - fflush(stderr); - if (prepare_midx_pack(m, i)) { - midx_report("failed to prepare pack %s", - m->pack_names[i]); - continue; - } - - if (!m->packs[i]->index_data && - open_pack_index(m->packs[i])) - midx_report("failed to open index for pack %s", - m->pack_names[i]); - } - if (verify_midx_error) goto cleanup; @@ -1002,7 +983,16 @@ int midx_verify(const char *pack_dir, const char *midx_id) continue; } + if (prepare_midx_pack(m, pack_id)) { + midx_report("failed to prepare pack %s", + m->pack_names[pack_id]); + continue; + } + p = m->packs[pack_id]; + if (!p->index_data && open_pack_index(p)) + midx_report("failed to open index for pack %s", + m->pack_names[pack_id]); if (!find_pack_entry_pos(cur_oid.hash, p, &index_pos)) { midx_report("midx contains object not present in packfile: %s", diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index c2ab8fa4389b7c..6813410d71c9b2 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -287,9 +287,6 @@ MIDX_BYTE_OBJECT_PACKID=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 1` MIDX_BYTE_OBJECT_OFFSET=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 4` -MIDX_OFFSET_PACKFILE_NAMES=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* $MIDX_NUM_OBJECTS` -MIDX_BYTE_PACKFILE_NAMES=`expr $MIDX_OFFSET_PACKFILE_NAMES + 10` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -381,18 +378,7 @@ test_expect_success 'verify bad pack-int-id' ' "pack-int-id for object" ' -test_expect_success 'verify bad 32-bit offset' ' - corrupt_midx_and_verify $MIDX_BYTE_OBJECT_OFFSET "\01" \ - "incorrect offset" -' - -test_expect_success 'verify packfile name' ' - echo $MIDX_BYTE_PACKFILE_NAMES && - corrupt_midx_and_verify $MIDX_BYTE_PACKFILE_NAMES "\00" \ - "failed to prepare pack" -' - -test_expect_success 'verify bad 64-bit offset' ' +test_expect_success 'verify bad offset' ' corrupt_midx_and_verify $MIDX_BYTE_OBJECT_OFFSET "\01" \ "incorrect offset" ' From 39662286c1a73918ab21aab929232d12e4cfb493 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:53:53 -0400 Subject: [PATCH 53/92] fixup! midx: verify bad pack-int-ids and offsets This reverts commit e02ee48360f9e5aad9195c8cf61c4324762dfc29. --- t/t5319-midx.sh | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 6813410d71c9b2..8f211d09b636a1 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -262,7 +262,6 @@ MIDX_OFFSET_CHUNK_LOOKUP=16 MIDX_WIDTH_CHUNK_LOOKUP=12 MIDX_NUM_CHUNKS=6 MIDX_NUM_PACKS=13 -MIDX_NUM_OBJECTS=77 MIDX_BYTE_CHUNK_PACKLOOKUP_ID=$MIDX_OFFSET_CHUNK_LOOKUP MIDX_BYTE_CHUNK_FANOUT_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` @@ -280,13 +279,6 @@ MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` MIDX_BYTE_OID_ORDER=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50` MIDX_BYTE_OID_MISSING=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50 + 5` -MIDX_OFFSET_OBJECT_OFFSETS=`expr $MIDX_OFFSET_OID_LOOKUP + \ - $HASH_LEN \* $MIDX_NUM_OBJECTS` -MIDX_WIDTH_OBJECT_OFFSETS=8 -MIDX_BYTE_OBJECT_PACKID=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 1` -MIDX_BYTE_OBJECT_OFFSET=`expr $MIDX_OFFSET_OBJECT_OFFSETS + \ - $MIDX_WIDTH_OBJECT_OFFSETS \* 50 + 4` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -373,14 +365,4 @@ test_expect_success 'verify bad OID lookup (object missing)' ' "object not present in pack" ' -test_expect_success 'verify bad pack-int-id' ' - corrupt_midx_and_verify $MIDX_BYTE_OBJECT_PACKID "\01" \ - "pack-int-id for object" -' - -test_expect_success 'verify bad offset' ' - corrupt_midx_and_verify $MIDX_BYTE_OBJECT_OFFSET "\01" \ - "incorrect offset" -' - test_done From 0e199cf0f506b6416f7c9590dffa40b7c86e1cef Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:57:01 -0400 Subject: [PATCH 54/92] fixup! midx: verify objects exist in packfiles This reverts commit c3f75a9d0ba3801a3ce0df0327f3d8febff3e87a. --- midx.c | 41 ----------------------------------------- packfile.c | 15 ++++----------- packfile.h | 4 ---- t/t5319-midx.sh | 6 ------ 4 files changed, 4 insertions(+), 62 deletions(-) diff --git a/midx.c b/midx.c index 4de99b36b1be62..3b07a7cd274bd9 100644 --- a/midx.c +++ b/midx.c @@ -948,11 +948,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) goto cleanup; for (i = 0; i < m->num_objects; i++) { - struct pack_midx_details details; - uint32_t index_pos, pack_id; - struct packed_git *p; - off_t pack_offset; - hashcpy(cur_oid.hash, m->chunk_oid_lookup + m->hdr->hash_len * i); while (cur_oid.hash[0] > cur_fanout_pos) { @@ -970,42 +965,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) oid_to_hex(&cur_oid)); oidcpy(&prev_oid, &cur_oid); - - if (!nth_midxed_object_details(m, i, &details)) { - midx_report("nth_midxed_object_details failed with n=%d", i); - continue; - } - - pack_id = details.pack_int_id; - if (pack_id >= m->num_packs) { - midx_report("pack-int-id for object n=%d is invalid: %u", - pack_id); - continue; - } - - if (prepare_midx_pack(m, pack_id)) { - midx_report("failed to prepare pack %s", - m->pack_names[pack_id]); - continue; - } - - p = m->packs[pack_id]; - if (!p->index_data && open_pack_index(p)) - midx_report("failed to open index for pack %s", - m->pack_names[pack_id]); - - if (!find_pack_entry_pos(cur_oid.hash, p, &index_pos)) { - midx_report("midx contains object not present in packfile: %s", - oid_to_hex(&cur_oid)); - continue; - } - - pack_offset = nth_packed_object_offset(p, index_pos); - if (details.offset != pack_offset) - midx_report("midx has incorrect offset for %s : %"PRIx64" != %"PRIx64, - oid_to_hex(&cur_oid), - details.offset, - pack_offset); } cleanup: diff --git a/packfile.c b/packfile.c index faec8591d5a7a2..ec31461700f9fc 100644 --- a/packfile.c +++ b/packfile.c @@ -1872,12 +1872,12 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) } } -int find_pack_entry_pos(const unsigned char *sha1, - struct packed_git *p, - uint32_t *result) +off_t find_pack_entry_one(const unsigned char *sha1, + struct packed_git *p) { const unsigned char *index = p->index_data; struct object_id oid; + uint32_t result; if (!index) { if (open_pack_index(p)) @@ -1885,14 +1885,7 @@ int find_pack_entry_pos(const unsigned char *sha1, } hashcpy(oid.hash, sha1); - return bsearch_pack(&oid, p, result); -} - -off_t find_pack_entry_one(const unsigned char *sha1, - struct packed_git *p) -{ - uint32_t result; - if (find_pack_entry_pos(sha1, p, &result)) + if (bsearch_pack(&oid, p, &result)) return nth_packed_object_offset(p, result); return 0; } diff --git a/packfile.h b/packfile.h index 15e007861bcec7..6fec75d274662d 100644 --- a/packfile.h +++ b/packfile.h @@ -123,10 +123,6 @@ extern const struct object_id *nth_packed_object_oid(struct object_id *, struct */ extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); -int find_pack_entry_pos(const unsigned char *sha1, - struct packed_git *p, - uint32_t *result); - /* * If the object named sha1 is present in the specified packfile, * return its offset within the packfile; otherwise, return 0. diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 8f211d09b636a1..c304a10d0ff368 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -278,7 +278,6 @@ MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` MIDX_BYTE_OID_ORDER=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50` -MIDX_BYTE_OID_MISSING=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50 + 5` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -360,9 +359,4 @@ test_expect_success 'verify bad OID lookup order' ' "incorrect OID order" ' -test_expect_success 'verify bad OID lookup (object missing)' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_MISSING "\00" \ - "object not present in pack" -' - test_done From 60ee9657cba432502c60c2f88b302d98adb39e03 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:57:18 -0400 Subject: [PATCH 55/92] fixup! midx: verify OID lookup order This reverts commit e0c0b311f0a2aafca074046ec5d6dd117a12e3fe. --- midx.c | 9 +-------- t/t5319-midx.sh | 13 +------------ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/midx.c b/midx.c index 3b07a7cd274bd9..7651f6a737db9c 100644 --- a/midx.c +++ b/midx.c @@ -913,7 +913,7 @@ int midx_verify(const char *pack_dir, const char *midx_id) uint32_t i, cur_fanout_pos = 0; struct midxed_git *m; const char *midx_head_path; - struct object_id cur_oid, prev_oid; + struct object_id cur_oid; if (midx_id) { size_t sz; @@ -958,13 +958,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) cur_fanout_pos++; } - - if (i && oidcmp(&prev_oid, &cur_oid) >= 0) - midx_report("midx has incorrect OID order: %s then %s", - oid_to_hex(&prev_oid), - oid_to_hex(&cur_oid)); - - oidcpy(&prev_oid, &cur_oid); } cleanup: diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index c304a10d0ff368..2a788cd8fb7c22 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -253,7 +253,6 @@ test_expect_success 'recompute valid midx' ' git midx --write --update-head --pack-dir . ' -HASH_LEN=20 MIDX_BYTE_VERSION=4 MIDX_BYTE_OID_VERSION=8 MIDX_BYTE_OID_LEN=9 @@ -261,7 +260,6 @@ MIDX_BYTE_CHUNK_COUNT=11 MIDX_OFFSET_CHUNK_LOOKUP=16 MIDX_WIDTH_CHUNK_LOOKUP=12 MIDX_NUM_CHUNKS=6 -MIDX_NUM_PACKS=13 MIDX_BYTE_CHUNK_PACKLOOKUP_ID=$MIDX_OFFSET_CHUNK_LOOKUP MIDX_BYTE_CHUNK_FANOUT_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` @@ -271,13 +269,9 @@ MIDX_BYTE_CHUNK_OFFSET_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 3 \* $MIDX_WIDTH_CHUNK_LOOKUP` MIDX_BYTE_CHUNK_PACKNAME_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 4 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_PACKLOOKUP=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ +MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_PACKLOOKUP + \ - 4 \* $MIDX_NUM_PACKS` MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` -MIDX_OFFSET_OID_LOOKUP=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 256` -MIDX_BYTE_OID_ORDER=`expr $MIDX_OFFSET_OID_LOOKUP + $HASH_LEN \* 50` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -354,9 +348,4 @@ test_expect_success 'verify bad OID fanout value' ' "incorrect fanout value" ' -test_expect_success 'verify bad OID lookup order' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_ORDER "\00" \ - "incorrect OID order" -' - test_done From 3cd3723a5db1476797888c51aa083b3cb27acdd7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:57:36 -0400 Subject: [PATCH 56/92] fixup! midx: verify oid fanout table This reverts commit 018662b411be744e7278109c6aaaf4844a20ced0. --- midx.c | 36 ++++++++++++++---------------------- t/t5319-midx.sh | 28 +++++++++------------------- 2 files changed, 23 insertions(+), 41 deletions(-) diff --git a/midx.c b/midx.c index 7651f6a737db9c..4ed16d19b0b1c8 100644 --- a/midx.c +++ b/midx.c @@ -194,16 +194,17 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char case MIDX_CHUNKID_LARGEOFFSETS: midx->chunk_large_offsets = data + chunk_offset; break; + + case 0: + break; + + default: + munmap(midx_map, midx_size); + close(fd); + die("Unrecognized MIDX chunk id: %08x", chunk_id); } } - if (!midx->chunk_oid_fanout) - die("midx missing OID Fanout chunk"); - if (!midx->chunk_pack_lookup) - die("midx missing Packfile Name Lookup chunk"); - if (!midx->chunk_pack_names) - die("midx missing Packfile Name chunk"); - midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4))); midx->num_packs = ntohl(midx->hdr->num_packs); @@ -910,10 +911,8 @@ static void midx_report(const char *fmt, ...) int midx_verify(const char *pack_dir, const char *midx_id) { - uint32_t i, cur_fanout_pos = 0; struct midxed_git *m; const char *midx_head_path; - struct object_id cur_oid; if (midx_id) { size_t sz; @@ -939,27 +938,20 @@ int midx_verify(const char *pack_dir, const char *midx_id) if (verify_midx_error) goto cleanup; + if (!m->chunk_oid_fanout) + midx_report("missing OID Fanout chunk"); if (!m->chunk_oid_lookup) midx_report("missing OID Lookup chunk"); if (!m->chunk_object_offsets) midx_report("missing Object Offset chunk"); + if (!m->chunk_pack_lookup) + midx_report("missing Packfile Name Lookup chunk"); + if (!m->chunk_pack_names) + midx_report("missing Packfile Name chunk"); if (verify_midx_error) goto cleanup; - for (i = 0; i < m->num_objects; i++) { - hashcpy(cur_oid.hash, m->chunk_oid_lookup + m->hdr->hash_len * i); - - while (cur_oid.hash[0] > cur_fanout_pos) { - uint32_t fanout_value = get_be32(m->chunk_oid_fanout + cur_fanout_pos * sizeof(uint32_t)); - if (i != fanout_value) - midx_report("midx has incorrect fanout value: fanout[%d] = %u != %u", - cur_fanout_pos, fanout_value, i); - - cur_fanout_pos++; - } - } - cleanup: if (m) close_midx(m); diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 2a788cd8fb7c22..b9975743265176 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -259,19 +259,15 @@ MIDX_BYTE_OID_LEN=9 MIDX_BYTE_CHUNK_COUNT=11 MIDX_OFFSET_CHUNK_LOOKUP=16 MIDX_WIDTH_CHUNK_LOOKUP=12 -MIDX_NUM_CHUNKS=6 -MIDX_BYTE_CHUNK_PACKLOOKUP_ID=$MIDX_OFFSET_CHUNK_LOOKUP -MIDX_BYTE_CHUNK_FANOUT_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` +MIDX_BYTE_CHUNK_FANOUT_ID=$MIDX_OFFSET_CHUNK_LOOKUP MIDX_BYTE_CHUNK_LOOKUP_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 2 \* $MIDX_WIDTH_CHUNK_LOOKUP` + 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` MIDX_BYTE_CHUNK_OFFSET_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ + 2 \* $MIDX_WIDTH_CHUNK_LOOKUP` +MIDX_BYTE_CHUNK_PACKLOOKUP_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 3 \* $MIDX_WIDTH_CHUNK_LOOKUP` MIDX_BYTE_CHUNK_PACKNAME_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ 4 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_OFFSET_OID_FANOUT=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - $MIDX_NUM_CHUNKS \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_OID_FANOUT=`expr $MIDX_OFFSET_OID_FANOUT + 4 \* 129` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -281,7 +277,6 @@ test_expect_success 'midx --verify succeeds' ' corrupt_midx_and_verify() { pos=$1 data="${2:-\0}" - grepstr=$3 midxid=$(cat midx-head) && file=midx-$midxid.midx && chmod a+w $file && @@ -318,11 +313,6 @@ test_expect_success 'verify bad chunk count' ' "missing Packfile Name chunk" ' -test_expect_success 'verify bad packfile lookup chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKLOOKUP_ID "\00" \ - "missing Packfile Name Lookup chunk" -' - test_expect_success 'verify bad OID fanout chunk id' ' corrupt_midx_and_verify $MIDX_BYTE_CHUNK_FANOUT_ID "\00" \ "missing OID Fanout chunk" @@ -338,14 +328,14 @@ test_expect_success 'verify bad offset chunk id' ' "missing Object Offset chunk" ' +test_expect_success 'verify bad packfile lookup chunk id' ' + corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKLOOKUP_ID "\00" \ + "missing Packfile Name Lookup chunk" +' + test_expect_success 'verify bad packfile name chunk id' ' corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKNAME_ID "\00" \ "missing Packfile Name chunk" ' -test_expect_success 'verify bad OID fanout value' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_FANOUT "\01" \ - "incorrect fanout value" -' - test_done From 3c3c988be7f5bed69633d053f341fa98d374eedc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:59:21 -0400 Subject: [PATCH 57/92] fixup! midx: verify invalid chunk lookup This reverts commit 8e634514ea60704bf2aab28c93b8d197064b65f2. --- midx.c | 20 +++++--------------- t/t5319-midx.sh | 42 ------------------------------------------ 2 files changed, 5 insertions(+), 57 deletions(-) diff --git a/midx.c b/midx.c index 4ed16d19b0b1c8..ba0835cc35ba9b 100644 --- a/midx.c +++ b/midx.c @@ -826,7 +826,11 @@ const char *write_midx_file(const char *pack_dir, objects, nr_objects); break; - /* We allow optional MIDX chunks, so ignore unrecognized chunk ids */ + case 0: + break; + + default: + die("unrecognized MIDX chunk id: %08x", chunk_ids[chunk]); } } @@ -938,20 +942,6 @@ int midx_verify(const char *pack_dir, const char *midx_id) if (verify_midx_error) goto cleanup; - if (!m->chunk_oid_fanout) - midx_report("missing OID Fanout chunk"); - if (!m->chunk_oid_lookup) - midx_report("missing OID Lookup chunk"); - if (!m->chunk_object_offsets) - midx_report("missing Object Offset chunk"); - if (!m->chunk_pack_lookup) - midx_report("missing Packfile Name Lookup chunk"); - if (!m->chunk_pack_names) - midx_report("missing Packfile Name chunk"); - - if (verify_midx_error) - goto cleanup; - cleanup: if (m) close_midx(m); diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index b9975743265176..f88466f9cce23c 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -256,18 +256,6 @@ test_expect_success 'recompute valid midx' ' MIDX_BYTE_VERSION=4 MIDX_BYTE_OID_VERSION=8 MIDX_BYTE_OID_LEN=9 -MIDX_BYTE_CHUNK_COUNT=11 -MIDX_OFFSET_CHUNK_LOOKUP=16 -MIDX_WIDTH_CHUNK_LOOKUP=12 -MIDX_BYTE_CHUNK_FANOUT_ID=$MIDX_OFFSET_CHUNK_LOOKUP -MIDX_BYTE_CHUNK_LOOKUP_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 1 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_OFFSET_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 2 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_PACKLOOKUP_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 3 \* $MIDX_WIDTH_CHUNK_LOOKUP` -MIDX_BYTE_CHUNK_PACKNAME_ID=`expr $MIDX_OFFSET_CHUNK_LOOKUP + \ - 4 \* $MIDX_WIDTH_CHUNK_LOOKUP` test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -308,34 +296,4 @@ test_expect_success 'verify bad object id length' ' "hash length" ' -test_expect_success 'verify bad chunk count' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" \ - "missing Packfile Name chunk" -' - -test_expect_success 'verify bad OID fanout chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_FANOUT_ID "\00" \ - "missing OID Fanout chunk" -' - -test_expect_success 'verify bad OID lookup chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_LOOKUP_ID "\00" \ - "missing OID Lookup chunk" -' - -test_expect_success 'verify bad offset chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET_ID "\00" \ - "missing Object Offset chunk" -' - -test_expect_success 'verify bad packfile lookup chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKLOOKUP_ID "\00" \ - "missing Packfile Name Lookup chunk" -' - -test_expect_success 'verify bad packfile name chunk id' ' - corrupt_midx_and_verify $MIDX_BYTE_CHUNK_PACKNAME_ID "\00" \ - "missing Packfile Name chunk" -' - test_done From c24e94c2f88e25b0bd71a2832f14fb87cf0d612f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:59:38 -0400 Subject: [PATCH 58/92] fixup! midx: verify OID version and length This reverts commit 0fba6c1226ce2eb28af7be07bda51cf1d2b771bb. --- midx.c | 16 +--------------- t/t5319-midx.sh | 12 ------------ 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/midx.c b/midx.c index ba0835cc35ba9b..47fc43fddd5654 100644 --- a/midx.c +++ b/midx.c @@ -929,22 +929,8 @@ int midx_verify(const char *pack_dir, const char *midx_id) m = load_midxed_git_one(midx_head_path, pack_dir); - if (!m) { + if (!m) midx_report("failed to find specified midx file"); - goto cleanup; - } - - if (m->hdr->hash_version != MIDX_OID_VERSION) - midx_report("invalid hash version"); - if (m->hdr->hash_len != MIDX_OID_LEN) - midx_report("invalid hash length"); - if (verify_midx_error) - goto cleanup; - -cleanup: - if (m) - close_midx(m); - free(m); return verify_midx_error; } diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index f88466f9cce23c..8ab53d4f14fe0e 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -254,8 +254,6 @@ test_expect_success 'recompute valid midx' ' ' MIDX_BYTE_VERSION=4 -MIDX_BYTE_OID_VERSION=8 -MIDX_BYTE_OID_LEN=9 test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . @@ -286,14 +284,4 @@ test_expect_success 'verify bad version' ' "midx version" ' -test_expect_success 'verify bad object id version' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_VERSION "\02" \ - "hash version" -' - -test_expect_success 'verify bad object id length' ' - corrupt_midx_and_verify $MIDX_BYTE_OID_LEN "\010" \ - "hash length" -' - test_done From a053fc7c80e890cc3c96e0f44db7d0b301486c8a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 16:59:56 -0400 Subject: [PATCH 59/92] fixup! midx: verify incorrect midx version This reverts commit ee2ac1eb0fb16dafdaf5e7c2a0638975769706ca. --- midx.c | 3 +-- t/t5319-midx.sh | 9 +-------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/midx.c b/midx.c index 47fc43fddd5654..cc7cf602d99c81 100644 --- a/midx.c +++ b/midx.c @@ -142,11 +142,10 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char } if (ntohl(hdr->midx_version) != MIDX_VERSION) { - uint32_t version = ntohl(hdr->midx_version); munmap(midx_map, midx_size); close(fd); die("midx version %X does not match version %X", - version, MIDX_VERSION); + ntohl(hdr->midx_version), MIDX_VERSION); } /* Time to fill a midx struct */ diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 8ab53d4f14fe0e..e8c05f696a45cf 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -249,12 +249,10 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' # The 'verify' commands below expect a midx-head file pointint # to an existing MIDX file. -test_expect_success 'recompute valid midx' ' +test_expect_success 'recomput valid midx' ' git midx --write --update-head --pack-dir . ' -MIDX_BYTE_VERSION=4 - test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . ' @@ -279,9 +277,4 @@ test_expect_success 'verify bad signature' ' "midx signature" ' -test_expect_success 'verify bad version' ' - corrupt_midx_and_verify $MIDX_BYTE_VERSION "\02" \ - "midx version" -' - test_done From 3d5e35515d4c2f068feeda8b5469f1aff9f0a219 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:00:10 -0400 Subject: [PATCH 60/92] fixup! midx: verify midx signature This reverts commit 26ace48d95f8bfdd3bb31d42c6103119c240914f. --- midx.c | 5 ++--- t/t5319-midx.sh | 20 -------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/midx.c b/midx.c index cc7cf602d99c81..b9cbb1d6502fbc 100644 --- a/midx.c +++ b/midx.c @@ -134,11 +134,10 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char hdr = midx_map; if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { - uint32_t signature = hdr->midx_signature; munmap(midx_map, midx_size); close(fd); die("midx signature %X does not match signature %X", - signature, MIDX_SIGNATURE); + ntohl(hdr->midx_signature), MIDX_SIGNATURE); } if (ntohl(hdr->midx_version) != MIDX_VERSION) { @@ -932,4 +931,4 @@ int midx_verify(const char *pack_dir, const char *midx_id) midx_report("failed to find specified midx file"); return verify_midx_error; -} +} \ No newline at end of file diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index e8c05f696a45cf..446f50c2b951e9 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -257,24 +257,4 @@ test_expect_success 'midx --verify succeeds' ' git midx --verify --pack-dir . ' -# usage: corrupt_midx_and_verify -corrupt_midx_and_verify() { - pos=$1 - data="${2:-\0}" - midxid=$(cat midx-head) && - file=midx-$midxid.midx && - chmod a+w $file && - test_when_finished mv midx-backup $file && - cp $file midx-backup && - printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc && - test_must_fail git midx --verify --pack-dir . 2>test_err && - grep -v "^+" test_err >err && - grep "$grepstr" err -} - -test_expect_success 'verify bad signature' ' - corrupt_midx_and_verify 0 "\00" \ - "midx signature" -' - test_done From dbb1f1575f52bc472e95a91981f79ebbcbbfa87b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:00:25 -0400 Subject: [PATCH 61/92] fixup! midx: create '--verify' mode This reverts commit a9f7d76549d35eb1f2b5a860dbdb1835b6a82ceb. --- Documentation/git-midx.txt | 18 ++++++------------ builtin/midx.c | 8 +------- midx.c | 38 -------------------------------------- midx.h | 2 -- t/t5319-midx.sh | 14 -------------- 5 files changed, 7 insertions(+), 73 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index 6b788cb0c79721..c2b4653228bc34 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -24,34 +24,28 @@ OPTIONS --clear:: If specified, delete the midx file specified by midx-head, and - midx-head. (Cannot be combined with `--write`, `--read`, or - `--verify`.) - ---verify:: - If specified, check the midx file specified by midx-head for - corruption or invalid data. (Cannot be combined with `--write`, - `--read`, or `--clear`.) + midx-head. (Cannot be combined with --write or --read.) --read:: If specified, read a midx file specified by the midx-head file and output basic details about the midx file. (Cannot be combined - with `--write`, `--clear`, or `--verify`.) + with --write or --clear.) --midx-id :: - If specified with `--read`, use the given oid to read midx-[oid].midx + If specified with --read, use the given oid to read midx-[oid].midx instead of using midx-head. --write:: If specified, write a new midx file to the pack directory using the packfiles present. Outputs the hash of the result midx file. - (Cannot be combined with `--read`, `--clear`, or `--verify`.) + (Cannot be combined with --read or --clear.) --update-head:: - If specified with `--write`, update the midx-head file to point to + If specified with --write, update the midx-head file to point to the written midx file. --delete-expired:: - If specified with `--write` and `--update-head`, delete the midx file + If specified with --write and --update-head, delete the midx file previously pointed to by midx-head (if changed). EXAMPLES diff --git a/builtin/midx.c b/builtin/midx.c index 7ce6b56a3e456d..c88708d48fd1a2 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -14,7 +14,6 @@ static char const * const builtin_midx_usage[] ={ N_("git midx --write [--pack-dir ] [--update-head] [--delete-expired]"), N_("git midx --read [--midx-id=]"), N_("git midx --clear [--pack-dir ]"), - N_("git midx --verify [--pack-dir ]"), NULL }; @@ -26,7 +25,6 @@ static struct opts_midx { int read; const char *midx_id; int clear; - int verify; int has_existing; struct object_id old_midx_oid; } opts; @@ -416,8 +414,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) N_("read midx file")), OPT_BOOL('c', "clear", &opts.clear, N_("clear midx file and midx-head")), - OPT_BOOL(0, "verify", &opts.verify, - N_("verify the contents of a midx file")), { OPTION_STRING, 'M', "midx-id", &opts.midx_id, N_("oid"), N_("An OID for a specific midx file in the pack-dir."), @@ -436,7 +432,7 @@ int cmd_midx(int argc, const char **argv, const char *prefix) builtin_midx_options, builtin_midx_usage, 0); - if (opts.write + opts.read + opts.clear + opts.verify > 1) + if (opts.write + opts.read + opts.clear > 1) usage_with_options(builtin_midx_usage, builtin_midx_options); if (!opts.pack_dir) { @@ -454,8 +450,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) return midx_read(); if (opts.clear) return midx_clear(); - if (opts.verify) - return midx_verify(opts.pack_dir, opts.midx_id); return 0; } diff --git a/midx.c b/midx.c index b9cbb1d6502fbc..f6a756bcb89c3e 100644 --- a/midx.c +++ b/midx.c @@ -894,41 +894,3 @@ void close_all_midx(void) midxed_git = 0; } - -static int verify_midx_error = 0; - -static void midx_report(const char *fmt, ...) -{ - va_list ap; - struct strbuf sb = STRBUF_INIT; - verify_midx_error = 1; - - va_start(ap, fmt); - strbuf_vaddf(&sb, fmt, ap); - - fprintf(stderr, "%s\n", sb.buf); - strbuf_release(&sb); - va_end(ap); -} - -int midx_verify(const char *pack_dir, const char *midx_id) -{ - struct midxed_git *m; - const char *midx_head_path; - - if (midx_id) { - size_t sz; - struct strbuf sb = STRBUF_INIT; - strbuf_addf(&sb, "%s/midx-%s.midx", pack_dir, midx_id); - midx_head_path = strbuf_detach(&sb, &sz); - } else { - midx_head_path = get_midx_head_filename_dir(pack_dir); - } - - m = load_midxed_git_one(midx_head_path, pack_dir); - - if (!m) - midx_report("failed to find specified midx file"); - - return verify_midx_error; -} \ No newline at end of file diff --git a/midx.h b/midx.h index 7bc4a3a548a744..7acac014f7b137 100644 --- a/midx.h +++ b/midx.h @@ -132,6 +132,4 @@ extern const char *write_midx_file(const char *pack_dir, extern int close_midx(struct midxed_git *m); extern void close_all_midx(void); -int midx_verify(const char *pack_dir, const char *midx_id); - #endif diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 446f50c2b951e9..5a2535cafdbe1f 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -209,10 +209,6 @@ test_expect_success 'midx --clear' ' ! test -f "midx-head" ' -test_expect_success 'midx --verify fails on missing midx' ' - test_must_fail git midx --verify --pack-dir . -' - test_expect_success 'Verify normal git operations succeed' ' git rev-list --all --objects >rev-list-out-8 && test_line_count = 90 rev-list-out-8 @@ -247,14 +243,4 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' rm -rf packs-64 ' -# The 'verify' commands below expect a midx-head file pointint -# to an existing MIDX file. -test_expect_success 'recomput valid midx' ' - git midx --write --update-head --pack-dir . -' - -test_expect_success 'midx --verify succeeds' ' - git midx --verify --pack-dir . -' - test_done From 91c210e25719e4d52e2b402e2d001d70593ea403 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:00:40 -0400 Subject: [PATCH 62/92] fixup! midx: fix flaky 64-bit offset test This reverts commit 00a077f04ce4e6dde411d8ff37e9d49ed7d40c31. --- t/t5319-midx.sh | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 5a2535cafdbe1f..56396d332fdf8a 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -228,19 +228,17 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' pack64=$(git pack-objects --index-version=2,0x40 test-64 midx-read-out-64 && - echo "header: 4d494458 80000001 01 14 00 06 00000001" >midx-read-expect-64 && - echo "num_objects: 65" >>midx-read-expect-64 && + corrupt_data $idx64 2863 "\02" && + midx64=$(git midx --write --pack-dir .) && + git midx --read --pack-dir . --midx-id=$midx64 >midx-read-out-64 && + echo "header: 4d494458 80000001 01 14 00 06 0000000e" >midx-read-expect-64 && + echo "num_objects: 77" >>midx-read-expect-64 && echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets large_offsets" >>midx-read-expect-64 && echo "pack_names:" >>midx-read-expect-64 && - echo test-64-$pack64.pack >>midx-read-expect-64 && - echo "pack_dir: packs-64" >>midx-read-expect-64 && + ls test-*.pack | sort >>midx-read-expect-64 && + echo "pack_dir: ." >>midx-read-expect-64 && test_cmp midx-read-out-64 midx-read-expect-64 && - rm -rf packs-64 + rm midx-$midx64.midx test-64* ' test_done From 619098ef63de7ca777453794bce596a4e32ab219 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:02:17 -0400 Subject: [PATCH 63/92] fixup! midx: replace constants with macros and sizeof() This reverts commit 1e0baca6eeb8ea1499253ce331442b9ef7ecb47c. --- midx.c | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/midx.c b/midx.c index f6a756bcb89c3e..2ff4d2c7413ed7 100644 --- a/midx.c +++ b/midx.c @@ -21,10 +21,6 @@ #define MIDX_OID_LEN MIDX_OID_LEN_SHA1 #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 -#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t)) -#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) -#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) -#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) /* MIDX-git global storage */ struct midxed_git *midxed_git = 0; @@ -475,7 +471,7 @@ static size_t write_midx_chunk_packlookup( cur_len += strlen(pack_names[i]) + 1; } - return sizeof(uint32_t) * (size_t)nr_packs; + return 4 * (size_t)nr_packs; } static size_t write_midx_chunk_packnames( @@ -530,7 +526,7 @@ static size_t write_midx_chunk_oidfanout( list = next; } - return MIDX_CHUNK_FANOUT_SIZE; + return 4 * 256; } static size_t write_midx_chunk_oidlookup( @@ -576,25 +572,28 @@ static size_t write_midx_chunk_objectoffsets( size_t written = 0; for (i = 0; i < nr_objects; i++) { - struct pack_midx_entry *obj = list++; + struct pack_midx_details_internal details; + struct pack_midx_entry *obj = *list++; if (last_oid && !oidcmp(last_oid, &obj->oid)) continue; last_oid = &obj->oid; - hashwrite_be32(f, pack_perm[obj->pack_int_id]); + details.pack_int_id = htonl(pack_perm[obj->pack_int_id]); if (large_offset_needed && obj->offset >> 31) - hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); + details.internal_offset = (MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); else if (!large_offset_needed && obj->offset >> 32) BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!", oid_to_hex(&obj->oid), obj->offset); else - hashwrite_be32(f, (uint32_t)obj->offset); + details.internal_offset = (uint32_t)obj->offset; - written += 2 * sizeof(uint32_t); + details.internal_offset = htonl(details.internal_offset); + hashwrite(f, &details, 8); + written += 8; } return written; @@ -611,6 +610,7 @@ static size_t write_midx_chunk_largeoffsets( while (nr_large_offset) { struct pack_midx_entry *obj = list++; uint64_t offset = obj->offset; + uint32_t split[2]; if (last_oid && !oidcmp(last_oid, &obj->oid)) continue; @@ -620,9 +620,11 @@ static size_t write_midx_chunk_largeoffsets( if (!(offset >> 31)) continue; - hashwrite_be32(f, offset >> 32); - hashwrite_be32(f, offset & 0xffffffff); - written += 2 * sizeof(uint32_t); + split[0] = htonl(offset >> 32); + split[1] = htonl(offset & 0xffffffff); + + hashwrite(f, split, 8); + written += 8; nr_large_offset--; } @@ -758,20 +760,20 @@ const char *write_midx_file(const char *pack_dir, * Fill initial chunk values using offsets * relative to first chunk. */ - chunk_offsets[0] = sizeof(hdr) + MIDX_CHUNKLOOKUP_WIDTH * (hdr.num_chunks + 1); + chunk_offsets[0] = sizeof(hdr) + 12 * (hdr.num_chunks + 1); chunk_ids[0] = MIDX_CHUNKID_PACKLOOKUP; chunk_offsets[1] = chunk_offsets[0] + nr_packs * 4; chunk_ids[1] = MIDX_CHUNKID_OIDFANOUT; - chunk_offsets[2] = chunk_offsets[1] + MIDX_CHUNK_FANOUT_SIZE; + chunk_offsets[2] = chunk_offsets[1] + 256 * 4; chunk_ids[2] = MIDX_CHUNKID_OIDLOOKUP; chunk_offsets[3] = chunk_offsets[2] + (uint64_t)nr_objects * (uint64_t)hdr.hash_len; chunk_ids[3] = MIDX_CHUNKID_OBJECTOFFSETS; - chunk_offsets[4] = chunk_offsets[3] + MIDX_CHUNK_OFFSET_WIDTH * (uint64_t)nr_objects; + chunk_offsets[4] = chunk_offsets[3] + 8 * (uint64_t)count_distinct; if (large_offset_needed) { chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; - chunk_offsets[5] = chunk_offsets[4] + MIDX_CHUNK_LARGE_OFFSET_WIDTH * (uint64_t)nr_large_offset; + chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; chunk_offsets[6] = chunk_offsets[5] + total_name_len; chunk_ids[6] = 0; @@ -782,10 +784,14 @@ const char *write_midx_file(const char *pack_dir, } for (i = 0; i <= hdr.num_chunks; i++) { - hashwrite_be32(f, chunk_ids[i]); - hashwrite_be32(f, chunk_offsets[i] >> 32); - hashwrite_be32(f, chunk_offsets[i] & 0xffffffff); - written += MIDX_CHUNKLOOKUP_WIDTH; + uint32_t chunk_write[3]; + + chunk_write[0] = htonl(chunk_ids[i]); + chunk_write[1] = htonl(chunk_offsets[i] >> 32); + chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + + hashwrite(f, chunk_write, 12); + written += 12; } for (chunk = 0; chunk <= hdr.num_chunks; chunk++) { From 475d1d7bd226529634b96122c2c2299fd2f7e00c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:02:42 -0400 Subject: [PATCH 64/92] fixup! midx: test 64-bit offsets This reverts commit 040a01f22fc1450b37e4efa64a4e2b3cef8edbfb. --- t/t5319-midx.sh | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 56396d332fdf8a..8fd44601e66186 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -194,7 +194,7 @@ test_expect_success 'write-midx with twelve packs' ' test_expect_success 'Verify normal git operations succeed' ' git rev-list --all --objects >rev-list-out-7 && - test_line_count = 90 rev-list-out-7 + test_line_count = 90 rev-list-out-7 rev-list-expect-7 ' test_expect_success 'write-midx with nothing new' ' @@ -214,31 +214,4 @@ test_expect_success 'Verify normal git operations succeed' ' test_line_count = 90 rev-list-out-8 ' -# usage: corrupt_data [] -corrupt_data() { - file=$1 - pos=$2 - data="${3:-\0}" - printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc -} - -# Force 64-bit offsets by manipulating the idx file. -# This makes the IDX file _incorrect_ so be careful to clean up after! -test_expect_success 'force some 64-bit offsets with pack-objects' ' - pack64=$(git pack-objects --index-version=2,0x40 test-64 midx-read-out-64 && - echo "header: 4d494458 80000001 01 14 00 06 0000000e" >midx-read-expect-64 && - echo "num_objects: 77" >>midx-read-expect-64 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets large_offsets" >>midx-read-expect-64 && - echo "pack_names:" >>midx-read-expect-64 && - ls test-*.pack | sort >>midx-read-expect-64 && - echo "pack_dir: ." >>midx-read-expect-64 && - test_cmp midx-read-out-64 midx-read-expect-64 && - rm midx-$midx64.midx test-64* -' - test_done From 16f04abd7c0c016e3d4f24f3cc67ba132b184ddf Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:05:19 -0400 Subject: [PATCH 65/92] fixup! midx: harden against incorrect chunk offsets This reverts commit 358a9972d4f6a3092d561236f3fc5aae44ad2cb5. --- midx.c | 54 +++++++++++------------------------------------------- 1 file changed, 11 insertions(+), 43 deletions(-) diff --git a/midx.c b/midx.c index 2ff4d2c7413ed7..3f6d501e3b25aa 100644 --- a/midx.c +++ b/midx.c @@ -460,7 +460,7 @@ int contains_pack(struct midxed_git *m, const char *pack_name) return 0; } -static size_t write_midx_chunk_packlookup( +static void write_midx_chunk_packlookup( struct hashfile *f, const char **pack_names, uint32_t nr_packs) { @@ -470,31 +470,24 @@ static size_t write_midx_chunk_packlookup( hashwrite_be32(f, cur_len); cur_len += strlen(pack_names[i]) + 1; } - - return 4 * (size_t)nr_packs; } -static size_t write_midx_chunk_packnames( +static void write_midx_chunk_packnames( struct hashfile *f, const char **pack_names, uint32_t nr_packs) { uint32_t i; - size_t written = 0; for (i = 0; i < nr_packs; i++) { - size_t writelen = strlen(pack_names[i]) + 1; if (i > 0 && strcmp(pack_names[i], pack_names[i-1]) <= 0) BUG("incorrect pack order: %s before %s", pack_names[i-1], pack_names[i]); - hashwrite(f, pack_names[i], writelen); - written += writelen; + hashwrite(f, pack_names[i], strlen(pack_names[i]) + 1); } - - return written; } -static size_t write_midx_chunk_oidfanout( +static void write_midx_chunk_oidfanout( struct hashfile *f, struct pack_midx_entry *objects, uint32_t nr_objects) { @@ -525,18 +518,15 @@ static size_t write_midx_chunk_oidfanout( hashwrite_be32(f, count_distinct); list = next; } - - return 4 * 256; } -static size_t write_midx_chunk_oidlookup( +static void write_midx_chunk_oidlookup( struct hashfile *f, unsigned char hash_len, struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; struct object_id *last_oid = NULL; uint32_t i; - size_t written = 0; for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; @@ -556,20 +546,16 @@ static size_t write_midx_chunk_oidlookup( last_oid = &obj->oid; hashwrite(f, obj->oid.hash, (int)hash_len); - written += hash_len; } - - return written; } -static size_t write_midx_chunk_objectoffsets( +static void write_midx_chunk_objectoffsets( struct hashfile *f, int large_offset_needed, struct pack_midx_entry *objects, uint32_t nr_objects, uint32_t *pack_perm) { struct pack_midx_entry *list = objects; struct object_id *last_oid = 0; uint32_t i, nr_large_offset = 0; - size_t written = 0; for (i = 0; i < nr_objects; i++) { struct pack_midx_details_internal details; @@ -593,19 +579,15 @@ static size_t write_midx_chunk_objectoffsets( details.internal_offset = htonl(details.internal_offset); hashwrite(f, &details, 8); - written += 8; } - - return written; } -static size_t write_midx_chunk_largeoffsets( +static void write_midx_chunk_largeoffsets( struct hashfile *f, uint32_t nr_large_offset, struct pack_midx_entry *objects, uint32_t nr_objects) { struct pack_midx_entry *list = objects; struct object_id *last_oid = 0; - size_t written = 0; while (nr_large_offset) { struct pack_midx_entry *obj = list++; @@ -624,12 +606,8 @@ static size_t write_midx_chunk_largeoffsets( split[1] = htonl(offset & 0xffffffff); hashwrite(f, split, 8); - written += 8; - nr_large_offset--; } - - return written; } struct pack_pair { @@ -683,7 +661,6 @@ const char *write_midx_file(const char *pack_dir, int rename_needed = 0; int total_name_len = 0; uint32_t *pack_perm; - size_t written = 0; if (!core_midx) return 0; @@ -754,7 +731,6 @@ const char *write_midx_file(const char *pack_dir, /* write header to file */ assert(sizeof(hdr) == 16); hashwrite(f, &hdr, sizeof(hdr)); - written += sizeof(hdr); /* * Fill initial chunk values using offsets @@ -773,7 +749,7 @@ const char *write_midx_file(const char *pack_dir, if (large_offset_needed) { chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; - chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; + chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; chunk_offsets[6] = chunk_offsets[5] + total_name_len; chunk_ids[6] = 0; @@ -789,25 +765,17 @@ const char *write_midx_file(const char *pack_dir, chunk_write[0] = htonl(chunk_ids[i]); chunk_write[1] = htonl(chunk_offsets[i] >> 32); chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); - hashwrite(f, chunk_write, 12); - written += 12; } - for (chunk = 0; chunk <= hdr.num_chunks; chunk++) { - if (chunk_offsets[chunk] != written) - BUG("chunk %d has intended chunk offset %"PRIx64" does not match expected %"PRIx64"", - chunk, - (uint64_t)chunk_offsets[chunk], - (uint64_t)written); - + for (chunk = 0; chunk < hdr.num_chunks; chunk++) { switch (chunk_ids[chunk]) { case MIDX_CHUNKID_PACKLOOKUP: - written += write_midx_chunk_packlookup(f, pack_names, nr_packs); + write_midx_chunk_packlookup(f, pack_names, nr_packs); break; case MIDX_CHUNKID_PACKNAMES: - written += write_midx_chunk_packnames(f, pack_names, nr_packs); + write_midx_chunk_packnames(f, pack_names, nr_packs); break; case MIDX_CHUNKID_OIDFANOUT: From 29a5f1889391d93e4b6b671a36c911655b2f2079 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:05:43 -0400 Subject: [PATCH 66/92] fixup! midx: harden against large offset problems This reverts commit be63a2ac120c790bec28d6c9511f9eeeea12fe61. --- midx.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/midx.c b/midx.c index 3f6d501e3b25aa..f18e3e04afe8ec 100644 --- a/midx.c +++ b/midx.c @@ -570,10 +570,6 @@ static void write_midx_chunk_objectoffsets( if (large_offset_needed && obj->offset >> 31) details.internal_offset = (MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); - else if (!large_offset_needed && obj->offset >> 32) - BUG("object %s requires a large offset (%"PRIx64") but the MIDX is not writing large offsets!", - oid_to_hex(&obj->oid), - obj->offset); else details.internal_offset = (uint32_t)obj->offset; From a48b46da6af1f741e3c6b0b95f0499a2ff7dd2e0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:06:01 -0400 Subject: [PATCH 67/92] fixup! midx: harden writes against incorrect pack orders This reverts commit 6c8f58b1d92f95ead696c6f908b8749c9fb6a433. --- midx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/midx.c b/midx.c index f18e3e04afe8ec..5aa5aee9ba0278 100644 --- a/midx.c +++ b/midx.c @@ -478,11 +478,6 @@ static void write_midx_chunk_packnames( { uint32_t i; for (i = 0; i < nr_packs; i++) { - if (i > 0 && strcmp(pack_names[i], pack_names[i-1]) <= 0) - BUG("incorrect pack order: %s before %s", - pack_names[i-1], - pack_names[i]); - hashwrite(f, pack_names[i], strlen(pack_names[i]) + 1); } } From 3665c712108b1dec4d1b2b080d62d71feea49430 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:07:37 -0400 Subject: [PATCH 68/92] fixup! midx: use hashwrite_be32() instead of htonl() This reverts commit c558d7fa5e1dc27e6b733e5a20f579a1a7c4d6b1. --- midx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index 5aa5aee9ba0278..ee8a5a61d475b4 100644 --- a/midx.c +++ b/midx.c @@ -467,7 +467,8 @@ static void write_midx_chunk_packlookup( uint32_t i, cur_len = 0; for (i = 0; i < nr_packs; i++) { - hashwrite_be32(f, cur_len); + uint32_t swap_len = htonl(cur_len); + hashwrite(f, &swap_len, 4); cur_len += strlen(pack_names[i]) + 1; } } @@ -499,18 +500,22 @@ static void write_midx_chunk_oidfanout( for (i = 0; i < 256; i++) { struct pack_midx_entry *next = list; struct pack_midx_entry *prev = NULL; + uint32_t swap_distinct; while (next < last) { if (next->oid.hash[0] != i) break; - if (!prev || oidcmp(&(prev->oid), &(next->oid))) + if (!prev || oidcmp(&(prev->oid), &(obj->oid))) + { count_distinct++; + } prev = next++; } - hashwrite_be32(f, count_distinct); + swap_distinct = htonl(count_distinct); + hashwrite(f, &swap_distinct, 4); list = next; } } From 232acbd35bd8524f3ce5f3398cdde0c80b3da212 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:09:12 -0400 Subject: [PATCH 69/92] fixup! midx: safe-guard against writing OIDs out of order This reverts commit 508a3cbcf540998b3131e7508de440c27c065308. --- midx.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/midx.c b/midx.c index ee8a5a61d475b4..ba239f8652a5cc 100644 --- a/midx.c +++ b/midx.c @@ -524,23 +524,13 @@ static void write_midx_chunk_oidlookup( struct hashfile *f, unsigned char hash_len, struct pack_midx_entry *objects, uint32_t nr_objects) { - struct pack_midx_entry *list = objects; - struct object_id *last_oid = NULL; + struct pack_midx_entry **list = objects; + struct object_id *last_oid = 0; uint32_t i; for (i = 0; i < nr_objects; i++) { struct pack_midx_entry *obj = list++; - if (i < nr_objects - 1) { - /* Check out-of-order */ - struct pack_midx_entry *next = list; - if (oidcmp(&obj->oid, &next->oid) >= 0) - BUG("OIDs not in order: %s >= %s", - oid_to_hex(&obj->oid), - oid_to_hex(&next->oid)); - } - - /* Skip duplicate objects */ if (last_oid && !oidcmp(last_oid, &obj->oid)) continue; From 1d812e82cea55b4a4168fa6755376b3db90022be Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:09:27 -0400 Subject: [PATCH 70/92] fixup! t5319-midx.sh: use modern test patterns This reverts commit 321c416cb951737db27ecceaf4c58c7ed5c6116a. --- t/t5319-midx.sh | 428 ++++++++++++++++++++++++------------------------ 1 file changed, 218 insertions(+), 210 deletions(-) diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 8fd44601e66186..a925f6e4681167 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -3,215 +3,223 @@ test_description='meta-pack indexes' . ./test-lib.sh -test_expect_success 'setup' ' - rm -rf .git && - git init && - git config core.midx true && - git config pack.threads 1 -' - -test_expect_success 'write-midx with no packs' ' - git midx --write --update-head --delete-expired --pack-dir . -' - -test_expect_success 'create packs' ' - i=1 && - while test $i -le 5 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree obj-list && - git update-ref HEAD $commit -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-1 && - test_line_count = 18 rev-list-out-1 -' - -test_expect_success 'write-midx from index version 1' ' - pack1=$(git pack-objects --index-version=1 test-1 midx-read-out-1 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-1 && - echo "num_objects: 17" >>midx-read-expect-1 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-1 && - echo "pack_names:" >>midx-read-expect-1 && - echo "test-1-$pack1.pack" >>midx-read-expect-1 && - echo "pack_dir: ." >>midx-read-expect-1 && - test_cmp midx-read-out-1 midx-read-expect-1 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-2 && - test_line_count = 18 rev-list-out-2 -' - -test_expect_success 'write-midx from index version 2' ' - rm "test-1-$pack1.pack" && - pack2=$(git pack-objects --index-version=2 test-2 midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx2 >midx-read-out-2 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-2 && - echo "num_objects: 17" >>midx-read-expect-2 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-2 && - echo "pack_names:" >>midx-read-expect-2 && - echo "test-2-$pack2.pack" >>midx-read-expect-2 && - echo "pack_dir: ." >>midx-read-expect-2 && - test_cmp midx-read-out-2 midx-read-expect-2 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-3 && - test_line_count = 18 rev-list-out-3 -' - -test_expect_success 'Add more objects' ' - i=6 && - while test $i -le 10 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit && - pack3=$(git pack-objects --index-version=2 test-pack rev-list-out-4 && - test_line_count = 35 rev-list-out-4 -' - -test_expect_success 'write-midx with two packs' ' - midx3=$(git midx --write --update-head --delete-expired --pack-dir .) && - test -f midx-$midx3.midx && - ! test -f midx-$midx2.midx && - printf $midx3 > midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx3 >midx-read-out-3 && - echo "header: 4d494458 80000001 01 14 00 05 00000002" >midx-read-expect-3 && - echo "num_objects: 33" >>midx-read-expect-3 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-3 && - echo "pack_names:" >>midx-read-expect-3 && - echo "test-2-$pack2.pack" >>midx-read-expect-3 && - echo "test-pack-$pack3.pack" >>midx-read-expect-3 && - echo "pack_dir: ." >>midx-read-expect-3 && - test_cmp midx-read-out-3 midx-read-expect-3 && - git midx --read --pack-dir . >midx-read-out-3-head && - test_cmp midx-read-out-3-head midx-read-expect-3 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-5 && - test_line_count = 35 rev-list-out-5 -' - -test_expect_success 'Add more packs' ' - j=0 && - while test $j -le 10 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit && - git pack-objects --index-version=2 test-pack rev-list-out-6 && - test_line_count = 90 rev-list-out-6 -' - -test_expect_success 'write-midx with twelve packs' ' - midx4=$(git midx --write --update-head --delete-expired --pack-dir .) && - test -f midx-$midx4.midx && - ! test -f midx-$midx3.midx && - printf $midx4 > midx-head-expect && - test_cmp midx-head midx-head-expect && - git midx --read --pack-dir . --midx-id=$midx4 >midx-read-out-4 && - echo "header: 4d494458 80000001 01 14 00 05 0000000d" >midx-read-expect-4 && - echo "num_objects: 77" >>midx-read-expect-4 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-4 && - echo "pack_names:" >>midx-read-expect-4 && - ls test-*.pack | sort >>midx-read-expect-4 && - echo "pack_dir: ." >>midx-read-expect-4 && - test_cmp midx-read-out-4 midx-read-expect-4 && - git midx --read --pack-dir . >midx-read-out-4-head && - test_cmp midx-read-out-4-head midx-read-expect-4 -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-7 && - test_line_count = 90 rev-list-out-7 rev-list-expect-7 -' - -test_expect_success 'write-midx with nothing new' ' - midx5=$(git midx --write --update-head --delete-expired --pack-dir .) && - printf $midx5 > midx-head-5 && - test_cmp midx-head-5 midx-head-expect -' - -test_expect_success 'midx --clear' ' - git midx --clear --pack-dir . && - ! test -f "midx-$midx4.midx" && - ! test -f "midx-head" -' - -test_expect_success 'Verify normal git operations succeed' ' - git rev-list --all --objects >rev-list-out-8 && - test_line_count = 90 rev-list-out-8 -' +test_expect_success \ + 'setup' \ + 'rm -rf .git && + git init && + git config core.midx true && + git config pack.threads 1' + +test_expect_success \ + 'write-midx with no packs' \ + 'git midx --write --update-head --delete-expired --pack-dir .' + +test_expect_success \ + 'create packs' \ + 'i=1 && + while test $i -le 5 + do + iii=$(printf '%03i' $i) + test-tool genrandom "bar" 200 > wide_delta_$iii && + test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && + test-tool genrandom "foo"$i 100 > deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-tool genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=$(expr $i + 1) || return 1 + done && + { echo 101 && test-tool genrandom 100 8192; } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree obj-list && + git update-ref HEAD $commit' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-1 && + echo 18 >rev-list-expect-1 && + cmp -n 2 rev-list-out-1 rev-list-expect-1' + +test_expect_success \ + 'write-midx from index version 1' \ + 'pack1=$(git pack-objects --index-version=1 test-1 midx-read-out-1 && + echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-1 && + echo "num_objects: 17" >>midx-read-expect-1 && + echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-1 && + echo "pack_names:" >>midx-read-expect-1 && + echo "test-1-${pack1}.pack" >>midx-read-expect-1 && + echo "pack_dir: ." >>midx-read-expect-1 && + cmp midx-read-out-1 midx-read-expect-1' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-2 && + echo 18 >rev-list-expect-2 && + cmp -n 2 rev-list-out-2 rev-list-expect-2' + +test_expect_success \ + 'write-midx from index version 2' \ + 'rm "test-1-${pack1}.pack" && + pack2=$(git pack-objects --index-version=2 test-2 midx-head-expect && + cmp -n 40 midx-head midx-head-expect && + git midx --read --pack-dir . --midx-id=${midx2} >midx-read-out-2 && + echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-2 && + echo "num_objects: 17" >>midx-read-expect-2 && + echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-2 && + echo "pack_names:" >>midx-read-expect-2 && + echo "test-2-${pack2}.pack" >>midx-read-expect-2 && + echo "pack_dir: ." >>midx-read-expect-2 && + cmp midx-read-out-2 midx-read-expect-2' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-3 && + echo 18 >rev-list-expect-3 && + cmp -n 2 rev-list-out-3 rev-list-expect-3' + +test_expect_success \ + 'Add more objects' \ + 'i=6 && + while test $i -le 10 + do + iii=$(printf '%03i' $i) + test-tool genrandom "bar" 200 > wide_delta_$iii && + test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && + test-tool genrandom "foo"$i 100 > deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-tool genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=$(expr $i + 1) || return 1 + done && + { echo 101 && test-tool genrandom 100 8192; } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree -p HEADobj-list && + git update-ref HEAD $commit && + pack3=$(git pack-objects --index-version=2 test-pack rev-list-out-4 && + echo 35 >rev-list-expect-4 && + cmp -n 2 rev-list-out-4 rev-list-expect-4' + +test_expect_success \ + 'write-midx with two packs' \ + 'midx3=$(git midx --write --update-head --delete-expired --pack-dir .) && + test -f midx-${midx3}.midx && + ! test -f midx-${midx2}.midx && + echo ${midx3} > midx-head-expect && + cmp -n 40 midx-head midx-head-expect && + git midx --read --pack-dir . --midx-id=${midx3} >midx-read-out-3 && + echo "header: 4d494458 80000001 01 14 00 05 00000002" >midx-read-expect-3 && + echo "num_objects: 33" >>midx-read-expect-3 && + echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-3 && + echo "pack_names:" >>midx-read-expect-3 && + echo "test-2-${pack2}.pack" >>midx-read-expect-3 && + echo "test-pack-${pack3}.pack" >>midx-read-expect-3 && + echo "pack_dir: ." >>midx-read-expect-3 && + cmp midx-read-out-3 midx-read-expect-3 && + git midx --read --pack-dir . >midx-read-out-3-head && + cmp midx-read-out-3-head midx-read-expect-3' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-5 && + echo 35 >rev-list-expect-5 && + cmp -n 2 rev-list-out-5 rev-list-expect-5' + +test_expect_success \ + 'Add more packs' \ + 'j=0 && + while test $j -le 10 + do + iii=$(printf '%03i' $i) + test-tool genrandom "bar" 200 > wide_delta_$iii && + test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && + test-tool genrandom "foo"$i 100 > deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && + test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-tool genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + { echo 101 && test-tool genrandom 100 8192; } >file_101 && + git update-index --add file_101 && + tree=$(git write-tree) && + commit=$(git commit-tree $tree -p HEADobj-list && + git update-ref HEAD $commit && + git pack-objects --index-version=2 test-pack rev-list-out-6 && + echo 90 >rev-list-expect-6 && + cmp -n 2 rev-list-out-6 rev-list-expect-6' + +test_expect_success \ + 'write-midx with twelve packs' \ + 'midx4=$(git midx --write --update-head --delete-expired --pack-dir .) && + test -f midx-${midx4}.midx && + ! test -f midx-${midx3}.midx && + echo ${midx4} > midx-head-expect && + cmp -n 40 midx-head midx-head-expect && + git midx --read --pack-dir . --midx-id=${midx4} >midx-read-out-4 && + echo "header: 4d494458 80000001 01 14 00 05 0000000d" >midx-read-expect-4 && + echo "num_objects: 77" >>midx-read-expect-4 && + echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-4 && + echo "pack_names:" >>midx-read-expect-4 && + ls test-*.pack | sort >>midx-read-expect-4 && + echo "pack_dir: ." >>midx-read-expect-4 && + cmp midx-read-out-4 midx-read-expect-4 && + git midx --read --pack-dir . >midx-read-out-4-head && + cmp midx-read-out-4-head midx-read-expect-4' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-7 && + echo 90 >rev-list-expect-7 && + cmp -n 2 rev-list-out-7 rev-list-expect-7' + +test_expect_success \ + 'write-midx with nothing new' \ + 'midx5=$(git midx --write --update-head --delete-expired --pack-dir .) && + echo ${midx5} > midx-head-5 && + cmp -n 40 midx-head-5 midx-head-expect' + +test_expect_success \ + 'midx --clear' \ + 'git midx --clear --pack-dir . && + ! test -f "midx-${midx4}.midx" && + ! test -f "midx-head"' + +test_expect_success \ + 'Verify normal git operations succeed' \ + 'git rev-list --all --objects | wc -l >rev-list-out-8 && + echo 90 >rev-list-expect-8 && + cmp -n 2 rev-list-out-8 rev-list-expect-8' test_done From 093e1c140d9d5ceadb1c67259f48df2e9aafe9d3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:11:38 -0400 Subject: [PATCH 71/92] fixup! midx: choose most-recent pack containing duplicate objects This reverts commit 376b3fe19213e6db1c384a320dfd0fc12c1e24a4. --- builtin/midx.c | 33 +++++++++++++++++++++++++++++++-- midx.c | 3 --- midx.h | 1 - 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/builtin/midx.c b/builtin/midx.c index c88708d48fd1a2..ff7300466ca417 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -230,8 +230,37 @@ static int build_midx_from_packs( return 0; } - dedupe_and_sort_entries(packs, nr_installed_packs, - midx, &objects, &nr_objects); + if (midx) + nr_objects += midx->num_objects; + + ALLOC_ARRAY(objects, nr_objects); + nr_objects = 0; + + for (i = 0; midx && i < midx->num_objects; i++) + nth_midxed_object_entry(midx, i, &objects[nr_objects++]); + + for (i = pack_offset; i < nr_installed_packs; i++) { + struct packed_git *p = packs[i]; + + for (j = 0; j < p->num_objects; j++) { + struct pack_midx_entry entry; + + if (!nth_packed_object_oid(&entry.oid, p, j)) + die("unable to get sha1 of object %u in %s", + i, p->pack_name); + + entry.pack_int_id = i; + entry.offset = nth_packed_object_offset(p, j); + + objects[nr_objects] = entry; + nr_objects++; + } + } + + ALLOC_ARRAY(obj_ptrs, nr_objects); + for (i = 0; i < nr_objects; i++) + obj_ptrs[i] = &objects[i]; +>>>>>>> parent of 376b3fe192... midx: choose most-recent pack containing duplicate objects *midx_id = write_midx_file(pack_dir, NULL, installed_pack_names, nr_installed_packs, diff --git a/midx.c b/midx.c index ba239f8652a5cc..4da6f9cca977e2 100644 --- a/midx.c +++ b/midx.c @@ -305,9 +305,6 @@ struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, e->pack_int_id = details.pack_int_id; e->offset = details.offset; - /* Use zero for mtime so this entry is "older" than new duplicates */ - e->pack_mtime = 0; - return e; } diff --git a/midx.h b/midx.h index 7acac014f7b137..54801ede10b413 100644 --- a/midx.h +++ b/midx.h @@ -16,7 +16,6 @@ struct pack_midx_entry { struct object_id oid; uint32_t pack_int_id; off_t offset; - timestamp_t pack_mtime; }; struct pack_midx_header { From 1207a5f91d7f1714e1d54e37823e25bc3909ab56 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:12:04 -0400 Subject: [PATCH 72/92] fixup! packfile: remove reprepare_packed_git()/midx loop This reverts commit 19db1026f758b168a499f0f33800aa2d787d4470. --- midx.c | 9 +---- packfile.c | 96 ++++++++++++----------------------------------------- packfile.h | 8 ----- sha1-file.c | 2 +- sha1-name.c | 4 +-- 5 files changed, 26 insertions(+), 93 deletions(-) diff --git a/midx.c b/midx.c index 4da6f9cca977e2..04217363149a4c 100644 --- a/midx.c +++ b/midx.c @@ -230,18 +230,11 @@ struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid) static int prepare_midxed_git_head(char *pack_dir, int local) { struct midxed_git *m = midxed_git; - struct midxed_git *m_search; - char *midx_head_path; + char *midx_head_path = get_midx_head_filename_dir(pack_dir); if (!core_midx) return 1; - for (m_search = midxed_git; m_search; m_search = m_search->next) { - if (!strcmp(pack_dir, m_search->pack_dir)) - return 1; - } - - midx_head_path = get_midx_head_filename_dir(pack_dir); if (midx_head_path) { midxed_git = load_midxed_git_one(midx_head_path, pack_dir); midxed_git->next = m; diff --git a/packfile.c b/packfile.c index ec31461700f9fc..cd6cd790515e43 100644 --- a/packfile.c +++ b/packfile.c @@ -844,7 +844,7 @@ unsigned long approximate_object_count(void) struct packed_git *p; struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); + prepare_packed_git(the_repository); count = 0; for (m = midxed_git; m; m = m->next) count += m->num_objects; @@ -913,87 +913,39 @@ static void prepare_packed_git_mru(struct repository *r) list_add_tail(&p->mru, &r->objects->packed_git_mru); } -/** - * We have a few states that we can be in. - * - * N: No MIDX or packfiles loaded - * P: No MIDX loaded, all packfiles loaded into packed_git - * M: MIDX loaded, packfiles not in MIDX loaded into packed_git - * - * In state M, we load the MIDX first and only load packfiles - * that are not in the MIDX. - * - * We begin in state N. - * - * We can change states with a call to - * prepare_packed_git_internal(use_midx), depending on the value - * of use_midx. - * - * Here are the transition cases: - * - * - State N, use_midx = 0 -> P - * (only load packfiles, skip MIDX) - * - State N, use_midx = 1 -> M - * (load both packfiles and MIDX) - * - State M, use_midx = 0 -> P - * (unload MIDX and add packfiles to packed_git) - * - State M, use_midx = 1 -> M - * (no-op, unless refresh = 1) - * - State P, use_midx = 0 -> P - * (no-op, unless refresh = 1) - * - State P, use_midx = 1 -> P - * (no-op, unless refresh = 1) - * - * We prevent the P -> M transition by setting - * prepare_packed_git_midx_state to 0 when transitioning to P. - * - * Calling reprepare_packed_git_internal(use_midx) signals that we - * want to check the ODB for more packfiles or MIDX files, but - * should not unload the existing files. However, we do trigger - * some transitions. For instance, use_midx = 0 will trigger the - * M -> P transition (if we are in state M). - */ -static int prepare_packed_git_midx_state = 1; -static void prepare_packed_git_with_refresh(struct repository *r, int use_midx, int refresh) +static int prepare_midxed_git_run_once = 0; +void prepare_packed_git_internal(struct repository *r, int use_midx) { struct alternate_object_database *alt; char *obj_dir; - if (!use_midx && prepare_packed_git_midx_state) { - /* - * If this is the first time called with - * use_midx = 0, then close any MIDX that - * may exist and reprepare the packs. - */ - close_all_midx(); - prepare_packed_git_midx_state = 0; - refresh = 1; + if (prepare_midxed_git_run_once) { + if (!use_midx) { + prepare_midxed_git_run_once = 0; + close_all_midx(); + reprepare_packed_git(r); + } + return; } - if (r->objects->packed_git_initialized && !refresh) + if (r->objects->packed_git_initialized) return; - r->objects->approximate_object_count_valid = 0; obj_dir = r->objects->objectdir; - if (prepare_packed_git_midx_state) { + if (use_midx) prepare_midxed_git_objdir(obj_dir, 1); - prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) - prepare_midxed_git_objdir(alt->path, 0); - } prepare_packed_git_one(r, obj_dir, 1); prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + if (use_midx) + prepare_midxed_git_objdir(alt->path, 0); prepare_packed_git_one(r, alt->path, 0); + } rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; -} - -void prepare_packed_git_internal(struct repository *r, int use_midx) -{ - prepare_packed_git_with_refresh(r, use_midx, 0); + prepare_midxed_git_run_once = use_midx; } static void prepare_packed_git(struct repository *r) @@ -1001,19 +953,16 @@ static void prepare_packed_git(struct repository *r) prepare_packed_git_internal(r, 0); } -void reprepare_packed_git_internal(struct repository *r, int use_midx) -{ - prepare_packed_git_with_refresh(r, use_midx, 1); -} - void reprepare_packed_git(struct repository *r) { - prepare_packed_git_with_refresh(r, 0, 1); + r->objects->approximate_object_count_valid = 0; + r->objects->packed_git_initialized = 0; + prepare_packed_git(r); } struct packed_git *get_packed_git(struct repository *r) { - prepare_packed_git_with_refresh(r, 0, 0); + prepare_packed_git(r); return r->objects->packed_git; } @@ -1021,7 +970,6 @@ struct list_head *get_packed_git_mru(struct repository *r) { prepare_packed_git(r); return &r->objects->packed_git_mru; - prepare_packed_git_with_refresh(r, 0, 1); } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -1961,7 +1909,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa struct list_head *pos; if (core_midx) { - prepare_packed_git_internal(r, USE_MIDX); + prepare_packed_git_internal(r, 1); if (fill_pack_entry_midx(oid, e)) return 1; } else diff --git a/packfile.h b/packfile.h index 6fec75d274662d..0af3db7ce59a49 100644 --- a/packfile.h +++ b/packfile.h @@ -39,15 +39,7 @@ extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_ #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * While the MIDX feature is evolving, not all callers are ready to be - * MIDX-aware. In the meantime, call these *_internal(use_midx) methods - * when ready for MIDX (or to specify you are not ready for MIDX). - */ -#define USE_MIDX 1 extern void prepare_packed_git_internal(struct repository *r, int use_midx); -extern void reprepare_packed_git_internal(struct repository *r, int use_midx); - extern void reprepare_packed_git(struct repository *r); extern void install_packed_git(struct repository *r, struct packed_git *pack); diff --git a/sha1-file.c b/sha1-file.c index 593cc4aa1df3b2..2f6395fe806d7a 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1439,7 +1439,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { - reprepare_packed_git_internal(r, core_midx ? USE_MIDX : 0); + reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; if (core_virtualize_objects && !tried_hook) { diff --git a/sha1-name.c b/sha1-name.c index 1b4f27dba7203c..f4a325860f5a1e 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -207,7 +207,7 @@ static void find_short_packed_object(struct disambiguate_state *ds) struct packed_git *p; struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); + prepare_packed_git_internal(the_repository, 1); for (m = midxed_git; m && !ds->ambiguous; m = m->next) unique_in_midx(m, ds); for (p = get_packed_git(the_repository); p && !ds->ambiguous; @@ -631,7 +631,7 @@ static void find_abbrev_len_packed(struct min_abbrev_data *mad) struct packed_git *p; struct midxed_git *m; - prepare_packed_git_internal(the_repository, USE_MIDX); + prepare_packed_git_internal(the_repository, 1); for (m = midxed_git; m; m = m->next) find_abbrev_len_for_midx(m, mad); for (p = get_packed_git(the_repository); p; p = p->next) From 4bb7d1e16c848c1dc6b751d9c17c10042312e6d1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:12:59 -0400 Subject: [PATCH 73/92] fixup! midx: fix issues with large offsets This reverts commit 7735598aa1d7c2925aafe0b89c771f7886cbf93b. --- midx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/midx.c b/midx.c index 04217363149a4c..0650ebfd88d2c9 100644 --- a/midx.c +++ b/midx.c @@ -725,8 +725,13 @@ const char *write_midx_file(const char *pack_dir, if (large_offset_needed) { chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; +<<<<<<< HEAD chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; +======= + chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; + chunk_ids[4] = MIDX_CHUNKID_PACKNAMES; +>>>>>>> parent of 7735598aa1... midx: fix issues with large offsets chunk_offsets[6] = chunk_offsets[5] + total_name_len; chunk_ids[6] = 0; } else { From ca73ed45bf0255952ce68ccc5877d8ec5e3dbfce Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:15:48 -0400 Subject: [PATCH 74/92] fixup! midx: various cleanups This reverts commit b5a313535786c57f4832aaf044857966ac48ac41. --- Documentation/git-midx.txt | 6 -- builtin/midx.c | 87 +++++++++++++------------- midx.c | 123 +++++++++++++------------------------ midx.h | 35 +++++++---- packfile.c | 27 +++++--- packfile.h | 2 +- t/t5319-midx.sh | 17 +---- 7 files changed, 132 insertions(+), 165 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index c2b4653228bc34..a38350d0913b36 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -51,12 +51,6 @@ OPTIONS EXAMPLES -------- -* Read the midx-head file and output the OID of the head MIDX file. -+ ------------------------------------------------- -$ git midx ------------------------------------------------- - * Write a MIDX file for the packfiles in your local .git folder. + ------------------------------------------------ diff --git a/builtin/midx.c b/builtin/midx.c index ff7300466ca417..a4d425b5d157f2 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -201,9 +201,6 @@ static int build_midx_from_packs( strbuf_setlen(&pack_path, baselen); strbuf_addstr(&pack_path, pack_names[i]); - if (midx && contains_pack(midx, pack_names[i])) - continue; - strbuf_strip_suffix(&pack_path, ".pack"); strbuf_addstr(&pack_path, ".idx"); @@ -221,13 +218,9 @@ static int build_midx_from_packs( strbuf_release(&pack_path); if (!nr_objects || !nr_installed_packs) { - if (opts.has_existing) - *midx_id = oid_to_hex(&opts.old_midx_oid); - else - *midx_id = 0; free(packs); free(installed_pack_names); - return 0; + return 1; } if (midx) @@ -275,34 +268,35 @@ static int build_midx_from_packs( static void update_head_file(const char *pack_dir, const char *midx_id) { struct strbuf head_path = STRBUF_INIT; - int fd; + FILE* f; struct lock_file lk = LOCK_INIT; strbuf_addstr(&head_path, pack_dir); strbuf_addstr(&head_path, "/"); strbuf_addstr(&head_path, "midx-head"); - fd = hold_lock_file_for_update(&lk, head_path.buf, LOCK_DIE_ON_ERROR); + hold_lock_file_for_update(&lk, head_path.buf, LOCK_DIE_ON_ERROR); strbuf_release(&head_path); - if (fd < 0) - die_errno("unable to open midx-head"); + f = fdopen_lock_file(&lk, "w"); + if (!f) + die_errno("unable to fdopen midx-head"); - write_in_full(fd, midx_id, GIT_MAX_HEXSZ); + fprintf(f, "%s", midx_id); commit_lock_file(&lk); } -static int midx_write(void) +static int cmd_midx_write(void) { const char **pack_names = NULL; uint32_t i, nr_packs = 0; - const char *midx_id = 0; + const char *midx_id; DIR *dir; struct dirent *de; - struct midxed_git *midx = NULL; + struct midxed_git *m = NULL; if (opts.has_existing) - midx = get_midxed_git(opts.pack_dir, &opts.old_midx_oid); + m = get_midxed_git(opts.pack_dir, &opts.old_midx_oid); dir = opendir(opts.pack_dir); if (!dir) { @@ -311,7 +305,7 @@ static int midx_write(void) return 1; } - nr_packs = 256; + nr_packs = 8; ALLOC_ARRAY(pack_names, nr_packs); i = 0; @@ -320,6 +314,9 @@ static int midx_write(void) continue; if (ends_with(de->d_name, ".pack")) { + if (m && contains_pack(m, de->d_name)) + continue; + ALLOC_GROW(pack_names, i + 1, nr_packs); pack_names[i++] = xstrdup(de->d_name); } @@ -328,14 +325,13 @@ static int midx_write(void) nr_packs = i; closedir(dir); - if (!nr_packs) + if (!nr_packs && opts.has_existing) { + printf("%s\n", oid_to_hex(&opts.old_midx_oid)); goto cleanup; + } - if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id, midx)) - die("failed to build MIDX"); - - if (!midx_id) - goto cleanup; + if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id, m)) + die("Failed to build MIDX."); printf("%s\n", midx_id); @@ -344,12 +340,17 @@ static int midx_write(void) if (opts.delete_expired && opts.update_head && opts.has_existing && strcmp(midx_id, oid_to_hex(&opts.old_midx_oid))) { - char *old_path = get_midx_head_filename_oid(opts.pack_dir, &opts.old_midx_oid); - close_midx(midx); - if (remove_path(old_path)) - die("failed to remove path %s", old_path); + struct strbuf old_path = STRBUF_INIT; + strbuf_addstr(&old_path, opts.pack_dir); + strbuf_addstr(&old_path, "/midx-"); + strbuf_addstr(&old_path, oid_to_hex(&opts.old_midx_oid)); + strbuf_addstr(&old_path, ".midx"); + + close_midx(m); + if (remove_path(old_path.buf)) + die("Failed to remove path %s", old_path.buf); - free(old_path); + strbuf_release(&old_path); } cleanup: @@ -358,7 +359,7 @@ static int midx_write(void) return 0; } -static int midx_read(void) +static int cmd_midx_read(void) { struct object_id midx_oid; struct midxed_git *midx; @@ -404,10 +405,10 @@ static int midx_read(void) return 0; } -static int midx_clear(void) +static int cmd_midx_clear(void) { + struct strbuf old_path = STRBUF_INIT; struct strbuf head_path = STRBUF_INIT; - char *old_path; if (!opts.has_existing) return 0; @@ -416,14 +417,18 @@ static int midx_clear(void) strbuf_addstr(&head_path, "/"); strbuf_addstr(&head_path, "midx-head"); if (remove_path(head_path.buf)) - die("failed to remove path %s", head_path.buf); - strbuf_release(&head_path); + die("Failed to remove path %s", head_path.buf); + + strbuf_addstr(&old_path, opts.pack_dir); + strbuf_addstr(&old_path, "/midx-"); + strbuf_addstr(&old_path, oid_to_hex(&opts.old_midx_oid)); + strbuf_addstr(&old_path, ".midx"); - old_path = get_midx_head_filename_oid(opts.pack_dir, &opts.old_midx_oid); - if (remove_path(old_path)) - die("failed to remove path %s", old_path); - free(old_path); + if (remove_path(old_path.buf)) + die("Failed to remove path %s", old_path.buf); + strbuf_release(&old_path); + strbuf_release(&head_path); return 0; } @@ -474,11 +479,11 @@ int cmd_midx(int argc, const char **argv, const char *prefix) opts.has_existing = !!get_midx_head_oid(opts.pack_dir, &opts.old_midx_oid); if (opts.write) - return midx_write(); + return cmd_midx_write(); if (opts.read) - return midx_read(); + return cmd_midx_read(); if (opts.clear) - return midx_clear(); + return cmd_midx_clear(); return 0; } diff --git a/midx.c b/midx.c index 0650ebfd88d2c9..b460941fcdb756 100644 --- a/midx.c +++ b/midx.c @@ -4,22 +4,6 @@ #include "packfile.h" #include "midx.h" -#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ -#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ -#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ -#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ -#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ -#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ -#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ - -#define MIDX_VERSION_GVFS 0x80000001 -#define MIDX_VERSION MIDX_VERSION_GVFS - -#define MIDX_OID_VERSION_SHA1 1 -#define MIDX_OID_LEN_SHA1 20 -#define MIDX_OID_VERSION MIDX_OID_VERSION_SHA1 -#define MIDX_OID_LEN MIDX_OID_LEN_SHA1 - #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 /* MIDX-git global storage */ @@ -50,25 +34,22 @@ struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid) return oid; } -char* get_midx_head_filename_oid(const char *pack_dir, - struct object_id *oid) +static const char* get_midx_head_filename_dir(const char *pack_dir) { + struct object_id oid; struct strbuf head_path = STRBUF_INIT; - strbuf_addstr(&head_path, pack_dir); - strbuf_addstr(&head_path, "/midx-"); - strbuf_addstr(&head_path, oid_to_hex(oid)); - strbuf_addstr(&head_path, ".midx"); + const char *result; - return strbuf_detach(&head_path, NULL); -} - -static char* get_midx_head_filename_dir(const char *pack_dir) -{ - struct object_id oid; if (!get_midx_head_oid(pack_dir, &oid)) return 0; - return get_midx_head_filename_oid(pack_dir, &oid); + strbuf_addstr(&head_path, pack_dir); + strbuf_addstr(&head_path, "/midx-"); + strbuf_addstr(&head_path, oid_to_hex(&oid)); + strbuf_addstr(&head_path, ".midx"); + + result = strbuf_detach(&head_path, NULL); + return result; } struct pack_midx_details_internal { @@ -131,17 +112,13 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char hdr = midx_map; if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { munmap(midx_map, midx_size); - close(fd); - die("midx signature %X does not match signature %X", + die("MIDX signature %X does not match signature %X", ntohl(hdr->midx_signature), MIDX_SIGNATURE); } - if (ntohl(hdr->midx_version) != MIDX_VERSION) { - munmap(midx_map, midx_size); - close(fd); - die("midx version %X does not match version %X", + if (ntohl(hdr->midx_version) != MIDX_VERSION) + die("MIDX version %X does not match version %X", ntohl(hdr->midx_version), MIDX_VERSION); - } /* Time to fill a midx struct */ midx = alloc_midxed_git(pack_dir); @@ -158,11 +135,8 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8)); uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2; - if (sizeof(data) == 4 && chunk_offset >> 32) { - munmap(midx_map, midx_size); - close(fd); + if (sizeof(data) == 4 && chunk_offset >> 32) die(_("unable to memory-map in 32-bit address space")); - } switch (chunk_id) { case MIDX_CHUNKID_PACKLOOKUP: @@ -193,8 +167,6 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char break; default: - munmap(midx_map, midx_size); - close(fd); die("Unrecognized MIDX chunk id: %08x", chunk_id); } } @@ -218,19 +190,24 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char return midx; } -struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *oid) +struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid) { struct midxed_git *m; - char *fname = get_midx_head_filename_oid(pack_dir, oid); - m = load_midxed_git_one(fname, pack_dir); - free(fname); + struct strbuf midx_file = STRBUF_INIT; + strbuf_addstr(&midx_file, pack_dir); + strbuf_addstr(&midx_file, "/midx-"); + strbuf_addstr(&midx_file, oid_to_hex(midx_oid)); + strbuf_addstr(&midx_file, ".midx"); + + m = load_midxed_git_one(midx_file.buf, pack_dir); + strbuf_release(&midx_file); return m; } -static int prepare_midxed_git_head(char *pack_dir, int local) +int prepare_midxed_git_head(char *pack_dir, int local) { struct midxed_git *m = midxed_git; - char *midx_head_path = get_midx_head_filename_dir(pack_dir); + const char *midx_head_path = get_midx_head_filename_dir(pack_dir); if (!core_midx) return 1; @@ -238,7 +215,6 @@ static int prepare_midxed_git_head(char *pack_dir, int local) if (midx_head_path) { midxed_git = load_midxed_git_one(midx_head_path, pack_dir); midxed_git->next = m; - free(midx_head_path); } else if (!m) { midxed_git = load_empty_midxed_git(); } @@ -246,18 +222,6 @@ static int prepare_midxed_git_head(char *pack_dir, int local) return !midxed_git; } -int prepare_midxed_git_objdir(char *obj_dir, int local) -{ - int ret; - struct strbuf pack_dir = STRBUF_INIT; - strbuf_addstr(&pack_dir, obj_dir); - strbuf_add(&pack_dir, "/pack", 5); - - ret = prepare_midxed_git_head(pack_dir.buf, local); - strbuf_release(&pack_dir); - return ret; -} - struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, uint32_t n, struct pack_midx_details *d) @@ -618,12 +582,11 @@ static void sort_packs_by_name(const char **pack_names, uint32_t nr_packs, uint3 } } -const char *write_midx_file(const char *pack_dir, - const char *midx_name, - const char **pack_names, - uint32_t nr_packs, - struct pack_midx_entry *objects, - uint32_t nr_objects) +const char *write_midx_file( + const char *pack_dir, + const char *midx_name, + const char **pack_names, uint32_t nr_packs, + struct pack_midx_entry **objects, uint32_t nr_objects) { struct hashfile *f; int i, chunk, fd; @@ -688,8 +651,8 @@ const char *write_midx_file(const char *pack_dir, hdr.midx_signature = htonl(MIDX_SIGNATURE); hdr.midx_version = htonl(MIDX_VERSION); - hdr.hash_version = MIDX_OID_VERSION; - hdr.hash_len = MIDX_OID_LEN; + hdr.hash_version = MIDX_HASH_VERSION; + hdr.hash_len = MIDX_HASH_LEN; hdr.num_base_midx = 0; hdr.num_packs = htonl(nr_packs); @@ -725,13 +688,8 @@ const char *write_midx_file(const char *pack_dir, if (large_offset_needed) { chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; -<<<<<<< HEAD chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; -======= - chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; - chunk_ids[4] = MIDX_CHUNKID_PACKNAMES; ->>>>>>> parent of 7735598aa1... midx: fix issues with large offsets chunk_offsets[6] = chunk_offsets[5] + total_name_len; chunk_ids[6] = 0; } else { @@ -783,7 +741,7 @@ const char *write_midx_file(const char *pack_dir, break; default: - die("unrecognized MIDX chunk id: %08x", chunk_ids[chunk]); + die("Unrecognized MIDX chunk id: %08x", chunk_ids[chunk]); } } @@ -791,17 +749,18 @@ const char *write_midx_file(const char *pack_dir, if (rename_needed) { - struct object_id oid; - char *fname; + struct strbuf final_name = STRBUF_INIT; - memcpy(oid.hash, final_hash, GIT_MAX_RAWSZ); - fname = get_midx_head_filename_oid(pack_dir, &oid); final_hex = sha1_to_hex(final_hash); + strbuf_addstr(&final_name, pack_dir); + strbuf_addstr(&final_name, "/midx-"); + strbuf_addstr(&final_name, final_hex); + strbuf_addstr(&final_name, ".midx"); - if (rename(midx_name, fname)) - die("failed to rename %s to %s", midx_name, fname); + if (rename(midx_name, final_name.buf)) + die("Failed to rename %s to %s", midx_name, final_name.buf); - free(fname); + strbuf_release(&final_name); } else { final_hex = midx_name; } diff --git a/midx.h b/midx.h index 54801ede10b413..5c8778d47e6f19 100644 --- a/midx.h +++ b/midx.h @@ -5,10 +5,24 @@ #include "object.h" #include "csum-file.h" -extern struct object_id *get_midx_head_oid(const char *pack_dir, - struct object_id *oid); -extern char *get_midx_head_filename_oid(const char *pack_dir, - struct object_id *oid); +#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ +#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ +#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ +#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ +#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ +#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ + +#define MIDX_VERSION_GVFS 0x80000001 +#define MIDX_VERSION MIDX_VERSION_GVFS + +#define MIDX_HASH_VERSION_SHA1 1 +#define MIDX_HASH_LEN_SHA1 20 +#define MIDX_HASH_VERSION MIDX_HASH_VERSION_SHA1 +#define MIDX_HASH_LEN MIDX_HASH_LEN_SHA1 + +extern struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid); + extern int fill_pack_entry_midx(const struct object_id *oid, struct pack_entry *e); @@ -85,7 +99,7 @@ extern struct midxed_git { } *midxed_git; extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); -extern int prepare_midxed_git_objdir(char *obj_dir, int local); +extern int prepare_midxed_git_head(char *pack_dir, int local); struct pack_midx_details { uint32_t pack_int_id; @@ -121,12 +135,11 @@ extern int contains_pack(struct midxed_git *m, const char *pack_name); * * Returns the final name of the MIDX file within pack_dir. */ -extern const char *write_midx_file(const char *pack_dir, - const char *midx_name, - const char **pack_names, - uint32_t nr_packs, - struct pack_midx_entry *objects, - uint32_t nr_objects); +extern const char *write_midx_file( + const char *pack_dir, + const char *midx_name, + const char **pack_names, uint32_t nr_packs, + struct pack_midx_entry **objects, uint32_t nr_objects); extern int close_midx(struct midxed_git *m); extern void close_all_midx(void); diff --git a/packfile.c b/packfile.c index cd6cd790515e43..e0b29a3e6fbad6 100644 --- a/packfile.c +++ b/packfile.c @@ -817,8 +817,7 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local ends_with(de->d_name, ".pack") || ends_with(de->d_name, ".bitmap") || ends_with(de->d_name, ".keep") || - ends_with(de->d_name, ".promisor") || - ends_with(de->d_name, ".midx")) + ends_with(de->d_name, ".promisor")) string_list_append(&garbage, path.buf); else report_garbage(PACKDIR_FILE_GARBAGE, path.buf); @@ -914,13 +913,13 @@ static void prepare_packed_git_mru(struct repository *r) } static int prepare_midxed_git_run_once = 0; -void prepare_packed_git_internal(struct repository *r, int use_midx) +void prepare_packed_git_internal(struct repository *r, int midx) { struct alternate_object_database *alt; char *obj_dir; if (prepare_midxed_git_run_once) { - if (!use_midx) { + if (!midx) { prepare_midxed_git_run_once = 0; close_all_midx(); reprepare_packed_git(r); @@ -932,20 +931,30 @@ void prepare_packed_git_internal(struct repository *r, int use_midx) return; obj_dir = r->objects->objectdir; - if (use_midx) - prepare_midxed_git_objdir(obj_dir, 1); + if (midx) { + struct strbuf pack_dir = STRBUF_INIT; + strbuf_addstr(&pack_dir, obj_dir); + strbuf_addstr(&pack_dir, "/pack"); + prepare_midxed_git_head(pack_dir.buf, 1); + strbuf_release(&pack_dir); + } prepare_packed_git_one(r, obj_dir, 1); prepare_alt_odb(r); for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { - if (use_midx) - prepare_midxed_git_objdir(alt->path, 0); + if (midx) { + struct strbuf alt_pack_dir = STRBUF_INIT; + strbuf_addstr(&alt_pack_dir, alt->path); + strbuf_addstr(&alt_pack_dir, "/pack"); + prepare_midxed_git_head(alt_pack_dir.buf, 0); + strbuf_release(&alt_pack_dir); + } prepare_packed_git_one(r, alt->path, 0); } rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; - prepare_midxed_git_run_once = use_midx; + prepare_midxed_git_run_once = midx; } static void prepare_packed_git(struct repository *r) diff --git a/packfile.h b/packfile.h index 0af3db7ce59a49..4994ec4714afe4 100644 --- a/packfile.h +++ b/packfile.h @@ -39,7 +39,7 @@ extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_ #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -extern void prepare_packed_git_internal(struct repository *r, int use_midx); +extern void prepare_packed_git_internal(struct repository *r, int midx); extern void reprepare_packed_git(struct repository *r); extern void install_packed_git(struct repository *r, struct packed_git *pack); diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index a925f6e4681167..cef12cf48bb469 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -8,15 +8,8 @@ test_expect_success \ 'rm -rf .git && git init && git config core.midx true && - git config pack.threads 1' - -test_expect_success \ - 'write-midx with no packs' \ - 'git midx --write --update-head --delete-expired --pack-dir .' - -test_expect_success \ - 'create packs' \ - 'i=1 && + git config pack.threads 1 && + i=1 && while test $i -le 5 do iii=$(printf '%03i' $i) @@ -204,12 +197,6 @@ test_expect_success \ echo 90 >rev-list-expect-7 && cmp -n 2 rev-list-out-7 rev-list-expect-7' -test_expect_success \ - 'write-midx with nothing new' \ - 'midx5=$(git midx --write --update-head --delete-expired --pack-dir .) && - echo ${midx5} > midx-head-5 && - cmp -n 40 midx-head-5 midx-head-expect' - test_expect_success \ 'midx --clear' \ 'git midx --clear --pack-dir . && From 65e523e97687b45cde08b60309296d1776d9d533 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:16:53 -0400 Subject: [PATCH 75/92] fixup! packfile: use midx for object loads This reverts commit 923db4fd482546734f5ea524eb6ebb3ec4ced802. --- midx.c | 83 ------------------------------------------------------ midx.h | 3 -- packfile.c | 21 +------------- 3 files changed, 1 insertion(+), 106 deletions(-) diff --git a/midx.c b/midx.c index b460941fcdb756..04249e53bc7eef 100644 --- a/midx.c +++ b/midx.c @@ -308,89 +308,6 @@ int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos) return 0; } -static int prepare_midx_pack(struct midxed_git *m, uint32_t pack_int_id) -{ - struct strbuf pack_name = STRBUF_INIT; - - if (pack_int_id >= m->hdr->num_packs) - return 1; - - if (m->packs[pack_int_id]) - return 0; - - strbuf_addstr(&pack_name, m->pack_dir); - strbuf_addstr(&pack_name, "/"); - strbuf_addstr(&pack_name, m->pack_names[pack_int_id]); - strbuf_strip_suffix(&pack_name, ".pack"); - strbuf_addstr(&pack_name, ".idx"); - - m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1); - strbuf_release(&pack_name); - return !m->packs[pack_int_id]; -} - -static int find_pack_entry_midx(const struct object_id *oid, - struct midxed_git *m, - struct packed_git **p, - off_t *offset) -{ - uint32_t pos; - struct pack_midx_details d; - - if (!bsearch_midx(m, oid->hash, &pos) || - !nth_midxed_object_details(m, pos, &d)) - return 0; - - if (d.pack_int_id >= m->num_packs) - die(_("Bad pack-int-id")); - - /* load packfile, if necessary */ - if (prepare_midx_pack(m, d.pack_int_id)) - return 0; - - *p = m->packs[d.pack_int_id]; - *offset = d.offset; - - return 1; -} - -int fill_pack_entry_midx(const struct object_id *oid, - struct pack_entry *e) -{ - struct packed_git *p; - struct midxed_git *m; - - if (!core_midx) - return 0; - - m = midxed_git; - while (m) - { - off_t offset; - if (!find_pack_entry_midx(oid, m, &p, &offset)) { - m = m->next; - continue; - } - - /* - * We are about to tell the caller where they can locate the - * requested object. We better make sure the packfile is - * still here and can be accessed before supplying that - * answer, as it may have been deleted since the MIDX was - * loaded! - */ - if (!is_pack_valid(p)) - return 0; - - e->offset = offset; - e->p = p; - - return 1; - } - - return 0; -} - int contains_pack(struct midxed_git *m, const char *pack_name) { uint32_t first = 0, last = m->num_packs; diff --git a/midx.h b/midx.h index 5c8778d47e6f19..6e7c7b82186441 100644 --- a/midx.h +++ b/midx.h @@ -23,9 +23,6 @@ extern struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid); -extern int fill_pack_entry_midx(const struct object_id *oid, - struct pack_entry *e); - struct pack_midx_entry { struct object_id oid; uint32_t pack_int_id; diff --git a/packfile.c b/packfile.c index e0b29a3e6fbad6..11000c9e54ff83 100644 --- a/packfile.c +++ b/packfile.c @@ -316,18 +316,6 @@ void close_pack(struct packed_git *p) void close_all_packs(struct raw_object_store *o) { struct packed_git *p; - struct midxed_git *m; - - for (m = midxed_git; m; m = m->next) { - int i; - for (i = 0; i < m->num_packs; i++) { - p = m->packs[i]; - if (p && p->do_not_close) - die("BUG: want to close pack marked 'do-not-close'"); - else if (p) - close_pack(p); - } - } for (p = o->packed_git; p; p = p->next) if (p->do_not_close) @@ -954,7 +942,6 @@ void prepare_packed_git_internal(struct repository *r, int midx) rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; - prepare_midxed_git_run_once = midx; } static void prepare_packed_git(struct repository *r) @@ -1917,13 +1904,7 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa { struct list_head *pos; - if (core_midx) { - prepare_packed_git_internal(r, 1); - if (fill_pack_entry_midx(oid, e)) - return 1; - } else - prepare_packed_git(r); - + prepare_packed_git(r); if (!r->objects->packed_git) return 0; From b1f27838fdf39a017122fe0b5b155704bf7dc413 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:17:09 -0400 Subject: [PATCH 76/92] fixup! sha1_name: use midx for abbreviations This reverts commit 50c00add586e877d1b64e8af7ab4f5621b14ea88. --- packfile.c | 2 +- packfile.h | 1 - sha1-name.c | 71 +++-------------------------------------------------- 3 files changed, 4 insertions(+), 70 deletions(-) diff --git a/packfile.c b/packfile.c index 11000c9e54ff83..8a6b95f54bccd6 100644 --- a/packfile.c +++ b/packfile.c @@ -901,7 +901,7 @@ static void prepare_packed_git_mru(struct repository *r) } static int prepare_midxed_git_run_once = 0; -void prepare_packed_git_internal(struct repository *r, int midx) +static void prepare_packed_git_internal(struct repository *r, int midx) { struct alternate_object_database *alt; char *obj_dir; diff --git a/packfile.h b/packfile.h index 4994ec4714afe4..630f35cf31ef74 100644 --- a/packfile.h +++ b/packfile.h @@ -39,7 +39,6 @@ extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_ #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -extern void prepare_packed_git_internal(struct repository *r, int midx); extern void reprepare_packed_git(struct repository *r); extern void install_packed_git(struct repository *r, struct packed_git *pack); diff --git a/sha1-name.c b/sha1-name.c index f4a325860f5a1e..1be9baa2037df6 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -12,7 +12,6 @@ #include "packfile.h" #include "object-store.h" #include "repository.h" -#include "midx.h" static int get_oid_oneline(const char *, struct object_id *, struct commit_list *); @@ -176,40 +175,10 @@ static void unique_in_pack(struct packed_git *p, } } -static void unique_in_midx(struct midxed_git *m, - struct disambiguate_state *ds) -{ - uint32_t num, i, first = 0; - const struct object_id *current = NULL; - - if (!m->num_objects) - return; - - num = m->num_objects; - bsearch_midx(m, ds->bin_pfx.hash, &first); - - /* - * At this point, "first" is the location of the lowest object - * with an object name that could match "bin_pfx". See if we have - * 0, 1 or more objects that actually match(es). - */ - for (i = first; i < num && !ds->ambiguous; i++) { - struct object_id oid; - current = nth_midxed_object_oid(&oid, m, i); - if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash)) - break; - update_candidates(ds, current); - } -} - static void find_short_packed_object(struct disambiguate_state *ds) { struct packed_git *p; - struct midxed_git *m; - prepare_packed_git_internal(the_repository, 1); - for (m = midxed_git; m && !ds->ambiguous; m = m->next) - unique_in_midx(m, ds); for (p = get_packed_git(the_repository); p && !ds->ambiguous; p = p->next) unique_in_pack(p, ds); @@ -560,36 +529,6 @@ static int extend_abbrev_len(const struct object_id *oid, void *cb_data) return 0; } -static void find_abbrev_len_for_midx(struct midxed_git *m, - struct min_abbrev_data *mad) -{ - int match = 0; - uint32_t first = 0; - struct object_id oid; - - if (!m->num_objects) - return; - - match = bsearch_midx(m, mad->oid->hash, &first); - - /* - * first is now the position in the packfile where we would insert - * mad->oid->hash if it does not exist (or the position of - * mad->oid->hash if it does exist). Hence, we consider a maximum of - * three objects nearby for the abbreviation length. - */ - mad->init_len = 0; - if (!match && nth_midxed_object_oid(&oid, m, first)) - extend_abbrev_len(&oid, mad); - else if (first < m->num_objects - 1 && - nth_midxed_object_oid(&oid, m, first + 1)) - extend_abbrev_len(&oid, mad); - if (first > 0 && nth_midxed_object_oid(&oid, m, first - 1)) - extend_abbrev_len(&oid, mad); - - mad->init_len = mad->cur_len; -} - static void find_abbrev_len_for_pack(struct packed_git *p, struct min_abbrev_data *mad) { @@ -607,9 +546,9 @@ static void find_abbrev_len_for_pack(struct packed_git *p, /* * first is now the position in the packfile where we would insert - * mad->oid->hash if it does not exist (or the position of - * mad->oid->hash if it does exist). Hence, we consider a maximum of two - * objects nearby for the abbreviation length. + * mad->hash if it does not exist (or the position of mad->hash if + * it does exist). Hence, we consider a maximum of two objects + * nearby for the abbreviation length. */ mad->init_len = 0; if (!match) { @@ -629,11 +568,7 @@ static void find_abbrev_len_for_pack(struct packed_git *p, static void find_abbrev_len_packed(struct min_abbrev_data *mad) { struct packed_git *p; - struct midxed_git *m; - prepare_packed_git_internal(the_repository, 1); - for (m = midxed_git; m; m = m->next) - find_abbrev_len_for_midx(m, mad); for (p = get_packed_git(the_repository); p; p = p->next) find_abbrev_len_for_pack(p, mad); } From d3a986c4b87bb5b231df7ceed0cd5ae49c4f5fba Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:17:21 -0400 Subject: [PATCH 77/92] revert! midx: nth_midxed_object_oid() and bsearch_midx() This reverts commit 7fe5be887c498d363f84e295b279c771e2e02e11. --- midx.c | 43 ------------------------------------------- midx.h | 12 ------------ 2 files changed, 55 deletions(-) diff --git a/midx.c b/midx.c index 04249e53bc7eef..62db9c5ceaa024 100644 --- a/midx.c +++ b/midx.c @@ -265,49 +265,6 @@ struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, return e; } -const struct object_id *nth_midxed_object_oid(struct object_id *oid, - struct midxed_git *m, - uint32_t n) -{ - struct pack_midx_entry e; - - if (!nth_midxed_object_entry(m, n, &e)) - return 0; - - hashcpy(oid->hash, e.oid.hash); - return oid; -} - -int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos) -{ - uint32_t last, first = 0; - - if (sha1[0]) - first = ntohl(*(uint32_t*)(m->chunk_oid_fanout + 4 * (sha1[0] - 1))); - last = ntohl(*(uint32_t*)(m->chunk_oid_fanout + 4 * sha1[0])); - - while (first < last) { - uint32_t mid = first + (last - first) / 2; - const unsigned char *current; - int cmp; - - current = m->chunk_oid_lookup + m->hdr->hash_len * mid; - cmp = hashcmp(sha1, current); - if (!cmp) { - *pos = mid; - return 1; - } - if (cmp > 0) { - first = mid + 1; - continue; - } - last = mid; - } - - *pos = first; - return 0; -} - int contains_pack(struct midxed_git *m, const char *pack_name) { uint32_t first = 0, last = m->num_packs; diff --git a/midx.h b/midx.h index 6e7c7b82186441..c5b8e4665627d2 100644 --- a/midx.h +++ b/midx.h @@ -109,18 +109,6 @@ extern struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, extern struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, uint32_t n, struct pack_midx_entry *e); -extern const struct object_id *nth_midxed_object_oid(struct object_id *oid, - struct midxed_git *m, - uint32_t n); - -/* - * Perform a binary search on the object list in a MIDX file for the given sha1. - * - * If the object exists, then return 1 and set *pos to the position of the sha1. - * Otherwise, return 0 and set *pos to the position of the lex-first object greater - * than the given sha1. - */ -extern int bsearch_midx(struct midxed_git *m, const unsigned char *sha1, uint32_t *pos); extern int contains_pack(struct midxed_git *m, const char *pack_name); From 5e7f5a14e44f420416700c22a0c998b9c151142d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:18:03 -0400 Subject: [PATCH 78/92] fixup! midx: use midx for approximate object count This reverts commit 7fbc3044ffe351195fcd57b540228d162723bc4c. --- packfile.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/packfile.c b/packfile.c index 8a6b95f54bccd6..877e752e43d5f9 100644 --- a/packfile.c +++ b/packfile.c @@ -829,12 +829,9 @@ unsigned long approximate_object_count(void) if (!the_repository->objects->approximate_object_count_valid) { unsigned long count; struct packed_git *p; - struct midxed_git *m; prepare_packed_git(the_repository); count = 0; - for (m = midxed_git; m; m = m->next) - count += m->num_objects; for (p = the_repository->objects->packed_git; p; p = p->next) { if (open_pack_index(p)) continue; From f8d6af9c685a64864fb0450c789a6df6df586db2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:18:25 -0400 Subject: [PATCH 79/92] fixup! packfile.c: create prepare_packed_git_internal(midx) This reverts commit b842a4c2d949901e6d48a21525ca0c93967752bd. --- midx.c | 69 +------------------------------------------------ midx.h | 6 ++--- packfile.c | 52 +++---------------------------------- t/t5319-midx.sh | 54 +++----------------------------------- 4 files changed, 10 insertions(+), 171 deletions(-) diff --git a/midx.c b/midx.c index 62db9c5ceaa024..59d3ebf94d491a 100644 --- a/midx.c +++ b/midx.c @@ -6,9 +6,6 @@ #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 -/* MIDX-git global storage */ -struct midxed_git *midxed_git = 0; - struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid) { struct strbuf head_filename = STRBUF_INIT; @@ -34,24 +31,6 @@ struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid) return oid; } -static const char* get_midx_head_filename_dir(const char *pack_dir) -{ - struct object_id oid; - struct strbuf head_path = STRBUF_INIT; - const char *result; - - if (!get_midx_head_oid(pack_dir, &oid)) - return 0; - - strbuf_addstr(&head_path, pack_dir); - strbuf_addstr(&head_path, "/midx-"); - strbuf_addstr(&head_path, oid_to_hex(&oid)); - strbuf_addstr(&head_path, ".midx"); - - result = strbuf_detach(&head_path, NULL); - return result; -} - struct pack_midx_details_internal { uint32_t pack_int_id; uint32_t internal_offset; @@ -204,24 +183,6 @@ struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_o return m; } -int prepare_midxed_git_head(char *pack_dir, int local) -{ - struct midxed_git *m = midxed_git; - const char *midx_head_path = get_midx_head_filename_dir(pack_dir); - - if (!core_midx) - return 1; - - if (midx_head_path) { - midxed_git = load_midxed_git_one(midx_head_path, pack_dir); - midxed_git->next = m; - } else if (!m) { - midxed_git = load_empty_midxed_git(); - } - - return !midxed_git; -} - struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, uint32_t n, struct pack_midx_details *d) @@ -642,43 +603,15 @@ const char *write_midx_file( return final_hex; } -int close_midx(struct midxed_git *m) +extern int close_midx(struct midxed_git *m) { - int i; if (m->midx_fd < 0) return 0; - for (i = 0; i < m->num_packs; i++) { - if (m->packs[i]) { - close_pack(m->packs[i]); - free(m->packs[i]); - m->packs[i] = NULL; - } - } - munmap((void *)m->data, m->data_len); m->data = 0; close(m->midx_fd); m->midx_fd = -1; - - free(m->packs); - free(m->pack_names); - return 1; } - -void close_all_midx(void) -{ - struct midxed_git *m = midxed_git; - struct midxed_git *next; - - while (m) { - next = m->next; - close_midx(m); - free(m); - m = next; - } - - midxed_git = 0; -} diff --git a/midx.h b/midx.h index c5b8e4665627d2..f2f35da81d6b03 100644 --- a/midx.h +++ b/midx.h @@ -39,7 +39,7 @@ struct pack_midx_header { uint32_t num_packs; }; -extern struct midxed_git { +struct midxed_git { struct midxed_git *next; int midx_fd; @@ -93,10 +93,9 @@ extern struct midxed_git { /* something like ".git/objects/pack" */ char pack_dir[FLEX_ARRAY]; /* more */ -} *midxed_git; +}; extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); -extern int prepare_midxed_git_head(char *pack_dir, int local); struct pack_midx_details { uint32_t pack_int_id; @@ -127,6 +126,5 @@ extern const char *write_midx_file( struct pack_midx_entry **objects, uint32_t nr_objects); extern int close_midx(struct midxed_git *m); -extern void close_all_midx(void); #endif diff --git a/packfile.c b/packfile.c index 877e752e43d5f9..ebcb5742ec748d 100644 --- a/packfile.c +++ b/packfile.c @@ -15,7 +15,6 @@ #include "tree-walk.h" #include "tree.h" #include "object-store.h" -#include "midx.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -761,7 +760,6 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local dirnamelen = path.len; while ((de = readdir(dir)) != NULL) { struct packed_git *p; - struct midxed_git *m; size_t base_len; if (is_dot_or_dotdot(de->d_name)) @@ -772,15 +770,7 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local base_len = path.len; if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - struct strbuf pack_name = STRBUF_INIT; - strbuf_addstr(&pack_name, de->d_name); - strbuf_setlen(&pack_name, pack_name.len - 3); - strbuf_add(&pack_name, "pack", 4); - /* Don't reopen a pack we already have. */ - for (m = midxed_git; m; m = m->next) - if (contains_pack(m, pack_name.buf)) - break; for (p = r->objects->packed_git; p; p = p->next) { size_t len; @@ -789,7 +779,7 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local !memcmp(p->pack_name, path.buf, len)) break; } - if (m == NULL && p == NULL && + if (p == NULL && /* * See if it really is a valid .idx file with * corresponding .pack file that we can map. @@ -897,55 +887,21 @@ static void prepare_packed_git_mru(struct repository *r) list_add_tail(&p->mru, &r->objects->packed_git_mru); } -static int prepare_midxed_git_run_once = 0; -static void prepare_packed_git_internal(struct repository *r, int midx) +static void prepare_packed_git(struct repository *r) { struct alternate_object_database *alt; - char *obj_dir; - - if (prepare_midxed_git_run_once) { - if (!midx) { - prepare_midxed_git_run_once = 0; - close_all_midx(); - reprepare_packed_git(r); - } - return; - } if (r->objects->packed_git_initialized) return; - - obj_dir = r->objects->objectdir; - if (midx) { - struct strbuf pack_dir = STRBUF_INIT; - strbuf_addstr(&pack_dir, obj_dir); - strbuf_addstr(&pack_dir, "/pack"); - prepare_midxed_git_head(pack_dir.buf, 1); - strbuf_release(&pack_dir); - } - - prepare_packed_git_one(r, obj_dir, 1); + prepare_packed_git_one(r, r->objects->objectdir, 1); prepare_alt_odb(r); - for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { - if (midx) { - struct strbuf alt_pack_dir = STRBUF_INIT; - strbuf_addstr(&alt_pack_dir, alt->path); - strbuf_addstr(&alt_pack_dir, "/pack"); - prepare_midxed_git_head(alt_pack_dir.buf, 0); - strbuf_release(&alt_pack_dir); - } + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) prepare_packed_git_one(r, alt->path, 0); - } rearrange_packed_git(r); prepare_packed_git_mru(r); r->objects->packed_git_initialized = 1; } -static void prepare_packed_git(struct repository *r) -{ - prepare_packed_git_internal(r, 0); -} - void reprepare_packed_git(struct repository *r) { r->objects->approximate_object_count_valid = 0; diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index cef12cf48bb469..230dc4793dc486 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -32,12 +32,6 @@ test_expect_success \ } >obj-list && git update-ref HEAD $commit' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-1 && - echo 18 >rev-list-expect-1 && - cmp -n 2 rev-list-out-1 rev-list-expect-1' - test_expect_success \ 'write-midx from index version 1' \ 'pack1=$(git pack-objects --index-version=1 test-1 >midx-read-expect-1 && cmp midx-read-out-1 midx-read-expect-1' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-2 && - echo 18 >rev-list-expect-2 && - cmp -n 2 rev-list-out-2 rev-list-expect-2' - test_expect_success \ 'write-midx from index version 2' \ 'rm "test-1-${pack1}.pack" && @@ -77,12 +65,6 @@ test_expect_success \ echo "pack_dir: ." >>midx-read-expect-2 && cmp midx-read-out-2 midx-read-expect-2' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-3 && - echo 18 >rev-list-expect-3 && - cmp -n 2 rev-list-out-3 rev-list-expect-3' - test_expect_success \ 'Add more objects' \ 'i=6 && @@ -106,18 +88,12 @@ test_expect_success \ echo $tree && git ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\) .*/\\1/" } >obj-list && - git update-ref HEAD $commit && - pack3=$(git pack-objects --index-version=2 test-pack rev-list-out-4 && - echo 35 >rev-list-expect-4 && - cmp -n 2 rev-list-out-4 rev-list-expect-4' + git update-ref HEAD $commit' test_expect_success \ 'write-midx with two packs' \ - 'midx3=$(git midx --write --update-head --delete-expired --pack-dir .) && + 'pack3=$(git pack-objects --index-version=2 test-pack midx-head-expect && @@ -134,12 +110,6 @@ test_expect_success \ git midx --read --pack-dir . >midx-read-out-3-head && cmp midx-read-out-3-head midx-read-expect-3' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-5 && - echo 35 >rev-list-expect-5 && - cmp -n 2 rev-list-out-5 rev-list-expect-5' - test_expect_success \ 'Add more packs' \ 'j=0 && @@ -167,12 +137,6 @@ test_expect_success \ j=$(expr $j + 1) || return 1 done' -test_expect_success \ - 'Verify normal git operations succeed in mixed mode' \ - 'git rev-list --all --objects | wc -l >rev-list-out-6 && - echo 90 >rev-list-expect-6 && - cmp -n 2 rev-list-out-6 rev-list-expect-6' - test_expect_success \ 'write-midx with twelve packs' \ 'midx4=$(git midx --write --update-head --delete-expired --pack-dir .) && @@ -191,22 +155,10 @@ test_expect_success \ git midx --read --pack-dir . >midx-read-out-4-head && cmp midx-read-out-4-head midx-read-expect-4' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-7 && - echo 90 >rev-list-expect-7 && - cmp -n 2 rev-list-out-7 rev-list-expect-7' - test_expect_success \ 'midx --clear' \ 'git midx --clear --pack-dir . && ! test -f "midx-${midx4}.midx" && ! test -f "midx-head"' -test_expect_success \ - 'Verify normal git operations succeed' \ - 'git rev-list --all --objects | wc -l >rev-list-out-8 && - echo 90 >rev-list-expect-8 && - cmp -n 2 rev-list-out-8 rev-list-expect-8' - test_done From 3e477c505c9afdc82c8d67645710202191265ff0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:18:36 -0400 Subject: [PATCH 80/92] fixup! midx: teach 'git midx --write --update-head --delete-expired' This reverts commit f31bdfd5cb050a3560a35f2c453a6a9548f15cd4. --- Documentation/git-midx.txt | 4 ---- builtin/midx.c | 20 +------------------- midx.c | 13 ------------- midx.h | 2 -- t/t5319-midx.sh | 6 ++---- 5 files changed, 3 insertions(+), 42 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index a38350d0913b36..f98837ffc18bb1 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -44,10 +44,6 @@ OPTIONS If specified with --write, update the midx-head file to point to the written midx file. ---delete-expired:: - If specified with --write and --update-head, delete the midx file - previously pointed to by midx-head (if changed). - EXAMPLES -------- diff --git a/builtin/midx.c b/builtin/midx.c index a4d425b5d157f2..2f261a0f7afd49 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -11,7 +11,7 @@ static char const * const builtin_midx_usage[] ={ N_("git midx [--pack-dir ]"), - N_("git midx --write [--pack-dir ] [--update-head] [--delete-expired]"), + N_("git midx --write [--pack-dir ] [--update-head]"), N_("git midx --read [--midx-id=]"), N_("git midx --clear [--pack-dir ]"), NULL @@ -21,7 +21,6 @@ static struct opts_midx { const char *pack_dir; int write; int update_head; - int delete_expired; int read; const char *midx_id; int clear; @@ -338,21 +337,6 @@ static int cmd_midx_write(void) if (opts.update_head) update_head_file(opts.pack_dir, midx_id); - if (opts.delete_expired && opts.update_head && opts.has_existing && - strcmp(midx_id, oid_to_hex(&opts.old_midx_oid))) { - struct strbuf old_path = STRBUF_INIT; - strbuf_addstr(&old_path, opts.pack_dir); - strbuf_addstr(&old_path, "/midx-"); - strbuf_addstr(&old_path, oid_to_hex(&opts.old_midx_oid)); - strbuf_addstr(&old_path, ".midx"); - - close_midx(m); - if (remove_path(old_path.buf)) - die("Failed to remove path %s", old_path.buf); - - strbuf_release(&old_path); - } - cleanup: if (pack_names) FREE_AND_NULL(pack_names); @@ -442,8 +426,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) N_("write midx file")), OPT_BOOL('u', "update-head", &opts.update_head, N_("update midx-head to written midx file")), - OPT_BOOL('d', "delete-expired", &opts.delete_expired, - N_("delete expired head midx file")), OPT_BOOL('r', "read", &opts.read, N_("read midx file")), OPT_BOOL('c', "clear", &opts.clear, diff --git a/midx.c b/midx.c index 59d3ebf94d491a..b5cf5635318100 100644 --- a/midx.c +++ b/midx.c @@ -602,16 +602,3 @@ const char *write_midx_file( return final_hex; } - -extern int close_midx(struct midxed_git *m) -{ - if (m->midx_fd < 0) - return 0; - - munmap((void *)m->data, m->data_len); - m->data = 0; - - close(m->midx_fd); - m->midx_fd = -1; - return 1; -} diff --git a/midx.h b/midx.h index f2f35da81d6b03..54d7a99419bf0d 100644 --- a/midx.h +++ b/midx.h @@ -125,6 +125,4 @@ extern const char *write_midx_file( const char **pack_names, uint32_t nr_packs, struct pack_midx_entry **objects, uint32_t nr_objects); -extern int close_midx(struct midxed_git *m); - #endif diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 230dc4793dc486..684639bec59c42 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -93,9 +93,8 @@ test_expect_success \ test_expect_success \ 'write-midx with two packs' \ 'pack3=$(git pack-objects --index-version=2 test-pack midx-head-expect && cmp -n 40 midx-head midx-head-expect && git midx --read --pack-dir . --midx-id=${midx3} >midx-read-out-3 && @@ -139,9 +138,8 @@ test_expect_success \ test_expect_success \ 'write-midx with twelve packs' \ - 'midx4=$(git midx --write --update-head --delete-expired --pack-dir .) && + 'midx4=$(git midx --write --update-head --pack-dir .) && test -f midx-${midx4}.midx && - ! test -f midx-${midx3}.midx && echo ${midx4} > midx-head-expect && cmp -n 40 midx-head midx-head-expect && git midx --read --pack-dir . --midx-id=${midx4} >midx-read-out-4 && From 548c79ea8c20e247012bb237d8f0cd3dd3a23421 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:19:37 -0400 Subject: [PATCH 81/92] fixup! midx: use existing midx during 'git midx --write' This reverts commit 48064776d2df63ea85e9f8e1dffa3e6086ee325d. --- builtin/midx.c | 34 ++++------------------------------ t/t5319-midx.sh | 6 +++--- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/builtin/midx.c b/builtin/midx.c index 2f261a0f7afd49..8038efbb3392d5 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -171,7 +171,7 @@ static void dedupe_and_sort_entries( static int build_midx_from_packs( const char *pack_dir, const char **pack_names, uint32_t nr_packs, - const char **midx_id, struct midxed_git *midx) + const char **midx_id) { struct packed_git **packs; const char **installed_pack_names; @@ -182,17 +182,9 @@ static int build_midx_from_packs( struct strbuf pack_path = STRBUF_INIT; int baselen; - if (midx) - nr_total_packs += midx->num_packs; - ALLOC_ARRAY(packs, nr_total_packs); ALLOC_ARRAY(installed_pack_names, nr_total_packs); - if (midx) { - for (i = 0; i < midx->num_packs; i++) - installed_pack_names[nr_installed_packs++] = midx->pack_names[i]; - } - strbuf_addstr(&pack_path, pack_dir); strbuf_addch(&pack_path, '/'); baselen = pack_path.len; @@ -222,15 +214,9 @@ static int build_midx_from_packs( return 1; } - if (midx) - nr_objects += midx->num_objects; - ALLOC_ARRAY(objects, nr_objects); nr_objects = 0; - for (i = 0; midx && i < midx->num_objects; i++) - nth_midxed_object_entry(midx, i, &objects[nr_objects++]); - for (i = pack_offset; i < nr_installed_packs; i++) { struct packed_git *p = packs[i]; @@ -252,7 +238,6 @@ static int build_midx_from_packs( ALLOC_ARRAY(obj_ptrs, nr_objects); for (i = 0; i < nr_objects; i++) obj_ptrs[i] = &objects[i]; ->>>>>>> parent of 376b3fe192... midx: choose most-recent pack containing duplicate objects *midx_id = write_midx_file(pack_dir, NULL, installed_pack_names, nr_installed_packs, @@ -292,10 +277,6 @@ static int cmd_midx_write(void) const char *midx_id; DIR *dir; struct dirent *de; - struct midxed_git *m = NULL; - - if (opts.has_existing) - m = get_midxed_git(opts.pack_dir, &opts.old_midx_oid); dir = opendir(opts.pack_dir); if (!dir) { @@ -313,23 +294,17 @@ static int cmd_midx_write(void) continue; if (ends_with(de->d_name, ".pack")) { - if (m && contains_pack(m, de->d_name)) - continue; + char *t = xstrdup(de->d_name); ALLOC_GROW(pack_names, i + 1, nr_packs); - pack_names[i++] = xstrdup(de->d_name); + pack_names[i++] = t; } } nr_packs = i; closedir(dir); - if (!nr_packs && opts.has_existing) { - printf("%s\n", oid_to_hex(&opts.old_midx_oid)); - goto cleanup; - } - - if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id, m)) + if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id)) die("Failed to build MIDX."); printf("%s\n", midx_id); @@ -337,7 +312,6 @@ static int cmd_midx_write(void) if (opts.update_head) update_head_file(opts.pack_dir, midx_id); -cleanup: if (pack_names) FREE_AND_NULL(pack_names); return 0; diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 684639bec59c42..14c2941ced93ec 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -92,7 +92,7 @@ test_expect_success \ test_expect_success \ 'write-midx with two packs' \ - 'pack3=$(git pack-objects --index-version=2 test-pack midx-head-expect && @@ -103,7 +103,7 @@ test_expect_success \ echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-3 && echo "pack_names:" >>midx-read-expect-3 && echo "test-2-${pack2}.pack" >>midx-read-expect-3 && - echo "test-pack-${pack3}.pack" >>midx-read-expect-3 && + echo "test-3-${pack3}.pack" >>midx-read-expect-3 && echo "pack_dir: ." >>midx-read-expect-3 && cmp midx-read-out-3 midx-read-expect-3 && git midx --read --pack-dir . >midx-read-out-3-head && @@ -131,7 +131,7 @@ test_expect_success \ git ls-tree $tree | sed -e "s/.* \\([0-9a-f]*\\) .*/\\1/" } >obj-list && git update-ref HEAD $commit && - git pack-objects --index-version=2 test-pack Date: Mon, 17 Sep 2018 17:20:42 -0400 Subject: [PATCH 82/92] fixup! midx: read object details from midx files This reverts commit 44d0e3794bb9a9e26b7ff06a7e1b2552f6040555. --- midx.c | 43 ------------------------------------------- midx.h | 12 ------------ 2 files changed, 55 deletions(-) diff --git a/midx.c b/midx.c index b5cf5635318100..c4f42713af6e0b 100644 --- a/midx.c +++ b/midx.c @@ -183,49 +183,6 @@ struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_o return m; } -struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, - uint32_t n, - struct pack_midx_details *d) -{ - struct pack_midx_details_internal *d_internal; - const unsigned char *details = m->chunk_object_offsets; - - if (n >= m->num_objects) { - return NULL; - } - - d_internal = (struct pack_midx_details_internal*)(details + 8 * n); - d->pack_int_id = ntohl(d_internal->pack_int_id); - d->offset = ntohl(d_internal->internal_offset); - - if (m->chunk_large_offsets && d->offset & MIDX_LARGE_OFFSET_NEEDED) { - uint32_t large_offset = d->offset ^ MIDX_LARGE_OFFSET_NEEDED; - const unsigned char *large_offsets = m->chunk_large_offsets + 8 * large_offset; - - d->offset = (((uint64_t)ntohl(*((uint32_t *)(large_offsets + 0)))) << 32) | - ntohl(*((uint32_t *)(large_offsets + 4))); - } - - return d; -} - -struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, - uint32_t n, - struct pack_midx_entry *e) -{ - struct pack_midx_details details; - const unsigned char *index = m->chunk_oid_lookup; - - if (!nth_midxed_object_details(m, n, &details)) - return NULL; - - memcpy(e->oid.hash, index + m->hdr->hash_len * n, m->hdr->hash_len); - e->pack_int_id = details.pack_int_id; - e->offset = details.offset; - - return e; -} - int contains_pack(struct midxed_git *m, const char *pack_name) { uint32_t first = 0, last = m->num_packs; diff --git a/midx.h b/midx.h index 54d7a99419bf0d..53f1882b3ca81e 100644 --- a/midx.h +++ b/midx.h @@ -97,18 +97,6 @@ struct midxed_git { extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); -struct pack_midx_details { - uint32_t pack_int_id; - off_t offset; -}; - -extern struct pack_midx_details *nth_midxed_object_details(struct midxed_git *m, - uint32_t n, - struct pack_midx_details *d); -extern struct pack_midx_entry *nth_midxed_object_entry(struct midxed_git *m, - uint32_t n, - struct pack_midx_entry *e); - extern int contains_pack(struct midxed_git *m, const char *pack_name); /* From 7e01313e05413b6c2e69c33daaa1a703f447df0c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:21:32 -0400 Subject: [PATCH 83/92] fixup! midx: binary search into pack names This reverts commit 31277bd6b835273a8fb3f3043dbcd4a4681715bf. --- midx.c | 24 ++++-------------------- midx.h | 2 -- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/midx.c b/midx.c index c4f42713af6e0b..0099883a916df5 100644 --- a/midx.c +++ b/midx.c @@ -183,27 +183,11 @@ struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_o return m; } -int contains_pack(struct midxed_git *m, const char *pack_name) +static int midx_oid_compare(const void *_a, const void *_b) { - uint32_t first = 0, last = m->num_packs; - - while (first < last) { - uint32_t mid = first + (last - first) / 2; - const char *current; - int cmp; - - current = m->pack_names[mid]; - cmp = strcmp(pack_name, current); - if (!cmp) - return 1; - if (cmp > 0) { - first = mid + 1; - continue; - } - last = mid; - } - - return 0; + struct pack_midx_entry *a = *(struct pack_midx_entry **)_a; + struct pack_midx_entry *b = *(struct pack_midx_entry **)_b; + return oidcmp(&a->oid, &b->oid); } static void write_midx_chunk_packlookup( diff --git a/midx.h b/midx.h index 53f1882b3ca81e..01b9be505b3bfe 100644 --- a/midx.h +++ b/midx.h @@ -97,8 +97,6 @@ struct midxed_git { extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); -extern int contains_pack(struct midxed_git *m, const char *pack_name); - /* * Write a single MIDX file storing the given entries for the * given list of packfiles. If midx_name is null, then a temp From 01a7c6117b6180ae753e8c00ca3ebf73559df0f9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:21:53 -0400 Subject: [PATCH 84/92] fixup! midx: teach git to clear midx files This reverts commit 0dad18b147cc99d760745b83ea001a07c1dd6ab4. --- Documentation/git-midx.txt | 8 ++------ builtin/midx.c | 35 +---------------------------------- t/t5319-midx.sh | 6 ------ 3 files changed, 3 insertions(+), 46 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index f98837ffc18bb1..100acff674ccd7 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -22,14 +22,10 @@ OPTIONS Use given directory for the location of packfiles, pack-indexes, and MIDX files. ---clear:: - If specified, delete the midx file specified by midx-head, and - midx-head. (Cannot be combined with --write or --read.) - --read:: If specified, read a midx file specified by the midx-head file and output basic details about the midx file. (Cannot be combined - with --write or --clear.) + with --write.) --midx-id :: If specified with --read, use the given oid to read midx-[oid].midx @@ -38,7 +34,7 @@ OPTIONS --write:: If specified, write a new midx file to the pack directory using the packfiles present. Outputs the hash of the result midx file. - (Cannot be combined with --read or --clear.) + (Cannot be combined with --read.) --update-head:: If specified with --write, update the midx-head file to point to diff --git a/builtin/midx.c b/builtin/midx.c index 8038efbb3392d5..1e31fcfe86a427 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -13,7 +13,6 @@ static char const * const builtin_midx_usage[] ={ N_("git midx [--pack-dir ]"), N_("git midx --write [--pack-dir ] [--update-head]"), N_("git midx --read [--midx-id=]"), - N_("git midx --clear [--pack-dir ]"), NULL }; @@ -23,7 +22,6 @@ static struct opts_midx { int update_head; int read; const char *midx_id; - int clear; int has_existing; struct object_id old_midx_oid; } opts; @@ -363,33 +361,6 @@ static int cmd_midx_read(void) return 0; } -static int cmd_midx_clear(void) -{ - struct strbuf old_path = STRBUF_INIT; - struct strbuf head_path = STRBUF_INIT; - - if (!opts.has_existing) - return 0; - - strbuf_addstr(&head_path, opts.pack_dir); - strbuf_addstr(&head_path, "/"); - strbuf_addstr(&head_path, "midx-head"); - if (remove_path(head_path.buf)) - die("Failed to remove path %s", head_path.buf); - - strbuf_addstr(&old_path, opts.pack_dir); - strbuf_addstr(&old_path, "/midx-"); - strbuf_addstr(&old_path, oid_to_hex(&opts.old_midx_oid)); - strbuf_addstr(&old_path, ".midx"); - - if (remove_path(old_path.buf)) - die("Failed to remove path %s", old_path.buf); - - strbuf_release(&old_path); - strbuf_release(&head_path); - return 0; -} - int cmd_midx(int argc, const char **argv, const char *prefix) { static struct option builtin_midx_options[] = { @@ -402,8 +373,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) N_("update midx-head to written midx file")), OPT_BOOL('r', "read", &opts.read, N_("read midx file")), - OPT_BOOL('c', "clear", &opts.clear, - N_("clear midx file and midx-head")), { OPTION_STRING, 'M', "midx-id", &opts.midx_id, N_("oid"), N_("An OID for a specific midx file in the pack-dir."), @@ -422,7 +391,7 @@ int cmd_midx(int argc, const char **argv, const char *prefix) builtin_midx_options, builtin_midx_usage, 0); - if (opts.write + opts.read + opts.clear > 1) + if (opts.write + opts.read > 1) usage_with_options(builtin_midx_usage, builtin_midx_options); if (!opts.pack_dir) { @@ -438,8 +407,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) return cmd_midx_write(); if (opts.read) return cmd_midx_read(); - if (opts.clear) - return cmd_midx_clear(); return 0; } diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 14c2941ced93ec..4ad84337656715 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -153,10 +153,4 @@ test_expect_success \ git midx --read --pack-dir . >midx-read-out-4-head && cmp midx-read-out-4-head midx-read-expect-4' -test_expect_success \ - 'midx --clear' \ - 'git midx --clear --pack-dir . && - ! test -f "midx-${midx4}.midx" && - ! test -f "midx-head"' - test_done From df0a171bc543eb43840d7caf63d4d9405ef3e309 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:22:11 -0400 Subject: [PATCH 85/92] fixup! midx: read midx-head for latest midx file This reverts commit 8ab5f90d1f30c6109da8d5b1ba720d4f16b933b1. --- Documentation/git-midx.txt | 8 ++------ builtin/midx.c | 6 ++---- midx.c | 25 ------------------------- midx.h | 2 -- t/t5319-midx.sh | 8 ++------ 5 files changed, 6 insertions(+), 43 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index 100acff674ccd7..b19ac07a013c6f 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -23,14 +23,10 @@ OPTIONS and MIDX files. --read:: - If specified, read a midx file specified by the midx-head file - and output basic details about the midx file. (Cannot be combined + If specified, read a midx file specified by --midx-id and + output basic details about the midx file. (Cannot be combined with --write.) ---midx-id :: - If specified with --read, use the given oid to read midx-[oid].midx - instead of using midx-head. - --write:: If specified, write a new midx file to the pack directory using the packfiles present. Outputs the hash of the result midx file. diff --git a/builtin/midx.c b/builtin/midx.c index 1e31fcfe86a427..27f67f03ce065e 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -323,8 +323,8 @@ static int cmd_midx_read(void) if (opts.midx_id && strlen(opts.midx_id) == GIT_MAX_HEXSZ) get_oid_hex(opts.midx_id, &midx_oid); - else if (!get_midx_head_oid(opts.pack_dir, &midx_oid)) - die("No midx-head exists."); + else + die("--read requires a --midx-id parameter"); midx = get_midxed_git(opts.pack_dir, &midx_oid); @@ -401,8 +401,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) opts.pack_dir = strbuf_detach(&path, NULL); } - opts.has_existing = !!get_midx_head_oid(opts.pack_dir, &opts.old_midx_oid); - if (opts.write) return cmd_midx_write(); if (opts.read) diff --git a/midx.c b/midx.c index 0099883a916df5..d6c88b9254ef30 100644 --- a/midx.c +++ b/midx.c @@ -6,31 +6,6 @@ #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 -struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid) -{ - struct strbuf head_filename = STRBUF_INIT; - char oid_hex[GIT_MAX_HEXSZ + 1]; - FILE *f; - - strbuf_addstr(&head_filename, pack_dir); - strbuf_addstr(&head_filename, "/midx-head"); - - f = fopen(head_filename.buf, "r"); - strbuf_release(&head_filename); - - if (!f) - return 0; - - if (!fgets(oid_hex, sizeof(oid_hex), f)) - die("Failed to read midx-head"); - - fclose(f); - - if (get_oid_hex(oid_hex, oid)) - return 0; - return oid; -} - struct pack_midx_details_internal { uint32_t pack_int_id; uint32_t internal_offset; diff --git a/midx.h b/midx.h index 01b9be505b3bfe..269c716c866816 100644 --- a/midx.h +++ b/midx.h @@ -21,8 +21,6 @@ #define MIDX_HASH_VERSION MIDX_HASH_VERSION_SHA1 #define MIDX_HASH_LEN MIDX_HASH_LEN_SHA1 -extern struct object_id *get_midx_head_oid(const char *pack_dir, struct object_id *oid); - struct pack_midx_entry { struct object_id oid; uint32_t pack_int_id; diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index 4ad84337656715..f658292a953080 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -105,9 +105,7 @@ test_expect_success \ echo "test-2-${pack2}.pack" >>midx-read-expect-3 && echo "test-3-${pack3}.pack" >>midx-read-expect-3 && echo "pack_dir: ." >>midx-read-expect-3 && - cmp midx-read-out-3 midx-read-expect-3 && - git midx --read --pack-dir . >midx-read-out-3-head && - cmp midx-read-out-3-head midx-read-expect-3' + cmp midx-read-out-3 midx-read-expect-3' test_expect_success \ 'Add more packs' \ @@ -149,8 +147,6 @@ test_expect_success \ echo "pack_names:" >>midx-read-expect-4 && ls test-*.pack | sort >>midx-read-expect-4 && echo "pack_dir: ." >>midx-read-expect-4 && - cmp midx-read-out-4 midx-read-expect-4 && - git midx --read --pack-dir . >midx-read-out-4-head && - cmp midx-read-out-4-head midx-read-expect-4' + cmp midx-read-out-4 midx-read-expect-4' test_done From 0a9808388d7087865643c859fb276ecbaa2c74d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:22:36 -0400 Subject: [PATCH 86/92] fixup! midx: teach git midx --write --update-head This reverts commit fdf46db2ae2c16adb22db0543be8a76ec64c1691. --- Documentation/git-midx.txt | 11 ----------- builtin/midx.c | 30 +----------------------------- t/t5319-midx.sh | 14 +++----------- 3 files changed, 4 insertions(+), 51 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index b19ac07a013c6f..23b5d319b5ccdb 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -32,10 +32,6 @@ OPTIONS the packfiles present. Outputs the hash of the result midx file. (Cannot be combined with --read.) ---update-head:: - If specified with --write, update the midx-head file to point to - the written midx file. - EXAMPLES -------- @@ -45,13 +41,6 @@ EXAMPLES $ git midx --write ------------------------------------------------ -* Write a MIDX file for the packfiles in your local .git folder and -* update the midx-head file. -+ ------------------------------------------------- -$ git midx --write --update-head ------------------------------------------------- - * Write a MIDX file for the packfiles in a different folder + --------------------------------------------------------- diff --git a/builtin/midx.c b/builtin/midx.c index 27f67f03ce065e..d8bae8fe1f3a63 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -3,7 +3,6 @@ #include "config.h" #include "dir.h" #include "git-compat-util.h" -#include "lockfile.h" #include "packfile.h" #include "parse-options.h" #include "midx.h" @@ -11,7 +10,7 @@ static char const * const builtin_midx_usage[] ={ N_("git midx [--pack-dir ]"), - N_("git midx --write [--pack-dir ] [--update-head]"), + N_("git midx --write [--pack-dir ]"), N_("git midx --read [--midx-id=]"), NULL }; @@ -19,7 +18,6 @@ static char const * const builtin_midx_usage[] ={ static struct opts_midx { const char *pack_dir; int write; - int update_head; int read; const char *midx_id; int has_existing; @@ -247,27 +245,6 @@ static int build_midx_from_packs( return 0; } -static void update_head_file(const char *pack_dir, const char *midx_id) -{ - struct strbuf head_path = STRBUF_INIT; - FILE* f; - struct lock_file lk = LOCK_INIT; - - strbuf_addstr(&head_path, pack_dir); - strbuf_addstr(&head_path, "/"); - strbuf_addstr(&head_path, "midx-head"); - - hold_lock_file_for_update(&lk, head_path.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&head_path); - - f = fdopen_lock_file(&lk, "w"); - if (!f) - die_errno("unable to fdopen midx-head"); - - fprintf(f, "%s", midx_id); - commit_lock_file(&lk); -} - static int cmd_midx_write(void) { const char **pack_names = NULL; @@ -307,9 +284,6 @@ static int cmd_midx_write(void) printf("%s\n", midx_id); - if (opts.update_head) - update_head_file(opts.pack_dir, midx_id); - if (pack_names) FREE_AND_NULL(pack_names); return 0; @@ -369,8 +343,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) N_("The pack directory containing set of packfile and pack-index pairs.") }, OPT_BOOL('w', "write", &opts.write, N_("write midx file")), - OPT_BOOL('u', "update-head", &opts.update_head, - N_("update midx-head to written midx file")), OPT_BOOL('r', "read", &opts.read, N_("read midx file")), { OPTION_STRING, 'M', "midx-id", &opts.midx_id, diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index f658292a953080..c516536549ab38 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -37,7 +37,6 @@ test_expect_success \ 'pack1=$(git pack-objects --index-version=1 test-1 midx-read-out-1 && echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-1 && echo "num_objects: 17" >>midx-read-expect-1 && @@ -51,11 +50,8 @@ test_expect_success \ 'write-midx from index version 2' \ 'rm "test-1-${pack1}.pack" && pack2=$(git pack-objects --index-version=2 test-2 midx-head-expect && - cmp -n 40 midx-head midx-head-expect && git midx --read --pack-dir . --midx-id=${midx2} >midx-read-out-2 && echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-2 && echo "num_objects: 17" >>midx-read-expect-2 && @@ -93,10 +89,8 @@ test_expect_success \ test_expect_success \ 'write-midx with two packs' \ 'pack3=$(git pack-objects --index-version=2 test-3 midx-head-expect && - cmp -n 40 midx-head midx-head-expect && git midx --read --pack-dir . --midx-id=${midx3} >midx-read-out-3 && echo "header: 4d494458 80000001 01 14 00 05 00000002" >midx-read-expect-3 && echo "num_objects: 33" >>midx-read-expect-3 && @@ -136,10 +130,8 @@ test_expect_success \ test_expect_success \ 'write-midx with twelve packs' \ - 'midx4=$(git midx --write --update-head --pack-dir .) && + 'midx4=$(git midx --write --pack-dir .) && test -f midx-${midx4}.midx && - echo ${midx4} > midx-head-expect && - cmp -n 40 midx-head midx-head-expect && git midx --read --pack-dir . --midx-id=${midx4} >midx-read-out-4 && echo "header: 4d494458 80000001 01 14 00 05 0000000d" >midx-read-expect-4 && echo "num_objects: 77" >>midx-read-expect-4 && From cf6d32cafe312bd9fc44ded0878f47a3f265999e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:23:14 -0400 Subject: [PATCH 87/92] fixup! midx: teach git midx --read for midx testing This reverts commit 4710deccb88c734baba9096e0e63cd7415ef4a6c. --- Documentation/git-midx.txt | 15 +-- builtin/midx.c | 67 +------------ midx.c | 190 ++++++++----------------------------- midx.h | 68 ------------- t/t5319-midx.sh | 40 +------- 5 files changed, 48 insertions(+), 332 deletions(-) diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt index 23b5d319b5ccdb..0d7066ea34637f 100644 --- a/Documentation/git-midx.txt +++ b/Documentation/git-midx.txt @@ -22,20 +22,15 @@ OPTIONS Use given directory for the location of packfiles, pack-indexes, and MIDX files. ---read:: - If specified, read a midx file specified by --midx-id and - output basic details about the midx file. (Cannot be combined - with --write.) - --write:: If specified, write a new midx file to the pack directory using the packfiles present. Outputs the hash of the result midx file. - (Cannot be combined with --read.) EXAMPLES -------- -* Write a MIDX file for the packfiles in your local .git folder. +* Write a MIDX file for the packfiles in your local .git folder and +* update midx-head to point to that file. + ------------------------------------------------ $ git midx --write @@ -47,12 +42,6 @@ $ git midx --write $ git midx --write --pack-dir ../../.gitObjectCache/pack/ --------------------------------------------------------- -* Read a MIDX file in the local .git folder. -+ --------------------------------------------------------------------- -$ git midx --read --midx-id 3e50d982a2257168c7fd0ff12ffe5cf6af38c74e --------------------------------------------------------------------- - CONFIGURATION ------------- diff --git a/builtin/midx.c b/builtin/midx.c index d8bae8fe1f3a63..d8bf3b9e1fc2ec 100644 --- a/builtin/midx.c +++ b/builtin/midx.c @@ -10,16 +10,14 @@ static char const * const builtin_midx_usage[] ={ N_("git midx [--pack-dir ]"), - N_("git midx --write [--pack-dir ]"), - N_("git midx --read [--midx-id=]"), + N_("git midx --write [--pack-dir ] [--update-head]"), NULL }; static struct opts_midx { const char *pack_dir; int write; - int read; - const char *midx_id; + int update_head; int has_existing; struct object_id old_midx_oid; } opts; @@ -289,66 +287,14 @@ static int cmd_midx_write(void) return 0; } -static int cmd_midx_read(void) -{ - struct object_id midx_oid; - struct midxed_git *midx; - uint32_t i; - - if (opts.midx_id && strlen(opts.midx_id) == GIT_MAX_HEXSZ) - get_oid_hex(opts.midx_id, &midx_oid); - else - die("--read requires a --midx-id parameter"); - - midx = get_midxed_git(opts.pack_dir, &midx_oid); - - printf("header: %08x %08x %02x %02x %02x %02x %08x\n", - ntohl(midx->hdr->midx_signature), - ntohl(midx->hdr->midx_version), - midx->hdr->hash_version, - midx->hdr->hash_len, - midx->hdr->num_base_midx, - midx->hdr->num_chunks, - ntohl(midx->hdr->num_packs)); - printf("num_objects: %d\n", midx->num_objects); - printf("chunks:"); - - if (midx->chunk_pack_lookup) - printf(" pack_lookup"); - if (midx->chunk_pack_names) - printf(" pack_names"); - if (midx->chunk_oid_fanout) - printf(" oid_fanout"); - if (midx->chunk_oid_lookup) - printf(" oid_lookup"); - if (midx->chunk_object_offsets) - printf(" object_offsets"); - if (midx->chunk_large_offsets) - printf(" large_offsets"); - printf("\n"); - - printf("pack_names:\n"); - for (i = 0; i < midx->num_packs; i++) - printf("%s\n", midx->pack_names[i]); - - printf("pack_dir: %s\n", midx->pack_dir); - return 0; -} - int cmd_midx(int argc, const char **argv, const char *prefix) { static struct option builtin_midx_options[] = { { OPTION_STRING, 'p', "pack-dir", &opts.pack_dir, - N_("dir"), - N_("The pack directory containing set of packfile and pack-index pairs.") }, + N_("dir"), + N_("The pack directory containing set of packfile and pack-index pairs.") }, OPT_BOOL('w', "write", &opts.write, N_("write midx file")), - OPT_BOOL('r', "read", &opts.read, - N_("read midx file")), - { OPTION_STRING, 'M', "midx-id", &opts.midx_id, - N_("oid"), - N_("An OID for a specific midx file in the pack-dir."), - PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, OPT_END(), }; @@ -363,9 +309,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) builtin_midx_options, builtin_midx_usage, 0); - if (opts.write + opts.read > 1) - usage_with_options(builtin_midx_usage, builtin_midx_options); - if (!opts.pack_dir) { struct strbuf path = STRBUF_INIT; strbuf_addstr(&path, get_object_directory()); @@ -375,8 +318,6 @@ int cmd_midx(int argc, const char **argv, const char *prefix) if (opts.write) return cmd_midx_write(); - if (opts.read) - return cmd_midx_read(); return 0; } diff --git a/midx.c b/midx.c index d6c88b9254ef30..77748b20f0552b 100644 --- a/midx.c +++ b/midx.c @@ -11,152 +11,15 @@ struct pack_midx_details_internal { uint32_t internal_offset; }; -static struct midxed_git *alloc_midxed_git(const char *pack_dir) -{ - struct midxed_git *m = NULL; - - FLEX_ALLOC_MEM(m, pack_dir, pack_dir, strlen(pack_dir)); - - return m; -} - -static struct midxed_git *load_empty_midxed_git(void) -{ - struct midxed_git *midx = alloc_midxed_git(""); - - midx->midx_fd = -1; - midx->data = NULL; - midx->num_objects = 0; - midx->packs = NULL; - - midx->hdr = (void *)midx; - midx->hdr->num_base_midx = 0; - midx->hdr->num_packs = 0; - midx->hdr->num_chunks = 0; - - return 0; -} - -static struct midxed_git *load_midxed_git_one(const char *midx_file, const char *pack_dir) -{ - void *midx_map; - const unsigned char *data; - struct pack_midx_header *hdr; - size_t midx_size, packs_len; - struct stat st; - uint32_t i; - struct midxed_git *midx; - int fd = git_open(midx_file); - - if (fd < 0) - return 0; - if (fstat(fd, &st)) { - close(fd); - return 0; - } - midx_size = xsize_t(st.st_size); - - if (midx_size < 16 + 8 * 5 + 4 * 256 + GIT_MAX_RAWSZ) { - close(fd); - die("midx file %s is too small", midx_file); - } - midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0); - data = (const unsigned char *)midx_map; - - hdr = midx_map; - if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) { - munmap(midx_map, midx_size); - die("MIDX signature %X does not match signature %X", - ntohl(hdr->midx_signature), MIDX_SIGNATURE); - } - - if (ntohl(hdr->midx_version) != MIDX_VERSION) - die("MIDX version %X does not match version %X", - ntohl(hdr->midx_version), MIDX_VERSION); - - /* Time to fill a midx struct */ - midx = alloc_midxed_git(pack_dir); - - midx->hdr = hdr; - midx->midx_fd = fd; - midx->data = midx_map; - midx->data_len = midx_size; - - /* read chunk ids to find pointers */ - for (i = 0; i <= hdr->num_chunks; i++) { - uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i)); - uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4)); - uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8)); - uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2; - - if (sizeof(data) == 4 && chunk_offset >> 32) - die(_("unable to memory-map in 32-bit address space")); - - switch (chunk_id) { - case MIDX_CHUNKID_PACKLOOKUP: - midx->chunk_pack_lookup = data + chunk_offset; - break; - - case MIDX_CHUNKID_PACKNAMES: - midx->chunk_pack_names = data + chunk_offset; - break; - - case MIDX_CHUNKID_OIDFANOUT: - midx->chunk_oid_fanout = data + chunk_offset; - break; - - case MIDX_CHUNKID_OIDLOOKUP: - midx->chunk_oid_lookup = data + chunk_offset; - break; - - case MIDX_CHUNKID_OBJECTOFFSETS: - midx->chunk_object_offsets = data + chunk_offset; - break; - - case MIDX_CHUNKID_LARGEOFFSETS: - midx->chunk_large_offsets = data + chunk_offset; - break; - - case 0: - break; - - default: - die("Unrecognized MIDX chunk id: %08x", chunk_id); - } - } - - midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4))); - midx->num_packs = ntohl(midx->hdr->num_packs); - - packs_len = st_mult(sizeof(struct packed_git*), midx->num_packs); - - if (packs_len) { - ALLOC_ARRAY(midx->packs, midx->num_packs); - ALLOC_ARRAY(midx->pack_names, midx->num_packs); - memset(midx->packs, 0, packs_len); - - for (i = 0; i < midx->num_packs; i++) { - uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i)); - midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset); - } - } - - return midx; -} - -struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid) -{ - struct midxed_git *m; - struct strbuf midx_file = STRBUF_INIT; - strbuf_addstr(&midx_file, pack_dir); - strbuf_addstr(&midx_file, "/midx-"); - strbuf_addstr(&midx_file, oid_to_hex(midx_oid)); - strbuf_addstr(&midx_file, ".midx"); - - m = load_midxed_git_one(midx_file.buf, pack_dir); - strbuf_release(&midx_file); - return m; -} +struct pack_midx_header { + uint32_t midx_signature; + uint32_t midx_version; + unsigned char hash_version; + unsigned char hash_len; + unsigned char num_base_midx; + unsigned char num_chunks; + uint32_t num_packs; +}; static int midx_oid_compare(const void *_a, const void *_b) { @@ -340,6 +203,7 @@ const char *write_midx_file( struct pack_midx_entry **objects, uint32_t nr_objects) { struct hashfile *f; + struct pack_midx_entry **sorted_by_sha, **list, **last; int i, chunk, fd; struct pack_midx_header hdr; uint32_t chunk_ids[7]; @@ -371,6 +235,34 @@ const char *write_midx_file( pack_perm = 0; } +<<<<<<< HEAD +======= + /* Sort objects */ + if (nr_objects) { + sorted_by_sha = objects; + list = sorted_by_sha; + last = sorted_by_sha + nr_objects; + + QSORT(sorted_by_sha, nr_objects, midx_oid_compare); + + count_distinct = 1; + for (i = 0; i < nr_objects; i++) { + if (!i || + !oidcmp(&sorted_by_sha[i-1]->oid, &sorted_by_sha[i]->oid)) + continue; + + count_distinct++; + + if (sorted_by_sha[i]->offset >> 31) + nr_large_offset++; + if (sorted_by_sha[i]->offset >> 32) + large_offset_needed = 1; + } + } else { + sorted_by_sha = list = last = NULL; + } + +>>>>>>> parent of 4710deccb8... midx: teach git midx --read for midx testing if (nr_packs) { for (i = 0; i < nr_packs; i++) { total_name_len += strlen(pack_names[i]) + 1; @@ -487,12 +379,6 @@ const char *write_midx_file( written += write_midx_chunk_largeoffsets(f, nr_large_offset, objects, nr_objects); break; - - case 0: - break; - - default: - die("Unrecognized MIDX chunk id: %08x", chunk_ids[chunk]); } } diff --git a/midx.h b/midx.h index 269c716c866816..aa3efc6d9ad209 100644 --- a/midx.h +++ b/midx.h @@ -27,74 +27,6 @@ struct pack_midx_entry { off_t offset; }; -struct pack_midx_header { - uint32_t midx_signature; - uint32_t midx_version; - unsigned char hash_version; - unsigned char hash_len; - unsigned char num_base_midx; - unsigned char num_chunks; - uint32_t num_packs; -}; - -struct midxed_git { - struct midxed_git *next; - - int midx_fd; - - /* the mmap'd data for the midx file */ - const unsigned char *data; - size_t data_len; - - /* points into the mmap'd data */ - struct pack_midx_header *hdr; - - /* can construct filename from obj_dir + "/packs/midx-" + oid + ".midx" */ - struct object_id oid; - - /* derived from the fanout chunk */ - uint32_t num_objects; - - /* converted number of packs */ - uint32_t num_packs; - - /* hdr->num_packs * 4 bytes */ - const unsigned char *chunk_pack_lookup; - const unsigned char *chunk_pack_names; - - /* 256 * 4 bytes */ - const unsigned char *chunk_oid_fanout; - - /* num_objects * hdr->hash_len bytes */ - const unsigned char *chunk_oid_lookup; - - /* num_objects * 8 bytes */ - const unsigned char *chunk_object_offsets; - - /* - * 8 bytes per large offset. - * (Optional: may be null.) - */ - const unsigned char *chunk_large_offsets; - - /* - * Points into mmap'd data storing the pack filenames. - */ - const char **pack_names; - - /* - * Store an array of pack-pointers. If NULL, then the - * pack has not been loaded yet. The array indices - * correspond to the pack_int_ids from the midx storage. - */ - struct packed_git **packs; - - /* something like ".git/objects/pack" */ - char pack_dir[FLEX_ARRAY]; /* more */ -}; - -extern struct midxed_git *get_midxed_git(const char *pack_dir, struct object_id *midx_oid); - /* * Write a single MIDX file storing the given entries for the * given list of packfiles. If midx_name is null, then a temp diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh index c516536549ab38..68495dbf018d64 100755 --- a/t/t5319-midx.sh +++ b/t/t5319-midx.sh @@ -36,15 +36,7 @@ test_expect_success \ 'write-midx from index version 1' \ 'pack1=$(git pack-objects --index-version=1 test-1 midx-read-out-1 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-1 && - echo "num_objects: 17" >>midx-read-expect-1 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-1 && - echo "pack_names:" >>midx-read-expect-1 && - echo "test-1-${pack1}.pack" >>midx-read-expect-1 && - echo "pack_dir: ." >>midx-read-expect-1 && - cmp midx-read-out-1 midx-read-expect-1' + test -f midx-${midx1}.midx' test_expect_success \ 'write-midx from index version 2' \ @@ -52,14 +44,7 @@ test_expect_success \ pack2=$(git pack-objects --index-version=2 test-2 midx-read-out-2 && - echo "header: 4d494458 80000001 01 14 00 05 00000001" >midx-read-expect-2 && - echo "num_objects: 17" >>midx-read-expect-2 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-2 && - echo "pack_names:" >>midx-read-expect-2 && - echo "test-2-${pack2}.pack" >>midx-read-expect-2 && - echo "pack_dir: ." >>midx-read-expect-2 && - cmp midx-read-out-2 midx-read-expect-2' + ! test -f midx-head' test_expect_success \ 'Add more objects' \ @@ -90,16 +75,7 @@ test_expect_success \ 'write-midx with two packs' \ 'pack3=$(git pack-objects --index-version=2 test-3 midx-read-out-3 && - echo "header: 4d494458 80000001 01 14 00 05 00000002" >midx-read-expect-3 && - echo "num_objects: 33" >>midx-read-expect-3 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-3 && - echo "pack_names:" >>midx-read-expect-3 && - echo "test-2-${pack2}.pack" >>midx-read-expect-3 && - echo "test-3-${pack3}.pack" >>midx-read-expect-3 && - echo "pack_dir: ." >>midx-read-expect-3 && - cmp midx-read-out-3 midx-read-expect-3' + test -f midx-${midx3}.midx' test_expect_success \ 'Add more packs' \ @@ -131,14 +107,6 @@ test_expect_success \ test_expect_success \ 'write-midx with twelve packs' \ 'midx4=$(git midx --write --pack-dir .) && - test -f midx-${midx4}.midx && - git midx --read --pack-dir . --midx-id=${midx4} >midx-read-out-4 && - echo "header: 4d494458 80000001 01 14 00 05 0000000d" >midx-read-expect-4 && - echo "num_objects: 77" >>midx-read-expect-4 && - echo "chunks: pack_lookup pack_names oid_fanout oid_lookup object_offsets" >>midx-read-expect-4 && - echo "pack_names:" >>midx-read-expect-4 && - ls test-*.pack | sort >>midx-read-expect-4 && - echo "pack_dir: ." >>midx-read-expect-4 && - cmp midx-read-out-4 midx-read-expect-4' + test -f midx-${midx4}.midx' test_done From 798b283b14480c9b0d5db595501bf7aea95a48f1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:23:33 -0400 Subject: [PATCH 88/92] fixup! midx: create t5319-midx.sh This reverts commit 798dadf325aad642fc6308c6ba24b55fb0368da0. --- t/t5319-midx.sh | 112 ------------------------------------------------ 1 file changed, 112 deletions(-) delete mode 100755 t/t5319-midx.sh diff --git a/t/t5319-midx.sh b/t/t5319-midx.sh deleted file mode 100755 index 68495dbf018d64..00000000000000 --- a/t/t5319-midx.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/sh - -test_description='meta-pack indexes' -. ./test-lib.sh - -test_expect_success \ - 'setup' \ - 'rm -rf .git && - git init && - git config core.midx true && - git config pack.threads 1 && - i=1 && - while test $i -le 5 - do - iii=$(printf '%03i' $i) - test-tool genrandom "bar" 200 > wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree obj-list && - git update-ref HEAD $commit' - -test_expect_success \ - 'write-midx from index version 1' \ - 'pack1=$(git pack-objects --index-version=1 test-1 wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - i=$(expr $i + 1) || return 1 - done && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit' - -test_expect_success \ - 'write-midx with two packs' \ - 'pack3=$(git pack-objects --index-version=2 test-3 wide_delta_$iii && - test-tool genrandom "baz $iii" 50 >> wide_delta_$iii && - test-tool genrandom "foo"$i 100 > deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 1) 100 >> deep_delta_$iii && - test-tool genrandom "foo"$(expr $i + 2) 100 >> deep_delta_$iii && - echo $iii >file_$iii && - test-tool genrandom "$iii" 8192 >>file_$iii && - git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && - { echo 101 && test-tool genrandom 100 8192; } >file_101 && - git update-index --add file_101 && - tree=$(git write-tree) && - commit=$(git commit-tree $tree -p HEADobj-list && - git update-ref HEAD $commit && - git pack-objects --index-version=2 test-4 Date: Mon, 17 Sep 2018 17:24:03 -0400 Subject: [PATCH 89/92] fixup! midx: implement write_midx_file() This reverts commit acca518f7c407e4436eb413f58b86273ce842816. --- Makefile | 1 - midx.c | 406 ------------------------------------------------------- midx.h | 44 ------ 3 files changed, 451 deletions(-) delete mode 100644 midx.c delete mode 100644 midx.h diff --git a/Makefile b/Makefile index 74319a98b88c05..4a9479bfef3040 100644 --- a/Makefile +++ b/Makefile @@ -905,7 +905,6 @@ LIB_OBJS += merge.o LIB_OBJS += merge-blobs.o LIB_OBJS += merge-recursive.o LIB_OBJS += mergesort.o -LIB_OBJS += midx.o LIB_OBJS += name-hash.o LIB_OBJS += negotiator/default.o LIB_OBJS += negotiator/skipping.o diff --git a/midx.c b/midx.c deleted file mode 100644 index 77748b20f0552b..00000000000000 --- a/midx.c +++ /dev/null @@ -1,406 +0,0 @@ -#include "cache.h" -#include "git-compat-util.h" -#include "pack.h" -#include "packfile.h" -#include "midx.h" - -#define MIDX_LARGE_OFFSET_NEEDED 0x80000000 - -struct pack_midx_details_internal { - uint32_t pack_int_id; - uint32_t internal_offset; -}; - -struct pack_midx_header { - uint32_t midx_signature; - uint32_t midx_version; - unsigned char hash_version; - unsigned char hash_len; - unsigned char num_base_midx; - unsigned char num_chunks; - uint32_t num_packs; -}; - -static int midx_oid_compare(const void *_a, const void *_b) -{ - struct pack_midx_entry *a = *(struct pack_midx_entry **)_a; - struct pack_midx_entry *b = *(struct pack_midx_entry **)_b; - return oidcmp(&a->oid, &b->oid); -} - -static void write_midx_chunk_packlookup( - struct hashfile *f, - const char **pack_names, uint32_t nr_packs) -{ - uint32_t i, cur_len = 0; - - for (i = 0; i < nr_packs; i++) { - uint32_t swap_len = htonl(cur_len); - hashwrite(f, &swap_len, 4); - cur_len += strlen(pack_names[i]) + 1; - } -} - -static void write_midx_chunk_packnames( - struct hashfile *f, - const char **pack_names, uint32_t nr_packs) -{ - uint32_t i; - for (i = 0; i < nr_packs; i++) { - hashwrite(f, pack_names[i], strlen(pack_names[i]) + 1); - } -} - -static void write_midx_chunk_oidfanout( - struct hashfile *f, - struct pack_midx_entry *objects, uint32_t nr_objects) -{ - struct pack_midx_entry *list = objects; - struct pack_midx_entry *last = objects + nr_objects; - uint32_t count_distinct = 0; - uint32_t i; - - /* - * Write the first-level table (the list is sorted, - * but we use a 256-entry lookup to be able to avoid - * having to do eight extra binary search iterations). - */ - for (i = 0; i < 256; i++) { - struct pack_midx_entry *next = list; - struct pack_midx_entry *prev = NULL; - uint32_t swap_distinct; - - while (next < last) { - if (next->oid.hash[0] != i) - break; - - if (!prev || oidcmp(&(prev->oid), &(obj->oid))) - { - count_distinct++; - } - - prev = next++; - } - - swap_distinct = htonl(count_distinct); - hashwrite(f, &swap_distinct, 4); - list = next; - } -} - -static void write_midx_chunk_oidlookup( - struct hashfile *f, unsigned char hash_len, - struct pack_midx_entry *objects, uint32_t nr_objects) -{ - struct pack_midx_entry **list = objects; - struct object_id *last_oid = 0; - uint32_t i; - - for (i = 0; i < nr_objects; i++) { - struct pack_midx_entry *obj = list++; - - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; - hashwrite(f, obj->oid.hash, (int)hash_len); - } -} - -static void write_midx_chunk_objectoffsets( - struct hashfile *f, int large_offset_needed, - struct pack_midx_entry *objects, uint32_t nr_objects, uint32_t *pack_perm) -{ - struct pack_midx_entry *list = objects; - struct object_id *last_oid = 0; - uint32_t i, nr_large_offset = 0; - - for (i = 0; i < nr_objects; i++) { - struct pack_midx_details_internal details; - struct pack_midx_entry *obj = *list++; - - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; - - details.pack_int_id = htonl(pack_perm[obj->pack_int_id]); - - if (large_offset_needed && obj->offset >> 31) - details.internal_offset = (MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); - else - details.internal_offset = (uint32_t)obj->offset; - - details.internal_offset = htonl(details.internal_offset); - hashwrite(f, &details, 8); - } -} - -static void write_midx_chunk_largeoffsets( - struct hashfile *f, uint32_t nr_large_offset, - struct pack_midx_entry *objects, uint32_t nr_objects) -{ - struct pack_midx_entry *list = objects; - struct object_id *last_oid = 0; - - while (nr_large_offset) { - struct pack_midx_entry *obj = list++; - uint64_t offset = obj->offset; - uint32_t split[2]; - - if (last_oid && !oidcmp(last_oid, &obj->oid)) - continue; - - last_oid = &obj->oid; - - if (!(offset >> 31)) - continue; - - split[0] = htonl(offset >> 32); - split[1] = htonl(offset & 0xffffffff); - - hashwrite(f, split, 8); - nr_large_offset--; - } -} - -struct pack_pair { - uint32_t pack_int_id; - const char *pack_name; -}; - -static int pack_pair_compare(const void *_a, const void *_b) -{ - struct pack_pair *a = (struct pack_pair *)_a; - struct pack_pair *b = (struct pack_pair *)_b; - return strcmp(a->pack_name, b->pack_name); -} - -static void sort_packs_by_name(const char **pack_names, uint32_t nr_packs, uint32_t *perm) -{ - uint32_t i; - struct pack_pair *pairs; - - ALLOC_ARRAY(pairs, nr_packs); - - for (i = 0; i < nr_packs; i++) { - pairs[i].pack_int_id = i; - pairs[i].pack_name = pack_names[i]; - } - - QSORT(pairs, nr_packs, pack_pair_compare); - - for (i = 0; i < nr_packs; i++) { - pack_names[i] = pairs[i].pack_name; - perm[pairs[i].pack_int_id] = i; - } -} - -const char *write_midx_file( - const char *pack_dir, - const char *midx_name, - const char **pack_names, uint32_t nr_packs, - struct pack_midx_entry **objects, uint32_t nr_objects) -{ - struct hashfile *f; - struct pack_midx_entry **sorted_by_sha, **list, **last; - int i, chunk, fd; - struct pack_midx_header hdr; - uint32_t chunk_ids[7]; - uint64_t chunk_offsets[7]; - unsigned char large_offset_needed = 0; - unsigned int nr_large_offset = 0; - unsigned char final_hash[GIT_MAX_RAWSZ]; - const char *final_hex; - int rename_needed = 0; - int total_name_len = 0; - uint32_t *pack_perm; - - if (!core_midx) - return 0; - - /* determine if large offsets are required */ - for (i = 0; i < nr_objects; i++) { - if (objects[i].offset > 0x7fffffff) - nr_large_offset++; - if (objects[i].offset > 0xffffffff) - large_offset_needed = 1; - } - - /* Sort packs */ - if (nr_packs) { - ALLOC_ARRAY(pack_perm, nr_packs); - sort_packs_by_name(pack_names, nr_packs, pack_perm); - } else { - pack_perm = 0; - } - -<<<<<<< HEAD -======= - /* Sort objects */ - if (nr_objects) { - sorted_by_sha = objects; - list = sorted_by_sha; - last = sorted_by_sha + nr_objects; - - QSORT(sorted_by_sha, nr_objects, midx_oid_compare); - - count_distinct = 1; - for (i = 0; i < nr_objects; i++) { - if (!i || - !oidcmp(&sorted_by_sha[i-1]->oid, &sorted_by_sha[i]->oid)) - continue; - - count_distinct++; - - if (sorted_by_sha[i]->offset >> 31) - nr_large_offset++; - if (sorted_by_sha[i]->offset >> 32) - large_offset_needed = 1; - } - } else { - sorted_by_sha = list = last = NULL; - } - ->>>>>>> parent of 4710deccb8... midx: teach git midx --read for midx testing - if (nr_packs) { - for (i = 0; i < nr_packs; i++) { - total_name_len += strlen(pack_names[i]) + 1; - } - } - - /* open temp file, or direct file if given */ - if (!midx_name) { - struct strbuf tmp_file = STRBUF_INIT; - strbuf_addstr(&tmp_file, pack_dir); - strbuf_addstr(&tmp_file, "/tmp_midx_XXXXXX"); - - fd = git_mkstemp_mode(tmp_file.buf, 0444); - if (fd < 0) - die_errno("unable to create '%s'", tmp_file.buf); - - midx_name = strbuf_detach(&tmp_file, NULL); - rename_needed = 1; - } else { - unlink(midx_name); - fd = open(midx_name, O_CREAT|O_EXCL|O_WRONLY, 0600); - - if (fd < 0) - die_errno("unable to create '%s'", midx_name); - } - f = hashfd(fd, midx_name); - - /* fill header info */ - hdr.midx_signature = htonl(MIDX_SIGNATURE); - hdr.midx_version = htonl(MIDX_VERSION); - - hdr.hash_version = MIDX_HASH_VERSION; - hdr.hash_len = MIDX_HASH_LEN; - hdr.num_base_midx = 0; - hdr.num_packs = htonl(nr_packs); - - /* - * We expect the following chunks, which are required: - * - * Packfile Name Lookup - * Packfile Names - * OID Fanout - * OID Lookup - * Object Offsets - */ - hdr.num_chunks = large_offset_needed ? 6 : 5; - - /* write header to file */ - assert(sizeof(hdr) == 16); - hashwrite(f, &hdr, sizeof(hdr)); - - /* - * Fill initial chunk values using offsets - * relative to first chunk. - */ - chunk_offsets[0] = sizeof(hdr) + 12 * (hdr.num_chunks + 1); - chunk_ids[0] = MIDX_CHUNKID_PACKLOOKUP; - chunk_offsets[1] = chunk_offsets[0] + nr_packs * 4; - chunk_ids[1] = MIDX_CHUNKID_OIDFANOUT; - chunk_offsets[2] = chunk_offsets[1] + 256 * 4; - chunk_ids[2] = MIDX_CHUNKID_OIDLOOKUP; - chunk_offsets[3] = chunk_offsets[2] + (uint64_t)nr_objects - * (uint64_t)hdr.hash_len; - chunk_ids[3] = MIDX_CHUNKID_OBJECTOFFSETS; - chunk_offsets[4] = chunk_offsets[3] + 8 * (uint64_t)count_distinct; - - if (large_offset_needed) { - chunk_ids[4] = MIDX_CHUNKID_LARGEOFFSETS; - chunk_offsets[5] = chunk_offsets[4] + 8 * (uint64_t)nr_large_offset; - chunk_ids[5] = MIDX_CHUNKID_PACKNAMES; - chunk_offsets[6] = chunk_offsets[5] + total_name_len; - chunk_ids[6] = 0; - } else { - chunk_ids[4] = MIDX_CHUNKID_PACKNAMES; - chunk_offsets[5] = chunk_offsets[4] + total_name_len; - chunk_ids[5] = 0; - } - - for (i = 0; i <= hdr.num_chunks; i++) { - uint32_t chunk_write[3]; - - chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); - hashwrite(f, chunk_write, 12); - } - - for (chunk = 0; chunk < hdr.num_chunks; chunk++) { - switch (chunk_ids[chunk]) { - case MIDX_CHUNKID_PACKLOOKUP: - write_midx_chunk_packlookup(f, pack_names, nr_packs); - break; - - case MIDX_CHUNKID_PACKNAMES: - write_midx_chunk_packnames(f, pack_names, nr_packs); - break; - - case MIDX_CHUNKID_OIDFANOUT: - written += write_midx_chunk_oidfanout(f, objects, nr_objects); - break; - - case MIDX_CHUNKID_OIDLOOKUP: - written += write_midx_chunk_oidlookup(f, hdr.hash_len, objects, - nr_objects); - break; - - case MIDX_CHUNKID_OBJECTOFFSETS: - written += write_midx_chunk_objectoffsets(f, large_offset_needed, - objects, nr_objects, - pack_perm); - break; - - case MIDX_CHUNKID_LARGEOFFSETS: - written += write_midx_chunk_largeoffsets(f, nr_large_offset, - objects, nr_objects); - break; - } - } - - finalize_hashfile(f, final_hash, CSUM_CLOSE | CSUM_FSYNC | CSUM_HASH_IN_STREAM); - - if (rename_needed) - { - struct strbuf final_name = STRBUF_INIT; - - final_hex = sha1_to_hex(final_hash); - strbuf_addstr(&final_name, pack_dir); - strbuf_addstr(&final_name, "/midx-"); - strbuf_addstr(&final_name, final_hex); - strbuf_addstr(&final_name, ".midx"); - - if (rename(midx_name, final_name.buf)) - die("Failed to rename %s to %s", midx_name, final_name.buf); - - strbuf_release(&final_name); - } else { - final_hex = midx_name; - } - - return final_hex; -} diff --git a/midx.h b/midx.h deleted file mode 100644 index aa3efc6d9ad209..00000000000000 --- a/midx.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef MIDX_H -#define MIDX_H - -#include "git-compat-util.h" -#include "object.h" -#include "csum-file.h" - -#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ -#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */ -#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ -#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ -#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ -#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ -#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */ - -#define MIDX_VERSION_GVFS 0x80000001 -#define MIDX_VERSION MIDX_VERSION_GVFS - -#define MIDX_HASH_VERSION_SHA1 1 -#define MIDX_HASH_LEN_SHA1 20 -#define MIDX_HASH_VERSION MIDX_HASH_VERSION_SHA1 -#define MIDX_HASH_LEN MIDX_HASH_LEN_SHA1 - -struct pack_midx_entry { - struct object_id oid; - uint32_t pack_int_id; - off_t offset; -}; - -/* - * Write a single MIDX file storing the given entries for the - * given list of packfiles. If midx_name is null, then a temp - * file will be created and swapped using the result hash value. - * Otherwise, write directly to midx_name. - * - * Returns the final name of the MIDX file within pack_dir. - */ -extern const char *write_midx_file( - const char *pack_dir, - const char *midx_name, - const char **pack_names, uint32_t nr_packs, - struct pack_midx_entry **objects, uint32_t nr_objects); - -#endif From 38087582da7bd7dfe8b00122f91b6f0371f5550b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:24:36 -0400 Subject: [PATCH 90/92] fixup! midx: create core.midx config setting This reverts commit 90b3c0f4021ad06cdb3837b0446e3bdb2fb912da. --- Documentation/config.txt | 3 --- cache.h | 1 - config.c | 5 ----- environment.c | 1 - 4 files changed, 10 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index fa8e7e393f2205..f03a20341817eb 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1014,9 +1014,6 @@ core.gvfs:: and switch to the new ref. -- -core.midx:: - Enable "multi-pack-index" feature. Set to true to read and write MIDX files. - core.sparseCheckout:: Enable "sparse checkout" feature. See section "Sparse checkout" in linkgit:git-read-tree[1] for more information. diff --git a/cache.h b/cache.h index 1912a8ea8d1c5f..dc9c155b0d2671 100644 --- a/cache.h +++ b/cache.h @@ -880,7 +880,6 @@ extern int core_preload_index; extern int core_apply_sparse_checkout; extern const char *core_virtualfilesystem; extern int core_gvfs; -extern int core_midx; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/config.c b/config.c index 4c0b1e2c5c4ee4..8385d51910dde2 100644 --- a/config.c +++ b/config.c @@ -1330,11 +1330,6 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } - if (!strcmp(var, "core.midx")) { - core_midx = git_config_bool(var, value); - return 0; - } - if (!strcmp(var, "core.sparsecheckout")) { /* virtual file system relies on the sparse checkout logic so force it on */ if (core_virtualfilesystem) diff --git a/environment.c b/environment.c index 2181e89f867ac1..4bdc3c31aa2e64 100644 --- a/environment.c +++ b/environment.c @@ -69,7 +69,6 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_gvfs; -int core_midx; const char *core_virtualfilesystem; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ From b6ff268ad702d75acf417fac893ec39f39f61375 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 17 Sep 2018 17:24:55 -0400 Subject: [PATCH 91/92] fixup! midx: specify midx file format This reverts commit 4f7733100f01ea319e297e46cd2e84fc89ca05ed. --- Documentation/technical/pack-format.txt | 84 ------------------------- 1 file changed, 84 deletions(-) diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt index 1b453da41e0da8..70a99fd1423894 100644 --- a/Documentation/technical/pack-format.txt +++ b/Documentation/technical/pack-format.txt @@ -252,87 +252,3 @@ Pack file entry: <+ corresponding packfile. 20-byte SHA-1-checksum of all of the above. - -== midx-*.midx files have the following format: - -The meta-index files refer to multiple pack-files and loose objects. - -In order to allow extensions that add extra data to the MIDX, we organize -the body into "chunks" and provide a lookup table at the beginning of the -body. The header includes certain length values, such as the number of packs, -the number of base MIDX files, hash lengths and types. - -All 4-byte numbers are in network order. - -HEADER: - - 4-byte signature: - The signature is: {'M', 'I', 'D', 'X'} - - 4-byte version number: - We reserve version number 1 for the format accepted by core git, - and instead use version 0x80000001 for gvfs/master. - - 1-byte Object Id Version (1 = SHA-1) - - 1-byte Object Id Length (H) - - 1-byte number (I) of base multi-pack-index files: - This value is currently always zero. - - 1-byte number (C) of "chunks" - - 4-byte number (P) of pack files - -CHUNK LOOKUP: - - (C + 1) * 12 bytes providing the chunk offsets: - First 4 bytes describe chunk id. Value 0 is a terminating label. - Other 8 bytes provide offset in current file for chunk to start. - (Chunks are provided in file-order, so you can infer the length - using the next chunk position if necessary.) - - The remaining data in the body is described one chunk at a time, and - these chunks may be given in any order. Chunks are required unless - otherwise specified. - -CHUNK DATA: - - OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) - The ith entry, F[i], stores the number of OIDs with first - byte at most i. Thus F[255] stores the total - number of objects (N). - - OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) - The OIDs for all objects in the MIDX are stored in lexicographic - order in this chunk. - - Object Offsets (ID: {'O', 'O', 'F', 'F'}) (N * 8 bytes) - Stores two 4-byte values for every object. - 1: The pack-int-id for the pack storing this object. - 2: The offset within the pack. - If all offsets are less than 2^31, then the large offset chunk - will not exist and offsets are stored as in IDX v1. - If there is at least one offset value larger than 2^32-1, then - the large offset chunk must exist. If the large offset chunk - exists and the 31st bit is on, then removing that bit reveals - the row in the large offsets containing the 8-byte offset of - this object. - - [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'}) - 8-byte offsets into large packfiles. - - Packfile Name Lookup (ID: {'P', 'L', 'O', 'O'}) (P * 4 bytes) - P * 4 bytes storing the offset in the packfile name chunk for - the null-terminated string containing the filename for the - ith packfile. - - Packfile Names (ID: {'P', 'N', 'A', 'M'}) - Stores the packfile names as concatenated, null-terminated strings. - Packfiles must be listed in lexicographic order for fast lookups by - name. This is the only chunk not guaranteed to be a multiple of four - bytes in length, so should be the last chunk for alignment reasons. - -TRAILER: - - H-byte HASH-checksum of all of the above. From c980871a4bc88e2b72d6aa5c776d880d534c669a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 24 Sep 2018 12:46:03 +0000 Subject: [PATCH 92/92] fixup! git-midx: add midx builtin --- .gitignore | 1 - Documentation/git-midx.txt | 55 ------- Makefile | 1 - builtin.h | 1 - builtin/midx.c | 323 ------------------------------------- command-list.txt | 1 - git.c | 1 - 7 files changed, 383 deletions(-) delete mode 100644 Documentation/git-midx.txt delete mode 100644 builtin/midx.c diff --git a/.gitignore b/.gitignore index d9773e1120585f..1943d93c72bdf9 100644 --- a/.gitignore +++ b/.gitignore @@ -100,7 +100,6 @@ /git-merge-subtree /git-mergetool /git-mergetool--lib -/git-midx /git-mktag /git-mktree /git-name-rev diff --git a/Documentation/git-midx.txt b/Documentation/git-midx.txt deleted file mode 100644 index 0d7066ea34637f..00000000000000 --- a/Documentation/git-midx.txt +++ /dev/null @@ -1,55 +0,0 @@ -git-midx(1) -============ - -NAME ----- -git-midx - Write and verify multi-pack-indexes (MIDX files). - - -SYNOPSIS --------- -[verse] -'git midx' [--pack-dir ] - -DESCRIPTION ------------ -Write or verify a MIDX file. - -OPTIONS -------- - ---pack-dir :: - Use given directory for the location of packfiles, pack-indexes, - and MIDX files. - ---write:: - If specified, write a new midx file to the pack directory using - the packfiles present. Outputs the hash of the result midx file. - -EXAMPLES --------- - -* Write a MIDX file for the packfiles in your local .git folder and -* update midx-head to point to that file. -+ ------------------------------------------------- -$ git midx --write ------------------------------------------------- - -* Write a MIDX file for the packfiles in a different folder -+ ---------------------------------------------------------- -$ git midx --write --pack-dir ../../.gitObjectCache/pack/ ---------------------------------------------------------- - -CONFIGURATION -------------- - -core.midx:: - The midx command will fail if core.midx is false. - Also, the written MIDX files will be ignored by other commands - unless core.midx is true. - -GIT ---- -Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 4a9479bfef3040..ed941554cfecda 100644 --- a/Makefile +++ b/Makefile @@ -1076,7 +1076,6 @@ BUILTIN_OBJS += builtin/merge-index.o BUILTIN_OBJS += builtin/merge-ours.o BUILTIN_OBJS += builtin/merge-recursive.o BUILTIN_OBJS += builtin/merge-tree.o -BUILTIN_OBJS += builtin/midx.o BUILTIN_OBJS += builtin/mktag.o BUILTIN_OBJS += builtin/mktree.o BUILTIN_OBJS += builtin/mv.o diff --git a/builtin.h b/builtin.h index 1eb16d1ebbf558..97913ada19166f 100644 --- a/builtin.h +++ b/builtin.h @@ -189,7 +189,6 @@ extern int cmd_merge_ours(int argc, const char **argv, const char *prefix); extern int cmd_merge_file(int argc, const char **argv, const char *prefix); extern int cmd_merge_recursive(int argc, const char **argv, const char *prefix); extern int cmd_merge_tree(int argc, const char **argv, const char *prefix); -extern int cmd_midx(int argc, const char **argv, const char *prefix); extern int cmd_mktag(int argc, const char **argv, const char *prefix); extern int cmd_mktree(int argc, const char **argv, const char *prefix); extern int cmd_mv(int argc, const char **argv, const char *prefix); diff --git a/builtin/midx.c b/builtin/midx.c deleted file mode 100644 index d8bf3b9e1fc2ec..00000000000000 --- a/builtin/midx.c +++ /dev/null @@ -1,323 +0,0 @@ -#include "builtin.h" -#include "cache.h" -#include "config.h" -#include "dir.h" -#include "git-compat-util.h" -#include "packfile.h" -#include "parse-options.h" -#include "midx.h" -#include "object-store.h" - -static char const * const builtin_midx_usage[] ={ - N_("git midx [--pack-dir ]"), - N_("git midx --write [--pack-dir ] [--update-head]"), - NULL -}; - -static struct opts_midx { - const char *pack_dir; - int write; - int update_head; - int has_existing; - struct object_id old_midx_oid; -} opts; - -static int midx_oid_compare(const void *_a, const void *_b) -{ - struct pack_midx_entry *a = (struct pack_midx_entry *)_a; - struct pack_midx_entry *b = (struct pack_midx_entry *)_b; - int cmp = oidcmp(&a->oid, &b->oid); - - if (cmp) - return cmp; - - if (a->pack_mtime > b->pack_mtime) - return -1; - else if (a->pack_mtime < b->pack_mtime) - return 1; - - return a->pack_int_id - b->pack_int_id; -} - -static uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) -{ - const uint32_t *level1_ofs = p->index_data; - - if (!level1_ofs) { - if (open_pack_index(p)) - return 0; - level1_ofs = p->index_data; - } - - if (p->index_version > 1) { - level1_ofs += 2; - } - - return ntohl(level1_ofs[value]); -} - -/* - * It is possible to artificially get into a state where there are many - * duplicate copies of objects. That can create high memory pressure if - * we are to create a list of all objects before de-duplication. To reduce - * this memory pressure without a significant performance drop, automatically - * group objects by the first byte of their object id. Use the IDX fanout - * tables to group the data, copy to a local array, then sort. - * - * Copy only the de-duplicated entries (selected by most-recent modified time - * of a packfile containing the object). - */ -static void dedupe_and_sort_entries( - struct packed_git **packs, uint32_t nr_packs, - struct midxed_git *midx, - struct pack_midx_entry **objects, uint32_t *nr_objects) -{ - uint32_t first_byte, i; - struct pack_midx_entry *objects_batch = NULL; - uint32_t nr_objects_batch = 0; - uint32_t alloc_objects_batch = 0; - uint32_t alloc_objects; - uint32_t pack_offset = 0; - struct pack_midx_entry *local_objects = NULL; - int nr_local_objects = 0; - - if (midx) { - nr_objects_batch = midx->num_objects; - pack_offset = midx->num_packs; - } - - for (i = pack_offset; i < nr_packs; i++) - nr_objects_batch += packs[i]->num_objects; - - /* - * Predict the size of the batches to be roughly ~1/256 the total - * count, but give some slack as they will not be equally sized. - */ - alloc_objects_batch = nr_objects_batch / 200; - ALLOC_ARRAY(objects_batch, alloc_objects_batch); - - *nr_objects = 0; - alloc_objects = alloc_objects_batch; - ALLOC_ARRAY(local_objects, alloc_objects); - - for (first_byte = 0; first_byte < 256; first_byte++) { - nr_objects_batch = 0; - - if (midx) { - uint32_t start, end; - if (first_byte) - start = get_be32(midx->chunk_oid_fanout + 4 * (first_byte - 1)); - else - start = 0; - - end = get_be32(midx->chunk_oid_fanout + 4 * first_byte); - - while (start < end) { - ALLOC_GROW(objects_batch, nr_objects_batch + 1, alloc_objects_batch); - nth_midxed_object_entry(midx, start, &objects_batch[nr_objects_batch]); - nr_objects_batch++; - start++; - } - } - - for (i = pack_offset; i < nr_packs; i++) { - uint32_t start, end; - - if (first_byte) - start = get_pack_fanout(packs[i], first_byte - 1); - else - start = 0; - end = get_pack_fanout(packs[i], first_byte); - - while (start < end) { - struct pack_midx_entry *entry; - ALLOC_GROW(objects_batch, nr_objects_batch + 1, alloc_objects_batch); - entry = &objects_batch[nr_objects_batch++]; - - if (!nth_packed_object_oid(&entry->oid, packs[i], start)) - die("unable to get sha1 of object %u in %s", - start, packs[i]->pack_name); - - entry->pack_int_id = i; - entry->offset = nth_packed_object_offset(packs[i], start); - entry->pack_mtime = packs[i]->mtime; - start++; - } - } - - QSORT(objects_batch, nr_objects_batch, midx_oid_compare); - - /* de-dupe as we copy from the batch in-order */ - for (i = 0; i < nr_objects_batch; i++) { - if (i > 0 && !oidcmp(&objects_batch[i - 1].oid, &objects_batch[i].oid)) - continue; - - ALLOC_GROW(local_objects, nr_local_objects + 1, alloc_objects); - memcpy(&local_objects[nr_local_objects], &objects_batch[i], sizeof(struct pack_midx_entry)); - nr_local_objects++; - } - } - - *nr_objects = nr_local_objects; - *objects = local_objects; -} - -static int build_midx_from_packs( - const char *pack_dir, - const char **pack_names, uint32_t nr_packs, - const char **midx_id) -{ - struct packed_git **packs; - const char **installed_pack_names; - uint32_t i, nr_installed_packs = 0; - uint32_t nr_objects = 0; - struct pack_midx_entry *objects = NULL; - uint32_t nr_total_packs = nr_packs; - struct strbuf pack_path = STRBUF_INIT; - int baselen; - - ALLOC_ARRAY(packs, nr_total_packs); - ALLOC_ARRAY(installed_pack_names, nr_total_packs); - - strbuf_addstr(&pack_path, pack_dir); - strbuf_addch(&pack_path, '/'); - baselen = pack_path.len; - for (i = 0; i < nr_packs; i++) { - strbuf_setlen(&pack_path, baselen); - strbuf_addstr(&pack_path, pack_names[i]); - - strbuf_strip_suffix(&pack_path, ".pack"); - strbuf_addstr(&pack_path, ".idx"); - - packs[nr_installed_packs] = add_packed_git(pack_path.buf, pack_path.len, 0); - - if (packs[nr_installed_packs] != NULL) { - if (open_pack_index(packs[nr_installed_packs])) - continue; - - nr_objects += packs[nr_installed_packs]->num_objects; - installed_pack_names[nr_installed_packs] = pack_names[i]; - nr_installed_packs++; - } - } - strbuf_release(&pack_path); - - if (!nr_objects || !nr_installed_packs) { - free(packs); - free(installed_pack_names); - return 1; - } - - ALLOC_ARRAY(objects, nr_objects); - nr_objects = 0; - - for (i = pack_offset; i < nr_installed_packs; i++) { - struct packed_git *p = packs[i]; - - for (j = 0; j < p->num_objects; j++) { - struct pack_midx_entry entry; - - if (!nth_packed_object_oid(&entry.oid, p, j)) - die("unable to get sha1 of object %u in %s", - i, p->pack_name); - - entry.pack_int_id = i; - entry.offset = nth_packed_object_offset(p, j); - - objects[nr_objects] = entry; - nr_objects++; - } - } - - ALLOC_ARRAY(obj_ptrs, nr_objects); - for (i = 0; i < nr_objects; i++) - obj_ptrs[i] = &objects[i]; - - *midx_id = write_midx_file(pack_dir, NULL, - installed_pack_names, nr_installed_packs, - objects, nr_objects); - - FREE_AND_NULL(installed_pack_names); - FREE_AND_NULL(objects); - - return 0; -} - -static int cmd_midx_write(void) -{ - const char **pack_names = NULL; - uint32_t i, nr_packs = 0; - const char *midx_id; - DIR *dir; - struct dirent *de; - - dir = opendir(opts.pack_dir); - if (!dir) { - error_errno("unable to open object pack directory: %s", - opts.pack_dir); - return 1; - } - - nr_packs = 8; - ALLOC_ARRAY(pack_names, nr_packs); - - i = 0; - while ((de = readdir(dir)) != NULL) { - if (is_dot_or_dotdot(de->d_name)) - continue; - - if (ends_with(de->d_name, ".pack")) { - char *t = xstrdup(de->d_name); - - ALLOC_GROW(pack_names, i + 1, nr_packs); - pack_names[i++] = t; - } - } - - nr_packs = i; - closedir(dir); - - if (build_midx_from_packs(opts.pack_dir, pack_names, nr_packs, &midx_id)) - die("Failed to build MIDX."); - - printf("%s\n", midx_id); - - if (pack_names) - FREE_AND_NULL(pack_names); - return 0; -} - -int cmd_midx(int argc, const char **argv, const char *prefix) -{ - static struct option builtin_midx_options[] = { - { OPTION_STRING, 'p', "pack-dir", &opts.pack_dir, - N_("dir"), - N_("The pack directory containing set of packfile and pack-index pairs.") }, - OPT_BOOL('w', "write", &opts.write, - N_("write midx file")), - OPT_END(), - }; - - if (argc == 2 && !strcmp(argv[1], "-h")) - usage_with_options(builtin_midx_usage, builtin_midx_options); - - git_config(git_default_config, NULL); - if (!core_midx) - die("git-midx requires core.midx=true."); - - argc = parse_options(argc, argv, prefix, - builtin_midx_options, - builtin_midx_usage, 0); - - if (!opts.pack_dir) { - struct strbuf path = STRBUF_INIT; - strbuf_addstr(&path, get_object_directory()); - strbuf_addstr(&path, "/pack"); - opts.pack_dir = strbuf_detach(&path, NULL); - } - - if (opts.write) - return cmd_midx_write(); - - return 0; -} diff --git a/command-list.txt b/command-list.txt index 4625a8c4f8d531..a9dda3b8af6a75 100644 --- a/command-list.txt +++ b/command-list.txt @@ -123,7 +123,6 @@ git-merge-index plumbingmanipulators git-merge-one-file purehelpers git-mergetool ancillarymanipulators complete git-merge-tree ancillaryinterrogators -git-midx plumbingmanipulators git-mktag plumbingmanipulators git-mktree plumbingmanipulators git-mv mainporcelain worktree diff --git a/git.c b/git.c index 2924710db52e79..9bbc121eee49f9 100644 --- a/git.c +++ b/git.c @@ -585,7 +585,6 @@ static struct cmd_struct commands[] = { { "merge-recursive-theirs", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "merge-subtree", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT }, - { "midx", cmd_midx, RUN_SETUP | NO_PARSEOPT }, { "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT }, { "mktree", cmd_mktree, RUN_SETUP }, { "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE },