From 817e30a287e12ce8e94ce41fcb969dd8ae53b9ce Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 28 Nov 2018 16:21:12 +0000 Subject: [PATCH 1/6] revision: add mark_tree_uninteresting_sparse In preparation for a new algorithm that walks fewer trees when creating a pack from a set of revisions, create a method that takes an oidset of tree oids and marks reachable objects as UNINTERESTING. The current implementation uses the existing mark_tree_uninteresting to recursively walk the trees and blobs. This will walk the same number of trees as the old mechanism. There is one new assumption in this approach: we are also given the oids of the interesting trees. This implementation does not use those trees at the moment, but we will use them in a later rewrite of this method. Signed-off-by: Derrick Stolee --- revision.c | 25 +++++++++++++++++++++++++ revision.h | 2 ++ 2 files changed, 27 insertions(+) diff --git a/revision.c b/revision.c index 13e0519c024163..f9eb6400f11563 100644 --- a/revision.c +++ b/revision.c @@ -99,6 +99,31 @@ void mark_tree_uninteresting(struct repository *r, struct tree *tree) mark_tree_contents_uninteresting(r, tree); } +void mark_trees_uninteresting_sparse(struct repository *r, + struct oidset *set) +{ + struct object_id *oid; + struct oidset_iter iter; + + oidset_iter_init(set, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct tree *tree = lookup_tree(r, oid); + + if (!tree) + continue; + + if (tree->object.flags & UNINTERESTING) { + /* + * Remove the flag so the next call + * is not a no-op. The flag is added + * in mark_tree_unintersting(). + */ + tree->object.flags ^= UNINTERESTING; + mark_tree_uninteresting(r, tree); + } + } +} + struct commit_stack { struct commit **items; size_t nr, alloc; diff --git a/revision.h b/revision.h index 7987bfcd2e9bd6..f828e91ae9f6dc 100644 --- a/revision.h +++ b/revision.h @@ -67,6 +67,7 @@ struct rev_cmdline_info { #define REVISION_WALK_NO_WALK_SORTED 1 #define REVISION_WALK_NO_WALK_UNSORTED 2 +struct oidset; struct topo_walk_info; struct rev_info { @@ -327,6 +328,7 @@ void put_revision_mark(const struct rev_info *revs, void mark_parents_uninteresting(struct commit *commit); void mark_tree_uninteresting(struct repository *r, struct tree *tree); +void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *set); void show_object_with_name(FILE *, struct object *, const char *); From 39dc89beb91ac12c94d13f7931a4d9ebc602681f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 28 Nov 2018 16:25:05 +0000 Subject: [PATCH 2/6] list-objects: consume sparse tree walk When creating a pack-file using 'git pack-objects --revs' we provide a list of interesting and uninteresting commits. For example, a push operation would make the local topic branch be interesting and the known remote refs as uninteresting. We want to discover the set of new objects to send to the server as a thin pack. We walk these commits until we discover a frontier of commits such that every commit walk starting at interesting commits ends in a root commit or unintersting commit. We then need to discover which non-commit objects are reachable from uninteresting commits. This commit walk is not changing during this series. The mark_edges_uninteresting() method in list-objects.c iterates on the commit list and does the following: * If the commit is UNINTERSTING, then mark its root tree and every object it can reach as UNINTERESTING. * If the commit is interesting, then mark the root tree of every UNINTERSTING parent (and all objects that tree can reach) as UNINTERSTING. At the very end, we repeat the process on every commit directly given to the revision walk from stdin. This helps ensure we properly cover shallow commits that otherwise were not included in the frontier. The logic to recursively follow trees is in the mark_tree_uninteresting() method in revision.c. The algorithm avoids duplicate work by not recursing into trees that are already marked UNINTERSTING. Add a new 'sparse' option to the mark_edges_uninteresting() method that performs this logic in a slightly new way. As we iterate over the commits, we add all of the root trees to an oidset. Then, call mark_trees_uninteresting_sparse() on that oidset. Note that we include interesting trees in this process. The current implementation of mark_trees_unintersting_sparse() will walk the same trees as the old logic, but this will be replaced in a later change. The sparse option is not used by any callers at the moment, but will be wired to 'git pack-objects' in the next change. Signed-off-by: Derrick Stolee --- bisect.c | 2 +- builtin/pack-objects.c | 2 +- builtin/rev-list.c | 2 +- http-push.c | 2 +- list-objects.c | 70 +++++++++++++++++++++++++++++++++++------- list-objects.h | 4 ++- 6 files changed, 66 insertions(+), 16 deletions(-) diff --git a/bisect.c b/bisect.c index 487675c67249a3..842f8b4b8f373a 100644 --- a/bisect.c +++ b/bisect.c @@ -656,7 +656,7 @@ static void bisect_common(struct rev_info *revs) if (prepare_revision_walk(revs)) die("revision walk setup failed"); if (revs->tree_objects) - mark_edges_uninteresting(revs, NULL); + mark_edges_uninteresting(revs, NULL, 0); } static void exit_if_skipped_commits(struct commit_list *tried, diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 411aefd6875b2d..5f70d840a707fa 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3135,7 +3135,7 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); - mark_edges_uninteresting(&revs, show_edge); + mark_edges_uninteresting(&revs, show_edge, 0); if (!fn_show_object) fn_show_object = show_object; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 2880ed37e3f971..9663cbfae0b8ed 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -543,7 +543,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); if (revs.tree_objects) - mark_edges_uninteresting(&revs, show_edge); + mark_edges_uninteresting(&revs, show_edge, 0); if (bisect_list) { int reaches, all; diff --git a/http-push.c b/http-push.c index cd485909127a79..ea52d6f9f64816 100644 --- a/http-push.c +++ b/http-push.c @@ -1933,7 +1933,7 @@ int cmd_main(int argc, const char **argv) pushing = 0; if (prepare_revision_walk(&revs)) die("revision walk setup failed"); - mark_edges_uninteresting(&revs, NULL); + mark_edges_uninteresting(&revs, NULL, 0); objects_to_send = get_delta(&revs, ref_lock); finish_all_active_slots(); diff --git a/list-objects.c b/list-objects.c index c41cc80db5bc86..fb728f78426726 100644 --- a/list-objects.c +++ b/list-objects.c @@ -222,25 +222,73 @@ static void mark_edge_parents_uninteresting(struct commit *commit, } } -void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge) +static void add_edge_parents(struct commit *commit, + struct rev_info *revs, + show_edge_fn show_edge, + struct oidset *set) +{ + struct commit_list *parents; + + for (parents = commit->parents; parents; parents = parents->next) { + struct commit *parent = parents->item; + struct tree *tree = get_commit_tree(parent); + + if (!tree) + continue; + + oidset_insert(set, &tree->object.oid); + + if (!(parent->object.flags & UNINTERESTING)) + continue; + tree->object.flags |= UNINTERESTING; + + if (revs->edge_hint && !(parent->object.flags & SHOWN)) { + parent->object.flags |= SHOWN; + show_edge(parent); + } + } +} + +void mark_edges_uninteresting(struct rev_info *revs, + show_edge_fn show_edge, + int sparse) { struct commit_list *list; int i; - for (list = revs->commits; list; list = list->next) { - struct commit *commit = list->item; + if (sparse) { + struct oidset set; + oidset_init(&set, 16); - if (commit->object.flags & UNINTERESTING) { - mark_tree_uninteresting(revs->repo, - get_commit_tree(commit)); - if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) { - commit->object.flags |= SHOWN; - show_edge(commit); + for (list = revs->commits; list; list = list->next) { + struct commit *commit = list->item; + struct tree *tree = get_commit_tree(commit); + + if (commit->object.flags & UNINTERESTING) + tree->object.flags |= UNINTERESTING; + + oidset_insert(&set, &tree->object.oid); + add_edge_parents(commit, revs, show_edge, &set); + } + + mark_trees_uninteresting_sparse(revs->repo, &set); + oidset_clear(&set); + } else { + for (list = revs->commits; list; list = list->next) { + struct commit *commit = list->item; + if (commit->object.flags & UNINTERESTING) { + mark_tree_uninteresting(revs->repo, + get_commit_tree(commit)); + if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) { + commit->object.flags |= SHOWN; + show_edge(commit); + } + continue; } - continue; + mark_edge_parents_uninteresting(commit, revs, show_edge); } - mark_edge_parents_uninteresting(commit, revs, show_edge); } + if (revs->edge_hint_aggressive) { for (i = 0; i < revs->cmdline.nr; i++) { struct object *obj = revs->cmdline.rev[i].item; diff --git a/list-objects.h b/list-objects.h index ad407629269a7e..a952680e46671d 100644 --- a/list-objects.h +++ b/list-objects.h @@ -10,7 +10,9 @@ typedef void (*show_object_fn)(struct object *, const char *, void *); void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); typedef void (*show_edge_fn)(struct commit *); -void mark_edges_uninteresting(struct rev_info *, show_edge_fn); +void mark_edges_uninteresting(struct rev_info *revs, + show_edge_fn show_edge, + int sparse); struct oidset; struct list_objects_filter_options; From ab733daff5398fd07ff051c323f51b70efbc2e57 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 28 Nov 2018 17:50:43 +0000 Subject: [PATCH 3/6] pack-objects: add --sparse option Add a '--sparse' option flag to the pack-objects builtin. This allows the user to specify that they want to use the new logic for walking trees. This logic currently does not differ from the existing output, but will in a later change. Create a new test script, t5322-pack-objects-sparse.sh, to ensure the object list that is selected matches what we expect. When we update the logic to walk in a sparse fashion, the final test will be updated to show the extra objects that are added. Signed-off-by: Derrick Stolee --- Documentation/git-pack-objects.txt | 11 ++- builtin/pack-objects.c | 5 +- t/t5322-pack-objects-sparse.sh | 115 +++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) create mode 100755 t/t5322-pack-objects-sparse.sh diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 40c825c38197f4..e45f3e680d3632 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -14,7 +14,7 @@ SYNOPSIS [--local] [--incremental] [--window=] [--depth=] [--revs [--unpacked | --all]] [--keep-pack=] [--stdout [--filter=] | base-name] - [--shallow] [--keep-true-parents] < object-list + [--shallow] [--keep-true-parents] [--sparse] < object-list DESCRIPTION @@ -196,6 +196,15 @@ depth is 4095. Add --no-reuse-object if you want to force a uniform compression level on all data no matter the source. +--sparse:: + Use the "sparse" algorithm to determine which objects to include in + the pack, when combined with the "--revs" option. This algorithm + only walks trees that appear in paths that introduce new objects. + This can have significant performance benefits when computing + a pack to send a small change. However, it is possible that extra + objects are added to the pack-file if the included commits contain + certain types of direct renames. + --thin:: Create a "thin" pack by omitting the common objects between a sender and a receiver in order to reduce network transfer. This diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5f70d840a707fa..7d5b0735e39698 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -84,6 +84,7 @@ static unsigned long pack_size_limit; static int depth = 50; static int delta_search_threads; static int pack_to_stdout; +static int sparse; static int thin; static int num_preferred_base; static struct progress *progress_state; @@ -3135,7 +3136,7 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); - mark_edges_uninteresting(&revs, show_edge, 0); + mark_edges_uninteresting(&revs, show_edge, sparse); if (!fn_show_object) fn_show_object = show_object; @@ -3292,6 +3293,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than