From 5f8e79f515f8a0364f11935b8a3df9cd7899e913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:22 +0200 Subject: [PATCH 1/8] trace.h: support nested performance tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Performance measurements are listed right now as a flat list, which is fine when we measure big blocks. But when we start adding more and more measurements, some of them could be just part of a bigger measurement and a flat list gives a wrong impression that they are executed at the same level instead of nested. Add trace_performance_enter() and trace_performance_leave() to allow indent these nested measurements. For now it does not help much because the only nested thing is (lazy) name hash initialization (e.g. called in diff-index from "git status"). This will help more because I'm going to add some more tracing that's actually nested. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- diff-lib.c | 3 +-- dir.c | 9 ++++--- name-hash.c | 4 +-- preload-index.c | 4 +-- read-cache.c | 11 ++++---- trace.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- trace.h | 15 +++++++++++ 7 files changed, 95 insertions(+), 20 deletions(-) diff --git a/diff-lib.c b/diff-lib.c index 88a98b1c06e879..86afb8e2cbc403 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -518,7 +518,6 @@ static int diff_cache(struct rev_info *revs, int run_diff_index(struct rev_info *revs, int cached) { struct object_array_entry *ent; - uint64_t start = getnanotime(); if (revs->pending.nr != 1) BUG("run_diff_index must be passed exactly one tree"); @@ -531,7 +530,7 @@ int run_diff_index(struct rev_info *revs, int cached) diffcore_fix_diff_index(&revs->diffopt); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); - trace_performance_since(start, "diff-index"); + trace_performance_leave("diff-index"); return 0; } diff --git a/dir.c b/dir.c index 894763721d0fc0..efa7b56d9a07c4 100644 --- a/dir.c +++ b/dir.c @@ -2341,10 +2341,13 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, const char *path, int len, const struct pathspec *pathspec) { struct untracked_cache_dir *untracked; - uint64_t start = getnanotime(); - if (has_symlink_leading_path(path, len)) + trace_performance_enter(); + + if (has_symlink_leading_path(path, len)) { + trace_performance_leave("read directory %.*s", len, path); return dir->nr; + } untracked = validate_untracked_cache(dir, len, pathspec); if (!untracked) @@ -2380,7 +2383,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate, dir->nr = i; } - trace_performance_since(start, "read directory %.*s", len, path); + trace_performance_leave("read directory %.*s", len, path); if (dir->untracked) { static int force_untracked_cache = -1; static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); diff --git a/name-hash.c b/name-hash.c index 163849831c9f11..1fcda73cb396e3 100644 --- a/name-hash.c +++ b/name-hash.c @@ -578,10 +578,10 @@ static void threaded_lazy_init_name_hash( static void lazy_init_name_hash(struct index_state *istate) { - uint64_t start = getnanotime(); if (istate->name_hash_initialized) return; + trace_performance_enter(); hashmap_init(&istate->name_hash, cache_entry_cmp, NULL, istate->cache_nr); hashmap_init(&istate->dir_hash, dir_entry_cmp, NULL, istate->cache_nr); @@ -602,7 +602,7 @@ static void lazy_init_name_hash(struct index_state *istate) } istate->name_hash_initialized = 1; - trace_performance_since(start, "initialize name hash"); + trace_performance_leave("initialize name hash"); } /* diff --git a/preload-index.c b/preload-index.c index 6857573ce6de0a..230b655d9531fe 100644 --- a/preload-index.c +++ b/preload-index.c @@ -76,7 +76,6 @@ void preload_index(struct index_state *index, const struct pathspec *pathspec) { int threads, i, work, offset; struct thread_data data[MAX_PARALLEL]; - uint64_t start = getnanotime(); if (!core_preload_index) return; @@ -86,6 +85,7 @@ void preload_index(struct index_state *index, const struct pathspec *pathspec) threads = 2; if (threads < 2) return; + trace_performance_enter(); if (threads > MAX_PARALLEL) threads = MAX_PARALLEL; offset = 0; @@ -108,7 +108,7 @@ void preload_index(struct index_state *index, const struct pathspec *pathspec) if (pthread_join(p->pthread, NULL)) die("unable to join threaded lstat"); } - trace_performance_since(start, "preload index"); + trace_performance_leave("preload index"); enable_fscache(0); } #endif diff --git a/read-cache.c b/read-cache.c index 6f708d19432fb5..9aa90e099ff81d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1478,8 +1478,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char *typechange_fmt; const char *added_fmt; const char *unmerged_fmt; - uint64_t start = getnanotime(); + trace_performance_enter(); modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n"); deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n"); typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n"); @@ -1549,7 +1549,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, replace_index_entry(istate, i, new_entry); } - trace_performance_since(start, "refresh index"); + trace_performance_leave("refresh index"); return has_errors; } @@ -2008,7 +2008,6 @@ static void freshen_shared_index(const char *shared_index, int warn) int read_index_from(struct index_state *istate, const char *path, const char *gitdir) { - uint64_t start = getnanotime(); struct split_index *split_index; int ret; char *base_oid_hex; @@ -2018,8 +2017,9 @@ int read_index_from(struct index_state *istate, const char *path, if (istate->initialized) return istate->cache_nr; + trace_performance_enter(); ret = do_read_index(istate, path, 0); - trace_performance_since(start, "read cache %s", path); + trace_performance_leave("read cache %s", path); split_index = istate->split_index; if (!split_index || is_null_oid(&split_index->base_oid)) { @@ -2027,6 +2027,7 @@ int read_index_from(struct index_state *istate, const char *path, return ret; } + trace_performance_enter(); if (split_index->base) discard_index(split_index->base); else @@ -2043,8 +2044,8 @@ int read_index_from(struct index_state *istate, const char *path, freshen_shared_index(base_path, 0); merge_base_index(istate); post_read_index_from(istate); - trace_performance_since(start, "read cache %s", base_path); free(base_path); + trace_performance_leave("read cache %s", base_path); return ret; } diff --git a/trace.c b/trace.c index fc623e91fdd7ed..fa4a2e7120e405 100644 --- a/trace.c +++ b/trace.c @@ -176,10 +176,30 @@ void trace_strbuf_fl(const char *file, int line, struct trace_key *key, strbuf_release(&buf); } +static uint64_t perf_start_times[10]; +static int perf_indent; + +uint64_t trace_performance_enter(void) +{ + uint64_t now; + + if (!trace_want(&trace_perf_key)) + return 0; + + now = getnanotime(); + perf_start_times[perf_indent] = now; + if (perf_indent + 1 < ARRAY_SIZE(perf_start_times)) + perf_indent++; + else + BUG("Too deep indentation"); + return now; +} + static void trace_performance_vprintf_fl(const char *file, int line, uint64_t nanos, const char *format, va_list ap) { + static const char space[] = " "; struct strbuf buf = STRBUF_INIT; if (!prepare_trace_line(file, line, &trace_perf_key, &buf)) @@ -188,7 +208,10 @@ static void trace_performance_vprintf_fl(const char *file, int line, strbuf_addf(&buf, "performance: %.9f s", (double) nanos / 1000000000); if (format && *format) { - strbuf_addstr(&buf, ": "); + if (perf_indent >= strlen(space)) + BUG("Too deep indentation"); + + strbuf_addf(&buf, ":%.*s ", perf_indent, space); strbuf_vaddf(&buf, format, ap); } @@ -244,6 +267,24 @@ void trace_performance_since(uint64_t start, const char *format, ...) va_end(ap); } +void trace_performance_leave(const char *format, ...) +{ + va_list ap; + uint64_t since; + + if (perf_indent) + perf_indent--; + + if (!format) /* Allow callers to leave without tracing anything */ + return; + + since = perf_start_times[perf_indent]; + va_start(ap, format); + trace_performance_vprintf_fl(NULL, 0, getnanotime() - since, + format, ap); + va_end(ap); +} + #else void trace_printf_key_fl(const char *file, int line, struct trace_key *key, @@ -273,6 +314,24 @@ void trace_performance_fl(const char *file, int line, uint64_t nanos, va_end(ap); } +void trace_performance_leave_fl(const char *file, int line, + uint64_t nanos, const char *format, ...) +{ + va_list ap; + uint64_t since; + + if (perf_indent) + perf_indent--; + + if (!format) /* Allow callers to leave without tracing anything */ + return; + + since = perf_start_times[perf_indent]; + va_start(ap, format); + trace_performance_vprintf_fl(file, line, nanos - since, format, ap); + va_end(ap); +} + #endif /* HAVE_VARIADIC_MACROS */ @@ -411,13 +470,11 @@ uint64_t getnanotime(void) } } -static uint64_t command_start_time; static struct strbuf command_line = STRBUF_INIT; static void print_command_performance_atexit(void) { - trace_performance_since(command_start_time, "git command:%s", - command_line.buf); + trace_performance_leave("git command:%s", command_line.buf); } void trace_command_performance(const char **argv) @@ -425,10 +482,10 @@ void trace_command_performance(const char **argv) if (!trace_want(&trace_perf_key)) return; - if (!command_start_time) + if (!command_line.len) atexit(print_command_performance_atexit); strbuf_reset(&command_line); sq_quote_argv_pretty(&command_line, argv); - command_start_time = getnanotime(); + trace_performance_enter(); } diff --git a/trace.h b/trace.h index 2b6a1bc17c2cc1..171b256d261c77 100644 --- a/trace.h +++ b/trace.h @@ -23,6 +23,7 @@ extern void trace_disable(struct trace_key *key); extern uint64_t getnanotime(void); extern void trace_command_performance(const char **argv); extern void trace_verbatim(struct trace_key *key, const void *buf, unsigned len); +uint64_t trace_performance_enter(void); #ifndef HAVE_VARIADIC_MACROS @@ -45,6 +46,9 @@ extern void trace_performance(uint64_t nanos, const char *format, ...); __attribute__((format (printf, 2, 3))) extern void trace_performance_since(uint64_t start, const char *format, ...); +__attribute__((format (printf, 1, 2))) +void trace_performance_leave(const char *format, ...); + #else /* @@ -118,6 +122,14 @@ extern void trace_performance_since(uint64_t start, const char *format, ...); __VA_ARGS__); \ } while (0) +#define trace_performance_leave(...) \ + do { \ + if (trace_pass_fl(&trace_perf_key)) \ + trace_performance_leave_fl(TRACE_CONTEXT, __LINE__, \ + getnanotime(), \ + __VA_ARGS__); \ + } while (0) + /* backend functions, use non-*fl macros instead */ __attribute__((format (printf, 4, 5))) extern void trace_printf_key_fl(const char *file, int line, struct trace_key *key, @@ -130,6 +142,9 @@ extern void trace_strbuf_fl(const char *file, int line, struct trace_key *key, __attribute__((format (printf, 4, 5))) extern void trace_performance_fl(const char *file, int line, uint64_t nanos, const char *fmt, ...); +__attribute__((format (printf, 4, 5))) +extern void trace_performance_leave_fl(const char *file, int line, + uint64_t nanos, const char *fmt, ...); static inline int trace_pass_fl(struct trace_key *key) { return key->fd || !key->initialized; From 4e5927910e37851f93c9aad38996069fa7b9dcbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:23 +0200 Subject: [PATCH 2/8] unpack-trees: add performance tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to optimize unpack_trees() a bit in the following patches. Let's add some tracing to measure how long it takes before and after. This is the baseline ("git checkout -" on webkit.git, 275k files on worktree) performance: 0.056651714 s: read cache .git/index performance: 0.183101080 s: preload index performance: 0.008584433 s: refresh index performance: 0.633767589 s: traverse_trees performance: 0.340265448 s: check_updates performance: 0.381884638 s: cache_tree_update performance: 1.401562947 s: unpack_trees performance: 0.338687914 s: write index, changed mask = 2e performance: 0.411927922 s: traverse_trees performance: 0.000023335 s: check_updates performance: 0.423697246 s: unpack_trees performance: 0.423708360 s: diff-index performance: 2.559524127 s: git command: git checkout - Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache-tree.c | 2 ++ unpack-trees.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index d89d281ca3556b..f37e6a6899872b 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -457,7 +457,9 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; + trace_performance_enter(); i = update_one(it, cache, entries, "", 0, &skip, flags); + trace_performance_leave("cache_tree_update"); if (i < 0) return i; istate->cache_changed |= CACHE_TREE_CHANGED; diff --git a/unpack-trees.c b/unpack-trees.c index 924a484fe757fd..fc6538c53edbdd 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -347,6 +347,7 @@ static int check_updates(struct unpack_trees_options *o) struct checkout state = CHECKOUT_INIT; int i; + trace_performance_enter(); state.force = 1; state.quiet = 1; state.refresh_cache = 1; @@ -416,6 +417,7 @@ static int check_updates(struct unpack_trees_options *o) errs |= finish_delayed_checkout(&state); if (o->update) git_attr_set_direction(GIT_ATTR_CHECKIN); + trace_performance_leave("check_updates"); return errs != 0; } @@ -1301,6 +1303,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (len > MAX_UNPACK_TREES) die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); + trace_performance_enter(); memset(&el, 0, sizeof(el)); if (!core_apply_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; @@ -1377,7 +1380,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } } - if (traverse_trees(len, t, &info) < 0) + trace_performance_enter(); + ret = traverse_trees(len, t, &info); + trace_performance_leave("traverse_trees"); + if (ret < 0) goto return_failed; } @@ -1469,6 +1475,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->src_index = NULL; done: + trace_performance_leave("unpack_trees"); clear_exclude_list(&el); return ret; From 2771fbbc67ee212a48781388666c4965c0130475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:24 +0200 Subject: [PATCH 3/8] unpack-trees: optimize walking same trees with cache-tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to merge one or many trees with the index, unpack-trees code walks multiple trees in parallel with the index and performs n-way merge. If we find out at start of a directory that all trees are the same (by comparing OID) and cache-tree happens to be available for that directory as well, we could avoid walking the trees because we already know what these trees contain: it's flattened in what's called "the index". The upside is of course a lot less I/O since we can potentially skip lots of trees (think subtrees). We also save CPU because we don't have to inflate and apply the deltas. The downside is of course more fragile code since the logic in some functions are now duplicated elsewhere. "checkout -" with this patch on webkit.git (275k files): baseline new -------------------------------------------------------------------- 0.056651714 0.080394752 s: read cache .git/index 0.183101080 0.216010838 s: preload index 0.008584433 0.008534301 s: refresh index 0.633767589 0.251992198 s: traverse_trees 0.340265448 0.377031383 s: check_updates 0.381884638 0.372768105 s: cache_tree_update 1.401562947 1.045887251 s: unpack_trees 0.338687914 0.314983512 s: write index, changed mask = 2e 0.411927922 0.062572653 s: traverse_trees 0.000023335 0.000022544 s: check_updates 0.423697246 0.073795585 s: unpack_trees 0.423708360 0.073807557 s: diff-index 2.559524127 1.938191592 s: git command: git checkout - Another measurement from Ben's running "git checkout" with over 500k trees (on the whole series): baseline new ---------------------------------------------------------------------- 0.535510167 0.556558733 s: read cache .git/index 0.3057373 0.3147105 s: initialize name hash 0.0184082 0.023558433 s: preload index 0.086910967 0.089085967 s: refresh index 7.889590767 2.191554433 s: unpack trees 0.120760833 0.131941267 s: update worktree after a merge 2.2583504 2.572663167 s: repair cache-tree 0.8916137 0.959495233 s: write index, changed mask = 28 3.405199233 0.2710663 s: unpack trees 0.000999667 0.0021554 s: update worktree after a merge 3.4063306 0.273318333 s: diff-index 16.9524923 9.462943133 s: git command: git.exe checkout This command calls unpack_trees() twice, the first time on 2way merge and the second 1way merge. In both times, "unpack trees" time is reduced to one third. Overall time reduction is not that impressive of course because index operations take a big chunk. And there's that repair cache-tree line. PS. A note about cache-tree invalidation and the use of it in this code. We do invalidate cache-tree in _source_ index when we add new entries to the (temporary) "result" index. But we also use the cache-tree from source index in this optimization. Does this mean we end up having no cache-tree in the source index to activate this optimization? The answer is twisted: the order of finding a good cache-tree and invalidating it matters. In this case we check for a good cache-tree first in all_trees_same_as_cache_tree(), then we start to merge things and potentially invalidate that same cache-tree in the process. Since cache-tree invalidation happens after the optimization kicks in, we're still good. But we may lose that cache-tree at the very first call_unpack_fn() call in traverse_by_cache_tree(). Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- unpack-trees.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index fc6538c53edbdd..6b6c8b68393fef 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -639,6 +639,102 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid); } +static int all_trees_same_as_cache_tree(int n, unsigned long dirmask, + struct name_entry *names, + struct traverse_info *info) +{ + struct unpack_trees_options *o = info->data; + int i; + + if (!o->merge || dirmask != ((1 << n) - 1)) + return 0; + + for (i = 1; i < n; i++) + if (!are_same_oid(names, names + i)) + return 0; + + return cache_tree_matches_traversal(o->src_index->cache_tree, names, info); +} + +static int index_pos_by_traverse_info(struct name_entry *names, + struct traverse_info *info) +{ + struct unpack_trees_options *o = info->data; + int len = traverse_path_len(info, names); + char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */); + int pos; + + make_traverse_path(name, info, names); + name[len++] = '/'; + name[len] = '\0'; + pos = index_name_pos(o->src_index, name, len); + if (pos >= 0) + BUG("This is a directory and should not exist in index"); + pos = -pos - 1; + if (!starts_with(o->src_index->cache[pos]->name, name) || + (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name))) + BUG("pos must point at the first entry in this directory"); + free(name); + return pos; +} + +/* + * Fast path if we detect that all trees are the same as cache-tree at this + * path. We'll walk these trees recursively using cache-tree/index instead of + * ODB since already know what these trees contain. + */ +static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, + struct name_entry *names, + struct traverse_info *info) +{ + struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; + struct unpack_trees_options *o = info->data; + int i, d; + + if (!o->merge) + BUG("We need cache-tree to do this optimization"); + + /* + * Do what unpack_callback() and unpack_nondirectories() normally + * do. But we walk all paths in an iterative loop instead. + * + * D/F conflicts and higher stage entries are not a concern + * because cache-tree would be invalidated and we would never + * get here in the first place. + */ + for (i = 0; i < nr_entries; i++) { + struct cache_entry *tree_ce; + int len, rc; + + src[0] = o->src_index->cache[pos + i]; + + len = ce_namelen(src[0]); + tree_ce = xcalloc(1, cache_entry_size(len)); + + tree_ce->ce_mode = src[0]->ce_mode; + tree_ce->ce_flags = create_ce_flags(0); + tree_ce->ce_namelen = len; + oidcpy(&tree_ce->oid, &src[0]->oid); + memcpy(tree_ce->name, src[0]->name, len + 1); + + for (d = 1; d <= nr_names; d++) + src[d] = tree_ce; + + rc = call_unpack_fn((const struct cache_entry * const *)src, o); + free(tree_ce); + if (rc < 0) + return rc; + + mark_ce_used(src[0], o); + } + if (o->debug_unpack) + printf("Unpacked %d entries from %s to %s using cache-tree\n", + nr_entries, + o->src_index->cache[pos]->name, + o->src_index->cache[pos + nr_entries - 1]->name); + return 0; +} + static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, @@ -650,6 +746,27 @@ static int traverse_trees_recursive(int n, unsigned long dirmask, void *buf[MAX_UNPACK_TREES]; struct traverse_info newinfo; struct name_entry *p; + int nr_entries; + + nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info); + if (nr_entries > 0) { + struct unpack_trees_options *o = info->data; + int pos = index_pos_by_traverse_info(names, info); + + if (!o->merge || df_conflicts) + BUG("Wrong condition to get here buddy"); + + /* + * All entries up to 'pos' must have been processed + * (i.e. marked CE_UNPACKED) at this point. But to be safe, + * save and restore cache_bottom anyway to not miss + * unprocessed entries before 'pos'. + */ + bottom = o->cache_bottom; + ret = traverse_by_cache_tree(pos, nr_entries, n, names, info); + o->cache_bottom = bottom; + return ret; + } p = names; while (!p->mode) @@ -816,6 +933,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info, return ce; } +/* + * Note that traverse_by_cache_tree() duplicates some logic in this function + * without actually calling it. If you change the logic here you may need to + * check and change there as well. + */ static int unpack_nondirectories(int n, unsigned long mask, unsigned long dirmask, struct cache_entry **src, @@ -1008,6 +1130,11 @@ static void debug_unpack_callback(int n, debug_name_entry(i, names + i); } +/* + * Note that traverse_by_cache_tree() duplicates some logic in this function + * without actually calling it. If you change the logic here you may need to + * check and change there as well. + */ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info) { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; From eecef1f7bf9ee941dc7236462eb6f5e6125fed85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:25 +0200 Subject: [PATCH 4/8] unpack-trees: reduce malloc in cache-tree walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a micro optimization that probably only shines on repos with deep directory structure. Instead of allocating and freeing a new cache_entry in every iteration, we reuse the last one and only update the parts that are new each iteration. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- unpack-trees.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index 6b6c8b68393fef..329ddf76695da2 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -689,6 +689,8 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, { struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; struct unpack_trees_options *o = info->data; + struct cache_entry *tree_ce = NULL; + int ce_len = 0; int i, d; if (!o->merge) @@ -703,30 +705,39 @@ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, * get here in the first place. */ for (i = 0; i < nr_entries; i++) { - struct cache_entry *tree_ce; - int len, rc; + int new_ce_len, len, rc; src[0] = o->src_index->cache[pos + i]; len = ce_namelen(src[0]); - tree_ce = xcalloc(1, cache_entry_size(len)); + new_ce_len = cache_entry_size(len); + + if (new_ce_len > ce_len) { + new_ce_len <<= 1; + tree_ce = xrealloc(tree_ce, new_ce_len); + memset(tree_ce, 0, new_ce_len); + ce_len = new_ce_len; + + tree_ce->ce_flags = create_ce_flags(0); + + for (d = 1; d <= nr_names; d++) + src[d] = tree_ce; + } tree_ce->ce_mode = src[0]->ce_mode; - tree_ce->ce_flags = create_ce_flags(0); tree_ce->ce_namelen = len; oidcpy(&tree_ce->oid, &src[0]->oid); memcpy(tree_ce->name, src[0]->name, len + 1); - for (d = 1; d <= nr_names; d++) - src[d] = tree_ce; - rc = call_unpack_fn((const struct cache_entry * const *)src, o); - free(tree_ce); - if (rc < 0) + if (rc < 0) { + free(tree_ce); return rc; + } mark_ce_used(src[0], o); } + free(tree_ce); if (o->debug_unpack) printf("Unpacked %d entries from %s to %s using cache-tree\n", nr_entries, From 248f1173e3b91e71dcdbb60ca938632cd3c36391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:26 +0200 Subject: [PATCH 5/8] unpack-trees: reuse (still valid) cache-tree from src_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do n-way merge by walking the source index and n trees at the same time and add merge results to a new temporary index called o->result. The merge result for any given path could be either - keep_entry(): same old index entry in o->src_index is reused - merged_entry(): either a new entry is added, or an existing one updated - deleted_entry(): one entry from o->src_index is removed For some reason [1] we keep making sure that the source index's cache-tree is still valid if used by o->result: for all those merged/deleted entries, we invalidate the same path in o->src_index, so only cache-trees covering the "keep_entry" parts remain good. Because of this, the cache-tree from o->src_index can be perfectly reused in o->result. And in fact we already rely on this logic to reuse untracked cache in edf3b90553 (unpack-trees: preserve index extensions - 2017-05-08). Move the cache-tree to o->result before doing cache_tree_update() to reduce hashing cost. Since cache_tree_update() has risen up as one of the most expensive parts in unpack_trees() after the last few patches. This does help reduce unpack_trees() time significantly (on webkit.git): before after -------------------------------------------------------------------- 0.080394752 0.051258167 s: read cache .git/index 0.216010838 0.212106298 s: preload index 0.008534301 0.280521764 s: refresh index 0.251992198 0.218160442 s: traverse_trees 0.377031383 0.374948191 s: check_updates 0.372768105 0.037040114 s: cache_tree_update 1.045887251 0.672031609 s: unpack_trees 0.314983512 0.317456290 s: write index, changed mask = 2e 0.062572653 0.038382654 s: traverse_trees 0.000022544 0.000042731 s: check_updates 0.073795585 0.050930053 s: unpack_trees 0.073807557 0.051099735 s: diff-index 1.938191592 1.614241153 s: git command: git checkout - [1] I'm pretty sure the reason is an oversight in 34110cd4e3 (Make 'unpack_trees()' have a separate source and destination index - 2008-03-06). That patch aims to _not_ update the source index at all. The invalidation should have been done on o->result in that patch. But then there was no cache-tree on o->result even then so it's pointless to do so. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- read-cache.c | 2 ++ unpack-trees.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index 9aa90e099ff81d..0b7b145c718e44 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2950,6 +2950,8 @@ void move_index_extensions(struct index_state *dst, struct index_state *src) { dst->untracked = src->untracked; src->untracked = NULL; + dst->cache_tree = src->cache_tree; + src->cache_tree = NULL; } struct cache_entry *dup_cache_entry(const struct cache_entry *ce, diff --git a/unpack-trees.c b/unpack-trees.c index 329ddf76695da2..2f4862fd63c585 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1596,6 +1596,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options ret = check_updates(o) ? (-2) : 0; if (o->dst_index) { + move_index_extensions(&o->result, o->src_index); if (!ret) { if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); @@ -1604,7 +1605,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } - move_index_extensions(&o->result, o->src_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { From 41f222875d5c0d5920b1fb9bec681044faa9cbac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:27 +0200 Subject: [PATCH 6/8] unpack-trees: add missing cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any changes to the output index should be (confusingly) marked in the source index with invalidate_ce_path(). This is used to make sure we still have valid untracked cache and cache-tree extensions in the end. We do a pretty good job of invalidating except in two places. verify_clean_subdirectory() is part of verify_absent() and verify_absent_sparse(). The former is usually called by merged_entry() or directly in threeway_merge(). The latter is obviously used by sparse checkout. In these three call sites, only merged_entry() follows up with invalidate_ce_path(). The other two don't, but they should not trigger this ce removal because this is about D/F conflicts [1]. But let's be safe and invalidate_ce_path() here as well. The second place is keep_entry() which is also used by threeway_merge() to keep higher stage entries. In order to reuse cache-tree we need to invalidate these paths as well. It's not a problem in the past because whenever a higher stage entry is present, cache-tree will not be created [2]. Now we salvage cache-tree even when higher stage entries are present, we need more invalidation. [1] c81935348b (Fix switching to a branch with D/F when current branch has file D. - 2007-03-15) [2] This is probably too strict. We should be able to create and save cache-tree for the directories that do not have conflict entries in cache_tree_update(). And this becomes more important when cache-tree plays bigger role in terms of performance. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- unpack-trees.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index 2f4862fd63c585..274808ed270220 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1805,6 +1805,7 @@ static int verify_clean_subdirectory(const struct cache_entry *ce, if (verify_uptodate(ce2, o)) return -1; add_entry(o, ce2, CE_REMOVE, 0); + invalidate_ce_path(ce, o); mark_ce_used(ce2, o); } cnt++; @@ -2085,6 +2086,8 @@ static int keep_entry(const struct cache_entry *ce, struct unpack_trees_options *o) { add_entry(o, ce, 0, 0); + if (ce_stage(ce)) + invalidate_ce_path(ce, o); return 1; } From 6400d134e1e651b999ba574b820353c239d6c5e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 18 Aug 2018 16:41:28 +0200 Subject: [PATCH 7/8] cache-tree: verify valid cache-tree in the test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes sure that cache-tree is consistent with the index. The main purpose is to catch potential problems by saving the index in unpack_trees() but the line in write_index() would also help spot missing invalidation in other code. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache-tree.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++ cache-tree.h | 1 + read-cache.c | 3 ++ t/test-lib.sh | 6 ++++ unpack-trees.c | 2 ++ 5 files changed, 90 insertions(+) diff --git a/cache-tree.c b/cache-tree.c index f37e6a6899872b..a47dcdd3aba030 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -4,6 +4,7 @@ #include "tree-walk.h" #include "cache-tree.h" #include "object-store.h" +#include "replace-object.h" #include "gvfs.h" #ifndef DEBUG_CACHE_TREE @@ -744,3 +745,80 @@ int cache_tree_matches_traversal(struct cache_tree *root, return it->entry_count; return 0; } + +static void verify_one(struct index_state *istate, + struct cache_tree *it, + struct strbuf *path) +{ + int i, pos, len = path->len; + struct strbuf tree_buf = STRBUF_INIT; + struct object_id new_oid; + + for (i = 0; i < it->subtree_nr; i++) { + strbuf_addf(path, "%s/", it->down[i]->name); + verify_one(istate, it->down[i]->cache_tree, path); + strbuf_setlen(path, len); + } + + if (it->entry_count < 0 || + /* no verification on tests (t7003) that replace trees */ + lookup_replace_object(the_repository, &it->oid) != &it->oid) + return; + + if (path->len) { + pos = index_name_pos(istate, path->buf, path->len); + pos = -pos - 1; + } else { + pos = 0; + } + + i = 0; + while (i < it->entry_count) { + struct cache_entry *ce = istate->cache[pos + i]; + const char *slash; + struct cache_tree_sub *sub = NULL; + const struct object_id *oid; + const char *name; + unsigned mode; + int entlen; + + if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) + BUG("%s with flags 0x%x should not be in cache-tree", + ce->name, ce->ce_flags); + name = ce->name + path->len; + slash = strchr(name, '/'); + if (slash) { + entlen = slash - name; + sub = find_subtree(it, ce->name + path->len, entlen, 0); + if (!sub || sub->cache_tree->entry_count < 0) + BUG("bad subtree '%.*s'", entlen, name); + oid = &sub->cache_tree->oid; + mode = S_IFDIR; + i += sub->cache_tree->entry_count; + } else { + oid = &ce->oid; + mode = ce->ce_mode; + entlen = ce_namelen(ce) - path->len; + i++; + } + strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0'); + strbuf_add(&tree_buf, oid->hash, the_hash_algo->rawsz); + } + hash_object_file(tree_buf.buf, tree_buf.len, tree_type, &new_oid); + if (oidcmp(&new_oid, &it->oid)) + BUG("cache-tree for path %.*s does not match. " + "Expected %s got %s", len, path->buf, + oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + strbuf_setlen(path, len); + strbuf_release(&tree_buf); +} + +void cache_tree_verify(struct index_state *istate) +{ + struct strbuf path = STRBUF_INIT; + + if (!istate->cache_tree) + return; + verify_one(istate, istate->cache_tree, &path); + strbuf_release(&path); +} diff --git a/cache-tree.h b/cache-tree.h index fc0c842e773f64..0ab6784ffea5da 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -32,6 +32,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct index_state *, int); +void cache_tree_verify(struct index_state *); /* bitmasks to write_cache_as_tree flags */ #define WRITE_TREE_MISSING_OK 1 diff --git a/read-cache.c b/read-cache.c index 0b7b145c718e44..21f4fbc67c9cd6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2754,6 +2754,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, int new_shared_index, ret; struct split_index *si = istate->split_index; + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(istate); + if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { if (flags & COMMIT_LOCK) rollback_lock_file(lock); diff --git a/t/test-lib.sh b/t/test-lib.sh index f2bd4e6fb86fd4..9e6fdd1a37ac5a 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1116,6 +1116,12 @@ else test_set_prereq C_LOCALE_OUTPUT fi +if test -z "$GIT_TEST_CHECK_CACHE_TREE" +then + GIT_TEST_CHECK_CACHE_TREE=true + export GIT_TEST_CHECK_CACHE_TREE +fi + test_lazy_prereq PIPE ' # test whether the filesystem supports FIFOs test_have_prereq !MINGW,!CYGWIN && diff --git a/unpack-trees.c b/unpack-trees.c index 274808ed270220..495423363ca38f 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1598,6 +1598,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) { move_index_extensions(&o->result, o->src_index); if (!ret) { + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(&o->result); if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); if (!cache_tree_fully_valid(o->result.cache_tree)) From f7ce28bdea69e5599696a315eeba870a457db775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 25 Aug 2018 15:02:09 +0200 Subject: [PATCH 8/8] Document update for nd/unpack-trees-with-cache-tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an incorrect comment in the new code added in b4da37380b (unpack-trees: optimize walking same trees with cache-tree - 2018-08-18) and document about the new test variable that is enabled by default in test-lib.sh in 4592e6080f (cache-tree: verify valid cache-tree in the test suite - 2018-08-18) Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/README | 4 ++++ unpack-trees.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/t/README b/t/README index 9028b47d923ca0..204b9f4cc5cbf8 100644 --- a/t/README +++ b/t/README @@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE= exercises the uncomon pack-objects code path where deltas larger than this limit require extra memory allocation for bookkeeping. +GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES= checks that cache-tree +records are valid when the index is written out or after a merge. This +is mostly to catch missing invalidation. Default is true. + Naming Tests ------------ diff --git a/unpack-trees.c b/unpack-trees.c index 495423363ca38f..852ffa3bc8c072 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -680,8 +680,8 @@ static int index_pos_by_traverse_info(struct name_entry *names, /* * Fast path if we detect that all trees are the same as cache-tree at this - * path. We'll walk these trees recursively using cache-tree/index instead of - * ODB since already know what these trees contain. + * path. We'll walk these trees in an iterative loop using cache-tree/index + * instead of ODB since we already know what these trees contain. */ static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names, struct name_entry *names,