From 522e6417487cc5c3f2f6d49c8f63554af63d8eda Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:31 +0000 Subject: [PATCH 001/129] t1091: use check_files to reduce boilerplate When testing the sparse-checkout feature, we need to compare the contents of the working-directory against some expected output. Using here-docs was useful in the beginning, but became repetetive as the test script grew. Create a check_files helper to make the tests simpler and easier to extend. It also reduces instances of bad here-doc whitespace. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 117 ++++++----------------------- 1 file changed, 22 insertions(+), 95 deletions(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index ff7f8f7a1fac66..e058a20ad6c85d 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -12,6 +12,13 @@ list_files() { (cd "$1" && printf '%s\n' *) } +check_files() { + list_files "$1" >actual && + shift && + printf "%s\n" $@ >expect && + test_cmp expect actual +} + test_expect_success 'setup' ' git init repo && ( @@ -58,9 +65,7 @@ test_expect_success 'git sparse-checkout init' ' EOF test_cmp expect repo/.git/info/sparse-checkout && test_cmp_config -C repo true core.sparsecheckout && - list_files repo >dir && - echo a >expect && - test_cmp expect dir + check_files repo a ' test_expect_success 'git sparse-checkout list after init' ' @@ -81,13 +86,7 @@ test_expect_success 'init with existing sparse-checkout' ' *folder* EOF test_cmp expect repo/.git/info/sparse-checkout && - list_files repo >dir && - cat >expect <<-EOF && - a - folder1 - folder2 - EOF - test_cmp expect dir + check_files repo a folder1 folder2 ' test_expect_success 'clone --sparse' ' @@ -98,9 +97,7 @@ test_expect_success 'clone --sparse' ' !/*/ EOF test_cmp expect actual && - list_files clone >dir && - echo a >expect && - test_cmp expect dir + check_files clone a ' test_expect_success 'set enables config' ' @@ -127,13 +124,7 @@ test_expect_success 'set sparse-checkout using builtin' ' git -C repo sparse-checkout list >actual && test_cmp expect actual && test_cmp expect repo/.git/info/sparse-checkout && - list_files repo >dir && - cat >expect <<-EOF && - a - folder1 - folder2 - EOF - test_cmp expect dir + check_files repo a folder1 folder2 ' test_expect_success 'set sparse-checkout using --stdin' ' @@ -147,13 +138,7 @@ test_expect_success 'set sparse-checkout using --stdin' ' git -C repo sparse-checkout list >actual && test_cmp expect actual && test_cmp expect repo/.git/info/sparse-checkout && - list_files repo >dir && - cat >expect <<-EOF && - a - folder1 - folder2 - EOF - test_cmp expect dir + check_files repo "a folder1 folder2" ' test_expect_success 'cone mode: match patterns' ' @@ -162,13 +147,7 @@ test_expect_success 'cone mode: match patterns' ' git -C repo read-tree -mu HEAD 2>err && test_i18ngrep ! "disabling cone patterns" err && git -C repo reset --hard && - list_files repo >dir && - cat >expect <<-EOF && - a - folder1 - folder2 - EOF - test_cmp expect dir + check_files repo a folder1 folder2 ' test_expect_success 'cone mode: warn on bad pattern' ' @@ -185,14 +164,7 @@ test_expect_success 'sparse-checkout disable' ' test_path_is_file repo/.git/info/sparse-checkout && git -C repo config --list >config && test_must_fail git config core.sparseCheckout && - list_files repo >dir && - cat >expect <<-EOF && - a - deep - folder1 - folder2 - EOF - test_cmp expect dir + check_files repo a deep folder1 folder2 ' test_expect_success 'cone mode: init and set' ' @@ -204,24 +176,9 @@ test_expect_success 'cone mode: init and set' ' test_cmp expect dir && git -C repo sparse-checkout set deep/deeper1/deepest/ 2>err && test_must_be_empty err && - list_files repo >dir && - cat >expect <<-EOF && - a - deep - EOF - test_cmp expect dir && - list_files repo/deep >dir && - cat >expect <<-EOF && - a - deeper1 - EOF - test_cmp expect dir && - list_files repo/deep/deeper1 >dir && - cat >expect <<-EOF && - a - deepest - EOF - test_cmp expect dir && + check_files repo a deep && + check_files repo/deep a deeper1 && + check_files repo/deep/deeper1 a deepest && cat >expect <<-EOF && /* !/*/ @@ -237,13 +194,7 @@ test_expect_success 'cone mode: init and set' ' folder2 EOF test_must_be_empty err && - cat >expect <<-EOF && - a - folder1 - folder2 - EOF - list_files repo >dir && - test_cmp expect dir + check_files repo a folder1 folder2 ' test_expect_success 'cone mode: list' ' @@ -275,13 +226,7 @@ test_expect_success 'revert to old sparse-checkout on bad update' ' test_must_fail git -C repo sparse-checkout set deep/deeper1 2>err && test_i18ngrep "cannot set sparse-checkout patterns" err && test_cmp repo/.git/info/sparse-checkout expect && - list_files repo/deep >dir && - cat >expect <<-EOF && - a - deeper1 - deeper2 - EOF - test_cmp dir expect + check_files repo/deep a deeper1 deeper2 ' test_expect_success 'revert to old sparse-checkout on empty update' ' @@ -332,12 +277,7 @@ test_expect_success 'cone mode: set with core.ignoreCase=true' ' /folder1/ EOF test_cmp expect repo/.git/info/sparse-checkout && - list_files repo >dir && - cat >expect <<-EOF && - a - folder1 - EOF - test_cmp expect dir + check_files repo a folder1 ' test_expect_success 'interaction with submodules' ' @@ -351,21 +291,8 @@ test_expect_success 'interaction with submodules' ' git sparse-checkout init --cone && git sparse-checkout set folder1 ) && - list_files super >dir && - cat >expect <<-\EOF && - a - folder1 - modules - EOF - test_cmp expect dir && - list_files super/modules/child >dir && - cat >expect <<-\EOF && - a - deep - folder1 - folder2 - EOF - test_cmp expect dir + check_files super a folder1 modules && + check_files super/modules/child a deep folder1 folder2 ' test_done From d622c34396b3ea1a81f07d951ee1112f83d9330c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:32 +0000 Subject: [PATCH 002/129] t1091: improve here-docs t1091-sparse-checkout-builtin.sh uses here-docs to populate the expected contents of the sparse-checkout file. These do not use shell interpolation, so use "-\EOF" instead of "-EOF". Also use proper tabbing. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 98 +++++++++++++++--------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e058a20ad6c85d..e28e1c797f9432 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -46,11 +46,11 @@ test_expect_success 'git sparse-checkout list (empty)' ' test_expect_success 'git sparse-checkout list (populated)' ' test_when_finished rm -f repo/.git/info/sparse-checkout && - cat >repo/.git/info/sparse-checkout <<-EOF && - /folder1/* - /deep/ - **/a - !*bin* + cat >repo/.git/info/sparse-checkout <<-\EOF && + /folder1/* + /deep/ + **/a + !*bin* EOF cp repo/.git/info/sparse-checkout expect && git -C repo sparse-checkout list >list && @@ -59,9 +59,9 @@ test_expect_success 'git sparse-checkout list (populated)' ' test_expect_success 'git sparse-checkout init' ' git -C repo sparse-checkout init && - cat >expect <<-EOF && - /* - !/*/ + cat >expect <<-\EOF && + /* + !/*/ EOF test_cmp expect repo/.git/info/sparse-checkout && test_cmp_config -C repo true core.sparsecheckout && @@ -70,9 +70,9 @@ test_expect_success 'git sparse-checkout init' ' test_expect_success 'git sparse-checkout list after init' ' git -C repo sparse-checkout list >actual && - cat >expect <<-EOF && - /* - !/*/ + cat >expect <<-\EOF && + /* + !/*/ EOF test_cmp expect actual ' @@ -80,10 +80,10 @@ test_expect_success 'git sparse-checkout list after init' ' test_expect_success 'init with existing sparse-checkout' ' echo "*folder*" >> repo/.git/info/sparse-checkout && git -C repo sparse-checkout init && - cat >expect <<-EOF && - /* - !/*/ - *folder* + cat >expect <<-\EOF && + /* + !/*/ + *folder* EOF test_cmp expect repo/.git/info/sparse-checkout && check_files repo a folder1 folder2 @@ -92,9 +92,9 @@ test_expect_success 'init with existing sparse-checkout' ' test_expect_success 'clone --sparse' ' git clone --sparse repo clone && git -C clone sparse-checkout list >actual && - cat >expect <<-EOF && - /* - !/*/ + cat >expect <<-\EOF && + /* + !/*/ EOF test_cmp expect actual && check_files clone a @@ -116,10 +116,10 @@ test_expect_success 'set enables config' ' test_expect_success 'set sparse-checkout using builtin' ' git -C repo sparse-checkout set "/*" "!/*/" "*folder*" && - cat >expect <<-EOF && - /* - !/*/ - *folder* + cat >expect <<-\EOF && + /* + !/*/ + *folder* EOF git -C repo sparse-checkout list >actual && test_cmp expect actual && @@ -128,11 +128,11 @@ test_expect_success 'set sparse-checkout using builtin' ' ' test_expect_success 'set sparse-checkout using --stdin' ' - cat >expect <<-EOF && - /* - !/*/ - /folder1/ - /folder2/ + cat >expect <<-\EOF && + /* + !/*/ + /folder1/ + /folder2/ EOF git -C repo sparse-checkout set --stdin actual && @@ -179,28 +179,28 @@ test_expect_success 'cone mode: init and set' ' check_files repo a deep && check_files repo/deep a deeper1 && check_files repo/deep/deeper1 a deepest && - cat >expect <<-EOF && - /* - !/*/ - /deep/ - !/deep/*/ - /deep/deeper1/ - !/deep/deeper1/*/ - /deep/deeper1/deepest/ + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + !/deep/deeper1/*/ + /deep/deeper1/deepest/ EOF test_cmp expect repo/.git/info/sparse-checkout && - git -C repo sparse-checkout set --stdin 2>err <<-EOF && - folder1 - folder2 + git -C repo sparse-checkout set --stdin 2>err <<-\EOF && + folder1 + folder2 EOF test_must_be_empty err && check_files repo a folder1 folder2 ' test_expect_success 'cone mode: list' ' - cat >expect <<-EOF && - folder1 - folder2 + cat >expect <<-\EOF && + folder1 + folder2 EOF git -C repo sparse-checkout set --stdin actual 2>err && @@ -211,10 +211,10 @@ test_expect_success 'cone mode: list' ' test_expect_success 'cone mode: set with nested folders' ' git -C repo sparse-checkout set deep deep/deeper1/deepest 2>err && test_line_count = 0 err && - cat >expect <<-EOF && - /* - !/*/ - /deep/ + cat >expect <<-\EOF && + /* + !/*/ + /deep/ EOF test_cmp repo/.git/info/sparse-checkout expect ' @@ -271,10 +271,10 @@ test_expect_success 'sparse-checkout (init|set|disable) fails with dirty status' test_expect_success 'cone mode: set with core.ignoreCase=true' ' git -C repo sparse-checkout init --cone && git -C repo -c core.ignoreCase=true sparse-checkout set folder1 && - cat >expect <<-EOF && - /* - !/*/ - /folder1/ + cat >expect <<-\EOF && + /* + !/*/ + /folder1/ EOF test_cmp expect repo/.git/info/sparse-checkout && check_files repo a folder1 From 3c754067a1164ffafd965dcd44a9f004e6100e42 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:33 +0000 Subject: [PATCH 003/129] sparse-checkout: create leading directories The 'git init' command creates the ".git/info" directory and fills it with some default files. However, 'git worktree add' does not create the info directory for that worktree. This causes a problem when running "git sparse-checkout init" inside a worktree. While care was taken to allow the sparse-checkout config to be specific to a worktree, this initialization was untested. Safely create the leading directories for the sparse-checkout file. This is the safest thing to do even without worktrees, as a user could delete their ".git/info" directory and expect Git to recover safely. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 4 ++++ t/t1091-sparse-checkout-builtin.sh | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index b3bed891cb15e9..3cee8ab46e548f 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -199,6 +199,10 @@ static int write_patterns_and_update(struct pattern_list *pl) int result; sparse_filename = get_sparse_checkout_filename(); + + if (safe_create_leading_directories(sparse_filename)) + die(_("failed to create directory for sparse-checkout file")); + fd = hold_lock_file_for_update(&lk, sparse_filename, LOCK_DIE_ON_ERROR); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e28e1c797f9432..43d1f7520ce426 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -295,4 +295,14 @@ test_expect_success 'interaction with submodules' ' check_files super/modules/child a deep folder1 folder2 ' +test_expect_success 'different sparse-checkouts with worktrees' ' + git -C repo worktree add --detach ../worktree && + check_files worktree "a deep folder1 folder2" && + git -C worktree sparse-checkout init --cone && + git -C repo sparse-checkout set folder1 && + git -C worktree sparse-checkout set deep/deeper1 && + check_files repo a folder1 && + check_files worktree a deep +' + test_done From 47dbf10d8a5ce8c9b441a16b7698c7d70585dff0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:34 +0000 Subject: [PATCH 004/129] clone: fix --sparse option with URLs The --sparse option was added to the clone builtin in d89f09c (clone: add --sparse mode, 2019-11-21) and was tested with a local path clone in t1091-sparse-checkout-builtin.sh. However, due to a difference in how local paths are handled versus URLs, this mechanism does not work with URLs. Modify the test to use a "file://" URL, which would output this error before the code change: Cloning into 'clone'... fatal: cannot change to 'file://.../repo': No such file or directory error: failed to initialize sparse-checkout These errors are due to using a "-C " option to call 'git -C sparse-checkout init' but the URL is being given instead of the target directory. Update that target directory to evaluate this correctly. I have also manually tested that https:// URLs are handled correctly as well. Acked-by: Taylor Blau Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- t/t1091-sparse-checkout-builtin.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 4348d962c9c81e..2caefc44fb860f 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1130,7 +1130,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_required_reference.nr || option_optional_reference.nr) setup_reference(); - if (option_sparse_checkout && git_sparse_checkout_init(repo)) + if (option_sparse_checkout && git_sparse_checkout_init(dir)) return 1; remote = remote_get(option_origin); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 43d1f7520ce426..cf4a595c868d8e 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -90,7 +90,7 @@ test_expect_success 'init with existing sparse-checkout' ' ' test_expect_success 'clone --sparse' ' - git clone --sparse repo clone && + git clone --sparse "file://$(pwd)/repo" clone && git -C clone sparse-checkout list >actual && cat >expect <<-\EOF && /* From 7aa9ef2fcaa986d7f11064adab6d1c010d4f2ead Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 24 Jan 2020 21:19:35 +0000 Subject: [PATCH 005/129] sparse-checkout: fix documentation typo for core.sparseCheckoutCone Signed-off-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 3b341cf0fce3c1..4834fb434d1ddd 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -106,7 +106,7 @@ The full pattern set allows for arbitrary pattern matches and complicated inclusion/exclusion rules. These can result in O(N*M) pattern matches when updating the index, where N is the number of patterns and M is the number of paths in the index. To combat this performance issue, a more restricted -pattern set is allowed when `core.spareCheckoutCone` is enabled. +pattern set is allowed when `core.sparseCheckoutCone` is enabled. The accepted patterns in the cone pattern set are: From 41de0c6fbcc3d2544ebada3a9f26dec0f32f42de Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:36 +0000 Subject: [PATCH 006/129] sparse-checkout: cone mode does not recognize "**" When core.sparseCheckoutCone is enabled, the 'git sparse-checkout set' command creates a restricted set of possible patterns that are used by a custom algorithm to quickly match those patterns. If a user manually edits the sparse-checkout file, then they could create patterns that do not match these expectations. The cone-mode matching algorithm can return incorrect results. The solution is to detect these incorrect patterns, warn that we do not recognize them, and revert to the standard algorithm. Check each pattern for the "**" substring, and revert to the old logic if seen. While technically a "//**" pattern matches the meaning of "//", it is not one that would be written by the sparse-checkout builtin in cone mode. Attempting to accept that pattern change complicates the logic and instead we punt and do not accept any instance of "**". Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 7 +++++- t/t1091-sparse-checkout-builtin.sh | 34 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 22d08e61c296a0..40fed73a947bad 100644 --- a/dir.c +++ b/dir.c @@ -651,11 +651,16 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern return; } + if (strstr(given->pattern, "**")) { + /* Not a cone pattern. */ + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + } + if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { /* Not a cone pattern. */ - pl->use_cone_patterns = 0; warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index cf4a595c868d8e..e2e45dc7fd6351 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -305,4 +305,38 @@ test_expect_success 'different sparse-checkouts with worktrees' ' check_files worktree a deep ' +check_read_tree_errors () { + REPO=$1 + FILES=$2 + ERRORS=$3 + git -C $REPO read-tree -mu HEAD 2>err && + if test -z "$ERRORS" + then + test_must_be_empty err + else + test_i18ngrep "$ERRORS" err + fi && + check_files $REPO $FILES +} + +test_expect_success 'pattern-checks: /A/**' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /folder1/** + EOF + check_read_tree_errors repo "a folder1" "disabling cone pattern matching" +' + +test_expect_success 'pattern-checks: /A/**/B/' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /deep/**/deepest + EOF + check_read_tree_errors repo "a deep" "disabling cone pattern matching" && + check_files repo/deep "deeper1" && + check_files repo/deep/deeper1 "deepest" +' + test_done From 9e6d3e64175713bc0007f3012ea288f4dfc0a399 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:37 +0000 Subject: [PATCH 007/129] sparse-checkout: detect short patterns In cone mode, the shortest pattern the sparse-checkout command will write into the sparse-checkout file is "/*". This is handled carefully in add_pattern_to_hashsets(), so warn if any other pattern is this short. This will assist future pattern checks by allowing us to assume there are at least three characters in the pattern. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 3 ++- t/t1091-sparse-checkout-builtin.sh | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dir.c b/dir.c index 40fed73a947bad..c2e585607e1962 100644 --- a/dir.c +++ b/dir.c @@ -651,7 +651,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern return; } - if (strstr(given->pattern, "**")) { + if (given->patternlen <= 2 || + strstr(given->pattern, "**")) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e2e45dc7fd6351..2e575347995b15 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -339,4 +339,13 @@ test_expect_success 'pattern-checks: /A/**/B/' ' check_files repo/deep/deeper1 "deepest" ' +test_expect_success 'pattern-checks: too short' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /a + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" +' + test_done From 9abc60f8015d060d3f3433b105648a4725c97bd1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:08 +0000 Subject: [PATCH 008/129] sparse-checkout: warn on globs in cone patterns In cone mode, the sparse-checkout commmand will write patterns that allow faster pattern matching. This matching only works if the patterns in the sparse-checkout file are those written by that command. Users can edit the sparse-checkout file and create patterns that cause the cone mode matching to fail. The cone mode patterns may end in "/*" but otherwise an un-escaped asterisk or other glob character is invalid. Add checks to disable cone mode when seeing these values. A later change will properly handle escaped globs. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 36 +++++++++++++++++++++++++++ t/t1091-sparse-checkout-builtin.sh | 39 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/dir.c b/dir.c index c2e585607e1962..71d28331f35c1a 100644 --- a/dir.c +++ b/dir.c @@ -635,6 +635,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern struct pattern_entry *translated; char *truncated; char *data = NULL; + const char *prev, *cur, *next; if (!pl->use_cone_patterns) return; @@ -652,12 +653,47 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern } if (given->patternlen <= 2 || + *given->pattern == '*' || strstr(given->pattern, "**")) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; } + prev = given->pattern; + cur = given->pattern + 1; + next = given->pattern + 2; + + while (*cur) { + /* Watch for glob characters '*', '\', '[', '?' */ + if (!is_glob_special(*cur)) + goto increment; + + /* But only if *prev != '\\' */ + if (*prev == '\\') + goto increment; + + /* But allow the initial '\' */ + if (*cur == '\\' && + is_glob_special(*next)) + goto increment; + + /* But a trailing '/' then '*' is fine */ + if (*prev == '/' && + *cur == '*' && + *next == 0) + goto increment; + + /* Not a cone pattern. */ + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + + increment: + prev++; + cur++; + next++; + } + if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 2e575347995b15..c732abeacde4d0 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -348,4 +348,43 @@ test_expect_success 'pattern-checks: too short' ' check_read_tree_errors repo "a" "disabling cone pattern matching" ' +test_expect_success 'pattern-checks: trailing "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /a* + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" +' + +test_expect_success 'pattern-checks: starting "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + *eep/ + EOF + check_read_tree_errors repo "a deep" "disabling cone pattern matching" +' + +test_expect_success 'pattern-checks: contained glob characters' ' + for c in "[a]" "\\" "?" "*" + do + cat >repo/.git/info/sparse-checkout <<-EOF && + /* + !/*/ + something$c-else/ + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" + done +' + +test_expect_success 'pattern-checks: escaped "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /does\*not\*exist/ + EOF + check_read_tree_errors repo "a" "" +' + test_done From 4f52c2ce6c578896964e960f6017510f0efd3f46 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:09 +0000 Subject: [PATCH 009/129] sparse-checkout: properly match escaped characters In cone mode, the sparse-checkout feature uses hashset containment queries to match paths. Make this algorithm respect escaped asterisk (*) and backslash (\) characters. Create dup_and_filter_pattern() method to convert a pattern by removing escape characters and dropping an optional "/*" at the end. This method is available in dir.h as we will use it in builtin/sparse-checkout.c in a later change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 35 +++++++++++++++++++++++++++--- t/t1091-sparse-checkout-builtin.sh | 23 ++++++++++++++++---- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/dir.c b/dir.c index 71d28331f35c1a..7ac0920b713517 100644 --- a/dir.c +++ b/dir.c @@ -630,6 +630,36 @@ int pl_hashmap_cmp(const void *unused_cmp_data, return strncmp(ee1->pattern, ee2->pattern, min_len); } +static char *dup_and_filter_pattern(const char *pattern) +{ + char *set, *read; + size_t count = 0; + char *result = xstrdup(pattern); + + set = result; + read = result; + + while (*read) { + /* skip escape characters (once) */ + if (*read == '\\') + read++; + + *set = *read; + + set++; + read++; + count++; + } + *set = 0; + + if (count > 2 && + *(set - 1) == '*' && + *(set - 2) == '/') + *(set - 2) = 0; + + return result; +} + static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) { struct pattern_entry *translated; @@ -702,8 +732,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern goto clear_hashmaps; } - truncated = xstrdup(given->pattern); - truncated[given->patternlen - 2] = 0; + truncated = dup_and_filter_pattern(given->pattern); translated = xmalloc(sizeof(struct pattern_entry)); translated->pattern = truncated; @@ -737,7 +766,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated = xmalloc(sizeof(struct pattern_entry)); - translated->pattern = xstrdup(given->pattern); + translated->pattern = dup_and_filter_pattern(given->pattern); translated->patternlen = given->patternlen; hashmap_entry_init(&translated->ent, ignore_case ? diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index c732abeacde4d0..9ea700896d831f 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -378,13 +378,28 @@ test_expect_success 'pattern-checks: contained glob characters' ' done ' -test_expect_success 'pattern-checks: escaped "*"' ' - cat >repo/.git/info/sparse-checkout <<-\EOF && +test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' + git clone repo escaped && + TREEOID=$(git -C escaped rev-parse HEAD:folder1) && + NEWTREE=$(git -C escaped mktree <<-EOF + $(git -C escaped ls-tree HEAD) + 040000 tree $TREEOID zbad\\dir + 040000 tree $TREEOID zdoes*exist + EOF + ) && + COMMIT=$(git -C escaped commit-tree $NEWTREE -p HEAD) && + git -C escaped reset --hard $COMMIT && + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && + git -C escaped sparse-checkout init --cone && + cat >escaped/.git/info/sparse-checkout <<-\EOF && /* !/*/ - /does\*not\*exist/ + /zbad\\dir/ + !/zbad\\dir/*/ + /zdoes\*not\*exist/ + /zdoes\*exist/ EOF - check_read_tree_errors repo "a" "" + check_read_tree_errors escaped "a zbad\\dir zdoes*exist" ' test_done From d585f0e7992ea7f025a5a91f46f2baa9e88f19f6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:10 +0000 Subject: [PATCH 010/129] sparse-checkout: write escaped patterns in cone mode If a user somehow creates a directory with an asterisk (*) or backslash (\), then the "git sparse-checkout set" command will struggle to provide the correct pattern in the sparse-checkout file. When not in cone mode, the provided pattern is written directly into the sparse-checkout file. However, in cone mode we expect a list of paths to directories and then we convert those into patterns. However, there is some care needed for the timing of these escapes. The in-memory pattern list is used to update the working directory before writing the patterns to disk. Thus, we need the command to have the unescaped names in the hashsets for the cone comparisons, then escape the patterns later. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 23 +++++++++++++++++++++-- t/t1091-sparse-checkout-builtin.sh | 10 ++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 3cee8ab46e548f..cc86b8a0147d74 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -13,6 +13,7 @@ #include "resolve-undo.h" #include "unpack-trees.h" #include "wt-status.h" +#include "quote.h" static const char *empty_base = ""; @@ -140,6 +141,22 @@ static int update_working_directory(struct pattern_list *pl) return result; } +static char *escaped_pattern(char *pattern) +{ + char *p = pattern; + struct strbuf final = STRBUF_INIT; + + while (*p) { + if (*p == '*' || *p == '\\') + strbuf_addch(&final, '\\'); + + strbuf_addch(&final, *p); + p++; + } + + return strbuf_detach(&final, NULL); +} + static void write_cone_to_file(FILE *fp, struct pattern_list *pl) { int i; @@ -164,10 +181,11 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) fprintf(fp, "/*\n!/*/\n"); for (i = 0; i < sl.nr; i++) { - char *pattern = sl.items[i].string; + char *pattern = escaped_pattern(sl.items[i].string); if (strlen(pattern)) fprintf(fp, "%s/\n!%s/*/\n", pattern, pattern); + free(pattern); } string_list_clear(&sl, 0); @@ -185,8 +203,9 @@ static void write_cone_to_file(FILE *fp, struct pattern_list *pl) string_list_remove_duplicates(&sl, 0); for (i = 0; i < sl.nr; i++) { - char *pattern = sl.items[i].string; + char *pattern = escaped_pattern(sl.items[i].string); fprintf(fp, "%s/\n", pattern); + free(pattern); } } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 9ea700896d831f..fb8718e64a4904 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -309,6 +309,9 @@ check_read_tree_errors () { REPO=$1 FILES=$2 ERRORS=$3 + git -C $REPO -c core.sparseCheckoutCone=false read-tree -mu HEAD 2>err && + test_must_be_empty err && + check_files $REPO "$FILES" && git -C $REPO read-tree -mu HEAD 2>err && if test -z "$ERRORS" then @@ -391,14 +394,17 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' git -C escaped reset --hard $COMMIT && check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && git -C escaped sparse-checkout init --cone && - cat >escaped/.git/info/sparse-checkout <<-\EOF && + git -C escaped sparse-checkout set zbad\\dir/bogus "zdoes*not*exist" "zdoes*exist" && + cat >expect <<-\EOF && /* !/*/ /zbad\\dir/ !/zbad\\dir/*/ - /zdoes\*not\*exist/ + /zbad\\dir/bogus/ /zdoes\*exist/ + /zdoes\*not\*exist/ EOF + test_cmp expect escaped/.git/info/sparse-checkout && check_read_tree_errors escaped "a zbad\\dir zdoes*exist" ' From bd64de42de28e5cdda7765d5de1c3ed34d4898cb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:11 +0000 Subject: [PATCH 011/129] sparse-checkout: unquote C-style strings over --stdin If a user somehow creates a directory with an asterisk (*) or backslash (\), then the "git sparse-checkout set" command will struggle to provide the correct pattern in the sparse-checkout file. When not in cone mode, the provided pattern is written directly into the sparse-checkout file. However, in cone mode we expect a list of paths to directories and then we convert those into patterns. Even more specifically, the goal is to always allow the following from the root of a repo: git ls-tree --name-only -d HEAD | git sparse-checkout set --stdin The ls-tree command provides directory names with an unescaped asterisk. It also quotes the directories that contain an escaped backslash. We must remove these quotes, then keep the escaped backslashes. Use unquote_c_style() when parsing lines from stdin. Command-line arguments will be parsed as-is, assuming the user can do the correct level of escaping from their environment to match the exact directory names. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 15 ++++++++++++++- t/t1091-sparse-checkout-builtin.sh | 14 +++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index cc86b8a0147d74..6083aa10f27cac 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -442,8 +442,21 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) pl.use_cone_patterns = 1; if (set_opts.use_stdin) { - while (!strbuf_getline(&line, stdin)) + struct strbuf unquoted = STRBUF_INIT; + while (!strbuf_getline(&line, stdin)) { + if (line.buf[0] == '"') { + strbuf_reset(&unquoted); + if (unquote_c_style(&unquoted, line.buf, NULL)) + die(_("unable to unquote C-style string '%s'"), + line.buf); + + strbuf_swap(&unquoted, &line); + } + strbuf_to_cone_pattern(&line, &pl); + } + + strbuf_release(&unquoted); } else { for (i = 0; i < argc; i++) { strbuf_setlen(&line, 0); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index fb8718e64a4904..a46a310740a7f2 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -405,7 +405,19 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' /zdoes\*not\*exist/ EOF test_cmp expect escaped/.git/info/sparse-checkout && - check_read_tree_errors escaped "a zbad\\dir zdoes*exist" + check_read_tree_errors escaped "a zbad\\dir zdoes*exist" && + git -C escaped ls-tree -d --name-only HEAD | git -C escaped sparse-checkout set --stdin && + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + /folder1/ + /folder2/ + /zbad\\dir/ + /zdoes\*exist/ + EOF + test_cmp expect escaped/.git/info/sparse-checkout && + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" ' test_done From e55682ea2640dd3aa002a2657c32bdd1d85b44e9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:12 +0000 Subject: [PATCH 012/129] sparse-checkout: use C-style quotes in 'list' subcommand When in cone mode, the 'git sparse-checkout list' subcommand lists the directories included in the sparse cone. When these directories contain odd characters, such as a backslash, then we need to use C-style quotes similar to 'git ls-tree'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 6 ++++-- t/t1091-sparse-checkout-builtin.sh | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 6083aa10f27cac..facdb6bda707de 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -78,8 +78,10 @@ static int sparse_checkout_list(int argc, const char **argv) string_list_sort(&sl); - for (i = 0; i < sl.nr; i++) - printf("%s\n", sl.items[i].string); + for (i = 0; i < sl.nr; i++) { + quote_c_style(sl.items[i].string, NULL, stdout, 0); + printf("\n"); + } return 0; } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index a46a310740a7f2..545e8d5ebe1de9 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -406,7 +406,8 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' EOF test_cmp expect escaped/.git/info/sparse-checkout && check_read_tree_errors escaped "a zbad\\dir zdoes*exist" && - git -C escaped ls-tree -d --name-only HEAD | git -C escaped sparse-checkout set --stdin && + git -C escaped ls-tree -d --name-only HEAD >list-expect && + git -C escaped sparse-checkout set --stdin expect <<-\EOF && /* !/*/ @@ -417,7 +418,9 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' /zdoes\*exist/ EOF test_cmp expect escaped/.git/info/sparse-checkout && - check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && + git -C escaped sparse-checkout list >list-actual && + test_cmp list-expect list-actual ' test_done From e53ffe2704d7e10690f4382e46c1411a482531f1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:13 +0000 Subject: [PATCH 013/129] sparse-checkout: escape all glob characters on write The sparse-checkout patterns allow special globs according to fnmatch(3). When writing cone-mode patterns for paths containing these characters, they must be escaped. Use is_glob_special() to check which characters must be escaped this way, and add a path to the tests that contains all glob characters at once. Note that ']' is not special, since the initial bracket '[' is escaped. Reported-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 2 +- t/t1091-sparse-checkout-builtin.sh | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index facdb6bda707de..7aeb384362df0d 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -149,7 +149,7 @@ static char *escaped_pattern(char *pattern) struct strbuf final = STRBUF_INIT; while (*p) { - if (*p == '*' || *p == '\\') + if (is_glob_special(*p)) strbuf_addch(&final, '\\'); strbuf_addch(&final, *p); diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 545e8d5ebe1de9..37e9304ef32cf0 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -381,20 +381,21 @@ test_expect_success 'pattern-checks: contained glob characters' ' done ' -test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' +test_expect_success BSLASHPSPEC 'pattern-checks: escaped characters' ' git clone repo escaped && TREEOID=$(git -C escaped rev-parse HEAD:folder1) && NEWTREE=$(git -C escaped mktree <<-EOF $(git -C escaped ls-tree HEAD) 040000 tree $TREEOID zbad\\dir 040000 tree $TREEOID zdoes*exist + 040000 tree $TREEOID zglob[!a]? EOF ) && COMMIT=$(git -C escaped commit-tree $NEWTREE -p HEAD) && git -C escaped reset --hard $COMMIT && - check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" zglob[!a]? && git -C escaped sparse-checkout init --cone && - git -C escaped sparse-checkout set zbad\\dir/bogus "zdoes*not*exist" "zdoes*exist" && + git -C escaped sparse-checkout set zbad\\dir/bogus "zdoes*not*exist" "zdoes*exist" "zglob[!a]?" && cat >expect <<-\EOF && /* !/*/ @@ -403,9 +404,10 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' /zbad\\dir/bogus/ /zdoes\*exist/ /zdoes\*not\*exist/ + /zglob\[!a]\?/ EOF test_cmp expect escaped/.git/info/sparse-checkout && - check_read_tree_errors escaped "a zbad\\dir zdoes*exist" && + check_read_tree_errors escaped "a zbad\\dir zdoes*exist zglob[!a]?" && git -C escaped ls-tree -d --name-only HEAD >list-expect && git -C escaped sparse-checkout set --stdin expect <<-\EOF && @@ -416,9 +418,10 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' /folder2/ /zbad\\dir/ /zdoes\*exist/ + /zglob\[!a]\?/ EOF test_cmp expect escaped/.git/info/sparse-checkout && - check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" zglob[!a]? && git -C escaped sparse-checkout list >list-actual && test_cmp list-expect list-actual ' From d2e65f4c9056be72ff8a1f39245c5e1b27d556b2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:14 +0000 Subject: [PATCH 014/129] sparse-checkout: improve docs around 'set' in cone mode The existing documentation does not clarify how the 'set' subcommand changes when core.sparseCheckoutCone is enabled. Correct this by changing some language around the "A/B/C" example. Also include a description of the input format matching the output of 'git ls-tree --name-only'. Helped-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 4834fb434d1ddd..091461988107b9 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -50,6 +50,14 @@ To avoid interfering with other worktrees, it first enables the + When the `--stdin` option is provided, the patterns are read from standard in as a newline-delimited list instead of from the arguments. ++ +When `core.sparseCheckoutCone` is enabled, the input list is considered a +list of directories instead of sparse-checkout patterns. The command writes +patterns to the sparse-checkout file to include all files contained in those +directories (recursively) as well as files that are siblings of ancestor +directories. The input format matches the output of `git ls-tree --name-only`. +This includes interpreting pathnames that begin with a double quote (") as +C-style quoted strings. 'disable':: Disable the `core.sparseCheckout` config setting, and restore the @@ -128,9 +136,12 @@ the following patterns: ---------------- This says "include everything in root, but nothing two levels below root." -If we then add the folder `A/B/C` as a recursive pattern, the folders `A` and -`A/B` are added as parent patterns. The resulting sparse-checkout file is -now + +When in cone mode, the `git sparse-checkout set` subcommand takes a list of +directories instead of a list of sparse-checkout patterns. In this mode, +the command `git sparse-checkout set A/B/C` sets the directory `A/B/C` as +a recursive pattern, the directories `A` and `A/B` are added as parent +patterns. The resulting sparse-checkout file is now ---------------- /* From f998a3f1e588d73ed7285cb14ac4839f63f6dc82 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:15 +0000 Subject: [PATCH 015/129] sparse-checkout: fix cone mode behavior mismatch The intention of the special "cone mode" in the sparse-checkout feature is to always match the same patterns that are matched by the same sparse-checkout file as when cone mode is disabled. When a file path is given to "git sparse-checkout set" in cone mode, then the cone mode improperly matches the file as a recursive path. When setting the skip-worktree bits, files were not expecting the MATCHED_RECURSIVE response, and hence these were left out of the matched cone. Fix this bug by checking for MATCHED_RECURSIVE in addition to MATCHED and add a test that prevents regression. Reported-by: Finn Bryant Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t1091-sparse-checkout-builtin.sh | 12 ++++++++++++ unpack-trees.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 37e9304ef32cf0..7d982096fbf0a2 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -305,6 +305,18 @@ test_expect_success 'different sparse-checkouts with worktrees' ' check_files worktree a deep ' +test_expect_success 'set using filename keeps file on-disk' ' + git -C repo sparse-checkout set a deep && + cat >expect <<-\EOF && + /* + !/*/ + /a/ + /deep/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo a deep +' + check_read_tree_errors () { REPO=$1 FILES=$2 diff --git a/unpack-trees.c b/unpack-trees.c index 3789a22cf0a519..78425ce74b5da2 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1416,7 +1416,7 @@ static int clear_ce_flags_1(struct index_state *istate, name, &dtype, pl, istate); if (ret == UNDECIDED) ret = default_match; - if (ret == MATCHED) + if (ret == MATCHED || ret == MATCHED_RECURSIVE) ce->ce_flags &= ~clear_mask; cache++; progress_nr++; From 6fb705abcb6044f07954b486d71c05151262b6b6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 11 Feb 2020 15:02:21 +0000 Subject: [PATCH 016/129] sparse-checkout: extract add_patterns_from_input() In anticipation of extending the sparse-checkout builtin with "add" and "remove" subcommands, extract the code that fills a pattern list based on the input values. The input changes depending on the presence of "--stdin" or the value of core.sparseCheckoutCone. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 64 +++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 7aeb384362df0d..41d8aaf9a22eb3 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -412,36 +412,16 @@ static struct sparse_checkout_set_opts { int use_stdin; } set_opts; -static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +static void add_patterns_from_input(struct pattern_list *pl, + int argc, const char **argv) { int i; - struct pattern_list pl; - int result; - int changed_config = 0; - - static struct option builtin_sparse_checkout_set_options[] = { - OPT_BOOL(0, "stdin", &set_opts.use_stdin, - N_("read patterns from standard in")), - OPT_END(), - }; - - repo_read_index(the_repository); - require_clean_work_tree(the_repository, - N_("set sparse-checkout patterns"), NULL, 1, 0); - - memset(&pl, 0, sizeof(pl)); - - argc = parse_options(argc, argv, prefix, - builtin_sparse_checkout_set_options, - builtin_sparse_checkout_set_usage, - PARSE_OPT_KEEP_UNKNOWN); - if (core_sparse_checkout_cone) { struct strbuf line = STRBUF_INIT; - hashmap_init(&pl.recursive_hashmap, pl_hashmap_cmp, NULL, 0); - hashmap_init(&pl.parent_hashmap, pl_hashmap_cmp, NULL, 0); - pl.use_cone_patterns = 1; + hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0); + hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0); + pl->use_cone_patterns = 1; if (set_opts.use_stdin) { struct strbuf unquoted = STRBUF_INIT; @@ -455,7 +435,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) strbuf_swap(&unquoted, &line); } - strbuf_to_cone_pattern(&line, &pl); + strbuf_to_cone_pattern(&line, pl); } strbuf_release(&unquoted); @@ -463,7 +443,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) for (i = 0; i < argc; i++) { strbuf_setlen(&line, 0); strbuf_addstr(&line, argv[i]); - strbuf_to_cone_pattern(&line, &pl); + strbuf_to_cone_pattern(&line, pl); } } } else { @@ -473,13 +453,39 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) while (!strbuf_getline(&line, stdin)) { size_t len; char *buf = strbuf_detach(&line, &len); - add_pattern(buf, empty_base, 0, &pl, 0); + add_pattern(buf, empty_base, 0, pl, 0); } } else { for (i = 0; i < argc; i++) - add_pattern(argv[i], empty_base, 0, &pl, 0); + add_pattern(argv[i], empty_base, 0, pl, 0); } } +} + +static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +{ + struct pattern_list pl; + int result; + int changed_config = 0; + + static struct option builtin_sparse_checkout_set_options[] = { + OPT_BOOL(0, "stdin", &set_opts.use_stdin, + N_("read patterns from standard in")), + OPT_END(), + }; + + repo_read_index(the_repository); + require_clean_work_tree(the_repository, + N_("set sparse-checkout patterns"), NULL, 1, 0); + + memset(&pl, 0, sizeof(pl)); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_set_options, + builtin_sparse_checkout_set_usage, + PARSE_OPT_KEEP_UNKNOWN); + + add_patterns_from_input(&pl, argc, argv); if (!core_apply_sparse_checkout) { set_config(MODE_ALL_PATTERNS); From 4bf0c06c7169da61de489544207a7659ef31029f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 11 Feb 2020 15:02:22 +0000 Subject: [PATCH 017/129] sparse-checkout: extract pattern update from 'set' subcommand In anticipation of adding "add" and "remove" subcommands to the sparse-checkout builtin, extract a modify_pattern_list() method from the sparse_checkout_set() method. This command will read input from the command-line or stdin to construct a set of patterns, then modify the existing sparse-checkout patterns after a successful update of the working directory. Currently, the only way to modify the patterns is to replace all of the patterns. This will be extended in a later update. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 41d8aaf9a22eb3..03915dd72963e0 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -462,29 +462,17 @@ static void add_patterns_from_input(struct pattern_list *pl, } } -static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +enum modify_type { + REPLACE, +}; + +static int modify_pattern_list(int argc, const char **argv, enum modify_type m) { - struct pattern_list pl; int result; int changed_config = 0; - - static struct option builtin_sparse_checkout_set_options[] = { - OPT_BOOL(0, "stdin", &set_opts.use_stdin, - N_("read patterns from standard in")), - OPT_END(), - }; - - repo_read_index(the_repository); - require_clean_work_tree(the_repository, - N_("set sparse-checkout patterns"), NULL, 1, 0); - + struct pattern_list pl; memset(&pl, 0, sizeof(pl)); - argc = parse_options(argc, argv, prefix, - builtin_sparse_checkout_set_options, - builtin_sparse_checkout_set_usage, - PARSE_OPT_KEEP_UNKNOWN); - add_patterns_from_input(&pl, argc, argv); if (!core_apply_sparse_checkout) { @@ -502,6 +490,26 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) return result; } +static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +{ + static struct option builtin_sparse_checkout_set_options[] = { + OPT_BOOL(0, "stdin", &set_opts.use_stdin, + N_("read patterns from standard in")), + OPT_END(), + }; + + repo_read_index(the_repository); + require_clean_work_tree(the_repository, + N_("set sparse-checkout patterns"), NULL, 1, 0); + + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_set_options, + builtin_sparse_checkout_set_usage, + PARSE_OPT_KEEP_UNKNOWN); + + return modify_pattern_list(argc, argv, REPLACE); +} + static int sparse_checkout_disable(int argc, const char **argv) { struct pattern_list pl; From 2631dc879d59aa08095bc4fb5bc9bcc491a787e9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 11 Feb 2020 15:02:23 +0000 Subject: [PATCH 018/129] sparse-checkout: create 'add' subcommand When using the sparse-checkout feature, a user may want to incrementally grow their sparse-checkout pattern set. Allow adding patterns using a new 'add' subcommand. This is not much different from the 'set' subcommand, because we still want to allow the '--stdin' option and interpret inputs as directories when in cone mode and patterns otherwise. When in cone mode, we are growing the cone. This may actually reduce the set of patterns when adding directory A when A/B is already a directory in the cone. Test the different cases: siblings, parents, ancestors. When not in cone mode, we can only assume the patterns should be appended to the sparse-checkout file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-sparse-checkout.txt | 7 +++ builtin/sparse-checkout.c | 72 ++++++++++++++++++++++++--- t/t1091-sparse-checkout-builtin.sh | 59 ++++++++++++++++++++++ 3 files changed, 132 insertions(+), 6 deletions(-) diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt index 091461988107b9..746f920d71d6d7 100644 --- a/Documentation/git-sparse-checkout.txt +++ b/Documentation/git-sparse-checkout.txt @@ -59,6 +59,13 @@ directories. The input format matches the output of `git ls-tree --name-only`. This includes interpreting pathnames that begin with a double quote (") as C-style quoted strings. +'add':: + Update the sparse-checkout file to include additional patterns. + By default, these patterns are read from the command-line arguments, + but they can be read from stdin using the `--stdin` option. When + `core.sparseCheckoutCone` is enabled, the given patterns are interpreted + as directory names as in the 'set' subcommand. + 'disable':: Disable the `core.sparseCheckout` config setting, and restore the working directory to include all files. Leaves the sparse-checkout diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 03915dd72963e0..af9e3e5123b528 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -18,7 +18,7 @@ static const char *empty_base = ""; static char const * const builtin_sparse_checkout_usage[] = { - N_("git sparse-checkout (init|list|set|disable) "), + N_("git sparse-checkout (init|list|set|add|disable) "), NULL }; @@ -404,7 +404,7 @@ static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl) } static char const * const builtin_sparse_checkout_set_usage[] = { - N_("git sparse-checkout set (--stdin | )"), + N_("git sparse-checkout (set|add) (--stdin | )"), NULL }; @@ -464,8 +464,54 @@ static void add_patterns_from_input(struct pattern_list *pl, enum modify_type { REPLACE, + ADD, }; +static void add_patterns_cone_mode(int argc, const char **argv, + struct pattern_list *pl) +{ + struct strbuf buffer = STRBUF_INIT; + struct pattern_entry *pe; + struct hashmap_iter iter; + struct pattern_list existing; + char *sparse_filename = get_sparse_checkout_filename(); + + add_patterns_from_input(pl, argc, argv); + + memset(&existing, 0, sizeof(existing)); + existing.use_cone_patterns = core_sparse_checkout_cone; + + if (add_patterns_from_file_to_list(sparse_filename, "", 0, + &existing, NULL)) + die(_("unable to load existing sparse-checkout patterns")); + free(sparse_filename); + + hashmap_for_each_entry(&existing.recursive_hashmap, &iter, pe, ent) { + if (!hashmap_contains_parent(&pl->recursive_hashmap, + pe->pattern, &buffer) || + !hashmap_contains_parent(&pl->parent_hashmap, + pe->pattern, &buffer)) { + strbuf_reset(&buffer); + strbuf_addstr(&buffer, pe->pattern); + insert_recursive_pattern(pl, &buffer); + } + } + + clear_pattern_list(&existing); + strbuf_release(&buffer); +} + +static void add_patterns_literal(int argc, const char **argv, + struct pattern_list *pl) +{ + char *sparse_filename = get_sparse_checkout_filename(); + if (add_patterns_from_file_to_list(sparse_filename, "", 0, + pl, NULL)) + die(_("unable to load existing sparse-checkout patterns")); + free(sparse_filename); + add_patterns_from_input(pl, argc, argv); +} + static int modify_pattern_list(int argc, const char **argv, enum modify_type m) { int result; @@ -473,7 +519,18 @@ static int modify_pattern_list(int argc, const char **argv, enum modify_type m) struct pattern_list pl; memset(&pl, 0, sizeof(pl)); - add_patterns_from_input(&pl, argc, argv); + switch (m) { + case ADD: + if (core_sparse_checkout_cone) + add_patterns_cone_mode(argc, argv, &pl); + else + add_patterns_literal(argc, argv, &pl); + break; + + case REPLACE: + add_patterns_from_input(&pl, argc, argv); + break; + } if (!core_apply_sparse_checkout) { set_config(MODE_ALL_PATTERNS); @@ -490,7 +547,8 @@ static int modify_pattern_list(int argc, const char **argv, enum modify_type m) return result; } -static int sparse_checkout_set(int argc, const char **argv, const char *prefix) +static int sparse_checkout_set(int argc, const char **argv, const char *prefix, + enum modify_type m) { static struct option builtin_sparse_checkout_set_options[] = { OPT_BOOL(0, "stdin", &set_opts.use_stdin, @@ -507,7 +565,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix) builtin_sparse_checkout_set_usage, PARSE_OPT_KEEP_UNKNOWN); - return modify_pattern_list(argc, argv, REPLACE); + return modify_pattern_list(argc, argv, m); } static int sparse_checkout_disable(int argc, const char **argv) @@ -558,7 +616,9 @@ int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) if (!strcmp(argv[0], "init")) return sparse_checkout_init(argc, argv); if (!strcmp(argv[0], "set")) - return sparse_checkout_set(argc, argv, prefix); + return sparse_checkout_set(argc, argv, prefix, REPLACE); + if (!strcmp(argv[0], "add")) + return sparse_checkout_set(argc, argv, prefix, ADD); if (!strcmp(argv[0], "disable")) return sparse_checkout_disable(argc, argv); } diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 7d982096fbf0a2..f9265de5e869e0 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -141,6 +141,21 @@ test_expect_success 'set sparse-checkout using --stdin' ' check_files repo "a folder1 folder2" ' +test_expect_success 'add to sparse-checkout' ' + cat repo/.git/info/sparse-checkout >expect && + cat >add <<-\EOF && + pattern1 + /folder1/ + pattern2 + EOF + cat add >>expect && + git -C repo sparse-checkout add --stdin actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo "a folder1 folder2" +' + test_expect_success 'cone mode: match patterns' ' git -C repo config --worktree core.sparseCheckoutCone true && rm -rf repo/a repo/folder1 repo/folder2 && @@ -219,8 +234,52 @@ test_expect_success 'cone mode: set with nested folders' ' test_cmp repo/.git/info/sparse-checkout expect ' +test_expect_success 'cone mode: add independent path' ' + git -C repo sparse-checkout set deep/deeper1 && + git -C repo sparse-checkout add folder1 && + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + /folder1/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo a deep folder1 +' + +test_expect_success 'cone mode: add sibling path' ' + git -C repo sparse-checkout set deep/deeper1 && + git -C repo sparse-checkout add deep/deeper2 && + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + /deep/deeper2/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo a deep +' + +test_expect_success 'cone mode: add parent path' ' + git -C repo sparse-checkout set deep/deeper1 folder1 && + git -C repo sparse-checkout add deep && + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + /folder1/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo a deep folder1 +' + test_expect_success 'revert to old sparse-checkout on bad update' ' test_when_finished git -C repo reset --hard && + git -C repo sparse-checkout set deep && echo update >repo/deep/deeper2/a && cp repo/.git/info/sparse-checkout expect && test_must_fail git -C repo sparse-checkout set deep/deeper1 2>err && From ef07659926f64d70e8cb41025c3d7456eecb962e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 11 Feb 2020 15:02:24 +0000 Subject: [PATCH 019/129] sparse-checkout: work with Windows paths When using Windows, a user may run 'git sparse-checkout set A\B\C' to add the Unix-style path A/B/C to their sparse-checkout patterns. Normalizing the input path converts the backslashes to slashes before we add the string 'A/B/C' to the recursive hashset. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/sparse-checkout.c | 3 +++ t/t1091-sparse-checkout-builtin.sh | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index af9e3e5123b528..3e314e3358223b 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -394,6 +394,9 @@ static void strbuf_to_cone_pattern(struct strbuf *line, struct pattern_list *pl) strbuf_trim_trailing_dir_sep(line); + if (strbuf_normalize_path(line)) + die(_("could not normalize path %s"), line->buf); + if (!line->len) return; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index f9265de5e869e0..c35cbdef454858 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -497,4 +497,18 @@ test_expect_success BSLASHPSPEC 'pattern-checks: escaped characters' ' test_cmp list-expect list-actual ' +test_expect_success MINGW 'cone mode replaces backslashes with slashes' ' + git -C repo sparse-checkout set deep\\deeper1 && + cat >expect <<-\EOF && + /* + !/*/ + /deep/ + !/deep/*/ + /deep/deeper1/ + EOF + test_cmp expect repo/.git/info/sparse-checkout && + check_files repo a deep && + check_files repo/deep a deeper1 +' + test_done From aec00fb6d278d45853a1e59e15ceaf82326f22e2 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 5 Apr 2017 10:58:09 -0600 Subject: [PATCH 020/129] reset --stdin: trim carriage return from the paths While using the reset --stdin feature on windows path added may have a \r at the end of the path that wasn't getting removed so didn't match the path in the index and wasn't reset. Signed-off-by: Kevin Willford --- t/t7108-reset-stdin.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh index b7cbcbf869296c..db5483b8f10052 100755 --- a/t/t7108-reset-stdin.sh +++ b/t/t7108-reset-stdin.sh @@ -29,4 +29,13 @@ test_expect_success '--stdin requires --mixed' ' git reset --mixed --stdin list && + git reset --stdin Date: Tue, 24 Jan 2017 17:44:31 +0100 Subject: [PATCH 021/129] gvfs: start by adding the -gvfs suffix to the version Signed-off-by: Saeed Noursalehi --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 2a8e7099164496..2f44242989a537 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.25.1 +DEF_VER=v2.25.1.vfs.1.1 LF=' ' From 35222f0a0519f912fadebb1f44e86735de23013d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 4 Apr 2017 12:04:11 +0200 Subject: [PATCH 022/129] gvfs: ensure that the version is based on a GVFS tag Signed-off-by: Johannes Schindelin --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 2f44242989a537..0d859db966e9f8 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -12,7 +12,7 @@ if test -f version then VN=$(cat version) || VN="$DEF_VER" elif test -d ${GIT_DIR:-.git} -o -f .git && - VN=$(git describe --match "v[0-9]*" HEAD 2>/dev/null) && + VN=$(git describe --match "v[0-9]*vfs*" HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) From 553b86fd980cca27970c51aeabfc9f4eb4b50c42 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:30:59 +0100 Subject: [PATCH 023/129] gvfs: add a GVFS-specific header file This header file will accumulate GVFS-specific definitions. Signed-off-by: Kevin Willford --- gvfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 gvfs.h diff --git a/gvfs.h b/gvfs.h new file mode 100644 index 00000000000000..b6dbe85eae4071 --- /dev/null +++ b/gvfs.h @@ -0,0 +1,9 @@ +#ifndef GVFS_H +#define GVFS_H + +/* + * This file is for the specific settings and methods + * used for GVFS functionality + */ + +#endif /* GVFS_H */ From c6ab81b481502233fb79f100fa3d1fef79717712 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:34:12 +0100 Subject: [PATCH 024/129] gvfs: add the core.gvfs config setting This does not do anything yet. The next patches will add various values for that config setting that correspond to the various features offered/required by GVFS. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 3 +++ cache.h | 1 + config.c | 6 ++++++ environment.c | 1 + gvfs.h | 31 +++++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 4eb3acdf96084c..9a1abce235bfca 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -611,6 +611,9 @@ core.multiPackIndex:: single index. See link:technical/multi-pack-index.html[the multi-pack-index design document]. +core.gvfs:: + Enable the features needed for GVFS. + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. diff --git a/cache.h b/cache.h index 6470693e91c384..12cc8aee6b06d0 100644 --- a/cache.h +++ b/cache.h @@ -953,6 +953,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/config.c b/config.c index 65c19f953ba1bc..da7624c75c43e2 100644 --- a/config.c +++ b/config.c @@ -20,6 +20,7 @@ #include "dir.h" #include "color.h" #include "refs.h" +#include "gvfs.h" struct config_source { struct config_source *prev; @@ -1359,6 +1360,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.gvfs")) { + gvfs_load_config_value(value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { core_apply_sparse_checkout = git_config_bool(var, value); return 0; diff --git a/environment.c b/environment.c index e72a02d0d577da..71a8daa335b75d 100644 --- a/environment.c +++ b/environment.c @@ -68,6 +68,7 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_sparse_checkout_cone; +int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/gvfs.h b/gvfs.h index b6dbe85eae4071..2e9e10f4e49ef6 100644 --- a/gvfs.h +++ b/gvfs.h @@ -1,9 +1,40 @@ #ifndef GVFS_H #define GVFS_H +#include "cache.h" +#include "config.h" + /* * This file is for the specific settings and methods * used for GVFS functionality */ +static inline int gvfs_config_is_set(int mask) { + return (core_gvfs & mask) == mask; +} + +static inline int gvfs_config_is_set_any(void) { + return core_gvfs > 0; +} + +static inline void gvfs_load_config_value(const char *value) { + int is_bool = 0; + + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &is_bool); + else + git_config_get_bool_or_int("core.gvfs", &is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (is_bool && core_gvfs) + core_gvfs = -1; +} + + +static inline int gvfs_config_load_and_is_set(int mask) { + gvfs_load_config_value(0); + return gvfs_config_is_set(mask); +} + + #endif /* GVFS_H */ From ca9383b610eb5226797f39303507416fe4dad113 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:38:59 +0100 Subject: [PATCH 025/129] gvfs: add the feature to skip writing the index' SHA-1 This takes a substantial amount of time, and if the user is reasonably sure that the files' integrity is not compromised, that time can be saved. Git no longer verifies the SHA-1 by default, anyway. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 10 +++++++++- gvfs.h | 6 ++++++ read-cache.c | 11 ++++++++--- t/t1016-read-tree-skip-sha-on-read.sh | 22 ++++++++++++++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100755 t/t1016-read-tree-skip-sha-on-read.sh diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 9a1abce235bfca..5062c8445b1760 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -612,7 +612,15 @@ core.multiPackIndex:: multi-pack-index design document]. core.gvfs:: - Enable the features needed for GVFS. + Enable the features needed for GVFS. This value can be set to true + to indicate all features should be turned on or the bit values listed + below can be used to turn on specific features. ++ +-- + GVFS_SKIP_SHA_ON_INDEX:: + Bit value 1 + Disables the calculation of the sha when writing the index +-- core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] diff --git a/gvfs.h b/gvfs.h index 2e9e10f4e49ef6..690419127a72dd 100644 --- a/gvfs.h +++ b/gvfs.h @@ -9,6 +9,12 @@ * used for GVFS functionality */ + +/* + * The list of bits in the core_gvfs setting + */ +#define GVFS_SKIP_SHA_ON_INDEX (1 << 0) + static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; } diff --git a/read-cache.c b/read-cache.c index 7f66e0817148f8..381802b6dcbd64 100644 --- a/read-cache.c +++ b/read-cache.c @@ -25,6 +25,7 @@ #include "fsmonitor.h" #include "thread-utils.h" #include "progress.h" +#include "gvfs.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -2466,7 +2467,9 @@ static int ce_write_flush(git_hash_ctx *context, int fd) { unsigned int buffered = write_buffer_len; if (buffered) { - the_hash_algo->update_fn(context, write_buffer, buffered); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, + buffered); if (write_in_full(fd, write_buffer, buffered) < 0) return -1; write_buffer_len = 0; @@ -2515,7 +2518,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) if (left) { write_buffer_len = 0; - the_hash_algo->update_fn(context, write_buffer, left); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, left); } /* Flush first if not enough space for hash signature */ @@ -2526,7 +2530,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) } /* Append the hash signature at the end */ - the_hash_algo->final_fn(write_buffer + left, context); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->final_fn(write_buffer + left, context); hashcpy(hash, write_buffer + left); left += the_hash_algo->rawsz; return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0; diff --git a/t/t1016-read-tree-skip-sha-on-read.sh b/t/t1016-read-tree-skip-sha-on-read.sh new file mode 100755 index 00000000000000..5b76a80a0020dc --- /dev/null +++ b/t/t1016-read-tree-skip-sha-on-read.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='check that read-tree works with core.gvfs config value' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-read-tree.sh + +test_expect_success setup ' + echo one >a && + git add a && + git commit -m initial +' +test_expect_success 'read-tree without core.gvsf' ' + read_tree_u_must_succeed -m -u HEAD +' + +test_expect_success 'read-tree with core.gvfs set to 1' ' + git config core.gvfs 1 && + read_tree_u_must_succeed -m -u HEAD +' + +test_done From f030eb86e485ab66adc809802d919ce547da1b6b Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:54:55 +0100 Subject: [PATCH 026/129] gvfs: add the feature that blobs may be missing Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 4 ++++ cache-tree.c | 4 +++- gvfs.h | 1 + t/t0000-basic.sh | 5 +++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 5062c8445b1760..ce7157f81d51df 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -620,6 +620,10 @@ core.gvfs:: GVFS_SKIP_SHA_ON_INDEX:: Bit value 1 Disables the calculation of the sha when writing the index + GVFS_MISSING_OK:: + Bit value 4 + Normally git write-tree ensures that the objects referenced by the + directory exist in the object database. This option disables this check. -- core.sparseCheckout:: diff --git a/cache-tree.c b/cache-tree.c index 1bd1b23d38d3f4..6b90e45ed3873f 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -6,6 +6,7 @@ #include "object-store.h" #include "replace-object.h" #include "promisor-remote.h" +#include "gvfs.h" #ifndef DEBUG_CACHE_TREE #define DEBUG_CACHE_TREE 0 @@ -244,7 +245,8 @@ static int update_one(struct cache_tree *it, int flags) { struct strbuf buffer; - int missing_ok = flags & WRITE_TREE_MISSING_OK; + int missing_ok = gvfs_config_is_set(GVFS_MISSING_OK) ? + WRITE_TREE_MISSING_OK : (flags & WRITE_TREE_MISSING_OK); int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; int to_invalidate = 0; diff --git a/gvfs.h b/gvfs.h index 690419127a72dd..dabbf67f94c852 100644 --- a/gvfs.h +++ b/gvfs.h @@ -14,6 +14,7 @@ * The list of bits in the core_gvfs setting */ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_MISSING_OK (1 << 2) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 8a81a249d0b496..470b4ab8e53799 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1168,6 +1168,11 @@ test_expect_success 'writing this tree with --missing-ok' ' git write-tree --missing-ok ' +test_expect_success 'writing this tree with missing ok config value' ' + git config core.gvfs 4 && + git write-tree +' + ################################################################ test_expect_success 'git read-tree followed by write-tree should be idempotent' ' From 1259c93fe4c92a4f5ee3399f4661879c6c095bfd Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 18 May 2016 13:40:39 +0000 Subject: [PATCH 027/129] gvfs: prevent files to be deleted outside the sparse checkout Prevent the sparse checkout to delete files that were marked with skip-worktree bit and are not in the sparse-checkout file. This is because everything with the skip-worktree bit turned on is being virtualized and will be removed with the change of HEAD. There was only one failing test when running with these changes that was checking to make sure the worktree narrows on checkout which was expected since we would no longer be narrowing the worktree. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 9 +++++++++ gvfs.h | 1 + t/t1090-sparse-checkout-scope.sh | 17 +++++++++++++++++ unpack-trees.c | 22 ++++++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index ce7157f81d51df..993707146bbd4f 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -624,6 +624,15 @@ core.gvfs:: Bit value 4 Normally git write-tree ensures that the objects referenced by the directory exist in the object database. This option disables this check. + GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT:: + Bit value 8 + When marking entries to remove from the index and the working + directory this option will take into account what the + skip-worktree bit was set to so that if the entry has the + skip-worktree bit set it will not be removed from the working + directory. This will allow virtualized working directories to + detect the change to HEAD and use the new commit tree to show + the files that are in the working directory. -- core.sparseCheckout:: diff --git a/gvfs.h b/gvfs.h index dabbf67f94c852..f9144d2ad9ab92 100644 --- a/gvfs.h +++ b/gvfs.h @@ -15,6 +15,7 @@ */ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) #define GVFS_MISSING_OK (1 << 2) +#define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index ae723732777348..ac786c7e7af010 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -82,6 +82,23 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success 'checkout does not delete items outside the sparse checkout file' ' + git checkout master && + git config core.gvfs 8 && + git checkout -b outside && + echo "new file1" >d && + git add d && + git commit -m "branch initial" && + echo "new file1" >e && + git add e && + git commit -m "skipped worktree" && + git update-index --skip-worktree e && + echo "/d" >.git/info/sparse-checkout && + git checkout HEAD^ && + test_path_is_file d && + test_path_is_file e +' + test_expect_success MINGW 'no unnecessary opendir() with fscache' ' git clone . fscache-test && ( diff --git a/unpack-trees.c b/unpack-trees.c index 0b5f1c2450303c..a72eba827efd5a 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -16,6 +16,7 @@ #include "fsmonitor.h" #include "object-store.h" #include "promisor-remote.h" +#include "gvfs.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -2128,6 +2129,27 @@ static int deleted_entry(const struct cache_entry *ce, } if (!(old->ce_flags & CE_CONFLICTED) && verify_uptodate(old, o)) return -1; + + /* + * When marking entries to remove from the index and the working + * directory this option will take into account what the + * skip-worktree bit was set to so that if the entry has the + * skip-worktree bit set it will not be removed from the working + * directory. This will allow virtualized working directories to + * detect the change to HEAD and use the new commit tree to show + * the files that are in the working directory. + * + * old is the cache_entry that will have the skip-worktree bit set + * which will need to be preserved when the CE_REMOVE entry is added + */ + if (gvfs_config_is_set(GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT) && + old && + old->ce_flags & CE_SKIP_WORKTREE) { + add_entry(o, old, CE_REMOVE, 0); + invalidate_ce_path(old, o); + return 1; + } + add_entry(o, ce, CE_REMOVE, 0); invalidate_ce_path(ce, o); return 1; From 518cc2cad82176351631f6fb694c92d183e62102 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 30 May 2016 10:55:53 -0400 Subject: [PATCH 028/129] gvfs: optionally skip reachability checks/upload pack during fetch While performing a fetch with a virtual file system we know that there will be missing objects and we don't want to download them just because of the reachability of the commits. We also don't want to download a pack file with commits, trees, and blobs since these will be downloaded on demand. This flag will skip the first connectivity check and by returning zero will skip the upload pack. It will also skip the second connectivity check but continue to update the branches to the latest commit ids. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 9 +++++++++ connected.c | 19 +++++++++++++++++++ gvfs.h | 1 + t/t5582-vfs.sh | 24 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+) create mode 100755 t/t5582-vfs.sh diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 993707146bbd4f..6782a0f3c06a94 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -633,6 +633,15 @@ core.gvfs:: directory. This will allow virtualized working directories to detect the change to HEAD and use the new commit tree to show the files that are in the working directory. + GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK:: + Bit value 16 + While performing a fetch with a virtual file system we know + that there will be missing objects and we don't want to download + them just because of the reachability of the commits. We also + don't want to download a pack file with commits, trees, and blobs + since these will be downloaded on demand. This flag will skip the + checks on the reachability of objects during a fetch as well as + the upload pack so that extraneous objects don't get downloaded. -- core.sparseCheckout:: diff --git a/connected.c b/connected.c index c337f5f7f4dd07..d976b870aa740b 100644 --- a/connected.c +++ b/connected.c @@ -6,6 +6,7 @@ #include "transport.h" #include "packfile.h" #include "promisor-remote.h" +#include "gvfs.h" /* * If we feed all the commits we want to verify to this command @@ -31,6 +32,24 @@ int check_connected(oid_iterate_fn fn, void *cb_data, size_t base_len; const unsigned hexsz = the_hash_algo->hexsz; + /* + * Running a virtual file system there will be objects that are + * missing locally and we don't want to download a bunch of + * commits, trees, and blobs just to make sure everything is + * reachable locally so this option will skip reachablility + * checks below that use rev-list. This will stop the check + * before uploadpack runs to determine if there is anything to + * fetch. Returning zero for the first check will also prevent the + * uploadpack from happening. It will also skip the check after + * the fetch is finished to make sure all the objects where + * downloaded in the pack file. This will allow the fetch to + * run and get all the latest tip commit ids for all the branches + * in the fetch but not pull down commits, trees, or blobs via + * upload pack. + */ + if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) + return 0; + if (!opt) opt = &defaults; transport = opt->transport; diff --git a/gvfs.h b/gvfs.h index f9144d2ad9ab92..8ee12df31c8a1d 100644 --- a/gvfs.h +++ b/gvfs.h @@ -16,6 +16,7 @@ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) #define GVFS_MISSING_OK (1 << 2) #define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) +#define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t5582-vfs.sh b/t/t5582-vfs.sh new file mode 100755 index 00000000000000..8a703cbb640387 --- /dev/null +++ b/t/t5582-vfs.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='fetch using the flag to skip reachability and upload pack' + +. ./test-lib.sh + + +test_expect_success setup ' + echo inital >a && + git add a && + git commit -m initial && + git clone . one +' + +test_expect_success "fetch test" ' + cd one && + git config core.gvfs 16 && + rm -rf .git/objects/* && + git -C .. cat-file commit HEAD | git hash-object -w --stdin -t commit && + git fetch && + test_must_fail git rev-parse --verify HEAD^{tree} +' + +test_done \ No newline at end of file From c9984b7562a77d5d93691352fa2d14e4b355dbfa Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 30 Nov 2016 23:11:36 +0100 Subject: [PATCH 029/129] Add a new run_hook_argv() function The two existing members of the run_hook*() family, run_hook_ve() and run_hook_le(), are good for callers that know the precise number of parameters already. Let's introduce a new sibling that takes an argv array for callers that want to pass a variable number of parameters. Signed-off-by: Johannes Schindelin --- run-command.c | 20 +++++++++++++++++--- run-command.h | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/run-command.c b/run-command.c index f5e1149f9b395e..b708acbd182754 100644 --- a/run-command.c +++ b/run-command.c @@ -1342,7 +1342,8 @@ const char *find_hook(const char *name) return path.buf; } -int run_hook_ve(const char *const *env, const char *name, va_list args) +int run_hook_argv(const char *const *env, const char *name, + const char **argv) { struct child_process hook = CHILD_PROCESS_INIT; const char *p; @@ -1352,8 +1353,7 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return 0; argv_array_push(&hook.args, p); - while ((p = va_arg(args, const char *))) - argv_array_push(&hook.args, p); + argv_array_pushv(&hook.args, argv); hook.env = env; hook.no_stdin = 1; hook.stdout_to_stderr = 1; @@ -1362,6 +1362,20 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return run_command(&hook); } +int run_hook_ve(const char *const *env, const char *name, va_list args) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + const char *p; + int ret; + + while ((p = va_arg(args, const char *))) + argv_array_push(&argv, p); + + ret = run_hook_argv(env, name, argv.argv); + argv_array_clear(&argv); + return ret; +} + int run_hook_le(const char *const *env, const char *name, ...) { va_list args; diff --git a/run-command.h b/run-command.h index 592d9dc03584c1..efd2ae691e4292 100644 --- a/run-command.h +++ b/run-command.h @@ -217,6 +217,7 @@ const char *find_hook(const char *name); LAST_ARG_MUST_BE_NULL int run_hook_le(const char *const *env, const char *name, ...); int run_hook_ve(const char *const *env, const char *name, va_list args); +int run_hook_argv(const char *const *env, const char *name, const char **argv); #define RUN_COMMAND_NO_STDIN 1 #define RUN_GIT_CMD 2 /*If this is to be git sub-command */ From 5d0d824db61445b1b386c6ec2304a4ae8f071410 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Wed, 15 Jun 2016 14:59:16 +0000 Subject: [PATCH 030/129] gvfs: ensure all filters and EOL conversions are blocked Ensure all filters and EOL conversions are blocked when running under GVFS so that our projected file sizes will match the actual file size when it is hydrated on the local machine. Signed-off-by: Ben Peart --- Documentation/config/core.txt | 9 +++++++++ convert.c | 22 +++++++++++++++++++++ gvfs.h | 1 + t/t0021-conversion.sh | 37 +++++++++++++++++++++++++++++++++++ t/t0027-auto-crlf.sh | 12 ++++++++++++ 5 files changed, 81 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 6782a0f3c06a94..118eb58fd1a68d 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -642,6 +642,15 @@ core.gvfs:: since these will be downloaded on demand. This flag will skip the checks on the reachability of objects during a fetch as well as the upload pack so that extraneous objects don't get downloaded. + GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS:: + Bit value 64 + With a virtual file system we only know the file size before any + CRLF or smudge/clean filters processing is done on the client. + To prevent file corruption due to truncation or expansion with + garbage at the end, these filters must not run when the file + is first accessed and brought down to the client. Git.exe can't + currently tell the first access vs subsequent accesses so this + flag just blocks them from occurring at all. -- core.sparseCheckout:: diff --git a/convert.c b/convert.c index 2b8549e5f19376..22ee2fc72686ad 100644 --- a/convert.c +++ b/convert.c @@ -9,6 +9,7 @@ #include "sub-process.h" #include "utf8.h" #include "ll-merge.h" +#include "gvfs.h" /* * convert.c - convert a file when checking it out and checking it in. @@ -559,6 +560,9 @@ static int crlf_to_git(const struct index_state *istate, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -598,6 +602,9 @@ static int crlf_to_worktree(const char *src, size_t len, if (!will_convert_lf_to_crlf(&stats, crlf_action)) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -712,6 +719,9 @@ static int apply_single_file_filter(const char *path, const char *src, size_t le struct async async; struct filter_params params; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("Filter \"%s\" not supported when running under GVFS", cmd); + memset(&async, 0, sizeof(async)); async.proc = filter_buffer_or_fd; async.data = ¶ms; @@ -1096,6 +1106,9 @@ static int ident_to_git(const char *src, size_t len, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -1143,6 +1156,9 @@ static int ident_to_worktree(const char *src, size_t len, if (!cnt) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -1593,6 +1609,9 @@ static int lf_to_crlf_filter_fn(struct stream_filter *filter, size_t count, o = 0; struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* * We may be holding onto the CR to see if it is followed by a * LF, in which case we would need to go to the main loop. @@ -1837,6 +1856,9 @@ static int ident_filter_fn(struct stream_filter *filter, struct ident_filter *ident = (struct ident_filter *)filter; static const char head[] = "$Id"; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + if (!input) { /* drain upon eof */ switch (ident->state) { diff --git a/gvfs.h b/gvfs.h index 8ee12df31c8a1d..2d6de575bf4a65 100644 --- a/gvfs.h +++ b/gvfs.h @@ -17,6 +17,7 @@ #define GVFS_MISSING_OK (1 << 2) #define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) +#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 8a2ba61d69b913..d7694fdc003ed0 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -314,6 +314,43 @@ test_expect_success "filter: smudge empty file" ' test_cmp expected filtered-empty-in-repo ' +test_expect_success "filter: clean filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + echo dead data walking >empty-in-repo && + test_must_fail git add empty-in-repo +' + +test_expect_success "filter: smudge filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + test_must_fail git checkout +' + +test_expect_success "ident blocked on add when under GVFS" ' + test_config core.gvfs 64 && + test_config core.autocrlf false && + + echo "*.i ident" >.gitattributes && + echo "\$Id\$" > ident.i && + + test_must_fail git add ident.i +' + +test_expect_success "ident blocked when under GVFS" ' + git add ident.i && + + git commit -m "added ident.i" && + test_config core.gvfs 64 && + rm ident.i && + + test_must_fail git checkout -- ident.i +' + test_expect_success 'disable filter with empty override' ' test_config_global filter.disable.smudge false && test_config_global filter.disable.clean false && diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index 9fcd56fab37314..4d43e15bb96727 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -333,6 +333,18 @@ checkout_files () { " } +test_expect_success 'crlf conversions blocked when under GVFS' ' + git checkout -b gvfs && + test_commit initial && + rm initial.t && + test_config core.gvfs 64 && + test_config core.autocrlf true && + test_must_fail git read-tree --reset -u HEAD && + + git config core.autocrlf false && + git read-tree --reset -u HEAD +' + # Test control characters # NUL SOH CR EOF==^Z test_expect_success 'ls-files --eol -o Text/Binary' ' From 5703d80417e86b8016df27fe6a2cef50eb3f2ac2 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 10 Jan 2017 18:47:14 +0000 Subject: [PATCH 031/129] gvfs: allow "virtualizing" objects The idea is to allow blob objects to be missing from the local repository, and to load them lazily on demand. After discussing this idea on the mailing list, we will rename the feature to "lazy clone" and work more on this. Signed-off-by: Ben Peart --- cache.h | 2 ++ config.c | 5 +++++ connected.c | 2 ++ environment.c | 1 + sha1-file.c | 22 ++++++++++++++++++++++ 5 files changed, 32 insertions(+) diff --git a/cache.h b/cache.h index 12cc8aee6b06d0..b156194669ecb6 100644 --- a/cache.h +++ b/cache.h @@ -982,6 +982,8 @@ int use_optional_locks(void); extern char comment_line_char; extern int auto_comment_line_char; +extern int core_virtualize_objects; + enum log_refs_config { LOG_REFS_UNSET = -1, LOG_REFS_NONE = 0, diff --git a/config.c b/config.c index da7624c75c43e2..4b5bcd107bfdb2 100644 --- a/config.c +++ b/config.c @@ -1395,6 +1395,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.virtualizeobjects")) { + core_virtualize_objects = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value, cb); } diff --git a/connected.c b/connected.c index d976b870aa740b..df0c7333e20edb 100644 --- a/connected.c +++ b/connected.c @@ -49,6 +49,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) return 0; + if (core_virtualize_objects) + return 0; if (!opt) opt = &defaults; diff --git a/environment.c b/environment.c index 71a8daa335b75d..4afd7190c4721c 100644 --- a/environment.c +++ b/environment.c @@ -72,6 +72,7 @@ int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; +int core_virtualize_objects; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT diff --git a/sha1-file.c b/sha1-file.c index 03ae9ae93a53d9..2c544ff945f8ea 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1411,6 +1411,21 @@ static int loose_object_info(struct repository *r, return (status < 0) ? status : 0; } +static int run_read_object_hook(const struct object_id *oid) +{ + struct argv_array args = ARGV_ARRAY_INIT; + int ret; + uint64_t start; + + start = getnanotime(); + argv_array_push(&args, oid_to_hex(oid)); + ret = run_hook_argv(NULL, "read-object", args.argv); + argv_array_clear(&args); + trace_performance_since(start, "run_read_object_hook"); + + return ret; +} + int fetch_if_missing = 1; int oid_object_info_extended(struct repository *r, const struct object_id *oid, @@ -1422,6 +1437,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, int rtype; const struct object_id *real = oid; int already_retried = 0; + int tried_hook = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) real = lookup_replace_object(r, oid); @@ -1432,6 +1448,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, if (!oi) oi = &blank_oi; +retry: co = find_cached_object(real); if (co) { if (oi->typep) @@ -1466,6 +1483,11 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; + if (core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!run_read_object_hook(oid)) + goto retry; + } } /* Check if it is a missing object */ From 8aa1a5431729ca867d1baf964116d205804fc214 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 24 May 2016 00:32:38 +0000 Subject: [PATCH 032/129] gvfs: add global command pre and post hook procs This adds hard-coded call to GVFS.hooks.exe before and after each Git command runs. To make sure that this is only called on repositories cloned with GVFS, we test for the tell-tale .gvfs. Signed-off-by: Ben Peart --- git.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 3 deletions(-) diff --git a/git.c b/git.c index 7be7ad34bd0538..8dd8f555e63408 100644 --- a/git.c +++ b/git.c @@ -400,6 +400,132 @@ static int handle_alias(int *argcp, const char ***argv) return ret; } +/* + * Runs pre/post-command hook. + */ +struct argv_array sargv = ARGV_ARRAY_INIT; +int run_post_hook = 0; +int exit_code = -1; + +static int is_gvfs_repo(void) +{ + wchar_t pwd[MAX_PATH]; + DWORD dwRet; + WIN32_FIND_DATAW FindFileData; + HANDLE hFind; + wchar_t *lastslash; + + dwRet = GetCurrentDirectoryW(MAX_PATH-7, pwd); + if (dwRet == 0 || dwRet > MAX_PATH) + die("GetCurrentDirectory failed (%d)\n", (int)GetLastError()); + + if ('\\' != pwd[wcslen(pwd) - 1]) + wcscat(pwd, L"\\"); + lastslash = pwd + wcslen(pwd) - 1; + while (1) { + wcscat(lastslash, L".gvfs"); + + hFind = FindFirstFileW(pwd, &FindFileData); + if (hFind != INVALID_HANDLE_VALUE) { + FindClose(hFind); + if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + return 1; + } + + lastslash--; + while ((pwd != lastslash) && (*lastslash != '\\')) + lastslash--; + if (pwd == lastslash) + return 0; + *(lastslash + 1) = 0; + }; + + return 0; +} + +static int run_GVFS_Hooks_argv(const char *const *env, const char *name, + const char *const *argv) +{ + struct child_process hook = CHILD_PROCESS_INIT; + + if (!is_gvfs_repo()) + return 0; + + argv_array_push(&hook.args, "GVFS.Hooks.exe"); + argv_array_push(&hook.args, name); + argv_array_pushv(&hook.args, (const char **)argv); + hook.env = env; + hook.no_stdin = 1; + + return run_command(&hook); +} + +static int run_pre_command_hook(const char **argv) +{ + char *lock; + int ret = 0; + + /* + * Ensure the global pre/post command hook is only called for + * the outer command and not when git is called recursively + * or spawns multiple commands (like with the alias command) + */ + lock = getenv("COMMAND_HOOK_LOCK"); + if (lock && !strcmp(lock, "true")) + return 0; + setenv("COMMAND_HOOK_LOCK", "true", 1); + + argv_array_pushv(&sargv, argv); + /* + * TODO: This is a temporary hack until we can get config settings + * before executing various git commands without messing up git's state. + * Once we can safely read settings, use the normal hook functions. + * + * ret = run_hook_argv(NULL, "pre-command", sargv.argv); + */ + ret = run_GVFS_Hooks_argv(NULL, "pre-command", sargv.argv); + + if (!ret) + run_post_hook = 1; + return ret; +} + +static int run_post_command_hook(void) +{ + char *lock; + int ret = 0; + + /* + * Only run post_command if pre_command succeeded in this process + */ + if (!run_post_hook) + return 0; + lock = getenv("COMMAND_HOOK_LOCK"); + if (!lock || strcmp(lock, "true")) + return 0; + + argv_array_pushf(&sargv, "--exit_code=%u", exit_code); + /* + * TODO: This is a temporary hack until we can get config settings + * before executing various git commands without messing up git's state. + * Once we can safely read settings, use the normal hook functions. + * + * ret = run_hook_argv(NULL, "post-command", sargv.argv); + */ + ret = run_GVFS_Hooks_argv(NULL, "post-command", sargv.argv); + + run_post_hook = 0; + argv_array_clear(&sargv); + setenv("COMMAND_HOOK_LOCK", "false", 1); + return ret; +} + +static void post_command_hook_atexit(void) +{ + fflush(NULL); + run_post_command_hook(); +} + static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status, help; @@ -436,17 +562,22 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (!help && p->option & NEED_WORK_TREE) setup_work_tree(); + if (run_pre_command_hook(argv)) + die("pre-command hook aborted command"); + trace_argv_printf(argv, "trace: built-in: git"); trace2_cmd_name(p->cmd); trace2_cmd_list_config(); validate_cache_entries(the_repository->index); - status = p->fn(argc, argv, prefix); + exit_code = status = p->fn(argc, argv, prefix); validate_cache_entries(the_repository->index); if (status) return status; + run_post_command_hook(); + /* Somebody closed stdout? */ if (fstat(fileno(stdout), &st)) return 0; @@ -702,13 +833,16 @@ static void execv_dashed_external(const char **argv) */ trace_argv_printf(cmd.args.argv, "trace: exec:"); + if (run_pre_command_hook(cmd.args.argv)) + die("pre-command hook aborted command"); + /* * If we fail because the command is not found, it is * OK to return. Otherwise, we just pass along the status code, * or our usual generic code if we were not even able to exec * the program. */ - status = run_command(&cmd); + exit_code = status = run_command(&cmd); /* * If the child process ran and we are now going to exit, emit a @@ -719,6 +853,8 @@ static void execv_dashed_external(const char **argv) exit(status); else if (errno != ENOENT) exit(128); + + run_post_command_hook(); } static int run_argv(int *argcp, const char ***argv) @@ -826,6 +962,7 @@ int cmd_main(int argc, const char **argv) } trace_command_performance(argv); + atexit(post_command_hook_atexit); /* * "git-xxxx" is the same as "git xxxx", but we obviously: @@ -853,10 +990,14 @@ int cmd_main(int argc, const char **argv) } else { /* The user didn't specify a command; give them help */ commit_pager_choice(); + if (run_pre_command_hook(argv)) + die("pre-command hook aborted command"); printf(_("usage: %s\n\n"), git_usage_string); list_common_cmds_help(); printf("\n%s\n", _(git_more_info_string)); - exit(1); + exit_code = 1; + run_post_command_hook(); + exit(exit_code); } cmd = argv[0]; From f42366ef1e39509f85043a75e63166d3c028def1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 6 Mar 2017 12:38:08 +0100 Subject: [PATCH 033/129] Allow hooks to be run before setup_git_directory() The use case here is to allow usage statistics to be gathered by running hooks before and after every hook, and to make that configurable via hooks. Signed-off-by: Johannes Schindelin --- run-command.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/run-command.c b/run-command.c index b708acbd182754..2e6b365891a597 100644 --- a/run-command.c +++ b/run-command.c @@ -7,6 +7,7 @@ #include "strbuf.h" #include "string-list.h" #include "quote.h" +#include "config.h" void child_process_init(struct child_process *child) { @@ -1308,12 +1309,60 @@ int async_with_fork(void) #endif } +static int early_hooks_path_config(const char *var, const char *value, void *data) +{ + if (!strcmp(var, "core.hookspath")) + return git_config_pathname((const char **)data, var, value); + + return 0; +} + +/* Discover the hook before setup_git_directory() was called */ +static const char *hook_path_early(const char *name, struct strbuf *result) +{ + static struct strbuf hooks_dir = STRBUF_INIT; + static int initialized; + + if (initialized < 0) + return NULL; + + if (!initialized) { + struct strbuf gitdir = STRBUF_INIT, commondir = STRBUF_INIT; + const char *early_hooks_dir = NULL; + + if (discover_git_directory(&commondir, &gitdir) < 0) { + initialized = -1; + return NULL; + } + + read_early_config(early_hooks_path_config, &early_hooks_dir); + if (!early_hooks_dir) + strbuf_addf(&hooks_dir, "%s/hooks/", commondir.buf); + else { + strbuf_add_absolute_path(&hooks_dir, early_hooks_dir); + strbuf_addch(&hooks_dir, '/'); + } + + strbuf_release(&gitdir); + strbuf_release(&commondir); + + initialized = 1; + } + + strbuf_addf(result, "%s%s", hooks_dir.buf, name); + return result->buf; +} + const char *find_hook(const char *name) { static struct strbuf path = STRBUF_INIT; strbuf_reset(&path); - strbuf_git_path(&path, "hooks/%s", name); + if (have_git_dir()) + strbuf_git_path(&path, "hooks/%s", name); + else if (!hook_path_early(name, &path)) + return NULL; + if (access(path.buf, X_OK) < 0) { int err = errno; From 50a7858b171e1c820996da5d3a6943a99aa086ce Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 24 Jan 2017 20:48:27 +0100 Subject: [PATCH 034/129] gvfs: introduce pre/post command hooks GVFS Git introduced pre-command and post-command hooks, to gather usage statistics and to be able to adjust the worktree if necessary. As run_hooks() implicitly calls setup_git_directory(), and that function does surprising things to the global state (sometimes even changing the current working directory), it cannot be used here. This commit introduces the pre-command/post-command hooks, based on the previous patches that culminate in support for running hooks early, i.e. before setup_git_directory() was called. Signed-off-by: Ben Peart --- git.c | 84 ++++-------------------------------- t/t0400-pre-command-hook.sh | 34 +++++++++++++++ t/t0401-post-command-hook.sh | 32 ++++++++++++++ 3 files changed, 74 insertions(+), 76 deletions(-) create mode 100755 t/t0400-pre-command-hook.sh create mode 100755 t/t0401-post-command-hook.sh diff --git a/git.c b/git.c index 8dd8f555e63408..cbf7b1f5422ba5 100644 --- a/git.c +++ b/git.c @@ -4,6 +4,7 @@ #include "help.h" #include "run-command.h" #include "alias.h" +#include "dir.h" #define RUN_SETUP (1<<0) #define RUN_SETUP_GENTLY (1<<1) @@ -400,65 +401,10 @@ static int handle_alias(int *argcp, const char ***argv) return ret; } -/* - * Runs pre/post-command hook. - */ -struct argv_array sargv = ARGV_ARRAY_INIT; -int run_post_hook = 0; -int exit_code = -1; - -static int is_gvfs_repo(void) -{ - wchar_t pwd[MAX_PATH]; - DWORD dwRet; - WIN32_FIND_DATAW FindFileData; - HANDLE hFind; - wchar_t *lastslash; - - dwRet = GetCurrentDirectoryW(MAX_PATH-7, pwd); - if (dwRet == 0 || dwRet > MAX_PATH) - die("GetCurrentDirectory failed (%d)\n", (int)GetLastError()); - - if ('\\' != pwd[wcslen(pwd) - 1]) - wcscat(pwd, L"\\"); - lastslash = pwd + wcslen(pwd) - 1; - while (1) { - wcscat(lastslash, L".gvfs"); - - hFind = FindFirstFileW(pwd, &FindFileData); - if (hFind != INVALID_HANDLE_VALUE) { - FindClose(hFind); - if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - return 1; - } - - lastslash--; - while ((pwd != lastslash) && (*lastslash != '\\')) - lastslash--; - if (pwd == lastslash) - return 0; - *(lastslash + 1) = 0; - }; - - return 0; -} - -static int run_GVFS_Hooks_argv(const char *const *env, const char *name, - const char *const *argv) -{ - struct child_process hook = CHILD_PROCESS_INIT; - - if (!is_gvfs_repo()) - return 0; - - argv_array_push(&hook.args, "GVFS.Hooks.exe"); - argv_array_push(&hook.args, name); - argv_array_pushv(&hook.args, (const char **)argv); - hook.env = env; - hook.no_stdin = 1; - - return run_command(&hook); -} +/* Runs pre/post-command hook */ +static struct argv_array sargv = ARGV_ARRAY_INIT; +static int run_post_hook = 0; +static int exit_code = -1; static int run_pre_command_hook(const char **argv) { @@ -475,15 +421,9 @@ static int run_pre_command_hook(const char **argv) return 0; setenv("COMMAND_HOOK_LOCK", "true", 1); + /* call the hook proc */ argv_array_pushv(&sargv, argv); - /* - * TODO: This is a temporary hack until we can get config settings - * before executing various git commands without messing up git's state. - * Once we can safely read settings, use the normal hook functions. - * - * ret = run_hook_argv(NULL, "pre-command", sargv.argv); - */ - ret = run_GVFS_Hooks_argv(NULL, "pre-command", sargv.argv); + ret = run_hook_argv(NULL, "pre-command", sargv.argv); if (!ret) run_post_hook = 1; @@ -505,14 +445,7 @@ static int run_post_command_hook(void) return 0; argv_array_pushf(&sargv, "--exit_code=%u", exit_code); - /* - * TODO: This is a temporary hack until we can get config settings - * before executing various git commands without messing up git's state. - * Once we can safely read settings, use the normal hook functions. - * - * ret = run_hook_argv(NULL, "post-command", sargv.argv); - */ - ret = run_GVFS_Hooks_argv(NULL, "post-command", sargv.argv); + ret = run_hook_argv(NULL, "post-command", sargv.argv); run_post_hook = 0; argv_array_clear(&sargv); @@ -522,7 +455,6 @@ static int run_post_command_hook(void) static void post_command_hook_atexit(void) { - fflush(NULL); run_post_command_hook(); } diff --git a/t/t0400-pre-command-hook.sh b/t/t0400-pre-command-hook.sh new file mode 100755 index 00000000000000..4f4f610b52b0a0 --- /dev/null +++ b/t/t0400-pre-command-hook.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +test_description='pre-command hook' + +. ./test-lib.sh + +test_expect_success 'with no hook' ' + echo "first" > file && + git add file && + git commit -m "first" +' + +test_expect_success 'with succeeding hook' ' + mkdir -p .git/hooks && + write_script .git/hooks/pre-command <<-EOF && + echo "\$*" >\$(git rev-parse --git-dir)/pre-command.out + EOF + echo "second" >> file && + git add file && + test "add file" = "$(cat .git/pre-command.out)" && + echo Hello | git hash-object --stdin && + test "hash-object --stdin" = "$(cat .git/pre-command.out)" +' + +test_expect_success 'with failing hook' ' + write_script .git/hooks/pre-command <<-EOF && + exit 1 + EOF + echo "third" >> file && + test_must_fail git add file && + test_path_is_missing "$(cat .git/pre-command.out)" +' + +test_done diff --git a/t/t0401-post-command-hook.sh b/t/t0401-post-command-hook.sh new file mode 100755 index 00000000000000..64646f7ad03b57 --- /dev/null +++ b/t/t0401-post-command-hook.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='post-command hook' + +. ./test-lib.sh + +test_expect_success 'with no hook' ' + echo "first" > file && + git add file && + git commit -m "first" +' + +test_expect_success 'with succeeding hook' ' + mkdir -p .git/hooks && + write_script .git/hooks/post-command <<-EOF && + echo "\$*" >\$(git rev-parse --git-dir)/post-command.out + EOF + echo "second" >> file && + git add file && + test "add file --exit_code=0" = "$(cat .git/post-command.out)" +' + +test_expect_success 'with failing pre-command hook' ' + write_script .git/hooks/pre-command <<-EOF && + exit 1 + EOF + echo "third" >> file && + test_must_fail git add file && + test_path_is_missing "$(cat .git/post-command.out)" +' + +test_done From 60b95f24c46dd52310f8f8de43d2d806ed76c2b1 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Wed, 15 Mar 2017 18:43:05 +0000 Subject: [PATCH 035/129] Hydrate missing loose objects in check_and_freshen() Hydrate missing loose objects in check_and_freshen() when running virtualized. Add test cases to verify read-object hook works when running virtualized. This hook is called in check_and_freshen() rather than check_and_freshen_local() to make the hook work also with alternates. Helped-by: Kevin Willford Signed-off-by: Ben Peart --- sha1-file.c | 46 +++++++++++++++++++++++++++--------------- t/t0411-read-object.sh | 27 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 16 deletions(-) create mode 100755 t/t0411-read-object.sh diff --git a/sha1-file.c b/sha1-file.c index 2c544ff945f8ea..aeddd0b2ef0229 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -865,6 +865,24 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } +static int run_read_object_hook(const struct object_id *oid) +{ + struct child_process hook = CHILD_PROCESS_INIT; + const char *p; + + p = find_hook("read-object"); + if (!p) + return 1; + + argv_array_push(&hook.args, p); + argv_array_push(&hook.args, oid_to_hex(oid)); + hook.env = NULL; + hook.no_stdin = 1; + hook.stdout_to_stderr = 1; + + return run_command(&hook); +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { @@ -917,8 +935,19 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) static int check_and_freshen(const struct object_id *oid, int freshen) { - return check_and_freshen_local(oid, freshen) || + int ret; + int tried_hook = 0; + +retry: + ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); + if (!ret && core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!run_read_object_hook(oid)) + goto retry; + } + + return ret; } int has_loose_object_nonlocal(const struct object_id *oid) @@ -1411,21 +1440,6 @@ static int loose_object_info(struct repository *r, return (status < 0) ? status : 0; } -static int run_read_object_hook(const struct object_id *oid) -{ - struct argv_array args = ARGV_ARRAY_INIT; - int ret; - uint64_t start; - - start = getnanotime(); - argv_array_push(&args, oid_to_hex(oid)); - ret = run_hook_argv(NULL, "read-object", args.argv); - argv_array_clear(&args); - trace_performance_since(start, "run_read_object_hook"); - - return ret; -} - int fetch_if_missing = 1; int oid_object_info_extended(struct repository *r, const struct object_id *oid, diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh new file mode 100755 index 00000000000000..0c3bfea1886cf9 --- /dev/null +++ b/t/t0411-read-object.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +test_description='tests for read-object hook' + +. ./test-lib.sh + +test_expect_success 'setup host and guest repos' ' + test_commit zero && + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) && + git init guest-repo && + cd guest-repo && + git config core.virtualizeobjects true && + write_script .git/hooks/read-object <<-\EOF + # pass core.virtualizeobjects=false so we dont end up calling the hook proc recursively + git --git-dir=../.git/ cat-file blob "$1" | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1 + EOF +' + +test_expect_success 'blobs can be retrieved from the host repo' ' + git cat-file blob "$hash1" +' + +test_expect_success 'invalid blobs generate errors' ' + test_must_fail git cat-file blob "invalid" +' + +test_done From 38d8dc46cb63274bac027e4b4cacdae1b8426240 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 22 Feb 2017 12:50:43 -0700 Subject: [PATCH 036/129] sparse-checkout: update files with a modify/delete conflict When using the sparse-checkout feature, the file might not be on disk because the skip-worktree bit is on. Signed-off-by: Kevin Willford --- merge-recursive.c | 2 +- t/t7615-merge-sparse-checkout.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100755 t/t7615-merge-sparse-checkout.sh diff --git a/merge-recursive.c b/merge-recursive.c index 6a7cf5ca31eedf..028558beb2bdd8 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1497,7 +1497,7 @@ static int handle_change_delete(struct merge_options *opt, * path. We could call update_file_flags() with update_cache=0 * and update_wd=0, but that's a no-op. */ - if (change_branch != opt->branch1 || alt_path) + if (change_branch != opt->branch1 || alt_path || !file_exists(update_path)) ret = update_file(opt, 0, changed, update_path); } free(alt_path); diff --git a/t/t7615-merge-sparse-checkout.sh b/t/t7615-merge-sparse-checkout.sh new file mode 100755 index 00000000000000..8e8208a61d39eb --- /dev/null +++ b/t/t7615-merge-sparse-checkout.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +test_description='merge can handle sparse-checkout' + +. ./test-lib.sh + +# merges with conflicts + +test_expect_success 'setup' ' + test_commit a && + test_commit file && + git checkout -b delete-file && + git rm file.t && + test_tick && + git commit -m "remove file" && + git checkout master && + test_commit modify file.t changed +' + +test_expect_success 'merge conflict deleted file and modified' ' + echo "/a.t" >.git/info/sparse-checkout && + test_config core.sparsecheckout true && + git checkout -f && + test_path_is_missing file.t && + test_must_fail git merge delete-file && + test_path_is_file file.t && + test "changed" = "$(cat file.t)" +' + +test_done From c6a4f50b97c7a6d6005ec1d1ac61f7b800212c0d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 16 Mar 2017 21:07:54 +0100 Subject: [PATCH 037/129] t0400: verify that the hook is called correctly from a subdirectory Suggested by Ben Peart. Signed-off-by: Johannes Schindelin --- t/t0400-pre-command-hook.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/t/t0400-pre-command-hook.sh b/t/t0400-pre-command-hook.sh index 4f4f610b52b0a0..83c453c9643eae 100755 --- a/t/t0400-pre-command-hook.sh +++ b/t/t0400-pre-command-hook.sh @@ -31,4 +31,27 @@ test_expect_success 'with failing hook' ' test_path_is_missing "$(cat .git/pre-command.out)" ' +test_expect_success 'in a subdirectory' ' + echo touch i-was-here | write_script .git/hooks/pre-command && + mkdir sub && + ( + cd sub && + git version + ) && + test_path_is_file sub/i-was-here +' + +test_expect_success 'in a subdirectory, using an alias' ' + git reset --hard && + echo "echo \"\$@; \$(pwd)\" >>log" | + write_script .git/hooks/pre-command && + mkdir -p sub && + ( + cd sub && + git -c alias.v="version" v + ) && + test_path_is_missing log && + test_line_count = 2 sub/log +' + test_done From 00c1a61fae398a09e19dab0bffcffffedcd738d8 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 18 Jul 2017 12:04:44 +0200 Subject: [PATCH 038/129] Add support for read-object as a background process to retrieve missing objects This commit converts the existing read_object hook proc model for downloading missing blobs to use a background process that is started the first time git encounters a missing blob and stays running until git exits. Git and the read-object process communicate via stdin/stdout and a versioned, capability negotiated interface as documented in Documentation/technical/read-object-protocol.txt. The advantage of this over the previous hook proc is that it saves the overhead of spawning a new hook process for every missing blob. The model for the background process was refactored from the recent git LFS work. I refactored that code into a shared module (sub-process.c/h) and then updated convert.c to consume the new library. I then used the same sub-process module when implementing the read-object background process. Signed-off-by: Ben Peart --- .../technical/read-object-protocol.txt | 102 +++++++++++++++ contrib/long-running-read-object/example.pl | 114 ++++++++++++++++ sha1-file.c | 122 ++++++++++++++++-- t/t0410/read-object | 114 ++++++++++++++++ t/t0411-read-object.sh | 24 ++-- 5 files changed, 450 insertions(+), 26 deletions(-) create mode 100644 Documentation/technical/read-object-protocol.txt create mode 100644 contrib/long-running-read-object/example.pl create mode 100755 t/t0410/read-object diff --git a/Documentation/technical/read-object-protocol.txt b/Documentation/technical/read-object-protocol.txt new file mode 100644 index 00000000000000..a893b46e7c28a9 --- /dev/null +++ b/Documentation/technical/read-object-protocol.txt @@ -0,0 +1,102 @@ +Read Object Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The read-object process enables Git to read all missing blobs with a +single process invocation for the entire life of a single Git command. +This is achieved by using a packet format (pkt-line, see technical/ +protocol-common.txt) based protocol over standard input and standard +output as follows. All packets, except for the "*CONTENT" packets and +the "0000" flush packet, are considered text and therefore are +terminated by a LF. + +Git starts the process when it encounters the first missing object that +needs to be retrieved. After the process is started, Git sends a welcome +message ("git-read-object-client"), a list of supported protocol version +numbers, and a flush packet. Git expects to read a welcome response +message ("git-read-object-server"), exactly one protocol version number +from the previously sent list, and a flush packet. All further +communication will be based on the selected version. + +The remaining protocol description below documents "version=1". Please +note that "version=42" in the example below does not exist and is only +there to illustrate how the protocol would look with more than one +version. + +After the version negotiation Git sends a list of all capabilities that +it supports and a flush packet. Git expects to read a list of desired +capabilities, which must be a subset of the supported capabilities list, +and a flush packet as response: +------------------------ +packet: git> git-read-object-client +packet: git> version=1 +packet: git> version=42 +packet: git> 0000 +packet: git< git-read-object-server +packet: git< version=1 +packet: git< 0000 +packet: git> capability=get +packet: git> capability=have +packet: git> capability=put +packet: git> capability=not-yet-invented +packet: git> 0000 +packet: git< capability=get +packet: git< 0000 +------------------------ +The only supported capability in version 1 is "get". + +Afterwards Git sends a list of "key=value" pairs terminated with a flush +packet. The list will contain at least the command (based on the +supported capabilities) and the sha1 of the object to retrieve. Please +note, that the process must not send any response before it received the +final flush packet. + +When the process receives the "get" command, it should make the requested +object available in the git object store and then return success. Git will +then check the object store again and this time find it and proceed. +------------------------ +packet: git> command=get +packet: git> sha1=0a214a649e1b3d5011e14a3dc227753f2bd2be05 +packet: git> 0000 +------------------------ + +The process is expected to respond with a list of "key=value" pairs +terminated with a flush packet. If the process does not experience +problems then the list must contain a "success" status. +------------------------ +packet: git< status=success +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content, it +is expected to respond with an "error" status. +------------------------ +packet: git< status=error +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content as +well as any future content for the lifetime of the Git process, then it +is expected to respond with an "abort" status at any point in the +protocol. +------------------------ +packet: git< status=abort +packet: git< 0000 +------------------------ + +Git neither stops nor restarts the process in case the "error"/"abort" +status is set. + +If the process dies during the communication or does not adhere to the +protocol then Git will stop the process and restart it with the next +object that needs to be processed. + +After the read-object process has processed an object it is expected to +wait for the next "key=value" list containing a command. Git will close +the command pipe on exit. The process is expected to detect EOF and exit +gracefully on its own. Git will wait until the process has stopped. + +A long running read-object process demo implementation can be found in +`contrib/long-running-read-object/example.pl` located in the Git core +repository. If you develop your own long running process then the +`GIT_TRACE_PACKET` environment variables can be very helpful for +debugging (see linkgit:git[1]). diff --git a/contrib/long-running-read-object/example.pl b/contrib/long-running-read-object/example.pl new file mode 100644 index 00000000000000..b8f37f836a813c --- /dev/null +++ b/contrib/long-running-read-object/example.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "/host_repo/.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/sha1-file.c b/sha1-file.c index aeddd0b2ef0229..de1717c1da4b31 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -32,6 +32,9 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "sigchain.h" +#include "sub-process.h" +#include "pkt-line.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -865,22 +868,113 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } -static int run_read_object_hook(const struct object_id *oid) +#define CAP_GET (1u<<0) + +static int subprocess_map_initialized; +static struct hashmap subprocess_map; + +struct read_object_process { + struct subprocess_entry subprocess; + unsigned int supported_capabilities; +}; + +static int start_read_object_fn(struct subprocess_entry *subprocess) { - struct child_process hook = CHILD_PROCESS_INIT; - const char *p; + struct read_object_process *entry = (struct read_object_process *)subprocess; + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { "get", CAP_GET }, + { NULL, 0 } + }; - p = find_hook("read-object"); - if (!p) - return 1; + return subprocess_handshake(subprocess, "git-read-object", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +static int read_object_process(const struct object_id *oid) +{ + int err; + struct read_object_process *entry; + struct child_process *process; + struct strbuf status = STRBUF_INIT; + const char *cmd = find_hook("read-object"); + uint64_t start; + + start = getnanotime(); + + if (!subprocess_map_initialized) { + subprocess_map_initialized = 1; + hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, + NULL, 0); + entry = NULL; + } else { + entry = (struct read_object_process *) subprocess_find_entry(&subprocess_map, cmd); + } + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, + start_read_object_fn)) { + free(entry); + return -1; + } + } + process = &entry->subprocess.process; + + if (!(CAP_GET & entry->supported_capabilities)) + return -1; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = packet_write_fmt_gently(process->in, "command=get\n"); + if (err) + goto done; + + err = packet_write_fmt_gently(process->in, "sha1=%s\n", oid_to_hex(oid)); + if (err) + goto done; + + err = packet_flush_gently(process->in); + if (err) + goto done; + + err = subprocess_read_status(process->out, &status); + err = err ? err : strcmp(status.buf, "success"); + +done: + sigchain_pop(SIGPIPE); + + if (err || errno == EPIPE) { + err = err ? err : errno; + if (!strcmp(status.buf, "error")) { + /* The process signaled a problem with the file. */ + } + else if (!strcmp(status.buf, "abort")) { + /* + * The process signaled a permanent problem. Don't try to read + * objects with the same command for the lifetime of the current + * Git process. + */ + entry->supported_capabilities &= ~CAP_GET; + } + else { + /* + * Something went wrong with the read-object process. + * Force shutdown and restart if needed. + */ + error("external process '%s' failed", cmd); + subprocess_stop(&subprocess_map, + (struct subprocess_entry *)entry); + free(entry); + } + } - argv_array_push(&hook.args, p); - argv_array_push(&hook.args, oid_to_hex(oid)); - hook.env = NULL; - hook.no_stdin = 1; - hook.stdout_to_stderr = 1; + trace_performance_since(start, "read_object_process"); - return run_command(&hook); + return err; } /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ @@ -943,7 +1037,7 @@ static int check_and_freshen(const struct object_id *oid, int freshen) check_and_freshen_nonlocal(oid, freshen); if (!ret && core_virtualize_objects && !tried_hook) { tried_hook = 1; - if (!run_read_object_hook(oid)) + if (!read_object_process(oid)) goto retry; } @@ -1499,7 +1593,7 @@ int oid_object_info_extended(struct repository *r, const struct object_id *oid, break; if (core_virtualize_objects && !tried_hook) { tried_hook = 1; - if (!run_read_object_hook(oid)) + if (!read_object_process(oid)) goto retry; } } diff --git a/t/t0410/read-object b/t/t0410/read-object new file mode 100755 index 00000000000000..85e997c930581c --- /dev/null +++ b/t/t0410/read-object @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "../.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh index 0c3bfea1886cf9..b8d7521c2c9106 100755 --- a/t/t0411-read-object.sh +++ b/t/t0411-read-object.sh @@ -1,27 +1,27 @@ #!/bin/sh -test_description='tests for read-object hook' +test_description='tests for long running read-object process' . ./test-lib.sh -test_expect_success 'setup host and guest repos' ' +test_expect_success 'setup host repo with a root commit' ' test_commit zero && - hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) && - git init guest-repo && - cd guest-repo && - git config core.virtualizeobjects true && - write_script .git/hooks/read-object <<-\EOF - # pass core.virtualizeobjects=false so we dont end up calling the hook proc recursively - git --git-dir=../.git/ cat-file blob "$1" | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1 - EOF + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) ' test_expect_success 'blobs can be retrieved from the host repo' ' - git cat-file blob "$hash1" + git init guest-repo && + (cd guest-repo && + mkdir -p .git/hooks && + cp $TEST_DIRECTORY/t0410/read-object .git/hooks/ && + git config core.virtualizeobjects true && + git cat-file blob "$hash1") ' test_expect_success 'invalid blobs generate errors' ' - test_must_fail git cat-file blob "invalid" + (cd guest-repo && + test_must_fail git cat-file blob "invalid") ' + test_done From 6774bf9482164fbce5c18681743bad03278fd11c Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 1 Mar 2017 15:17:12 -0800 Subject: [PATCH 039/129] sparse-checkout: avoid writing entries with the skip-worktree bit When using the sparse-checkout feature git should not write to the working directory for files with the skip-worktree bit on. With the skip-worktree bit on the file may or may not be in the working directory and if it is not we don't want or need to create it by calling checkout_entry. There are two callers of checkout_target. Both of which check that the file does not exist before calling checkout_target. load_current which make a call to lstat right before calling checkout_target and check_preimage which will only run checkout_taret it stat_ret is less than zero. It sets stat_ret to zero and only if !stat->cached will it lstat the file and set stat_ret to something other than zero. This patch checks if skip-worktree bit is on in checkout_target and just returns so that the entry doesn't not end up in the working directory. This is so that apply will not create a file in the working directory, then update the index but not keep the working directory up to date with the changes that happened in the index. Signed-off-by: Kevin Willford --- apply.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/apply.c b/apply.c index c98d444e2475e6..bb2979f5c87b8f 100644 --- a/apply.c +++ b/apply.c @@ -3344,6 +3344,24 @@ static int checkout_target(struct index_state *istate, { struct checkout costate = CHECKOUT_INIT; + /* + * Do not checkout the entry if the skipworktree bit is set + * + * Both callers of this method (check_preimage and load_current) + * check for the existance of the file before calling this + * method so we know that the file doesn't exist at this point + * and we don't need to perform that check again here. + * We just need to check the skip-worktree and return. + * + * This is to prevent git from creating a file in the + * working directory that has the skip-worktree bit on, + * then updating the index from the patch and not keeping + * the working directory version up to date with what it + * changed the index version to be. + */ + if (ce_skip_worktree(ce)) + return 0; + costate.refresh_cache = 1; costate.istate = istate; if (checkout_entry(ce, &costate, NULL, NULL) || From abe3c843965a46af972b9bd99268a193aebd81ee Mon Sep 17 00:00:00 2001 From: Alejandro Pauly Date: Mon, 10 Apr 2017 13:26:14 -0400 Subject: [PATCH 040/129] Pass PID of git process to hooks. Signed-off-by: Alejandro Pauly --- git.c | 1 + t/t0400-pre-command-hook.sh | 3 ++- t/t0401-post-command-hook.sh | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/git.c b/git.c index cbf7b1f5422ba5..e4ae4fd7b7f8bb 100644 --- a/git.c +++ b/git.c @@ -423,6 +423,7 @@ static int run_pre_command_hook(const char **argv) /* call the hook proc */ argv_array_pushv(&sargv, argv); + argv_array_pushf(&sargv, "--git-pid=%"PRIuMAX, (uintmax_t)getpid()); ret = run_hook_argv(NULL, "pre-command", sargv.argv); if (!ret) diff --git a/t/t0400-pre-command-hook.sh b/t/t0400-pre-command-hook.sh index 83c453c9643eae..f04a55a695bc97 100755 --- a/t/t0400-pre-command-hook.sh +++ b/t/t0400-pre-command-hook.sh @@ -13,7 +13,8 @@ test_expect_success 'with no hook' ' test_expect_success 'with succeeding hook' ' mkdir -p .git/hooks && write_script .git/hooks/pre-command <<-EOF && - echo "\$*" >\$(git rev-parse --git-dir)/pre-command.out + echo "\$*" | sed "s/ --git-pid=[0-9]*//" \ + >\$(git rev-parse --git-dir)/pre-command.out EOF echo "second" >> file && git add file && diff --git a/t/t0401-post-command-hook.sh b/t/t0401-post-command-hook.sh index 64646f7ad03b57..fcbfc4a0c79c1e 100755 --- a/t/t0401-post-command-hook.sh +++ b/t/t0401-post-command-hook.sh @@ -13,7 +13,8 @@ test_expect_success 'with no hook' ' test_expect_success 'with succeeding hook' ' mkdir -p .git/hooks && write_script .git/hooks/post-command <<-EOF && - echo "\$*" >\$(git rev-parse --git-dir)/post-command.out + echo "\$*" | sed "s/ --git-pid=[0-9]*//" \ + >\$(git rev-parse --git-dir)/post-command.out EOF echo "second" >> file && git add file && From 5e513a18f4cdd69ab02829261b778f93a9127f0d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 8 Sep 2017 11:32:43 +0200 Subject: [PATCH 041/129] sha1_file: when writing objects, skip the read_object_hook If we are going to write an object there is no use in calling the read object hook to get an object from a potentially remote source. We would rather just write out the object and avoid the potential round trip for an object that doesn't exist. This change adds a flag to the check_and_freshen() and freshen_loose_object() functions' signatures so that the hook is bypassed when the functions are called before writing loose objects. The check for a local object is still performed so we don't overwrite something that has already been written to one of the objects directories. Based on a patch by Kevin Willford. Signed-off-by: Johannes Schindelin --- sha1-file.c | 17 ++++++++++------- t/t0410/read-object | 4 ++++ t/t0411-read-object.sh | 7 +++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sha1-file.c b/sha1-file.c index de1717c1da4b31..85fca17d471aa6 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1027,7 +1027,8 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) return 0; } -static int check_and_freshen(const struct object_id *oid, int freshen) +static int check_and_freshen(const struct object_id *oid, int freshen, + int skip_virtualized_objects) { int ret; int tried_hook = 0; @@ -1035,7 +1036,8 @@ static int check_and_freshen(const struct object_id *oid, int freshen) retry: ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); - if (!ret && core_virtualize_objects && !tried_hook) { + if (!ret && core_virtualize_objects && !skip_virtualized_objects && + !tried_hook) { tried_hook = 1; if (!read_object_process(oid)) goto retry; @@ -1051,7 +1053,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) static int has_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 0); + return check_and_freshen(oid, 0, 0); } static void mmap_limit_check(size_t length) @@ -1979,9 +1981,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr, return finalize_object_file(tmp_file.buf, filename.buf); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(const struct object_id *oid, + int skip_virtualized_objects) { - return check_and_freshen(oid, 1); + return check_and_freshen(oid, 1, skip_virtualized_objects); } static int freshen_packed_object(const struct object_id *oid) @@ -2007,7 +2010,7 @@ int write_object_file(const void *buf, unsigned long len, const char *type, * it out into .git/objects/??/?{38} file. */ write_object_file_prepare(buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) return 0; return write_loose_object(oid, hdr, hdrlen, buf, len, 0); } @@ -2026,7 +2029,7 @@ int hash_object_file_literally(const void *buf, unsigned long len, if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) goto cleanup; status = write_loose_object(oid, header, hdrlen, buf, len, 0); diff --git a/t/t0410/read-object b/t/t0410/read-object index 85e997c930581c..2360d4916ee81c 100755 --- a/t/t0410/read-object +++ b/t/t0410/read-object @@ -108,6 +108,10 @@ while (1) { system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); packet_txt_write(($?) ? "status=error" : "status=success"); packet_flush(); + + open my $log, '>>.git/read-object-hook.log'; + print $log "Read object $sha1, exit code $?\n"; + close $log; } else { die "bad command '$command'"; } diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh index b8d7521c2c9106..af97ec5b50f356 100755 --- a/t/t0411-read-object.sh +++ b/t/t0411-read-object.sh @@ -23,5 +23,12 @@ test_expect_success 'invalid blobs generate errors' ' test_must_fail git cat-file blob "invalid") ' +test_expect_success 'read-object-hook is bypassed when writing objects' ' + (cd guest-repo && + echo hello >hello.txt && + git add hello.txt && + hash="$(git rev-parse --verify :hello.txt)" && + ! grep "$hash" .git/read-object-hook.log) +' test_done From abce59c890f5eedf8f9215495939cfa60700c925 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 15 Mar 2017 16:36:53 -0600 Subject: [PATCH 042/129] Fix reset when using the sparse-checkout feature. When using the sparse checkout feature the git reset command will add entries to the index that will have the skip-worktree bit off but will leave the working directory empty. File data is lost because the index version of the files has been changed but there is nothing that is in the working directory. This will cause the next status call to show either deleted for files modified or deleting or nothing for files added. The added files should be shown as untracked and modified files should be shown as modified. To fix this when the reset is running if there is not a file in the working directory and if it will be missing with the new index entry or was not missing in the previous version, we create the previous index version of the file in the working directory so that status will report correctly and the files will be availble for the user to deal with. Signed-off-by: Kevin Willford --- builtin/reset.c | 34 +++++++++++++++++++ t/t7114-reset-sparse-checkout.sh | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100755 t/t7114-reset-sparse-checkout.sh diff --git a/builtin/reset.c b/builtin/reset.c index e13519c9fe322a..f39d24995c00b1 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -27,6 +27,7 @@ #include "submodule-config.h" #include "strbuf.h" #include "quote.h" +#include "dir.h" #define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) @@ -129,12 +130,45 @@ static void update_index_from_diff(struct diff_queue_struct *q, struct diff_options *opt, void *data) { int i; + int pos; int intent_to_add = *(int *)data; for (i = 0; i < q->nr; i++) { struct diff_filespec *one = q->queue[i]->one; + struct diff_filespec *two = q->queue[i]->two; int is_missing = !(one->mode && !is_null_oid(&one->oid)); + int was_missing = !two->mode && is_null_oid(&two->oid); struct cache_entry *ce; + struct cache_entry *ceBefore; + struct checkout state = CHECKOUT_INIT; + + /* + * When using the sparse-checkout feature the cache entries that are + * added here will not have the skip-worktree bit set. + * Without this code there is data that is lost because the files that + * would normally be in the working directory are not there and show as + * deleted for the next status or in the case of added files just disappear. + * We need to create the previous version of the files in the working + * directory so that they will have the right content and the next + * status call will show modified or untracked files correctly. + */ + if (core_apply_sparse_checkout && !file_exists(two->path)) + { + pos = cache_name_pos(two->path, strlen(two->path)); + if ((pos >= 0 && ce_skip_worktree(active_cache[pos])) && (is_missing || !was_missing)) + { + state.force = 1; + state.refresh_cache = 1; + state.istate = &the_index; + ceBefore = make_cache_entry(&the_index, two->mode, &two->oid, two->path, + 0, 0); + if (!ceBefore) + die(_("make_cache_entry failed for path '%s'"), + two->path); + + checkout_entry(ceBefore, &state, NULL, NULL); + } + } if (is_missing && !intent_to_add) { remove_file_from_cache(one->path); diff --git a/t/t7114-reset-sparse-checkout.sh b/t/t7114-reset-sparse-checkout.sh new file mode 100755 index 00000000000000..c46cbdb64e4ebc --- /dev/null +++ b/t/t7114-reset-sparse-checkout.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +test_description='reset when using a sparse-checkout' + +. ./test-lib.sh + +# reset using a sparse-checkout file + +test_expect_success 'setup' ' + test_tick && + echo "checkout file" >c && + echo "modify file" >m && + echo "delete file" >d && + git add . && + git commit -m "initial commit" && + echo "added file" >a && + echo "modification of a file" >m && + git rm d && + git add . && + git commit -m "second commit" && + git checkout -b endCommit +' + +test_expect_success 'reset when there is a sparse-checkout' ' + echo "/c" >.git/info/sparse-checkout && + test_config core.sparsecheckout true && + git checkout -B resetBranch && + test_path_is_missing m && + test_path_is_missing a && + test_path_is_missing d && + git reset HEAD~1 && + test "checkout file" = "$(cat c)" && + test "modification of a file" = "$(cat m)" && + test "added file" = "$(cat a)" && + test_path_is_missing d +' + +test_expect_success 'reset after deleting file without skip-worktree bit' ' + git checkout -f endCommit && + git clean -xdf && + echo "/c +/m" >.git/info/sparse-checkout && + test_config core.sparsecheckout true && + git checkout -B resetAfterDelete && + test_path_is_file m && + test_path_is_missing a && + test_path_is_missing d && + rm -f m && + git reset HEAD~1 && + test "checkout file" = "$(cat c)" && + test "added file" = "$(cat a)" && + test_path_is_missing m && + test_path_is_missing d +' + + + +test_done From 251a28ab3dd0c9259d5bffec9c8ab44925ba5232 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 8 Aug 2017 00:27:50 +0200 Subject: [PATCH 043/129] pre-command: always respect core.hooksPath We need to respect that config setting even if we already know that we have a repository, but have not yet read the config. The regression test was written by Alejandro Pauly. Signed-off-by: Johannes Schindelin --- run-command.c | 13 +++++++++++-- t/t0400-pre-command-hook.sh | 11 +++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/run-command.c b/run-command.c index 2e6b365891a597..deb4af053db4e0 100644 --- a/run-command.c +++ b/run-command.c @@ -1358,9 +1358,18 @@ const char *find_hook(const char *name) static struct strbuf path = STRBUF_INIT; strbuf_reset(&path); - if (have_git_dir()) + if (have_git_dir()) { + static int forced_config; + + if (!forced_config) { + if (!git_hooks_path) + git_config_get_pathname("core.hookspath", + &git_hooks_path); + forced_config = 1; + } + strbuf_git_path(&path, "hooks/%s", name); - else if (!hook_path_early(name, &path)) + } else if (!hook_path_early(name, &path)) return NULL; if (access(path.buf, X_OK) < 0) { diff --git a/t/t0400-pre-command-hook.sh b/t/t0400-pre-command-hook.sh index f04a55a695bc97..f2a9115e299385 100755 --- a/t/t0400-pre-command-hook.sh +++ b/t/t0400-pre-command-hook.sh @@ -55,4 +55,15 @@ test_expect_success 'in a subdirectory, using an alias' ' test_line_count = 2 sub/log ' +test_expect_success 'with core.hooksPath' ' + mkdir -p .git/alternateHooks && + write_script .git/alternateHooks/pre-command <<-EOF && + echo "alternate" >\$(git rev-parse --git-dir)/pre-command.out + EOF + write_script .git/hooks/pre-command <<-EOF && + echo "original" >\$(git rev-parse --git-dir)/pre-command.out + EOF + git -c core.hooksPath=.git/alternateHooks status && + test "alternate" = "$(cat .git/pre-command.out)" +' test_done From ee592f65e9d6277d24f1b18e91e1bc8f310f1624 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 5 Apr 2017 10:55:32 -0600 Subject: [PATCH 044/129] Do not remove files outside the sparse-checkout Signed-off-by: Kevin Willford --- unpack-trees.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unpack-trees.c b/unpack-trees.c index 0b5f1c2450303c..308148a3cfb429 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -504,7 +504,9 @@ static int apply_sparse_checkout(struct index_state *istate, */ if (!(ce->ce_flags & CE_UPDATE) && verify_uptodate_sparse(ce, o)) return -1; - ce->ce_flags |= CE_WT_REMOVE; + if (!gvfs_config_is_set(GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT)) + ce->ce_flags |= CE_WT_REMOVE; + ce->ce_flags &= ~CE_UPDATE; } if (was_skip_worktree && !ce_skip_worktree(ce)) { From f555d493d7ec10e2456a56340af555cbfc818fcd Mon Sep 17 00:00:00 2001 From: Jameson Miller Date: Wed, 10 Jan 2018 11:56:26 -0500 Subject: [PATCH 045/129] Teach ahead-behind and serialized status to play nicely together --- t/t7524-serialized-status.sh | 26 ++++++++++++++++++++++++++ wt-status-deserialize.c | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh index a0fc1bc2daf6ec..6bcd301dbb7913 100755 --- a/t/t7524-serialized-status.sh +++ b/t/t7524-serialized-status.sh @@ -137,4 +137,30 @@ test_expect_success 'verify serialized status handles path scopes' ' test_i18ncmp expect output ' +test_expect_success 'verify no-ahead-behind and serialized status integration' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + # branch.oid 68d4a437ea4c2de65800f48c053d4d543b55c410 + # branch.head alt_branch + # branch.upstream master + # branch.ab +1 -0 + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + git checkout -b alt_branch master --track >/dev/null && + touch alt_branch_changes.txt && + git add alt_branch_changes.txt && + test_tick && + git commit -m"New commit on alt branch" && + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git -c status.aheadBehind=false status --porcelain=v2 --branch --ahead-behind --deserialize=serialized_status.dat >output && + test_i18ncmp expect output +' + test_done diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 317b88fddc1ec0..02a1b4cd13cb7d 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -548,6 +548,7 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de des_s->show_branch = cmd_s->show_branch; des_s->show_stash = cmd_s->show_stash; /* hints */ + des_s->ahead_behind_flags = cmd_s->ahead_behind_flags; des_s->status_format = cmd_s->status_format; des_s->fp = cmd_s->fp; if (cmd_s->prefix && *cmd_s->prefix) @@ -556,7 +557,6 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de return DESERIALIZE_OK; } - /* * Read raw serialized status data from the given file * From 1c0c811bfd6e196e69bb97a00eee6329d2ee0e2c Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Fri, 14 Apr 2017 10:59:20 -0600 Subject: [PATCH 046/129] gvfs: refactor loading the core.gvfs config value This code change makes sure that the config value for core_gvfs is always loaded before checking it. Signed-off-by: Kevin Willford --- Makefile | 1 + gvfs.c | 38 ++++++++++++++++++++++++++++++++++++++ gvfs.h | 31 ++----------------------------- 3 files changed, 41 insertions(+), 29 deletions(-) create mode 100644 gvfs.c diff --git a/Makefile b/Makefile index 6d585bb56decaf..950f1b9ef52066 100644 --- a/Makefile +++ b/Makefile @@ -893,6 +893,7 @@ LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o +LIB_OBJS += gvfs.o LIB_OBJS += hashmap.o LIB_OBJS += linear-assignment.o LIB_OBJS += help.o diff --git a/gvfs.c b/gvfs.c new file mode 100644 index 00000000000000..7235199c04ac39 --- /dev/null +++ b/gvfs.c @@ -0,0 +1,38 @@ +#include "cache.h" +#include "gvfs.h" +#include "config.h" + +static int gvfs_config_loaded; +static int core_gvfs_is_bool; + +static int early_core_gvfs_config(const char *var, const char *value, void *data) +{ + if (!strcmp(var, "core.gvfs")) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &core_gvfs_is_bool); + return 0; +} + +void gvfs_load_config_value(const char *value) +{ + if (gvfs_config_loaded) + return; + + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &core_gvfs_is_bool); + else if (startup_info->have_repository == 0) + read_early_config(early_core_gvfs_config, NULL); + else + git_config_get_bool_or_int("core.gvfs", &core_gvfs_is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (core_gvfs_is_bool && core_gvfs) + core_gvfs = -1; + + gvfs_config_loaded = 1; +} + +int gvfs_config_is_set(int mask) +{ + gvfs_load_config_value(0); + return (core_gvfs & mask) == mask; +} diff --git a/gvfs.h b/gvfs.h index 2d6de575bf4a65..7c9367866f502a 100644 --- a/gvfs.h +++ b/gvfs.h @@ -1,8 +1,6 @@ #ifndef GVFS_H #define GVFS_H -#include "cache.h" -#include "config.h" /* * This file is for the specific settings and methods @@ -19,32 +17,7 @@ #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) #define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) -static inline int gvfs_config_is_set(int mask) { - return (core_gvfs & mask) == mask; -} - -static inline int gvfs_config_is_set_any(void) { - return core_gvfs > 0; -} - -static inline void gvfs_load_config_value(const char *value) { - int is_bool = 0; - - if (value) - core_gvfs = git_config_bool_or_int("core.gvfs", value, &is_bool); - else - git_config_get_bool_or_int("core.gvfs", &is_bool, &core_gvfs); - - /* Turn on all bits if a bool was set in the settings */ - if (is_bool && core_gvfs) - core_gvfs = -1; -} - - -static inline int gvfs_config_load_and_is_set(int mask) { - gvfs_load_config_value(0); - return gvfs_config_is_set(mask); -} - +void gvfs_load_config_value(const char *value); +int gvfs_config_is_set(int mask); #endif /* GVFS_H */ From d4336edc6da51ed3419399bd7d87b742718791b4 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 2 Feb 2018 14:17:05 -0500 Subject: [PATCH 047/129] status: serialize to path Teach status serialization to take an optional pathname on the command line to direct that cache data be written there rather than to stdout. When used this way, normal status results will still be written to stdout. When no path is given, only binary serialization data is written to stdout. Usage: git status --serialize[=] Signed-off-by: Jeff Hostetler --- Documentation/git-status.txt | 10 ++++++---- builtin/commit.c | 36 +++++++++++++++++++++++++++--------- t/t7524-serialized-status.sh | 23 +++++++++++++++++++++++ wt-status-serialize.c | 5 ++--- wt-status.c | 2 +- wt-status.h | 2 +- 6 files changed, 60 insertions(+), 18 deletions(-) diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 990bba01a980c7..c5b21372c2f301 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -156,10 +156,12 @@ ignored, then the directory is not shown, but all contents are shown. update it afterwards if any changes were detected. Defaults to `--lock-index`. ---serialize[=]:: - (EXPERIMENTAL) Serialize raw status results to stdout in a - format suitable for use by `--deserialize`. Valid values for - `` are "1" and "v1". +--serialize[=]:: + (EXPERIMENTAL) Serialize raw status results to a file or stdout + in a format suitable for use by `--deserialize`. If a path is + given, serialize data will be written to that path *and* normal + status output will be written to stdout. If path is omitted, + only binary serialization data will be written to stdout. --deserialize[=]:: (EXPERIMENTAL) Deserialize raw status results from a file or diff --git a/builtin/commit.c b/builtin/commit.c index 3852ce811e4676..dd06c99dacfb10 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -147,26 +147,34 @@ static int opt_parse_porcelain(const struct option *opt, const char *arg, int un } static int do_serialize = 0; +static char *serialize_path = NULL; + static int do_implicit_deserialize = 0; static int do_explicit_deserialize = 0; static char *deserialize_path = NULL; /* - * --serialize | --serialize=1 | --serialize=v1 + * --serialize | --serialize= + * + * Request that we serialize status output rather than or in addition to + * printing in any of the established formats. + * + * Without a path, we write binary serialization data to stdout (and omit + * the normal status output). * - * Request that we serialize our output rather than printing in - * any of the established formats. Optionally specify serialization - * version. + * With a path, we write binary serialization data to the and then + * write normal status output. */ static int opt_parse_serialize(const struct option *opt, const char *arg, int unset) { enum wt_status_format *value = (enum wt_status_format *)opt->value; if (unset || !arg) *value = STATUS_FORMAT_SERIALIZE_V1; - else if (!strcmp(arg, "v1") || !strcmp(arg, "1")) - *value = STATUS_FORMAT_SERIALIZE_V1; - else - die("unsupported serialize version '%s'", arg); + + if (arg) { + free(serialize_path); + serialize_path = xstrdup(arg); + } if (do_explicit_deserialize) die("cannot mix --serialize and --deserialize"); @@ -1452,7 +1460,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) N_("version"), N_("machine-readable output"), PARSE_OPT_OPTARG, opt_parse_porcelain }, { OPTION_CALLBACK, 0, "serialize", &status_format, - N_("version"), N_("serialize raw status data to stdout"), + N_("path"), N_("serialize raw status data to path or stdout"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, opt_parse_serialize }, { OPTION_CALLBACK, 0, "deserialize", NULL, N_("path"), N_("deserialize raw status data from file"), @@ -1594,6 +1602,16 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (s.relative_paths) s.prefix = prefix; + if (serialize_path) { + int fd_serialize = xopen(serialize_path, + O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd_serialize < 0) + die_errno(_("could not serialize to '%s'"), + serialize_path); + wt_status_serialize_v1(fd_serialize, &s); + close(fd_serialize); + } + wt_status_print(&s); wt_status_collect_free_buffers(&s); diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh index 6bcd301dbb7913..d1ca3af3f96546 100755 --- a/t/t7524-serialized-status.sh +++ b/t/t7524-serialized-status.sh @@ -163,4 +163,27 @@ test_expect_success 'verify no-ahead-behind and serialized status integration' ' test_i18ncmp expect output ' +test_expect_success 'verify new --serialize=path mode' ' + #test_when_finished "rm serialized_status.dat expect new_change.txt output.1 output.2" && + cat >expect <<-\EOF && + ? expect + ? output.1 + ? untracked/ + ? untracked_1.txt + EOF + + git checkout -b serialize_path_branch master --track >/dev/null && + touch alt_branch_changes.txt && + git add alt_branch_changes.txt && + test_tick && + git commit -m"New commit on serialize_path_branch" && + + git status --porcelain=v2 --serialize=serialized_status.dat >output.1 && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat >output.2 && + test_i18ncmp expect output.1 && + test_i18ncmp expect output.2 +' + test_done diff --git a/wt-status-serialize.c b/wt-status-serialize.c index 60da2d17d16182..4acb6f3defa055 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -162,7 +162,7 @@ static inline void wt_serialize_v1_ignored(struct wt_status *s, int fd, } /* - * Serialize the list of changes to stdout. The goal of this + * Serialize the list of changes to the given file. The goal of this * is to just serialize the key fields in wt_status so that a * later command can rebuilt it and do the printing. * @@ -171,9 +171,8 @@ static inline void wt_serialize_v1_ignored(struct wt_status *s, int fd, * is relatively quick for the status consumer to compute * as necessary. */ -void wt_status_serialize_v1(struct wt_status *s) +void wt_status_serialize_v1(int fd, struct wt_status *s) { - int fd = 1; /* we always write to stdout */ struct string_list_item *iter; int k; diff --git a/wt-status.c b/wt-status.c index 5b8e45302eb8b3..68820961d1f439 100644 --- a/wt-status.c +++ b/wt-status.c @@ -2394,7 +2394,7 @@ void wt_status_print(struct wt_status *s) wt_longstatus_print(s); break; case STATUS_FORMAT_SERIALIZE_V1: - wt_status_serialize_v1(s); + wt_status_serialize_v1(1, s); break; } diff --git a/wt-status.h b/wt-status.h index 913bf2750f9bd7..e6b37492853928 100644 --- a/wt-status.h +++ b/wt-status.h @@ -199,7 +199,7 @@ struct wt_status_serialize_data * Serialize computed status scan results using "version 1" format * to the given file. */ -void wt_status_serialize_v1(struct wt_status *s); +void wt_status_serialize_v1(int fd, struct wt_status *s); /* * Deserialize existing status results from the given file and From bbf87397d0f837282b3ca6616e8d4a22a2d451e3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 22 Aug 2017 11:54:23 -0400 Subject: [PATCH 048/129] status: add status serialization mechanism Teach STATUS to optionally serialize the results of a status computation to a file. Teach STATUS to optionally read an existing serialization file and simply print the results, rather than actually scanning. This is intended for immediate status results on extremely large repos and assumes the use of a service/daemon to maintain a fresh current status snapshot. Signed-off-by: Jeff Hostetler --- Documentation/config/status.txt | 6 + Documentation/git-status.txt | 33 + .../technical/status-serialization-format.txt | 107 ++++ Makefile | 2 + builtin/commit.c | 123 +++- contrib/completion/git-completion.bash | 2 +- pkt-line.c | 2 +- pkt-line.h | 1 + t/t7523-status-complete-untracked.sh | 39 ++ t/t7524-serialized-status.sh | 140 ++++ wt-status-deserialize.c | 597 ++++++++++++++++++ wt-status-serialize.c | 213 +++++++ wt-status.c | 6 + wt-status.h | 52 +- 14 files changed, 1319 insertions(+), 4 deletions(-) create mode 100644 Documentation/technical/status-serialization-format.txt create mode 100755 t/t7523-status-complete-untracked.sh create mode 100755 t/t7524-serialized-status.sh create mode 100644 wt-status-deserialize.c create mode 100644 wt-status-serialize.c diff --git a/Documentation/config/status.txt b/Documentation/config/status.txt index 0fc704ab80b223..65cecc12e80c80 100644 --- a/Documentation/config/status.txt +++ b/Documentation/config/status.txt @@ -75,3 +75,9 @@ status.submoduleSummary:: the --ignore-submodules=dirty command-line option or the 'git submodule summary' command, which shows a similar output but does not honor these settings. + +status.deserializePath:: + EXPERIMENTAL, Pathname to a file containing cached status results + generated by `--serialize`. This will be overridden by + `--deserialize=` on the command line. If the cache file is + invalid or stale, git will fall-back and compute status normally. diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 0646c606cfa46f..990bba01a980c7 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -156,6 +156,19 @@ ignored, then the directory is not shown, but all contents are shown. update it afterwards if any changes were detected. Defaults to `--lock-index`. +--serialize[=]:: + (EXPERIMENTAL) Serialize raw status results to stdout in a + format suitable for use by `--deserialize`. Valid values for + `` are "1" and "v1". + +--deserialize[=]:: + (EXPERIMENTAL) Deserialize raw status results from a file or + stdin rather than scanning the worktree. If `` is omitted + and `status.deserializePath` is unset, input is read from stdin. +--no-deserialize:: + (EXPERIMENTAL) Disable implicit deserialization of status results + from the value of `status.deserializePath`. + ...:: See the 'pathspec' entry in linkgit:gitglossary[7]. @@ -405,6 +418,26 @@ quoted as explained for the configuration variable `core.quotePath` (see linkgit:git-config[1]). +SERIALIZATION and DESERIALIZATION (EXPERIMENTAL) +------------------------------------------------ + +The `--serialize` option allows git to cache the result of a +possibly time-consuming status scan to a binary file. A local +service/daemon watching file system events could use this to +periodically pre-compute a fresh status result. + +Interactive users could then use `--deserialize` to simply +(and immediately) print the last-known-good result without +waiting for the status scan. + +The binary serialization file format includes some worktree state +information allowing `--deserialize` to reject the cached data +and force a normal status scan if, for example, the commit, branch, +or status modes/options change. The format cannot, however, indicate +when the cached data is otherwise stale -- that coordination belongs +to the task driving the serializations. + + CONFIGURATION ------------- diff --git a/Documentation/technical/status-serialization-format.txt b/Documentation/technical/status-serialization-format.txt new file mode 100644 index 00000000000000..475ae814495581 --- /dev/null +++ b/Documentation/technical/status-serialization-format.txt @@ -0,0 +1,107 @@ +Git status serialization format +=============================== + +Git status serialization enables git to dump the results of a status scan +to a binary file. This file can then be loaded by later status invocations +to print the cached status results. + +The file contains the essential fields from: +() the index +() the "struct wt_status" for the overall results +() the contents of "struct wt_status_change_data" for tracked changed files +() the list of untracked and ignored files + +Version 1 Format: +================= + +The V1 file begins with a required header section followed by optional +sections for each type of item (changed, untracked, ignored). Individual +item sections are only present if necessary. Each item section begins +with an item-type header with the number of items in the section. + +Each "line" in the format is encoded using pkt-line with a final LF. +Flush packets are used to terminate sections. + +----------------- +PKT-LINE("version" SP "1") + +[] +[] +[] +----------------- + + +V1 Header +--------- + +The v1-header-section fields are taken directly from "struct wt_status". +Each field is printed on a separate pkt-line. Lines for NULL string +values are omitted. All integers are printed with "%d". OIDs are +printed in hex. + +v1-header-section = + + PKT-LINE() + +v1-index-headers = PKT-LINE("index_mtime" SP SP LF) + +v1-wt-status-headers = PKT-LINE("is_initial" SP LF) + [ PKT-LINE("branch" SP LF) ] + [ PKT-LINE("reference" SP LF) ] + PKT-LINE("show_ignored_files" SP LF) + PKT-LINE("show_untracked_files" SP LF) + PKT-LINE("show_ignored_directory" SP LF) + [ PKT-LINE("ignore_submodule_arg" SP LF) ] + PKT-LINE("detect_rename" SP LF) + PKT-LINE("rename_score" SP LF) + PKT-LINE("rename_limit" SP LF) + PKT-LINE("detect_break" SP LF) + PKT-LINE("sha1_commit" SP LF) + PKT-LINE("committable" SP LF) + PKT-LINE("workdir_dirty" SP LF) + + +V1 Changed Items +---------------- + +The v1-changed-item-section lists all of the changed items with one +item per pkt-line. Each pkt-line contains: a binary block of data +from "struct wt_status_serialize_data_fixed" in a fixed header where +integers are in network byte order and OIDs are in raw (non-hex) form. +This is followed by one or two raw pathnames (not c-quoted) with NUL +terminators (both NULs are always present even if there is no rename). + +v1-changed-item-section = PKT-LINE("changed" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() + +changed_item = + + + + + + + + + + + + NUL + [ ] + NUL + + +V1 Untracked and Ignored Items +------------------------------ + +These sections are simple lists of pathnames. They ARE NOT +c-quoted. + +v1-untracked-item-section = PKT-LINE("untracked" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() + +v1-ignored-item-section = PKT-LINE("ignored" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() diff --git a/Makefile b/Makefile index 950f1b9ef52066..305254ca0590d9 100644 --- a/Makefile +++ b/Makefile @@ -1034,6 +1034,8 @@ LIB_OBJS += wrapper.o LIB_OBJS += write-or-die.o LIB_OBJS += ws.o LIB_OBJS += wt-status.o +LIB_OBJS += wt-status-deserialize.o +LIB_OBJS += wt-status-serialize.o LIB_OBJS += xdiff-interface.o LIB_OBJS += zlib.o diff --git a/builtin/commit.c b/builtin/commit.c index e2258de9bd6f4a..3852ce811e4676 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -146,6 +146,70 @@ static int opt_parse_porcelain(const struct option *opt, const char *arg, int un return 0; } +static int do_serialize = 0; +static int do_implicit_deserialize = 0; +static int do_explicit_deserialize = 0; +static char *deserialize_path = NULL; + +/* + * --serialize | --serialize=1 | --serialize=v1 + * + * Request that we serialize our output rather than printing in + * any of the established formats. Optionally specify serialization + * version. + */ +static int opt_parse_serialize(const struct option *opt, const char *arg, int unset) +{ + enum wt_status_format *value = (enum wt_status_format *)opt->value; + if (unset || !arg) + *value = STATUS_FORMAT_SERIALIZE_V1; + else if (!strcmp(arg, "v1") || !strcmp(arg, "1")) + *value = STATUS_FORMAT_SERIALIZE_V1; + else + die("unsupported serialize version '%s'", arg); + + if (do_explicit_deserialize) + die("cannot mix --serialize and --deserialize"); + do_implicit_deserialize = 0; + + do_serialize = 1; + return 0; +} + +/* + * --deserialize | --deserialize= | + * --no-deserialize + * + * Request that we deserialize status data from some existing resource + * rather than performing a status scan. + * + * The input source can come from stdin or a path given here -- or be + * inherited from the config settings. + */ +static int opt_parse_deserialize(const struct option *opt, const char *arg, int unset) +{ + if (unset) { + do_implicit_deserialize = 0; + do_explicit_deserialize = 0; + } else { + if (do_serialize) + die("cannot mix --serialize and --deserialize"); + if (arg) { + /* override config or stdin */ + free(deserialize_path); + deserialize_path = xstrdup(arg); + } + if (deserialize_path && *deserialize_path + && (access(deserialize_path, R_OK) != 0)) + die("cannot find serialization file '%s'", + deserialize_path); + + do_explicit_deserialize = 1; + } + + return 0; +} + static int opt_parse_m(const struct option *opt, const char *arg, int unset) { struct strbuf *buf = opt->value; @@ -1079,6 +1143,8 @@ static void handle_untracked_files_arg(struct wt_status *s) s->show_untracked_files = SHOW_NORMAL_UNTRACKED_FILES; else if (!strcmp(untracked_files_arg, "all")) s->show_untracked_files = SHOW_ALL_UNTRACKED_FILES; + else if (!strcmp(untracked_files_arg,"complete")) + s->show_untracked_files = SHOW_COMPLETE_UNTRACKED_FILES; /* * Please update $__git_untracked_file_modes in * git-completion.bash when you add new options @@ -1314,6 +1380,19 @@ static int git_status_config(const char *k, const char *v, void *cb) s->relative_paths = git_config_bool(k, v); return 0; } + if (!strcmp(k, "status.deserializepath")) { + /* + * Automatically assume deserialization if this is + * set in the config and the file exists. Do not + * complain if the file does not exist, because we + * silently fall back to normal mode. + */ + if (v && *v && access(v, R_OK) == 0) { + do_implicit_deserialize = 1; + deserialize_path = xstrdup(v); + } + return 0; + } if (!strcmp(k, "status.showuntrackedfiles")) { if (!v) return config_error_nonbool(k); @@ -1356,7 +1435,8 @@ int cmd_status(int argc, const char **argv, const char *prefix) static int show_ignored_directory = 0; static struct wt_status s; unsigned int progress_flag = 0; - int fd; + int try_deserialize; + int fd = -1; struct object_id oid; static struct option builtin_status_options[] = { OPT__VERBOSE(&verbose, N_("be verbose")), @@ -1371,6 +1451,12 @@ int cmd_status(int argc, const char **argv, const char *prefix) { OPTION_CALLBACK, 0, "porcelain", &status_format, N_("version"), N_("machine-readable output"), PARSE_OPT_OPTARG, opt_parse_porcelain }, + { OPTION_CALLBACK, 0, "serialize", &status_format, + N_("version"), N_("serialize raw status data to stdout"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, opt_parse_serialize }, + { OPTION_CALLBACK, 0, "deserialize", NULL, + N_("path"), N_("deserialize raw status data from file"), + PARSE_OPT_OPTARG, opt_parse_deserialize }, OPT_SET_INT(0, "long", &status_format, N_("show status in long format (default)"), STATUS_FORMAT_LONG), @@ -1431,10 +1517,26 @@ int cmd_status(int argc, const char **argv, const char *prefix) s.show_untracked_files == SHOW_NO_UNTRACKED_FILES) die(_("Unsupported combination of ignored and untracked-files arguments")); + if (s.show_untracked_files == SHOW_COMPLETE_UNTRACKED_FILES && + s.show_ignored_mode == SHOW_NO_IGNORED) + die(_("Complete Untracked only supported with ignored files")); + parse_pathspec(&s.pathspec, 0, PATHSPEC_PREFER_FULL, prefix, argv); + /* + * If we want to try to deserialize status data from a cache file, + * we need to re-order the initialization code. The problem is that + * this makes for a very nasty diff and causes merge conflicts as we + * carry it forward. And it easy to mess up the merge, so we + * duplicate some code here to hopefully reduce conflicts. + */ + try_deserialize = (!do_serialize && + (do_implicit_deserialize || do_explicit_deserialize)); + if (try_deserialize) + goto skip_init; + enable_fscache(0); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) @@ -1449,6 +1551,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) else fd = -1; +skip_init: s.is_initial = get_oid(s.reference, &oid) ? 1 : 0; if (!s.is_initial) oidcpy(&s.oid_commit, &oid); @@ -1465,6 +1568,24 @@ int cmd_status(int argc, const char **argv, const char *prefix) s.rename_score = parse_rename_score(&rename_score_arg); } + if (try_deserialize) { + if (s.relative_paths) + s.prefix = prefix; + + if (wt_status_deserialize(&s, deserialize_path) == DESERIALIZE_OK) + return 0; + + /* deserialize failed, so force the initialization we skipped above. */ + enable_fscache(1); + read_cache_preload(&s.pathspec); + refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); + + if (use_optional_locks()) + fd = hold_locked_index(&index_lock, 0); + else + fd = -1; + } + wt_status_collect(&s); if (0 <= fd) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index e4d9ff4a95ceb5..beef71e4d70c41 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1417,7 +1417,7 @@ _git_clone () esac } -__git_untracked_file_modes="all no normal" +__git_untracked_file_modes="all no normal complete" _git_commit () { diff --git a/pkt-line.c b/pkt-line.c index a0e87b1e81408e..308ab8d5f78781 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -187,7 +187,7 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) return status; } -static int packet_write_gently(const int fd_out, const char *buf, size_t size) +int packet_write_gently(const int fd_out, const char *buf, size_t size) { static char packet_write_buffer[LARGE_PACKET_MAX]; size_t packet_size; diff --git a/pkt-line.h b/pkt-line.h index fef3a0d792d31b..28b7e785784597 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -31,6 +31,7 @@ void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((f void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len); int packet_flush_gently(int fd); int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3))); +int packet_write_gently(const int fd_out, const char *buf, size_t size); int write_packetized_from_fd(int fd_in, int fd_out); int write_packetized_from_buf(const char *src_in, size_t len, int fd_out); diff --git a/t/t7523-status-complete-untracked.sh b/t/t7523-status-complete-untracked.sh new file mode 100755 index 00000000000000..2f4476654a5570 --- /dev/null +++ b/t/t7523-status-complete-untracked.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='git status untracked complete tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + cat >.gitignore <<-\EOF && + *.ign + ignored_dir/ + EOF + + mkdir tracked ignored_dir && + touch tracked_1.txt tracked/tracked_1.txt && + git add . && + test_tick && + git commit -m"Adding original file." && + mkdir untracked && + touch ignored.ign ignored_dir/ignored_2.txt \ + untracked_1.txt untracked/untracked_2.txt untracked/untracked_3.txt +' + +test_expect_success 'verify untracked-files=complete' ' + cat >expect <<-\EOF && + ? expect + ? output + ? untracked/ + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --porcelain=v2 --untracked-files=complete --ignored >output && + test_i18ncmp expect output +' + +test_done diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh new file mode 100755 index 00000000000000..a0fc1bc2daf6ec --- /dev/null +++ b/t/t7524-serialized-status.sh @@ -0,0 +1,140 @@ +#!/bin/sh + +test_description='git serialized status tests' + +. ./test-lib.sh + +# This file includes tests for serializing / deserializing +# status data. These tests cover two basic features: +# +# [1] Because users can request different types of untracked-file +# and ignored file reporting, the cache data generated by +# serialize must use either the same untracked and ignored +# parameters as the later deserialize invocation; otherwise, +# the deserialize invocation must disregard the cached data +# and run a full scan itself. +# +# To increase the number of cases where the cached status can +# be used, we have added a "--untracked-file=complete" option +# that reports a superset or union of the results from the +# "-u normal" and "-u all". We combine this with a filter in +# deserialize to filter the results. +# +# Ignored file reporting is simpler in that is an all or +# nothing; there are no subsets. +# +# The tests here (in addition to confirming that a cache +# file can be generated and used by a subsequent status +# command) need to test this untracked-file filtering. +# +# [2] ensuring the status calls are using data from the status +# cache as expected. This includes verifying cached data +# is used when appropriate as well as falling back to +# performing a new status scan when the data in the cache +# is insufficient/known stale. + +test_expect_success 'setup' ' + cat >.gitignore <<-\EOF && + *.ign + ignored_dir/ + EOF + + mkdir tracked ignored_dir && + touch tracked_1.txt tracked/tracked_1.txt && + git add . && + test_tick && + git commit -m"Adding original file." && + mkdir untracked && + touch ignored.ign ignored_dir/ignored_2.txt \ + untracked_1.txt untracked/untracked_2.txt untracked/untracked_3.txt +' + +test_expect_success 'verify untracked-files=complete with no conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --untracked-files=complete --ignored=matching --deserialize=serialized_status.dat >output && + test_i18ncmp expect output +' + +test_expect_success 'verify untracked-files=complete to untracked-files=normal conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat >output && + test_i18ncmp expect output +' + +test_expect_success 'verify untracked-files=complete to untracked-files=all conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --untracked-files=all --ignored=matching --deserialize=serialized_status.dat >output && + test_i18ncmp expect output +' + +test_expect_success 'verify serialized status with non-convertible ignore mode does new scan' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? new_change.txt + ? output + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --ignored --deserialize=serialized_status.dat >output && + test_i18ncmp expect output +' + +test_expect_success 'verify serialized status handles path scopes' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? untracked/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat untracked >output && + test_i18ncmp expect output +' + +test_done diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c new file mode 100644 index 00000000000000..317b88fddc1ec0 --- /dev/null +++ b/wt-status-deserialize.c @@ -0,0 +1,597 @@ +#include "cache.h" +#include "wt-status.h" +#include "pkt-line.h" +#include "trace.h" + +static struct trace_key trace_deserialize = TRACE_KEY_INIT(DESERIALIZE); + +enum deserialize_parse_strategy { + DESERIALIZE_STRATEGY_AS_IS, + DESERIALIZE_STRATEGY_SKIP, + DESERIALIZE_STRATEGY_NORMAL, + DESERIALIZE_STRATEGY_ALL +}; + +static int check_path_contains(const char *out, int out_len, const char *in, int in_len) +{ + return (out_len > 0 && + out_len < in_len && + (out[out_len - 1] == '/') && + !memcmp(out, in, out_len)); +} + +static const char *my_packet_read_line(int fd, int *line_len) +{ + static char buf[LARGE_PACKET_MAX]; + + *line_len = packet_read(fd, NULL, NULL, buf, sizeof(buf), + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF); + return (*line_len > 0) ? buf : NULL; +} + +/* + * mtime_reported contains the mtime of the index when the + * serialization snapshot was computed. + * + * mtime_observed_on_disk contains the mtime of the index now. + * + * If these 2 times are different, then the .git/index has + * changed since the serialization cache was created and we + * must reject the cache because anything could have changed. + * + * If they are the same, we continue trying to use the cache. + */ +static int my_validate_index(const struct cache_time *mtime_reported) +{ + const char *path = get_index_file(); + struct stat st; + struct cache_time mtime_observed_on_disk; + + if (lstat(path, &st)) { + trace_printf_key(&trace_deserialize, "could not stat index"); + return DESERIALIZE_ERR; + } + mtime_observed_on_disk.sec = st.st_mtime; + mtime_observed_on_disk.nsec = ST_MTIME_NSEC(st); + if ((mtime_observed_on_disk.sec != mtime_reported->sec) || + (mtime_observed_on_disk.nsec != mtime_reported->nsec)) { + trace_printf_key(&trace_deserialize, "index mtime changed [des %d.%d][obs %d.%d]", + mtime_reported->sec, mtime_reported->nsec, + mtime_observed_on_disk.sec, mtime_observed_on_disk.nsec); + return DESERIALIZE_ERR; + } + + return DESERIALIZE_OK; +} + +static int wt_deserialize_v1_header(struct wt_status *s, int fd) +{ + struct cache_time index_mtime; + int line_len, nr_fields; + const char *line; + const char *arg; + + /* + * parse header lines up to the first flush packet. + */ + while ((line = my_packet_read_line(fd, &line_len))) { + + if (skip_prefix(line, "index_mtime ", &arg)) { + nr_fields = sscanf(arg, "%d %d", + &index_mtime.sec, + &index_mtime.nsec); + if (nr_fields != 2) { + trace_printf_key(&trace_deserialize, "invalid index_mtime (%d) '%s'", + nr_fields, line); + return DESERIALIZE_ERR; + } + continue; + } + + if (skip_prefix(line, "is_initial ", &arg)) { + s->is_initial = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "branch ", &arg)) { + s->branch = xstrdup(arg); + continue; + } + if (skip_prefix(line, "reference ", &arg)) { + s->reference = xstrdup(arg); + continue; + } + /* pathspec */ + /* verbose */ + /* amend */ + if (skip_prefix(line, "whence ", &arg)) { + s->whence = (int)strtol(arg, NULL, 10); + continue; + } + /* nowarn */ + /* use_color */ + /* no_gettext */ + /* display_comment_prefix */ + /* relative_paths */ + /* submodule_summary */ + if (skip_prefix(line, "show_ignored_mode ", &arg)) { + s->show_ignored_mode = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "show_untracked_files ", &arg)) { + s->show_untracked_files = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "ignore_submodule_arg ", &arg)) { + s->ignore_submodule_arg = xstrdup(arg); + continue; + } + /* color_palette */ + /* colopts */ + /* null_termination */ + /* commit_template */ + /* show_branch */ + /* show_stash */ + if (skip_prefix(line, "hints ", &arg)) { + s->hints = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "detect_rename ", &arg)) { + s->detect_rename = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "rename_score ", &arg)) { + s->rename_score = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "rename_limit ", &arg)) { + s->rename_limit = (int)strtol(arg, NULL, 10); + continue; + } + /* status_format */ + if (skip_prefix(line, "sha1_commit ", &arg)) { + if (get_oid_hex(arg, &s->oid_commit)) { + trace_printf_key(&trace_deserialize, "invalid sha1_commit"); + return DESERIALIZE_ERR; + } + continue; + } + if (skip_prefix(line, "committable ", &arg)) { + s->committable = (int)strtol(arg, NULL, 10); + continue; + } + if (skip_prefix(line, "workdir_dirty ", &arg)) { + s->workdir_dirty = (int)strtol(arg, NULL, 10); + continue; + } + /* prefix */ + + trace_printf_key(&trace_deserialize, "unexpected line '%s'", line); + return DESERIALIZE_ERR; + } + + return my_validate_index(&index_mtime); +} + +/* + * Build a string-list of (count) lines from the input. + */ +static int wt_deserialize_v1_changed_items(struct wt_status *s, int fd, int count) +{ + struct wt_status_serialize_data *sd; + char *p; + int line_len; + const char *line; + struct string_list_item *item; + + string_list_init(&s->change, 1); + + /* + * + + * + * + * NUL [] NUL + */ + while ((line = my_packet_read_line(fd, &line_len))) { + struct wt_status_change_data *d = xcalloc(1, sizeof(*d)); + sd = (struct wt_status_serialize_data *)line; + + d->worktree_status = ntohl(sd->fixed.worktree_status); + d->index_status = ntohl(sd->fixed.index_status); + d->stagemask = ntohl(sd->fixed.stagemask); + d->rename_score = ntohl(sd->fixed.rename_score); + d->mode_head = ntohl(sd->fixed.mode_head); + d->mode_index = ntohl(sd->fixed.mode_index); + d->mode_worktree = ntohl(sd->fixed.mode_worktree); + d->dirty_submodule = ntohl(sd->fixed.dirty_submodule); + d->new_submodule_commits = ntohl(sd->fixed.new_submodule_commits); + oidcpy(&d->oid_head, &sd->fixed.oid_head); + oidcpy(&d->oid_index, &sd->fixed.oid_index); + + p = sd->variant; + item = string_list_append(&s->change, p); + p += strlen(p) + 1; + if (*p) + d->rename_source = xstrdup(p); + item->util = d; + + trace_printf_key( + &trace_deserialize, + "change: %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", + d->worktree_status, + d->index_status, + d->stagemask, + d->rename_score, + d->mode_head, + d->mode_index, + d->mode_worktree, + d->dirty_submodule, + d->new_submodule_commits, + oid_to_hex(&d->oid_head), + oid_to_hex(&d->oid_index), + item->string, + (d->rename_source ? d->rename_source : "")); + } + + return DESERIALIZE_OK; +} + +static int wt_deserialize_v1_untracked_items(struct wt_status *s, + int fd, + int count, + enum deserialize_parse_strategy strategy) +{ + int line_len; + const char *line; + char *out = NULL; + int out_len = 0; + + string_list_init(&s->untracked, 1); + + /* + * + + * + */ + while ((line = my_packet_read_line(fd, &line_len))) { + if (strategy == DESERIALIZE_STRATEGY_AS_IS) + string_list_append(&s->untracked, line); + if (strategy == DESERIALIZE_STRATEGY_SKIP) + continue; + if (strategy == DESERIALIZE_STRATEGY_NORMAL) { + + /* Only add "normal" entries to list */ + if (out && + check_path_contains(out, out_len, line, line_len)) { + continue; + } + else { + out = string_list_append(&s->untracked, line)->string; + out_len = line_len; + } + } + if (strategy == DESERIALIZE_STRATEGY_ALL) { + /* Only add "all" entries to list */ + if (line[line_len - 1] != '/') + string_list_append(&s->untracked, line); + } + } + + return DESERIALIZE_OK; +} + +static int wt_deserialize_v1_ignored_items(struct wt_status *s, + int fd, + int count, + enum deserialize_parse_strategy strategy) +{ + int line_len; + const char *line; + + string_list_init(&s->ignored, 1); + + /* + * + + * + */ + while ((line = my_packet_read_line(fd, &line_len))) { + if (strategy == DESERIALIZE_STRATEGY_AS_IS) + string_list_append(&s->ignored, line); + else + continue; + } + + return DESERIALIZE_OK; +} + +static int validate_untracked_files_arg(enum untracked_status_type cmd, + enum untracked_status_type des, + enum deserialize_parse_strategy *strategy) +{ + *strategy = DESERIALIZE_STRATEGY_AS_IS; + + if (cmd == des) { + *strategy = DESERIALIZE_STRATEGY_AS_IS; + } else if (cmd == SHOW_NO_UNTRACKED_FILES) { + *strategy = DESERIALIZE_STRATEGY_SKIP; + } else if (des == SHOW_COMPLETE_UNTRACKED_FILES) { + if (cmd == SHOW_ALL_UNTRACKED_FILES) + *strategy = DESERIALIZE_STRATEGY_ALL; + else if (cmd == SHOW_NORMAL_UNTRACKED_FILES) + *strategy = DESERIALIZE_STRATEGY_NORMAL; + } else { + return DESERIALIZE_ERR; + } + + return DESERIALIZE_OK; +} + +static int validate_ignored_files_arg(enum show_ignored_type cmd, + enum show_ignored_type des, + enum deserialize_parse_strategy *strategy) +{ + *strategy = DESERIALIZE_STRATEGY_AS_IS; + + if (cmd == SHOW_NO_IGNORED) { + *strategy = DESERIALIZE_STRATEGY_SKIP; + } + else if (cmd != des) { + return DESERIALIZE_ERR; + } + + return DESERIALIZE_OK; +} + +static int wt_deserialize_v1(const struct wt_status *cmd_s, struct wt_status *s, int fd) +{ + int line_len; + const char *line; + const char *arg; + int nr_changed = 0; + int nr_untracked = 0; + int nr_ignored = 0; + + enum deserialize_parse_strategy ignored_strategy = DESERIALIZE_STRATEGY_AS_IS, untracked_strategy = DESERIALIZE_STRATEGY_AS_IS; + + if (wt_deserialize_v1_header(s, fd) == DESERIALIZE_ERR) + return DESERIALIZE_ERR; + + /* + * We now have the header parsed. Look at the command args (as passed in), and see how to parse + * the serialized data + */ + if (validate_untracked_files_arg(cmd_s->show_untracked_files, s->show_untracked_files, &untracked_strategy)) { + trace_printf_key(&trace_deserialize, "reject: show_untracked_file: command: %d, serialized : %d", + cmd_s->show_untracked_files, + s->show_untracked_files); + return DESERIALIZE_ERR; + } + + if (validate_ignored_files_arg(cmd_s->show_ignored_mode, s->show_ignored_mode, &ignored_strategy)) { + trace_printf_key(&trace_deserialize, "reject: show_ignored_mode: command: %d, serialized: %d", + cmd_s->show_ignored_mode, + s->show_ignored_mode); + return DESERIALIZE_ERR; + } + + /* + * [ [+] ] + * [ [+] ] + * [ [+] ] + */ + while ((line = my_packet_read_line(fd, &line_len))) { + if (skip_prefix(line, "changed ", &arg)) { + nr_changed = (int)strtol(arg, NULL, 10); + if (wt_deserialize_v1_changed_items(s, fd, nr_changed) + == DESERIALIZE_ERR) + return DESERIALIZE_ERR; + continue; + } + if (skip_prefix(line, "untracked ", &arg)) { + nr_untracked = (int)strtol(arg, NULL, 10); + if (wt_deserialize_v1_untracked_items(s, fd, nr_untracked, untracked_strategy) + == DESERIALIZE_ERR) + return DESERIALIZE_ERR; + continue; + } + if (skip_prefix(line, "ignored ", &arg)) { + nr_ignored = (int)strtol(arg, NULL, 10); + if (wt_deserialize_v1_ignored_items(s, fd, nr_ignored, ignored_strategy) + == DESERIALIZE_ERR) + return DESERIALIZE_ERR; + continue; + } + trace_printf_key(&trace_deserialize, "unexpected line '%s'", line); + return DESERIALIZE_ERR; + } + + return DESERIALIZE_OK; +} + +static int wt_deserialize_parse(const struct wt_status *cmd_s, struct wt_status *s, int fd) +{ + int line_len; + const char *line; + const char *arg; + + memset(s, 0, sizeof(*s)); + + if ((line = my_packet_read_line(fd, &line_len)) && + (skip_prefix(line, "version ", &arg))) { + int version = (int)strtol(arg, NULL, 10); + if (version == 1) + return wt_deserialize_v1(cmd_s, s, fd); + } + trace_printf_key(&trace_deserialize, "missing/unsupported version"); + return DESERIALIZE_ERR; +} + +static inline int my_strcmp_null(const char *a, const char *b) +{ + const char *alt_a = (a) ? a : ""; + const char *alt_b = (b) ? b : ""; + + return strcmp(alt_a, alt_b); +} + +static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *des_s, int fd) +{ + /* + * Check the path spec on the current command + */ + if (cmd_s->pathspec.nr > 1) { + trace_printf_key(&trace_deserialize, "reject: multiple pathspecs"); + return DESERIALIZE_ERR; + } + + /* + * If we have a pathspec, but it maches the root (e.g. no filtering) + * then this is OK. + */ + if (cmd_s->pathspec.nr == 1 && + my_strcmp_null(cmd_s->pathspec.items[0].match, "")) { + trace_printf_key(&trace_deserialize, "reject: pathspec"); + return DESERIALIZE_ERR; + } + + /* + * Deserialize cached status + */ + if (wt_deserialize_parse(cmd_s, des_s, fd) == DESERIALIZE_ERR) + return DESERIALIZE_ERR; + + /* + * Compare fields in cmd_s with those observed in des_s and + * complain if they are incompatible (such as different "-u" + * or "--ignored" settings). + */ + if (cmd_s->is_initial != des_s->is_initial) { + trace_printf_key(&trace_deserialize, "reject: is_initial"); + return DESERIALIZE_ERR; + } + if (my_strcmp_null(cmd_s->branch, des_s->branch)) { + trace_printf_key(&trace_deserialize, "reject: branch"); + return DESERIALIZE_ERR; + } + if (my_strcmp_null(cmd_s->reference, des_s->reference)) { + trace_printf_key(&trace_deserialize, "reject: reference"); + return DESERIALIZE_ERR; + } + /* verbose */ + /* amend */ + if (cmd_s->whence != des_s->whence) { + trace_printf_key(&trace_deserialize, "reject: whence"); + return DESERIALIZE_ERR; + } + /* nowarn */ + /* use_color */ + /* no_gettext */ + /* display_comment_prefix */ + /* relative_paths */ + /* submodule_summary */ + + /* show_ignored_files - already validated */ + /* show_untrackes_files - already validated */ + + /* + * Submodules are not supported by status serialization. + * The status will not be serialized if it contains submodules, + * and so this check is not needed. + * + * if (my_strcmp_null(cmd_s->ignore_submodule_arg, des_s->ignore_submodule_arg)) { + * trace_printf_key(&trace_deserialize, "reject: ignore_submodule_arg"); + * return DESERIALIZE_ERR; + * } + */ + + /* color_palette */ + /* colopts */ + /* null_termination */ + /* commit_template */ + /* show_branch */ + /* show_stash */ + /* hints */ + if (cmd_s->detect_rename != des_s->detect_rename) { + trace_printf_key(&trace_deserialize, "reject: detect_rename"); + return DESERIALIZE_ERR; + } + if (cmd_s->rename_score != des_s->rename_score) { + trace_printf_key(&trace_deserialize, "reject: rename_score"); + return DESERIALIZE_ERR; + } + if (cmd_s->rename_limit != des_s->rename_limit) { + trace_printf_key(&trace_deserialize, "reject: rename_limit"); + return DESERIALIZE_ERR; + } + /* status_format */ + if (!oideq(&cmd_s->oid_commit, &des_s->oid_commit)) { + trace_printf_key(&trace_deserialize, "reject: sha1_commit"); + return DESERIALIZE_ERR; + } + + /* + * Copy over display-related fields from the current command. + */ + des_s->verbose = cmd_s->verbose; + /* amend */ + /* whence */ + des_s->nowarn = cmd_s->nowarn; + des_s->use_color = cmd_s->use_color; + des_s->no_gettext = cmd_s->no_gettext; + des_s->display_comment_prefix = cmd_s->display_comment_prefix; + des_s->relative_paths = cmd_s->relative_paths; + des_s->submodule_summary = cmd_s->submodule_summary; + memcpy(des_s->color_palette, cmd_s->color_palette, + sizeof(char)*WT_STATUS_MAXSLOT*COLOR_MAXLEN); + des_s->colopts = cmd_s->colopts; + des_s->null_termination = cmd_s->null_termination; + /* commit_template */ + des_s->show_branch = cmd_s->show_branch; + des_s->show_stash = cmd_s->show_stash; + /* hints */ + des_s->status_format = cmd_s->status_format; + des_s->fp = cmd_s->fp; + if (cmd_s->prefix && *cmd_s->prefix) + des_s->prefix = xstrdup(cmd_s->prefix); + + return DESERIALIZE_OK; +} + + +/* + * Read raw serialized status data from the given file + * + * Verify that the args specified in the current command + * are compatible with the deserialized data (such as "-uno"). + * + * Copy display-related fields from the current command + * into the deserialized data (so that the user can request + * long or short as they please). + */ +int wt_status_deserialize(const struct wt_status *cmd_s, + const char *path) +{ + struct wt_status des_s; + int result; + + if (path && *path && strcmp(path, "0")) { + int fd = xopen(path, O_RDONLY); + if (fd == -1) { + trace_printf_key(&trace_deserialize, "could not read '%s'", path); + return DESERIALIZE_ERR; + } + trace_printf_key(&trace_deserialize, "reading serialization file '%s'", path); + result = wt_deserialize_fd(cmd_s, &des_s, fd); + close(fd); + } else { + trace_printf_key(&trace_deserialize, "reading stdin"); + result = wt_deserialize_fd(cmd_s, &des_s, 0); + } + + if (result == DESERIALIZE_OK) { + wt_status_get_state(cmd_s->repo, &des_s.state, des_s.branch && + !strcmp(des_s.branch, "HEAD")); + wt_status_print(&des_s); + } + + return result; +} diff --git a/wt-status-serialize.c b/wt-status-serialize.c new file mode 100644 index 00000000000000..60da2d17d16182 --- /dev/null +++ b/wt-status-serialize.c @@ -0,0 +1,213 @@ +#include "cache.h" +#include "wt-status.h" +#include "pkt-line.h" + +static struct trace_key trace_serialize = TRACE_KEY_INIT(SERIALIZE); + +/* + * Write V1 header fields. + */ +static void wt_serialize_v1_header(struct wt_status *s, int fd) +{ + /* + * Write select fields from the current index to help + * the deserializer recognize a stale data set. + */ + packet_write_fmt(fd, "index_mtime %d %d\n", + s->repo->index->timestamp.sec, + s->repo->index->timestamp.nsec); + + /* + * Write data from wt_status to qualify this status report. + * That is, if this run specified "-uno", the consumer of + * our serialization should know that. + */ + packet_write_fmt(fd, "is_initial %d\n", s->is_initial); + if (s->branch) + packet_write_fmt(fd, "branch %s\n", s->branch); + if (s->reference) + packet_write_fmt(fd, "reference %s\n", s->reference); + /* pathspec */ + /* verbose */ + /* amend */ + packet_write_fmt(fd, "whence %d\n", s->whence); + /* nowarn */ + /* use_color */ + /* no_gettext */ + /* display_comment_prefix */ + /* relative_paths */ + /* submodule_summary */ + packet_write_fmt(fd, "show_ignored_mode %d\n", s->show_ignored_mode); + packet_write_fmt(fd, "show_untracked_files %d\n", s->show_untracked_files); + if (s->ignore_submodule_arg) + packet_write_fmt(fd, "ignore_submodule_arg %s\n", s->ignore_submodule_arg); + /* color_palette */ + /* colopts */ + /* null_termination */ + /* commit_template */ + /* show_branch */ + /* show_stash */ + packet_write_fmt(fd, "hints %d\n", s->hints); + packet_write_fmt(fd, "detect_rename %d\n", s->detect_rename); + packet_write_fmt(fd, "rename_score %d\n", s->rename_score); + packet_write_fmt(fd, "rename_limit %d\n", s->rename_limit); + /* status_format */ + packet_write_fmt(fd, "sha1_commit %s\n", oid_to_hex(&s->oid_commit)); + packet_write_fmt(fd, "committable %d\n", s->committable); + packet_write_fmt(fd, "workdir_dirty %d\n", s->workdir_dirty); + /* prefix */ + packet_flush(fd); +} + +/* + * Print changed/unmerged items. + * We write raw (not c-quoted) pathname(s). The rename_source is only + * set when status computed a rename/copy. + * + * We ALWAYS write a final LF to the packet-line (for debugging) + * even though Linux pathnames allow LFs. + */ +static inline void wt_serialize_v1_changed(struct wt_status *s, int fd, + struct string_list_item *item) +{ + struct wt_status_change_data *d = item->util; + struct wt_status_serialize_data sd; + char *begin; + char *end; + char *p; + int len_path, len_rename_source; + + trace_printf_key(&trace_serialize, + "change: %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", + d->worktree_status, + d->index_status, + d->stagemask, + d->rename_score, + d->mode_head, + d->mode_index, + d->mode_worktree, + d->dirty_submodule, + d->new_submodule_commits, + oid_to_hex(&d->oid_head), + oid_to_hex(&d->oid_index), + item->string, + (d->rename_source ? d->rename_source : "")); + + sd.fixed.worktree_status = htonl(d->worktree_status); + sd.fixed.index_status = htonl(d->index_status); + sd.fixed.stagemask = htonl(d->stagemask); + sd.fixed.rename_score = htonl(d->rename_score); + sd.fixed.mode_head = htonl(d->mode_head); + sd.fixed.mode_index = htonl(d->mode_index); + sd.fixed.mode_worktree = htonl(d->mode_worktree); + sd.fixed.dirty_submodule = htonl(d->dirty_submodule); + sd.fixed.new_submodule_commits = htonl(d->new_submodule_commits); + oidcpy(&sd.fixed.oid_head, &d->oid_head); + oidcpy(&sd.fixed.oid_index, &d->oid_index); + + begin = (char *)&sd; + end = begin + sizeof(sd); + + p = sd.variant; + + /* + * Write NUL [] NUL LF at the end of the buffer. + */ + len_path = strlen(item->string); + len_rename_source = d->rename_source ? strlen(d->rename_source) : 0; + + /* + * This is a bit of a hack, but I don't want to split the + * status detail record across multiple pkt-lines. + */ + if (p + len_path + 1 + len_rename_source + 1 + 1 >= end) + BUG("path to long to serialize '%s'", item->string); + + memcpy(p, item->string, len_path); + p += len_path; + *p++ = '\0'; + + if (len_rename_source) { + memcpy(p, d->rename_source, len_rename_source); + p += len_rename_source; + } + *p++ = '\0'; + *p++ = '\n'; + + if (packet_write_gently(fd, begin, (p - begin))) + BUG("cannot serialize '%s'", item->string); +} + +/* + * Write raw (not c-quoted) pathname for an untracked item. + * We ALWAYS write a final LF to the packet-line (for debugging) + * even though Linux pathnames allows LFs. That is, deserialization + * should use the packet-line length and omit the final LF. + */ +static inline void wt_serialize_v1_untracked(struct wt_status *s, int fd, + struct string_list_item *item) +{ + packet_write_fmt(fd, "%s\n", item->string); +} + +/* + * Write raw (not c-quoted) pathname for an ignored item. + * We ALWAYS write a final LF to the packet-line (for debugging) + * even though Linux pathnames allows LFs. + */ +static inline void wt_serialize_v1_ignored(struct wt_status *s, int fd, + struct string_list_item *item) +{ + packet_write_fmt(fd, "%s\n", item->string); +} + +/* + * Serialize the list of changes to stdout. The goal of this + * is to just serialize the key fields in wt_status so that a + * later command can rebuilt it and do the printing. + * + * We DO NOT include the contents of wt_status_state NOR + * current branch info. This info easily gets stale and + * is relatively quick for the status consumer to compute + * as necessary. + */ +void wt_status_serialize_v1(struct wt_status *s) +{ + int fd = 1; /* we always write to stdout */ + struct string_list_item *iter; + int k; + + /* + * version header must be first line. + */ + packet_write_fmt(fd, "version 1\n"); + + wt_serialize_v1_header(s, fd); + + if (s->change.nr > 0) { + packet_write_fmt(fd, "changed %d\n", s->change.nr); + for (k = 0; k < s->change.nr; k++) { + iter = &(s->change.items[k]); + wt_serialize_v1_changed(s, fd, iter); + } + packet_flush(fd); + } + + if (s->untracked.nr > 0) { + packet_write_fmt(fd, "untracked %d\n", s->untracked.nr); + for (k = 0; k < s->untracked.nr; k++) { + iter = &(s->untracked.items[k]); + wt_serialize_v1_untracked(s, fd, iter); + } + packet_flush(fd); + } + + if (s->ignored.nr > 0) { + packet_write_fmt(fd, "ignored %d\n", s->ignored.nr); + for (k = 0; k < s->ignored.nr; k++) { + iter = &(s->ignored.items[k]); + wt_serialize_v1_ignored(s, fd, iter); + } + packet_flush(fd); + } +} diff --git a/wt-status.c b/wt-status.c index cc6f94504d9fa9..5b8e45302eb8b3 100644 --- a/wt-status.c +++ b/wt-status.c @@ -707,6 +707,9 @@ static void wt_status_collect_untracked(struct wt_status *s) if (s->show_untracked_files != SHOW_ALL_UNTRACKED_FILES) dir.flags |= DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES; + if (s->show_untracked_files == SHOW_COMPLETE_UNTRACKED_FILES) + dir.flags |= DIR_KEEP_UNTRACKED_CONTENTS; + if (s->show_ignored_mode) { dir.flags |= DIR_SHOW_IGNORED_TOO; @@ -2390,6 +2393,9 @@ void wt_status_print(struct wt_status *s) case STATUS_FORMAT_LONG: wt_longstatus_print(s); break; + case STATUS_FORMAT_SERIALIZE_V1: + wt_status_serialize_v1(s); + break; } trace2_region_leave("status", "print", s->repo); diff --git a/wt-status.h b/wt-status.h index 71c3f25f43deff..913bf2750f9bd7 100644 --- a/wt-status.h +++ b/wt-status.h @@ -4,6 +4,7 @@ #include "string-list.h" #include "color.h" #include "pathspec.h" +#include "pkt-line.h" #include "remote.h" struct repository; @@ -25,7 +26,8 @@ enum color_wt_status { enum untracked_status_type { SHOW_NO_UNTRACKED_FILES, SHOW_NORMAL_UNTRACKED_FILES, - SHOW_ALL_UNTRACKED_FILES + SHOW_ALL_UNTRACKED_FILES, + SHOW_COMPLETE_UNTRACKED_FILES, }; enum show_ignored_type { @@ -60,6 +62,7 @@ enum wt_status_format { STATUS_FORMAT_SHORT, STATUS_FORMAT_PORCELAIN, STATUS_FORMAT_PORCELAIN_V2, + STATUS_FORMAT_SERIALIZE_V1, STATUS_FORMAT_UNSPECIFIED }; @@ -161,4 +164,51 @@ int require_clean_work_tree(struct repository *repo, int ignore_submodules, int gently); +#define DESERIALIZE_OK 0 +#define DESERIALIZE_ERR 1 + +struct wt_status_serialize_data_fixed +{ + uint32_t worktree_status; + uint32_t index_status; + uint32_t stagemask; + uint32_t rename_score; + uint32_t mode_head; + uint32_t mode_index; + uint32_t mode_worktree; + uint32_t dirty_submodule; + uint32_t new_submodule_commits; + struct object_id oid_head; + struct object_id oid_index; +}; + +/* + * Consume the maximum amount of data possible in a + * packet-line record. This is overkill because we + * have at most 2 relative pathnames, but means we + * don't need to allocate a variable length structure. + */ +struct wt_status_serialize_data +{ + struct wt_status_serialize_data_fixed fixed; + char variant[LARGE_PACKET_DATA_MAX + - sizeof(struct wt_status_serialize_data_fixed)]; +}; + +/* + * Serialize computed status scan results using "version 1" format + * to the given file. + */ +void wt_status_serialize_v1(struct wt_status *s); + +/* + * Deserialize existing status results from the given file and + * populate a (new) "struct wt_status". Use the contents of "cmd_s" + * (computed from the command line arguments) to verify that the + * cached data is compatible and overlay various display-related + * fields. + */ +int wt_status_deserialize(const struct wt_status *cmd_s, + const char *path); + #endif /* STATUS_H */ From 630ecb7d6a2132023345646e699f02aada8199db Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 3 Jul 2017 13:39:45 -0600 Subject: [PATCH 049/129] cache-tree: remove use of strbuf_addf in update_one String formatting can be a performance issue when there are hundreds of thousands of trees. Change to stop using the strbuf_addf and just add the strings or characters individually. There are a limited number of modes so added a switch for the known ones and a default case if something comes through that are not a known one for git. In one scenario regarding a huge worktree, this reduces the time required for a `git checkout ` from 44 seconds to 38 seconds, i.e. it is a non-negligible performance improvement. Signed-off-by: Kevin Willford --- cache-tree.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index 6b90e45ed3873f..2213f4632ed0b1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -398,7 +398,29 @@ static int update_one(struct cache_tree *it, continue; strbuf_grow(&buffer, entlen + 100); - strbuf_addf(&buffer, "%o %.*s%c", mode, entlen, path + baselen, '\0'); + + switch (mode) { + case 0100644: + strbuf_add(&buffer, "100644 ", 7); + break; + case 0100664: + strbuf_add(&buffer, "100664 ", 7); + break; + case 0100755: + strbuf_add(&buffer, "100755 ", 7); + break; + case 0120000: + strbuf_add(&buffer, "120000 ", 7); + break; + case 0160000: + strbuf_add(&buffer, "160000 ", 7); + break; + default: + strbuf_addf(&buffer, "%o ", mode); + break; + } + strbuf_add(&buffer, path + baselen, entlen); + strbuf_addch(&buffer, '\0'); strbuf_add(&buffer, oid->hash, the_hash_algo->rawsz); #if DEBUG_CACHE_TREE From c9e80e2154c0c6218c31d79c958faa2f160bd08a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 7 Feb 2018 10:59:03 -0500 Subject: [PATCH 050/129] status: reject deserialize in V2 and conflicts Teach status deserialize code to reject status cache when printing in porcelain V2 and there are unresolved conflicts in the cache file. A follow-on task might extend the cache format to include this additiona data. See code for longer explanation. Signed-off-by: Jeff Hostetler --- t/t7524-serialized-status.sh | 84 +++++++++++++++++++++++++++++++++++- wt-status-deserialize.c | 28 +++++++++++- 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh index d1ca3af3f96546..6ac29ba3e85b45 100755 --- a/t/t7524-serialized-status.sh +++ b/t/t7524-serialized-status.sh @@ -164,7 +164,7 @@ test_expect_success 'verify no-ahead-behind and serialized status integration' ' ' test_expect_success 'verify new --serialize=path mode' ' - #test_when_finished "rm serialized_status.dat expect new_change.txt output.1 output.2" && + test_when_finished "rm serialized_status.dat expect new_change.txt output.1 output.2" && cat >expect <<-\EOF && ? expect ? output.1 @@ -186,4 +186,86 @@ test_expect_success 'verify new --serialize=path mode' ' test_i18ncmp expect output.2 ' +test_expect_success 'merge conflicts' ' + + # create a merge conflict. + + git init conflicts && + echo x >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m x && + git -C conflicts branch a && + git -C conflicts branch b && + git -C conflicts checkout a && + echo y >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m a && + git -C conflicts checkout b && + echo z >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m b && + test_must_fail git -C conflicts merge --no-commit a && + + # verify that regular status correctly identifies it + # in each format. + + cat >expect.v2 <observed.v2 && + test_cmp expect.v2 observed.v2 && + + cat >expect.long <..." to mark resolution) + both modified: x.txt + +no changes added to commit (use "git add" and/or "git commit -a") +EOF + git -C conflicts status --long >observed.long && + test_cmp expect.long observed.long && + + cat >expect.short <observed.short && + test_cmp expect.short observed.short && + + # save status data in serialized cache. + + git -C conflicts status --serialize >serialized && + + # make some dirt in the worktree so we can tell whether subsequent + # status commands used the cached data or did a fresh status. + + echo dirt >conflicts/dirt.txt && + + # run status using the cached data. + + git -C conflicts status --long --deserialize=../serialized >observed.long && + test_cmp expect.long observed.long && + + git -C conflicts status --short --deserialize=../serialized >observed.short && + test_cmp expect.short observed.short && + + # currently, the cached data does not have enough information about + # merge conflicts for porcelain V2 format. (And V2 format looks at + # the index to get that data, but the whole point of the serialization + # is to avoid reading the index unnecessarily.) So V2 always rejects + # the cached data when there is an unresolved conflict. + + cat >expect.v2.dirty <observed.v2 && + test_cmp expect.v2.dirty observed.v2 + +' + test_done diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 02a1b4cd13cb7d..924df8df736fce 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -176,7 +176,8 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) /* * Build a string-list of (count) lines from the input. */ -static int wt_deserialize_v1_changed_items(struct wt_status *s, int fd, int count) +static int wt_deserialize_v1_changed_items(const struct wt_status *cmd_s, + struct wt_status *s, int fd, int count) { struct wt_status_serialize_data *sd; char *p; @@ -231,6 +232,29 @@ static int wt_deserialize_v1_changed_items(struct wt_status *s, int fd, int coun oid_to_hex(&d->oid_index), item->string, (d->rename_source ? d->rename_source : "")); + + if (d->stagemask && + cmd_s->status_format == STATUS_FORMAT_PORCELAIN_V2) { + /* + * We have an unresolved conflict and the user wants + * to see porcelain V2 output. The cached status data + * does not contain enough information for V2 (because + * the main status computation does not capture it). + * We only get a single change record for the file with + * a single SHA -- we don't get the stage [123] mode + * and SHA data. The V2 detail-line print code looks + * up this information directly from the index. The + * whole point of this serialization cache is to avoid + * reading the index, so the V2 print code gets zeros. + * So we reject the status cache and let the fallback + * code run. + */ + trace_printf_key( + &trace_deserialize, + "reject: V2 format and umerged file: %s", + item->string); + return DESERIALIZE_ERR; + } } return DESERIALIZE_OK; @@ -381,7 +405,7 @@ static int wt_deserialize_v1(const struct wt_status *cmd_s, struct wt_status *s, while ((line = my_packet_read_line(fd, &line_len))) { if (skip_prefix(line, "changed ", &arg)) { nr_changed = (int)strtol(arg, NULL, 10); - if (wt_deserialize_v1_changed_items(s, fd, nr_changed) + if (wt_deserialize_v1_changed_items(cmd_s, s, fd, nr_changed) == DESERIALIZE_ERR) return DESERIALIZE_ERR; continue; From b7d8c5802a2f952a995d1c29dc7c6aa60328f928 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 24 Sep 2018 19:07:28 +0000 Subject: [PATCH 051/129] fsck: use ERROR_MULTI_PACK_INDEX The multi-pack-index was added to the data verified by git-fsck in ea5ae6c3 "fsck: verify multi-pack-index". This implementation was based on the implementation for verifying the commit-graph, and a copy-paste error kept the ERROR_COMMIT_GRAPH flag as the bit set when an error appears in the multi-pack-index. Add a new flag, ERROR_MULTI_PACK_INDEX, and use that instead. Signed-off-by: Derrick Stolee --- builtin/fsck.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 8d13794b1412c8..f02cbdb439b210 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -49,6 +49,7 @@ static int name_objects; #define ERROR_PACK 04 #define ERROR_REFS 010 #define ERROR_COMMIT_GRAPH 020 +#define ERROR_MULTI_PACK_INDEX 040 static const char *describe_object(const struct object_id *oid) { @@ -952,7 +953,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) midx_argv[2] = "--object-dir"; midx_argv[3] = odb->path; if (run_command(&midx_verify)) - errors_found |= ERROR_COMMIT_GRAPH; + errors_found |= ERROR_MULTI_PACK_INDEX; } } From 3eae1576e8a35bee96ed9b23e9270fffe1103ec2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 21 Aug 2019 19:31:12 +0000 Subject: [PATCH 052/129] t5516: relax error message pattern This test in t5516 is failing some times due to a race condition in the stderr output. The remote is sending an error line in the middle of the local client writing an error. This causes the exact line match specified by the path to be incorrect. Relax the condition a little for now, and consider a better fix to the race condition later. Signed-off-by: Derrick Stolee --- t/t5516-fetch-push.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index c81ca360ac4ac9..35100cf9c47796 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1253,7 +1253,7 @@ do git cat-file commit $SHA1_2 && test_must_fail env GIT_TEST_PROTOCOL_VERSION= \ git fetch ../testrepo/.git $SHA1_3 2>err && - test_i18ngrep "remote error:.*not our ref.*$SHA1_3\$" err + test_i18ngrep "not our ref.*$SHA1_3" err ) ' done From 694acebed55534a85b9d17e81b95f18365cbbcf1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 27 Aug 2019 15:30:37 -0400 Subject: [PATCH 053/129] upload-pack: fix race condition in error messages Test t5516-fetch-push.sh has a test 'deny fetch unreachable SHA1, allowtipsha1inwant=true' that checks stderr for a specific error string from the remote. In some build environments the error sent over the remote connection gets mingled with the error from the die() statement. Since both signals are being output to the same file descriptor (but from parent and child processes), the output we are matching with grep gets split. To reduce the risk of this failure, follow this process instead: 1. Write an error message to stderr. 2. Write an error message across the connection. 3. exit(1). This reorders the events so the error is written entirely before the client receives a message from the remote, removing the race condition. Signed-off-by: Derrick Stolee --- upload-pack.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index a00d7ece6b9c9a..36b6cf3561c615 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -612,11 +612,12 @@ static void check_non_tip(struct object_array *want_obj, for (i = 0; i < want_obj->nr; i++) { struct object *o = want_obj->objects[i].item; if (!is_our_ref(o)) { + warning("git upload-pack: not our ref %s", + oid_to_hex(&o->oid)); packet_writer_error(writer, "upload-pack: not our ref %s", oid_to_hex(&o->oid)); - die("git upload-pack: not our ref %s", - oid_to_hex(&o->oid)); + exit(1); } } } From 6c11c6a124a0175f4d1b94f0fa077ed1c098339b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 20 Feb 2020 20:07:06 +0000 Subject: [PATCH 054/129] sparse-checkout: allow one-character directories in cone mode In 9e6d3e64 (sparse-checkout: detect short patterns, 2020-01-24), a condition on the minimum length of a cone-mode pattern was introduced. However, this condition was off-by-one. If we have a directory with a single character, say "b", then the command git sparse-checkout set b will correctly add the pattern "/b/" to the sparse-checkout file. When this is interpeted in dir.c, the pattern is "/b" with the PATTERN_FLAG_MUSTBEDIR flag. This string has length two, which satisfies our inclusive inequality (<= 2). The reason for this inequality is that we will start to read the pattern string character-by-character using three char pointers: prev, cur, next. In particular, next is set to the current pattern plus two. The mistake was that next will still be a valid pointer when the pattern length is two, since the string is null-terminated. Make this inequality strict so these patterns work. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 2 +- t/t1091-sparse-checkout-builtin.sh | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 7ac0920b713517..a87900d43a2ea8 100644 --- a/dir.c +++ b/dir.c @@ -682,7 +682,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern return; } - if (given->patternlen <= 2 || + if (given->patternlen < 2 || *given->pattern == '*' || strstr(given->pattern, "**")) { /* Not a cone pattern. */ diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index c35cbdef454858..b4c9c32a037251 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -417,10 +417,20 @@ test_expect_success 'pattern-checks: too short' ' cat >repo/.git/info/sparse-checkout <<-\EOF && /* !/*/ - /a + / EOF check_read_tree_errors repo "a" "disabling cone pattern matching" ' +test_expect_success 'pattern-checks: not too short' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /b/ + EOF + git -C repo read-tree -mu HEAD 2>err && + test_must_be_empty err && + check_files repo a +' test_expect_success 'pattern-checks: trailing "*"' ' cat >repo/.git/info/sparse-checkout <<-\EOF && From 0fab055dc8676935c73030d3981a60b9075b942f Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Thu, 11 Jan 2018 16:25:08 -0500 Subject: [PATCH 055/129] Add virtual file system settings and hook proc On index load, clear/set the skip worktree bits based on the virtual file system data. Use virtual file system data to update skip-worktree bit in unpack-trees. Use virtual file system data to exclude files and folders not explicitly requested. Signed-off-by: Ben Peart --- Documentation/config/core.txt | 8 + Documentation/githooks.txt | 20 ++ Makefile | 1 + cache.h | 1 + config.c | 23 ++- config.h | 1 + dir.c | 33 +++- environment.c | 1 + read-cache.c | 2 + t/t1092-virtualfilesystem.sh | 349 ++++++++++++++++++++++++++++++++++ unpack-trees.c | 26 ++- virtualfilesystem.c | 312 ++++++++++++++++++++++++++++++ virtualfilesystem.h | 25 +++ 13 files changed, 794 insertions(+), 8 deletions(-) create mode 100755 t/t1092-virtualfilesystem.sh create mode 100644 virtualfilesystem.c create mode 100644 virtualfilesystem.h diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 118eb58fd1a68d..8fccebd7d7faca 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -68,6 +68,14 @@ core.fsmonitor:: avoiding unnecessary processing of files that have not changed. See the "fsmonitor-watchman" section of linkgit:githooks[5]. +core.virtualFilesystem:: + If set, the value of this variable is used as a command which + will identify all files and directories that are present in + the working directory. Git will only track and update files + listed in the virtual file system. Using the virtual file system + will supersede the sparse-checkout settings which will be ignored. + See the "virtual file system" section of linkgit:githooks[5]. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 50365f2914e04f..15401ad21bfb73 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -540,6 +540,26 @@ and "0" meaning they were not. Only one parameter should be set to "1" when the hook runs. The hook running passing "1", "1" should not be possible. +virtualFilesystem +~~~~~~~~~~~~~~~~~~ + +"Virtual File System" allows populating the working directory sparsely. +The projection data is typically automatically generated by an external +process. Git will limit what files it checks for changes as well as which +directories are checked for untracked files based on the path names given. +Git will also only update those files listed in the projection. + +The hook is invoked when the configuration option core.virtualFilesystem +is set. It takes one argument, a version (currently 1). + +The hook should output to stdout the list of all files in the working +directory that git should track. The paths are relative to the root +of the working directory and are separated by a single NUL. Full paths +('dir1/a.txt') as well as directories are supported (ie 'dir1/'). + +The exit status determines whether git will use the data from the +hook. On error, git will abort the command with an error message. + GIT --- Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 305254ca0590d9..587baa23645d41 100644 --- a/Makefile +++ b/Makefile @@ -1027,6 +1027,7 @@ LIB_OBJS += utf8.o LIB_OBJS += varint.o LIB_OBJS += version.o LIB_OBJS += versioncmp.o +LIB_OBJS += virtualfilesystem.o LIB_OBJS += walker.o LIB_OBJS += wildmatch.o LIB_OBJS += worktree.o diff --git a/cache.h b/cache.h index b156194669ecb6..f69a412f8596bc 100644 --- a/cache.h +++ b/cache.h @@ -953,6 +953,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern const char *core_virtualfilesystem; extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; diff --git a/config.c b/config.c index 4b5bcd107bfdb2..3332359419f197 100644 --- a/config.c +++ b/config.c @@ -1366,7 +1366,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "core.sparsecheckout")) { - core_apply_sparse_checkout = git_config_bool(var, value); + /* virtual file system relies on the sparse checkout logic so force it on */ + if (core_virtualfilesystem) + core_apply_sparse_checkout = 1; + else + core_apply_sparse_checkout = git_config_bool(var, value); return 0; } @@ -2342,6 +2346,23 @@ int git_config_get_fsmonitor(void) return 0; } +int git_config_get_virtualfilesystem(void) +{ + if (git_config_get_pathname("core.virtualfilesystem", &core_virtualfilesystem)) + core_virtualfilesystem = getenv("GIT_VIRTUALFILESYSTEM_TEST"); + + if (core_virtualfilesystem && !*core_virtualfilesystem) + core_virtualfilesystem = NULL; + + /* virtual file system relies on the sparse checkout logic so force it on */ + if (core_virtualfilesystem) { + core_apply_sparse_checkout = 1; + return 1; + } + + return 0; +} + int git_config_get_index_threads(int *dest) { int is_bool, val; diff --git a/config.h b/config.h index 91fd4c5e96ae79..034ee76bbcf984 100644 --- a/config.h +++ b/config.h @@ -579,6 +579,7 @@ int git_config_get_untracked_cache(void); int git_config_get_split_index(void); int git_config_get_max_percent_split_change(void); int git_config_get_fsmonitor(void); +int git_config_get_virtualfilesystem(void); /* This dies if the configured or default date is in the future */ int git_config_get_expiry(const char *key, const char **output); diff --git a/dir.c b/dir.c index e8952d05148346..e2098739978126 100644 --- a/dir.c +++ b/dir.c @@ -18,6 +18,7 @@ #include "ewah/ewok.h" #include "fsmonitor.h" #include "submodule-config.h" +#include "virtualfilesystem.h" /* * Tells read_directory_recursive how a file or directory should be treated. @@ -1306,6 +1307,17 @@ enum pattern_match_result path_matches_pattern_list( int result = NOT_MATCHED; const char *slash_pos; + /* + * The virtual file system data is used to prevent git from traversing + * any part of the tree that is not in the virtual file system. Return + * 1 to exclude the entry if it is not found in the virtual file system, + * else fall through to the regular excludes logic as it may further exclude. + */ + if (*dtype == DT_UNKNOWN) + *dtype = resolve_dtype(DT_UNKNOWN, istate, pathname, pathlen); + if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0) + return 1; + if (!pl->use_cone_patterns) { pattern = last_matching_pattern_from_list(pathname, pathlen, basename, dtype, pl, istate); @@ -1564,8 +1576,20 @@ struct path_pattern *last_matching_pattern(struct dir_struct *dir, int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *pathname, int *dtype_p) { - struct path_pattern *pattern = - last_matching_pattern(dir, istate, pathname, dtype_p); + struct path_pattern *pattern; + + /* + * The virtual file system data is used to prevent git from traversing + * any part of the tree that is not in the virtual file system. Return + * 1 to exclude the entry if it is not found in the virtual file system, + * else fall through to the regular excludes logic as it may further exclude. + */ + if (*dtype_p == DT_UNKNOWN) + *dtype_p = resolve_dtype(DT_UNKNOWN, istate, pathname, strlen(pathname)); + if (is_excluded_from_virtualfilesystem(pathname, strlen(pathname), *dtype_p) > 0) + return 1; + + pattern = last_matching_pattern(dir, istate, pathname, dtype_p); if (pattern) return pattern->flags & PATTERN_FLAG_NEGATIVE ? 0 : 1; return 0; @@ -1924,6 +1948,9 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, if (dtype != DT_DIR && has_path_in_index) return path_none; + if (is_excluded_from_virtualfilesystem(path->buf, path->len, dtype) > 0) + return path_excluded; + /* * When we are looking at a directory P in the working tree, * there are three cases: @@ -2183,6 +2210,8 @@ static void add_path_to_appropriate_result_list(struct dir_struct *dir, /* add the path to the appropriate result list */ switch (state) { case path_excluded: + if (is_excluded_from_virtualfilesystem(path->buf, path->len, DT_DIR) > 0) + break; if (dir->flags & DIR_SHOW_IGNORED) dir_add_name(dir, istate, path->buf, path->len); else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || diff --git a/environment.c b/environment.c index 4afd7190c4721c..3fa6678758ee10 100644 --- a/environment.c +++ b/environment.c @@ -69,6 +69,7 @@ int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_sparse_checkout_cone; int core_gvfs; +const char *core_virtualfilesystem; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/read-cache.c b/read-cache.c index 381802b6dcbd64..fd3853217dc1c5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -25,6 +25,7 @@ #include "fsmonitor.h" #include "thread-utils.h" #include "progress.h" +#include "virtualfilesystem.h" #include "gvfs.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -1920,6 +1921,7 @@ static void post_read_index_from(struct index_state *istate) tweak_untracked_cache(istate); tweak_split_index(istate); tweak_fsmonitor(istate); + apply_virtualfilesystem(istate); } static size_t estimate_cache_size_from_compressed(unsigned int entries) diff --git a/t/t1092-virtualfilesystem.sh b/t/t1092-virtualfilesystem.sh new file mode 100755 index 00000000000000..940ed47940521f --- /dev/null +++ b/t/t1092-virtualfilesystem.sh @@ -0,0 +1,349 @@ +#!/bin/sh + +test_description='virtual file system tests' + +. ./test-lib.sh + +clean_repo () { + rm .git/index && + git -c core.virtualfilesystem= reset --hard HEAD && + git -c core.virtualfilesystem= clean -fd && + touch untracked.txt && + touch dir1/untracked.txt && + touch dir2/untracked.txt +} + +test_expect_success 'setup' ' + mkdir -p .git/hooks/ && + cat > .gitignore <<-\EOF && + .gitignore + expect* + actual* + EOF + mkdir -p dir1 && + touch dir1/file1.txt && + touch dir1/file2.txt && + mkdir -p dir2 && + touch dir2/file1.txt && + touch dir2/file2.txt && + git add . && + git commit -m "initial" && + git config --local core.virtualfilesystem .git/hooks/virtualfilesystem +' + +test_expect_success 'test hook parameters and version' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + if test "$#" -ne 1 + then + echo "$0: Exactly 1 argument expected" >&2 + exit 2 + fi + + if test "$1" != 1 + then + echo "$0: Unsupported hook version." >&2 + exit 1 + fi + EOF + git status && + write_script .git/hooks/virtualfilesystem <<-\EOF && + exit 3 + EOF + test_must_fail git status +' + +test_expect_success 'verify status is clean' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir2/file1.txt\0" + EOF + rm -f .git/index && + git checkout -f && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir2/file1.txt\0" + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + EOF + git status > actual && + cat > expected <<-\EOF && + On branch master + nothing to commit, working tree clean + EOF + test_cmp expected actual +' + +test_expect_success 'verify skip-worktree bit is set for absolute path' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + S dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify skip-worktree bit is cleared for absolute path' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file2.txt\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + S dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify folder wild cards' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify folders not included are ignored' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify including one file doesnt include the rest' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + printf "dir1/dir2/a\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/dir2/a + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify files not listed are ignored by git clean -f -x' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "untracked.txt\0" + printf "dir1/\0" + EOF + mkdir -p dir3 && + touch dir3/untracked.txt && + git clean -f -x && + test ! -f untracked.txt && + test -d dir1 && + test -f dir1/file1.txt && + test -f dir1/file2.txt && + test ! -f dir1/untracked.txt && + test -f dir2/file1.txt && + test -f dir2/file2.txt && + test -f dir2/untracked.txt && + test -d dir3 && + test -f dir3/untracked.txt +' + +test_expect_success 'verify files not listed are ignored by git clean -f -d -x' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "untracked.txt\0" + printf "dir1/\0" + printf "dir3/\0" + EOF + mkdir -p dir3 && + touch dir3/untracked.txt && + git clean -f -d -x && + test ! -f untracked.txt && + test -d dir1 && + test -f dir1/file1.txt && + test -f dir1/file2.txt && + test ! -f dir1/untracked.txt && + test -f dir2/file1.txt && + test -f dir2/file2.txt && + test -f dir2/untracked.txt && + test ! -d dir3 && + test ! -f dir3/untracked.txt +' + +test_expect_success 'verify folder entries include all files' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + ?? dir1/b + ?? dir1/untracked.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify case insensitivity of virtual file system entries' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/a\0" + printf "Dir1/Dir2/a\0" + printf "DIR2/\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git -c core.ignorecase=false status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + EOF + test_cmp expected actual && + git -c core.ignorecase=true status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + ?? dir1/dir2/a + ?? dir2/untracked.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file created' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file3.txt\0" + EOF + touch dir1/file3.txt && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + S dir1/file1.txt + S dir1/file2.txt + H dir1/file3.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file renamed' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file3.txt\0" + EOF + mv dir1/file1.txt dir1/file3.txt && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + ?? dir1/file3.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file deleted' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + rm dir1/file1.txt && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file overwritten' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + echo "overwritten" > dir1/file1.txt && + git status -su > actual && + cat > expected <<-\EOF && + M dir1/file1.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on folder created' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/dir1/\0" + EOF + mkdir -p dir1/dir1 && + git status -su > actual && + cat > expected <<-\EOF && + EOF + test_cmp expected actual && + git clean -fd && + test ! -d "/dir1/dir1" +' + +test_expect_success 'on folder renamed' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir3/\0" + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + printf "dir3/file1.txt\0" + printf "dir3/file2.txt\0" + EOF + mv dir1 dir3 && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + D dir1/file2.txt + ?? dir3/file1.txt + ?? dir3/file2.txt + ?? dir3/untracked.txt + EOF + test_cmp expected actual +' + +test_done diff --git a/unpack-trees.c b/unpack-trees.c index 945de23170b526..f5fcea5f998d84 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -17,6 +17,7 @@ #include "object-store.h" #include "promisor-remote.h" #include "gvfs.h" +#include "virtualfilesystem.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -1373,6 +1374,14 @@ static int clear_ce_flags_1(struct index_state *istate, continue; } + /* if it's not in the virtual file system, exit early */ + if (core_virtualfilesystem) { + if (is_included_in_virtualfilesystem(ce->name, ce->ce_namelen) > 0) + ce->ce_flags &= ~clear_mask; + cache++; + continue; + } + if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len)) break; @@ -1521,11 +1530,18 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (!o->skip_sparse_checkout && !o->pl) { char *sparse = git_pathdup("info/sparse-checkout"); pl.use_cone_patterns = core_sparse_checkout_cone; - if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) - o->skip_sparse_checkout = 1; - else + if (core_virtualfilesystem) o->pl = &pl; - free(sparse); + else if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) + o->skip_sparse_checkout = 1; + else { + char *sparse = git_pathdup("info/sparse-checkout"); + if (add_patterns_from_file_to_list(sparse, "", 0, &pl, NULL) < 0) + o->skip_sparse_checkout = 1; + else + o->pl = &pl; + free(sparse); + } } memset(&o->result, 0, sizeof(o->result)); @@ -1621,7 +1637,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options /* * Sparse checkout loop #2: set NEW_SKIP_WORKTREE on entries not in loop #1 - * If the will have NEW_SKIP_WORKTREE, also set CE_SKIP_WORKTREE + * If they will have NEW_SKIP_WORKTREE, also set CE_SKIP_WORKTREE * so apply_sparse_checkout() won't attempt to remove it from worktree */ mark_new_skip_worktree(o->pl, &o->result, diff --git a/virtualfilesystem.c b/virtualfilesystem.c new file mode 100644 index 00000000000000..9aa2ca239015dd --- /dev/null +++ b/virtualfilesystem.c @@ -0,0 +1,312 @@ +#include "cache.h" +#include "config.h" +#include "dir.h" +#include "hashmap.h" +#include "run-command.h" +#include "virtualfilesystem.h" + +#define HOOK_INTERFACE_VERSION (1) + +static struct strbuf virtual_filesystem_data = STRBUF_INIT; +static struct hashmap virtual_filesystem_hashmap; +static struct hashmap parent_directory_hashmap; + +struct virtualfilesystem { + struct hashmap_entry ent; /* must be the first member! */ + const char *pattern; + int patternlen; +}; + +static unsigned int(*vfshash)(const void *buf, size_t len); +static int(*vfscmp)(const char *a, const char *b, size_t len); + +static int vfs_hashmap_cmp(const void *unused_cmp_data, + const struct hashmap_entry *he1, + const struct hashmap_entry *he2, + const void *key) +{ + const struct virtualfilesystem *vfs1 = + container_of(he1, const struct virtualfilesystem, ent); + const struct virtualfilesystem *vfs2 = + container_of(he2, const struct virtualfilesystem, ent); + + return vfscmp(vfs1->pattern, vfs2->pattern, vfs1->patternlen); +} + +static void get_virtual_filesystem_data(struct strbuf *vfs_data) +{ + struct child_process cp = CHILD_PROCESS_INIT; + char ver[64]; + const char *argv[3]; + int err; + + strbuf_init(vfs_data, 0); + + snprintf(ver, sizeof(ver), "%d", HOOK_INTERFACE_VERSION); + argv[0] = core_virtualfilesystem; + argv[1] = ver; + argv[2] = NULL; + cp.argv = argv; + cp.use_shell = 1; + + err = capture_command(&cp, vfs_data, 1024); + if (err) + die("unable to load virtual file system"); +} + +static int check_includes_hashmap(struct hashmap *map, const char *pattern, int patternlen) +{ + struct strbuf sb = STRBUF_INIT; + struct virtualfilesystem vfs; + char *slash; + + /* Check straight mapping */ + strbuf_reset(&sb); + strbuf_add(&sb, pattern, patternlen); + vfs.pattern = sb.buf; + vfs.patternlen = sb.len; + hashmap_entry_init(&vfs.ent, vfshash(vfs.pattern, vfs.patternlen)); + if (hashmap_get_entry(map, &vfs, ent, NULL)) { + strbuf_release(&sb); + return 1; + } + + /* + * Check to see if it matches a directory or any path + * underneath it. In other words, 'a/b/foo.txt' will match + * '/', 'a/', and 'a/b/'. + */ + slash = strchr(sb.buf, '/'); + while (slash) { + vfs.pattern = sb.buf; + vfs.patternlen = slash - sb.buf + 1; + hashmap_entry_init(&vfs.ent, vfshash(vfs.pattern, vfs.patternlen)); + if (hashmap_get_entry(map, &vfs, ent, NULL)) { + strbuf_release(&sb); + return 1; + } + slash = strchr(slash + 1, '/'); + } + + strbuf_release(&sb); + return 0; +} + +static void includes_hashmap_add(struct hashmap *map, const char *pattern, const int patternlen) +{ + struct virtualfilesystem *vfs; + + vfs = xmalloc(sizeof(struct virtualfilesystem)); + vfs->pattern = pattern; + vfs->patternlen = patternlen; + hashmap_entry_init(&vfs->ent, vfshash(vfs->pattern, vfs->patternlen)); + hashmap_add(map, &vfs->ent); +} + +static void initialize_includes_hashmap(struct hashmap *map, struct strbuf *vfs_data) +{ + char *buf, *entry; + size_t len; + int i; + + /* + * Build a hashmap of the virtual file system data we can use to look + * for cache entry matches quickly + */ + vfshash = ignore_case ? memihash : memhash; + vfscmp = ignore_case ? strncasecmp : strncmp; + hashmap_init(map, vfs_hashmap_cmp, NULL, 0); + + entry = buf = vfs_data->buf; + len = vfs_data->len; + for (i = 0; i < len; i++) { + if (buf[i] == '\0') { + includes_hashmap_add(map, entry, buf + i - entry); + entry = buf + i + 1; + } + } +} + +/* + * Return 1 if the requested item is found in the virtual file system, + * 0 for not found and -1 for undecided. + */ +int is_included_in_virtualfilesystem(const char *pathname, int pathlen) +{ + if (!core_virtualfilesystem) + return -1; + + if (!virtual_filesystem_hashmap.tablesize && virtual_filesystem_data.len) + initialize_includes_hashmap(&virtual_filesystem_hashmap, &virtual_filesystem_data); + if (!virtual_filesystem_hashmap.tablesize) + return -1; + + return check_includes_hashmap(&virtual_filesystem_hashmap, pathname, pathlen); +} + +static void parent_directory_hashmap_add(struct hashmap *map, const char *pattern, const int patternlen) +{ + char *slash; + struct virtualfilesystem *vfs; + + /* + * Add any directories leading up to the file as the excludes logic + * needs to match directories leading up to the files as well. Detect + * and prevent unnecessary duplicate entries which will be common. + */ + if (patternlen > 1) { + slash = strchr(pattern + 1, '/'); + while (slash) { + vfs = xmalloc(sizeof(struct virtualfilesystem)); + vfs->pattern = pattern; + vfs->patternlen = slash - pattern + 1; + hashmap_entry_init(&vfs->ent, vfshash(vfs->pattern, vfs->patternlen)); + if (hashmap_get_entry(map, vfs, ent, NULL)) + free(vfs); + else + hashmap_add(map, &vfs->ent); + slash = strchr(slash + 1, '/'); + } + } +} + +static void initialize_parent_directory_hashmap(struct hashmap *map, struct strbuf *vfs_data) +{ + char *buf, *entry; + size_t len; + int i; + + /* + * Build a hashmap of the parent directories contained in the virtual + * file system data we can use to look for matches quickly + */ + vfshash = ignore_case ? memihash : memhash; + vfscmp = ignore_case ? strncasecmp : strncmp; + hashmap_init(map, vfs_hashmap_cmp, NULL, 0); + + entry = buf = vfs_data->buf; + len = vfs_data->len; + for (i = 0; i < len; i++) { + if (buf[i] == '\0') { + parent_directory_hashmap_add(map, entry, buf + i - entry); + entry = buf + i + 1; + } + } +} + +static int check_directory_hashmap(struct hashmap *map, const char *pathname, int pathlen) +{ + struct strbuf sb = STRBUF_INIT; + struct virtualfilesystem vfs; + + /* Check for directory */ + strbuf_reset(&sb); + strbuf_add(&sb, pathname, pathlen); + strbuf_addch(&sb, '/'); + vfs.pattern = sb.buf; + vfs.patternlen = sb.len; + hashmap_entry_init(&vfs.ent, vfshash(vfs.pattern, vfs.patternlen)); + if (hashmap_get_entry(map, &vfs, ent, NULL)) { + strbuf_release(&sb); + return 0; + } + + strbuf_release(&sb); + return 1; +} + +/* + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dtype) +{ + if (!core_virtualfilesystem) + return -1; + + if (dtype != DT_REG && dtype != DT_DIR && dtype != DT_LNK) + die(_("is_excluded_from_virtualfilesystem passed unhandled dtype")); + + if (dtype == DT_REG) { + int ret = is_included_in_virtualfilesystem(pathname, pathlen); + if (ret > 0) + return 0; + if (ret == 0) + return 1; + return ret; + } + + if (dtype == DT_DIR || dtype == DT_LNK) { + if (!parent_directory_hashmap.tablesize && virtual_filesystem_data.len) + initialize_parent_directory_hashmap(&parent_directory_hashmap, &virtual_filesystem_data); + if (!parent_directory_hashmap.tablesize) + return -1; + + return check_directory_hashmap(&parent_directory_hashmap, pathname, pathlen); + } + + return -1; +} + +/* + * Update the CE_SKIP_WORKTREE bits based on the virtual file system. + */ +void apply_virtualfilesystem(struct index_state *istate) +{ + char *buf, *entry; + int i; + + if (!git_config_get_virtualfilesystem()) + return; + + if (!virtual_filesystem_data.len) + get_virtual_filesystem_data(&virtual_filesystem_data); + + /* set CE_SKIP_WORKTREE bit on all entries */ + for (i = 0; i < istate->cache_nr; i++) + istate->cache[i]->ce_flags |= CE_SKIP_WORKTREE; + + /* clear CE_SKIP_WORKTREE bit for everything in the virtual file system */ + entry = buf = virtual_filesystem_data.buf; + for (i = 0; i < virtual_filesystem_data.len; i++) { + if (buf[i] == '\0') { + int pos, len; + + len = buf + i - entry; + + /* look for a directory wild card (ie "dir1/") */ + if (buf[i - 1] == '/') { + if (ignore_case) + adjust_dirname_case(istate, entry); + pos = index_name_pos(istate, entry, len - 1); + if (pos < 0) { + pos = -pos - 1; + while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) { + istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE; + pos++; + } + } + } else { + if (ignore_case) { + struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case); + if (ce) + ce->ce_flags &= ~CE_SKIP_WORKTREE; + } else { + int pos = index_name_pos(istate, entry, len); + if (pos >= 0) + istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE; + } + } + + entry += len + 1; + } + } +} + +/* + * Free the virtual file system data structures. + */ +void free_virtualfilesystem(void) { + hashmap_free_entries(&virtual_filesystem_hashmap, struct virtualfilesystem, ent); + hashmap_free_entries(&parent_directory_hashmap, struct virtualfilesystem, ent); + strbuf_release(&virtual_filesystem_data); +} diff --git a/virtualfilesystem.h b/virtualfilesystem.h new file mode 100644 index 00000000000000..5e8c5b096df09a --- /dev/null +++ b/virtualfilesystem.h @@ -0,0 +1,25 @@ +#ifndef VIRTUALFILESYSTEM_H +#define VIRTUALFILESYSTEM_H + +/* + * Update the CE_SKIP_WORKTREE bits based on the virtual file system. + */ +void apply_virtualfilesystem(struct index_state *istate); + +/* + * Return 1 if the requested item is found in the virtual file system, + * 0 for not found and -1 for undecided. + */ +int is_included_in_virtualfilesystem(const char *pathname, int pathlen); + +/* + * Return 1 for exclude, 0 for include and -1 for undecided. + */ +int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dtype); + +/* + * Free the virtual file system data structures. + */ +void free_virtualfilesystem(void); + +#endif From 162ac55ecf2abe8506de65f80b05f84c74addec5 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Thu, 7 Jun 2018 13:49:01 -0400 Subject: [PATCH 056/129] Update the virtualfilesystem support We now specify that it needs to be run from the root of the git work tree. This enables the hook to be found even if the current working directory is not the root of the repo (like when running 'git diff' with Beyond Compare configured as the diff tool. Also simpify how argv[] parameter is created. Signed-off-by: Ben Peart --- virtualfilesystem.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/virtualfilesystem.c b/virtualfilesystem.c index 9aa2ca239015dd..de160cd5ee865f 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -36,18 +36,14 @@ static int vfs_hashmap_cmp(const void *unused_cmp_data, static void get_virtual_filesystem_data(struct strbuf *vfs_data) { struct child_process cp = CHILD_PROCESS_INIT; - char ver[64]; - const char *argv[3]; int err; strbuf_init(vfs_data, 0); - snprintf(ver, sizeof(ver), "%d", HOOK_INTERFACE_VERSION); - argv[0] = core_virtualfilesystem; - argv[1] = ver; - argv[2] = NULL; - cp.argv = argv; + argv_array_push(&cp.args, core_virtualfilesystem); + argv_array_pushf(&cp.args, "%d", HOOK_INTERFACE_VERSION); cp.use_shell = 1; + cp.dir = get_git_work_tree(); err = capture_command(&cp, vfs_data, 1024); if (err) From 85aef2e9da8027cfc8b60818c73d9516a6f6f638 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Wed, 1 Aug 2018 13:26:22 -0400 Subject: [PATCH 057/129] virtualfilesystem: don't run the virtual file system hook if the index has been redirected Fixes #13 Some git commands spawn helpers and redirect the index to a different location. These include "difftool -d" and the sequencer (i.e. `git rebase -i`, `git cherry-pick` and `git revert`) and others. In those instances we don't want to update their temporary index with our virtualization data. Helped-by: Johannes Schindelin Signed-off-by: Ben Peart --- config.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/config.c b/config.c index 3332359419f197..801184d0a36856 100644 --- a/config.c +++ b/config.c @@ -2354,10 +2354,24 @@ int git_config_get_virtualfilesystem(void) if (core_virtualfilesystem && !*core_virtualfilesystem) core_virtualfilesystem = NULL; - /* virtual file system relies on the sparse checkout logic so force it on */ if (core_virtualfilesystem) { - core_apply_sparse_checkout = 1; - return 1; + /* + * Some git commands spawn helpers and redirect the index to a different + * location. These include "difftool -d" and the sequencer + * (i.e. `git rebase -i`, `git cherry-pick` and `git revert`) and others. + * In those instances we don't want to update their temporary index with + * our virtualization data. + */ + char *default_index_file = xstrfmt("%s/%s", the_repository->gitdir, "index"); + int should_run_hook = !strcmp(default_index_file, the_repository->index_file); + + free(default_index_file); + if (should_run_hook) { + /* virtual file system relies on the sparse checkout logic so force it on */ + core_apply_sparse_checkout = 1; + return 1; + } + core_virtualfilesystem = NULL; } return 0; From 42230ccc524a742d109033c1f46adfa872f4c558 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 25 Sep 2018 16:28:16 -0400 Subject: [PATCH 058/129] virtualfilesystem: fix bug with symlinks being ignored The virtual file system code incorrectly treated symlinks as directories instead of regular files. This meant symlinks were not included even if they are listed in the list of files returned by the core.virtualFilesystem hook proc. Fixes #25 Signed-off-by: Ben Peart --- virtualfilesystem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virtualfilesystem.c b/virtualfilesystem.c index de160cd5ee865f..e2102ca4f803ef 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -222,7 +222,7 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt if (dtype != DT_REG && dtype != DT_DIR && dtype != DT_LNK) die(_("is_excluded_from_virtualfilesystem passed unhandled dtype")); - if (dtype == DT_REG) { + if (dtype == DT_REG || dtype == DT_LNK) { int ret = is_included_in_virtualfilesystem(pathname, pathlen); if (ret > 0) return 0; @@ -231,7 +231,7 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt return ret; } - if (dtype == DT_DIR || dtype == DT_LNK) { + if (dtype == DT_DIR) { if (!parent_directory_hashmap.tablesize && virtual_filesystem_data.len) initialize_parent_directory_hashmap(&parent_directory_hashmap, &virtual_filesystem_data); if (!parent_directory_hashmap.tablesize) From f41adf20a624af4923c58b4ebf4febca773481f4 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 9 Oct 2018 10:19:14 -0600 Subject: [PATCH 059/129] virtualfilesystem: check if directory is included Add check to see if a directory is included in the virtualfilesystem before checking the directory hashmap. This allows a directory entry like foo/ to find all untracked files in subdirectories. --- t/t1092-virtualfilesystem.sh | 2 ++ virtualfilesystem.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/t/t1092-virtualfilesystem.sh b/t/t1092-virtualfilesystem.sh index 940ed47940521f..bc5449ae4745b8 100755 --- a/t/t1092-virtualfilesystem.sh +++ b/t/t1092-virtualfilesystem.sh @@ -221,6 +221,8 @@ test_expect_success 'verify folder entries include all files' ' cat > expected <<-\EOF && ?? dir1/a ?? dir1/b + ?? dir1/dir2/a + ?? dir1/dir2/b ?? dir1/untracked.txt EOF test_cmp expected actual diff --git a/virtualfilesystem.c b/virtualfilesystem.c index e2102ca4f803ef..c50ee2d29bfdaf 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -232,6 +232,10 @@ int is_excluded_from_virtualfilesystem(const char *pathname, int pathlen, int dt } if (dtype == DT_DIR) { + int ret = is_included_in_virtualfilesystem(pathname, pathlen); + if (ret > 0) + return 0; + if (!parent_directory_hashmap.tablesize && virtual_filesystem_data.len) initialize_parent_directory_hashmap(&parent_directory_hashmap, &virtual_filesystem_data); if (!parent_directory_hashmap.tablesize) From 4a54e81089ba5217766cf6b9fcb99bad9a743033 Mon Sep 17 00:00:00 2001 From: Jameson Miller Date: Tue, 20 Nov 2018 11:53:53 -0500 Subject: [PATCH 060/129] vfs: fix case where directories not handled correctly The vfs does not correctly handle the case when there is a file that begins with the same prefix as a directory. For example, the following setup would encounter this issue: A directory contains a file named `dir1.sln` and a directory named `dir1/`. The directory `dir1` contains other files. The directory `dir1` is in the virtual file system list The contents of `dir1` should be in the virtual file system, but it is not. The contents of this directory do not have the skip worktree bit cleared as expected. The problem is in the `apply_virtualfilesystem(...)` function where it does not include the trailing slash of the directory name when looking up the position in the index to start clearing the skip worktree bit. This fix is it include the trailing slash when finding the first index entry from `index_name_pos(...)`. --- t/t1092-virtualfilesystem.sh | 19 +++++++++++++++++++ virtualfilesystem.c | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/t/t1092-virtualfilesystem.sh b/t/t1092-virtualfilesystem.sh index bc5449ae4745b8..2379bca25a6426 100755 --- a/t/t1092-virtualfilesystem.sh +++ b/t/t1092-virtualfilesystem.sh @@ -348,4 +348,23 @@ test_expect_success 'on folder renamed' ' test_cmp expected actual ' +test_expect_success 'folder with same prefix as file' ' + clean_repo && + touch dir1.sln && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + printf "dir1.sln\0" + EOF + git add dir1.sln && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1.sln + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + test_done diff --git a/virtualfilesystem.c b/virtualfilesystem.c index c50ee2d29bfdaf..c93357e1508a51 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -277,7 +277,7 @@ void apply_virtualfilesystem(struct index_state *istate) if (buf[i - 1] == '/') { if (ignore_case) adjust_dirname_case(istate, entry); - pos = index_name_pos(istate, entry, len - 1); + pos = index_name_pos(istate, entry, len); if (pos < 0) { pos = -pos - 1; while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) { From 92e1e1338885cbf0473906350494f602821f7f99 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 25 Jul 2018 12:03:22 -0400 Subject: [PATCH 061/129] status: fix rename reporting when using serialization cache Fix "git status --deserialize" to correctly report both pathnames for renames. Add a test case to verify. A change was made upstream that added an additional "rename_status" field to the "struct wt_status_change_data" structure. It is used during the various print routines to decide if 2 pathnames need to be printed. 5134ccde642ae9ed6a244c92864c26734d100f4c wt-status.c: rename rename-related fields in wt_status_change_data The fix here is to add that field to the status cache data. Signed-off-by: Jeff Hostetler --- t/t7524-serialized-status.sh | 12 ++++++++++++ wt-status-deserialize.c | 4 +++- wt-status-serialize.c | 4 +++- wt-status.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh index 6ac29ba3e85b45..872e9629417347 100755 --- a/t/t7524-serialized-status.sh +++ b/t/t7524-serialized-status.sh @@ -268,4 +268,16 @@ EOF ' +test_expect_success 'renames' ' + git init rename_test && + echo OLDNAME >rename_test/OLDNAME && + git -C rename_test add OLDNAME && + git -C rename_test commit -m OLDNAME && + git -C rename_test mv OLDNAME NEWNAME && + git -C rename_test status --serialize=renamed.dat >output.1 && + echo DIRT >rename_test/DIRT && + git -C rename_test status --deserialize=renamed.dat >output.2 && + test_i18ncmp output.1 output.2 +' + test_done diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 924df8df736fce..1da78249265d23 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -200,6 +200,7 @@ static int wt_deserialize_v1_changed_items(const struct wt_status *cmd_s, d->worktree_status = ntohl(sd->fixed.worktree_status); d->index_status = ntohl(sd->fixed.index_status); d->stagemask = ntohl(sd->fixed.stagemask); + d->rename_status = ntohl(sd->fixed.rename_status); d->rename_score = ntohl(sd->fixed.rename_score); d->mode_head = ntohl(sd->fixed.mode_head); d->mode_index = ntohl(sd->fixed.mode_index); @@ -218,10 +219,11 @@ static int wt_deserialize_v1_changed_items(const struct wt_status *cmd_s, trace_printf_key( &trace_deserialize, - "change: %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", + "change: %d %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", d->worktree_status, d->index_status, d->stagemask, + d->rename_status, d->rename_score, d->mode_head, d->mode_index, diff --git a/wt-status-serialize.c b/wt-status-serialize.c index 4acb6f3defa055..60364855e189aa 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -78,10 +78,11 @@ static inline void wt_serialize_v1_changed(struct wt_status *s, int fd, int len_path, len_rename_source; trace_printf_key(&trace_serialize, - "change: %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", + "change: %d %d %d %d %d %o %o %o %d %d %s %s '%s' '%s'", d->worktree_status, d->index_status, d->stagemask, + d->rename_status, d->rename_score, d->mode_head, d->mode_index, @@ -96,6 +97,7 @@ static inline void wt_serialize_v1_changed(struct wt_status *s, int fd, sd.fixed.worktree_status = htonl(d->worktree_status); sd.fixed.index_status = htonl(d->index_status); sd.fixed.stagemask = htonl(d->stagemask); + sd.fixed.rename_status = htonl(d->rename_status); sd.fixed.rename_score = htonl(d->rename_score); sd.fixed.mode_head = htonl(d->mode_head); sd.fixed.mode_index = htonl(d->mode_index); diff --git a/wt-status.h b/wt-status.h index e6b37492853928..30a86f28dfe55d 100644 --- a/wt-status.h +++ b/wt-status.h @@ -172,6 +172,7 @@ struct wt_status_serialize_data_fixed uint32_t worktree_status; uint32_t index_status; uint32_t stagemask; + uint32_t rename_status; uint32_t rename_score; uint32_t mode_head; uint32_t mode_index; From c5bf92afc41596edbe53c506b4840a028a1eec94 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 13 Jun 2018 14:10:26 -0400 Subject: [PATCH 062/129] commit: add generation to pop_most_recent_commit() The method pop_most_recent_commit() is confusingly named, in that it pops the most-recent commit, but also adds that commit's parents to the list. This is used by a few commit walks, especially the one in ref_newer(). 'git push' uses ref_newer() to check if a force-push is necessary, and in the case of a force-push being needed, the current algorithm walks every reachable commit. This is especially severe in the case of an amended commit: they have the same parent, but we still walk to the very end of the graph! Add a 'min_generation' parameter to pop_most_recent_commit() to limit the commits that are walked to those with generation number at least 'min_generation'. This greatly reduces the number of commits walked by a force-push. There may be more work to improve this algorithm in the future, but for now this is enough for most cases. This direction has the benefit that it does not affect the non-force-push case at all. Future directions should consider improving that case as well. Signed-off-by: Derrick Stolee --- commit.c | 7 +++++-- commit.h | 6 +++++- fetch-pack.c | 3 ++- sha1-name.c | 3 ++- walker.c | 3 ++- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/commit.c b/commit.c index 434ec030d6b2a0..d4b991845f3b9d 100644 --- a/commit.c +++ b/commit.c @@ -620,7 +620,8 @@ void commit_list_sort_by_date(struct commit_list **list) } struct commit *pop_most_recent_commit(struct commit_list **list, - unsigned int mark) + unsigned int mark, + uint32_t min_generation) { struct commit *ret = pop_commit(list); struct commit_list *parents = ret->parents; @@ -629,7 +630,9 @@ struct commit *pop_most_recent_commit(struct commit_list **list, struct commit *commit = parents->item; if (!parse_commit(commit) && !(commit->object.flags & mark)) { commit->object.flags |= mark; - commit_list_insert_by_date(commit, list); + + if (commit->generation >= min_generation) + commit_list_insert_by_date(commit, list); } parents = parents->next; } diff --git a/commit.h b/commit.h index 221cdaa34b826b..11e42b7367da18 100644 --- a/commit.h +++ b/commit.h @@ -200,9 +200,13 @@ const char *skip_blank_lines(const char *msg); /** Removes the first commit from a list sorted by date, and adds all * of its parents. + * + * The parents are not added if their generation number is strictly + * lower than min_generation. **/ struct commit *pop_most_recent_commit(struct commit_list **list, - unsigned int mark); + unsigned int mark, + uint32_t min_generation); struct commit *pop_commit(struct commit_list **stack); diff --git a/fetch-pack.c b/fetch-pack.c index ede4fbd50c2c02..b8fd60b8558ede 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -532,7 +532,8 @@ static void mark_recent_complete_commits(struct fetch_pack_args *args, while (complete && cutoff <= complete->item->date) { print_verbose(args, _("Marking %s as complete"), oid_to_hex(&complete->item->object.oid)); - pop_most_recent_commit(&complete, COMPLETE); + pop_most_recent_commit(&complete, COMPLETE, + GENERATION_NUMBER_ZERO); } } diff --git a/sha1-name.c b/sha1-name.c index 200eb373ad80fb..5027dd5ba1764c 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -1271,7 +1271,8 @@ static int get_oid_oneline(struct repository *r, struct commit *commit; int matches; - commit = pop_most_recent_commit(&list, ONELINE_SEEN); + commit = pop_most_recent_commit(&list, ONELINE_SEEN, + GENERATION_NUMBER_ZERO); if (!parse_object(r, &commit->object.oid)) continue; buf = get_commit_buffer(commit, NULL); diff --git a/walker.c b/walker.c index 06cd2bd5691a5d..9e1c5db0d61ea4 100644 --- a/walker.c +++ b/walker.c @@ -82,7 +82,8 @@ static int process_commit(struct walker *walker, struct commit *commit) return -1; while (complete && complete->item->date >= commit->date) { - pop_most_recent_commit(&complete, COMPLETE); + pop_most_recent_commit(&complete, COMPLETE, + GENERATION_NUMBER_ZERO); } if (commit->object.flags & COMPLETE) From 3c674868e98680d1e3dfbc3f392504af76543ab0 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 25 Jul 2018 12:28:42 -0400 Subject: [PATCH 063/129] status: add comments for ahead_behind_flags in serialization The "ahead_behind_flags" field of "struct wt_status" does not need to be stored in the serialization cache file, since it is a display property. Update the code comments in both serialize and deserialize to reflect that. Signed-off-by: Jeff Hostetler --- wt-status-deserialize.c | 1 + wt-status-serialize.c | 1 + 2 files changed, 2 insertions(+) diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 1da78249265d23..ee00c1b3a366f8 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -536,6 +536,7 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de /* show_branch */ /* show_stash */ /* hints */ + /* ahead_behind_flags */ if (cmd_s->detect_rename != des_s->detect_rename) { trace_printf_key(&trace_deserialize, "reject: detect_rename"); return DESERIALIZE_ERR; diff --git a/wt-status-serialize.c b/wt-status-serialize.c index 60364855e189aa..6e0d85c60b3e74 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -48,6 +48,7 @@ static void wt_serialize_v1_header(struct wt_status *s, int fd) /* show_branch */ /* show_stash */ packet_write_fmt(fd, "hints %d\n", s->hints); + /* ahead_behind_flags */ packet_write_fmt(fd, "detect_rename %d\n", s->detect_rename); packet_write_fmt(fd, "rename_score %d\n", s->rename_score); packet_write_fmt(fd, "rename_limit %d\n", s->rename_limit); From 81921ed70a1ee0f902ceda159386722ef841e39e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 20 Jul 2018 12:08:50 -0400 Subject: [PATCH 064/129] serialize-status: serialize global and repo-local exclude file metadata Changes to the global or repo-local excludes files can change the results returned by "git status" for untracked files. Therefore, it is important that the exclude-file values used during serialization are still current at the time of deserialization. Teach "git status --serialize" to report metadata on the user's global exclude file (which defaults to "$XDG_HOME/git/ignore") and for the repo-local excludes file (which is in ".git/info/excludes"). Serialize will record the pathnames and mtimes for these files in the serialization header (next to the mtime data for the .git/index file). Teach "git status --deserialize" to validate this new metadata. If either exclude file has changed since the serialization-cache-file was written, then deserialize will reject the cache file and force a full/normal status run. Signed-off-by: Jeff Hostetler --- wt-status-deserialize.c | 84 ++++++++++++++++++++++++++++ wt-status-serialize.c | 118 ++++++++++++++++++++++++++++++++++++++++ wt-status.h | 8 +++ 3 files changed, 210 insertions(+) diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index ee00c1b3a366f8..5c47bf756b20da 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -65,12 +65,69 @@ static int my_validate_index(const struct cache_time *mtime_reported) return DESERIALIZE_OK; } +/* + * Use the given key and exclude pathname to compute a serialization header + * reflecting the current contents on disk. See if that matches the value + * computed for this key when the cache was written. Reject the cache if + * anything has changed. + */ +static int my_validate_excludes(const char *path, const char *key, const char *line) +{ + struct strbuf sb = STRBUF_INIT; + int r; + + wt_serialize_compute_exclude_header(&sb, key, path); + + r = (strcmp(line, sb.buf) ? DESERIALIZE_ERR : DESERIALIZE_OK); + + if (r == DESERIALIZE_ERR) + trace_printf_key(&trace_deserialize, + "%s changed [cached '%s'][observed '%s']", + key, line, sb.buf); + + strbuf_release(&sb); + return r; +} + +static int my_parse_core_excludes(const char *line) +{ + /* + * In dir.c:setup_standard_excludes() they use either the value of + * the "core.excludefile" variable (stored in the global "excludes_file" + * variable) -or- the default value "$XDG_HOME/git/ignore". This is done + * during wt_status_collect_untracked() which we are hoping to not call. + * + * Fake the setup here. + */ + + if (excludes_file) { + return my_validate_excludes(excludes_file, "core_excludes", line); + } else { + char *path = xdg_config_home("ignore"); + int r = my_validate_excludes(path, "core_excludes", line); + free(path); + return r; + } +} + +static int my_parse_repo_excludes(const char *line) +{ + char *path = git_pathdup("info/exclude"); + int r = my_validate_excludes(path, "repo_excludes", line); + free(path); + + return r; +} + static int wt_deserialize_v1_header(struct wt_status *s, int fd) { struct cache_time index_mtime; int line_len, nr_fields; const char *line; const char *arg; + int have_required_index_mtime = 0; + int have_required_core_excludes = 0; + int have_required_repo_excludes = 0; /* * parse header lines up to the first flush packet. @@ -86,6 +143,20 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) nr_fields, line); return DESERIALIZE_ERR; } + have_required_index_mtime = 1; + continue; + } + + if (skip_prefix(line, "core_excludes ", &arg)) { + if (my_parse_core_excludes(line) != DESERIALIZE_OK) + return DESERIALIZE_ERR; + have_required_core_excludes = 1; + continue; + } + if (skip_prefix(line, "repo_excludes ", &arg)) { + if (my_parse_repo_excludes(line) != DESERIALIZE_OK) + return DESERIALIZE_ERR; + have_required_repo_excludes = 1; continue; } @@ -170,6 +241,19 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) return DESERIALIZE_ERR; } + if (!have_required_index_mtime) { + trace_printf_key(&trace_deserialize, "missing '%s'", "index_mtime"); + return DESERIALIZE_ERR; + } + if (!have_required_core_excludes) { + trace_printf_key(&trace_deserialize, "missing '%s'", "core_excludes"); + return DESERIALIZE_ERR; + } + if (!have_required_repo_excludes) { + trace_printf_key(&trace_deserialize, "missing '%s'", "repo_excludes"); + return DESERIALIZE_ERR; + } + return my_validate_index(&index_mtime); } diff --git a/wt-status-serialize.c b/wt-status-serialize.c index 6e0d85c60b3e74..f68235593997e4 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -4,6 +4,122 @@ static struct trace_key trace_serialize = TRACE_KEY_INIT(SERIALIZE); +/* + * Compute header record for exclude file using format: + * SP SP LF + */ +void wt_serialize_compute_exclude_header(struct strbuf *sb, + const char *key, + const char *path) +{ + struct stat st; + struct stat_data sd; + + memset(&sd, 0, sizeof(sd)); + + strbuf_setlen(sb, 0); + + if (!path || !*path) { + strbuf_addf(sb, "%s U (unset)", key); + } else if (lstat(path, &st) == -1) { + if (is_missing_file_error(errno)) + strbuf_addf(sb, "%s E (not-found) %s", key, path); + else + strbuf_addf(sb, "%s E (other) %s", key, path); + } else { + fill_stat_data(&sd, &st); + strbuf_addf(sb, "%s F %d %d %s", + key, sd.sd_mtime.sec, sd.sd_mtime.nsec, path); + } +} + +static void append_exclude_info(int fd, const char *path, const char *key) +{ + struct strbuf sb = STRBUF_INIT; + + wt_serialize_compute_exclude_header(&sb, key, path); + + packet_write_fmt(fd, "%s\n", sb.buf); + + strbuf_release(&sb); +} + +static void append_core_excludes_file_info(int fd) +{ + /* + * Write pathname and mtime of the core/global excludes file to + * the status cache header. Since a change in the global excludes + * will/may change the results reported by status, the deserialize + * code should be able to reject the status cache if the excludes + * file changes since when the cache was written. + * + * The "core.excludefile" setting defaults to $XDG_HOME/git/ignore + * and uses a global variable which should have been set during + * wt_status_collect_untracked(). + * + * See dir.c:setup_standard_excludes() + */ + append_exclude_info(fd, excludes_file, "core_excludes"); +} + +static void append_repo_excludes_file_info(int fd) +{ + /* + * Likewise, there is a per-repo excludes file in .git/info/excludes + * that can change the results reported by status. And the deserialize + * code needs to be able to reject the status cache if this file + * changes. + * + * See dir.c:setup_standard_excludes() and git_path_info_excludes(). + * We replicate the pathname construction here because of the static + * variables/functions used in dir.c. + */ + char *path = git_pathdup("info/exclude"); + + append_exclude_info(fd, path, "repo_excludes"); + + free(path); +} + +/* + * WARNING: The status cache attempts to preserve the essential in-memory + * status data after a status scan into a "serialization" (aka "status cache") + * file. It allows later "git status --deserialize=" instances to + * just print the cached status results without scanning the workdir (and + * without reading the index). + * + * The status cache file is valid as long as: + * [1] the set of functional command line options are the same (think "-u"). + * [2] repo-local and user-global configuration settings are compatible. + * [3] nothing in the workdir has changed. + * + * We rely on: + * [1.a] We remember the relevant (functional, non-display) command line + * arguments in the status cache header. + * [2.a] We use the mtime of the .git/index to detect staging changes. + * [2.b] We use the mtimes of the excludes files to detect changes that + * might affect untracked file reporting. + * + * But we need external help to verify [3]. + * [] This includes changes to tracked files. + * [] This includes changes to tracked .gitignore files that might change + * untracked file reporting. + * [] This includes the creation of new, untracked per-directory .gitignore + * files that might change untracked file reporting. + * + * [3.a] On GVFS repos, we rely on the GVFS service (mount) daemon to + * watch the filesystem and invalidate (delete) the status cache + * when anything changes inside the workdir. + * + * [3.b] TODO This problem is not solved for non-GVFS repos. + * [] It is possible that the untracked-cache index extension + * could help with this but that requires status to read the + * index to load the extension. + * [] It is possible that the new fsmonitor facility could also + * provide this information, but that to requires reading the + * index. + */ + /* * Write V1 header fields. */ @@ -16,6 +132,8 @@ static void wt_serialize_v1_header(struct wt_status *s, int fd) packet_write_fmt(fd, "index_mtime %d %d\n", s->repo->index->timestamp.sec, s->repo->index->timestamp.nsec); + append_core_excludes_file_info(fd); + append_repo_excludes_file_info(fd); /* * Write data from wt_status to qualify this status report. diff --git a/wt-status.h b/wt-status.h index 30a86f28dfe55d..0f127ff24bea07 100644 --- a/wt-status.h +++ b/wt-status.h @@ -212,4 +212,12 @@ void wt_status_serialize_v1(int fd, struct wt_status *s); int wt_status_deserialize(const struct wt_status *cmd_s, const char *path); +/* + * A helper routine for serialize and deserialize to compute + * metadata for the user-global and repo-local excludes files. + */ +void wt_serialize_compute_exclude_header(struct strbuf *sb, + const char *key, + const char *path); + #endif /* STATUS_H */ From c5ef02ae1a44a752fe341741816990e427f15a9d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 25 Jul 2018 14:49:37 -0400 Subject: [PATCH 065/129] status: deserialization wait Teach `git status --deserialize` to either wait indefintely or immediately fail if the status serialization cache file is stale. Signed-off-by: Jeff Hostetler --- Documentation/config/status.txt | 16 +++++ builtin/commit.c | 59 +++++++++++++++- t/t7524-serialized-status.sh | 52 ++++++++++++++ wt-status-deserialize.c | 119 +++++++++++++++++++++++++++++--- wt-status.h | 12 +++- 5 files changed, 245 insertions(+), 13 deletions(-) diff --git a/Documentation/config/status.txt b/Documentation/config/status.txt index 65cecc12e80c80..af043d7e26f269 100644 --- a/Documentation/config/status.txt +++ b/Documentation/config/status.txt @@ -81,3 +81,19 @@ status.deserializePath:: generated by `--serialize`. This will be overridden by `--deserialize=` on the command line. If the cache file is invalid or stale, git will fall-back and compute status normally. + +status.deserializeWait:: + EXPERIMENTAL, Specifies what `git status --deserialize` should do + if the serialization cache file is stale and whether it should + fall-back and compute status normally. This will be overridden by + `--deserialize-wait=` on the command line. ++ +-- +* `fail` - cause git to exit with an error when the status cache file +is stale; this is intended for testing and debugging. +* `block` - cause git to spin and periodically retry the cache file +every 100 ms; this is intended to help coordinate with another git +instance concurrently computing the cache file. +* `no` - to immediately fall-back if cache file is stale. This is the default. +* `` - time (in tenths of a second) to spin and retry. +-- diff --git a/builtin/commit.c b/builtin/commit.c index dd06c99dacfb10..b4a75e609d038c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -153,6 +153,9 @@ static int do_implicit_deserialize = 0; static int do_explicit_deserialize = 0; static char *deserialize_path = NULL; +static enum wt_status_deserialize_wait implicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; +static enum wt_status_deserialize_wait explicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + /* * --serialize | --serialize= * @@ -218,6 +221,40 @@ static int opt_parse_deserialize(const struct option *opt, const char *arg, int return 0; } +static enum wt_status_deserialize_wait parse_dw(const char *arg) +{ + int tenths; + + if (!strcmp(arg, "fail")) + return DESERIALIZE_WAIT__FAIL; + else if (!strcmp(arg, "block")) + return DESERIALIZE_WAIT__BLOCK; + else if (!strcmp(arg, "no")) + return DESERIALIZE_WAIT__NO; + + /* + * Otherwise, assume it is a timeout in tenths of a second. + * If it contains a bogus value, atol() will return zero + * which is OK. + */ + tenths = atol(arg); + if (tenths < 0) + tenths = DESERIALIZE_WAIT__NO; + return tenths; +} + +static int opt_parse_deserialize_wait(const struct option *opt, + const char *arg, + int unset) +{ + if (unset) + explicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + else + explicit_deserialize_wait = parse_dw(arg); + + return 0; +} + static int opt_parse_m(const struct option *opt, const char *arg, int unset) { struct strbuf *buf = opt->value; @@ -1401,6 +1438,13 @@ static int git_status_config(const char *k, const char *v, void *cb) } return 0; } + if (!strcmp(k, "status.deserializewait")) { + if (!v || !*v) + implicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + else + implicit_deserialize_wait = parse_dw(v); + return 0; + } if (!strcmp(k, "status.showuntrackedfiles")) { if (!v) return config_error_nonbool(k); @@ -1465,6 +1509,9 @@ int cmd_status(int argc, const char **argv, const char *prefix) { OPTION_CALLBACK, 0, "deserialize", NULL, N_("path"), N_("deserialize raw status data from file"), PARSE_OPT_OPTARG, opt_parse_deserialize }, + { OPTION_CALLBACK, 0, "deserialize-wait", NULL, + N_("fail|block|no"), N_("how to wait if status cache file is invalid"), + PARSE_OPT_OPTARG, opt_parse_deserialize_wait }, OPT_SET_INT(0, "long", &status_format, N_("show status in long format (default)"), STATUS_FORMAT_LONG), @@ -1577,11 +1624,21 @@ int cmd_status(int argc, const char **argv, const char *prefix) } if (try_deserialize) { + int result; + enum wt_status_deserialize_wait dw = implicit_deserialize_wait; + if (explicit_deserialize_wait != DESERIALIZE_WAIT__UNSET) + dw = explicit_deserialize_wait; + if (dw == DESERIALIZE_WAIT__UNSET) + dw = DESERIALIZE_WAIT__NO; + if (s.relative_paths) s.prefix = prefix; - if (wt_status_deserialize(&s, deserialize_path) == DESERIALIZE_OK) + result = wt_status_deserialize(&s, deserialize_path, dw); + if (result == DESERIALIZE_OK) return 0; + if (dw == DESERIALIZE_WAIT__FAIL) + die(_("Rejected status serialization cache")); /* deserialize failed, so force the initialization we skipped above. */ enable_fscache(1); diff --git a/t/t7524-serialized-status.sh b/t/t7524-serialized-status.sh index 872e9629417347..8e9d191136598e 100755 --- a/t/t7524-serialized-status.sh +++ b/t/t7524-serialized-status.sh @@ -186,6 +186,58 @@ test_expect_success 'verify new --serialize=path mode' ' test_i18ncmp expect output.2 ' +test_expect_success 'try deserialize-wait feature' ' + test_when_finished "rm -f serialized_status.dat dirt expect.* output.* trace.*" && + + git status --serialize=serialized_status.dat >output.1 && + + # make status cache stale by updating the mtime on the index. confirm that + # deserialize fails when requested. + sleep 1 && + touch .git/index && + test_must_fail git status --deserialize=serialized_status.dat --deserialize-wait=fail && + test_must_fail git -c status.deserializeWait=fail status --deserialize=serialized_status.dat && + + cat >expect.1 <<-\EOF && + ? expect.1 + ? output.1 + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + # refresh the status cache. + git status --porcelain=v2 --serialize=serialized_status.dat >output.1 && + test_cmp expect.1 output.1 && + + # create some dirt. confirm deserialize used the existing status cache. + echo x >dirt && + git status --porcelain=v2 --deserialize=serialized_status.dat >output.2 && + test_cmp output.1 output.2 && + + # make the cache stale and try the timeout feature and wait upto + # 2 tenths of a second. confirm deserialize timed out and rejected + # the status cache and did a normal scan. + + cat >expect.2 <<-\EOF && + ? dirt + ? expect.1 + ? expect.2 + ? output.1 + ? output.2 + ? serialized_status.dat + ? trace.2 + ? untracked/ + ? untracked_1.txt + EOF + + sleep 1 && + touch .git/index && + GIT_TRACE_DESERIALIZE=1 git status --porcelain=v2 --deserialize=serialized_status.dat --deserialize-wait=2 >output.2 2>trace.2 && + test_cmp expect.2 output.2 && + grep "wait polled=2 result=1" trace.2 >trace.2g +' + test_expect_success 'merge conflicts' ' # create a merge conflict. diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 5c47bf756b20da..61c8fbc44bffa1 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -56,7 +56,8 @@ static int my_validate_index(const struct cache_time *mtime_reported) mtime_observed_on_disk.nsec = ST_MTIME_NSEC(st); if ((mtime_observed_on_disk.sec != mtime_reported->sec) || (mtime_observed_on_disk.nsec != mtime_reported->nsec)) { - trace_printf_key(&trace_deserialize, "index mtime changed [des %d.%d][obs %d.%d]", + trace_printf_key(&trace_deserialize, + "index mtime changed [des %d %d][obs %d %d]", mtime_reported->sec, mtime_reported->nsec, mtime_observed_on_disk.sec, mtime_observed_on_disk.nsec); return DESERIALIZE_ERR; @@ -545,6 +546,8 @@ static inline int my_strcmp_null(const char *a, const char *b) static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *des_s, int fd) { + memset(des_s, 0, sizeof(*des_s)); + /* * Check the path spec on the current command */ @@ -668,8 +671,101 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de return DESERIALIZE_OK; } +static struct cache_time deserialize_prev_mtime = { 0, 0 }; + +static int try_deserialize_read_from_file_1(const struct wt_status *cmd_s, + const char *path, + struct wt_status *des_s) +{ + struct stat st; + int result; + int fd; + + /* + * If we are spinning waiting for the status cache to become + * valid, skip re-reading it if the mtime has not changed + * since the last time we read it. + */ + if (lstat(path, &st)) { + trace_printf_key(&trace_deserialize, + "could not lstat '%s'", path); + return DESERIALIZE_ERR; + } + if (st.st_mtime == deserialize_prev_mtime.sec && + ST_MTIME_NSEC(st) == deserialize_prev_mtime.nsec) { + trace_printf_key(&trace_deserialize, + "mtime has not changed '%s'", path); + return DESERIALIZE_ERR; + } + + fd = xopen(path, O_RDONLY); + if (fd == -1) { + trace_printf_key(&trace_deserialize, + "could not read '%s'", path); + return DESERIALIZE_ERR; + } + + deserialize_prev_mtime.sec = st.st_mtime; + deserialize_prev_mtime.nsec = ST_MTIME_NSEC(st); + + trace_printf_key(&trace_deserialize, + "reading serialization file (%d %d) '%s'", + deserialize_prev_mtime.sec, + deserialize_prev_mtime.nsec, + path); + + result = wt_deserialize_fd(cmd_s, des_s, fd); + close(fd); + + return result; +} + +static int try_deserialize_read_from_file(const struct wt_status *cmd_s, + const char *path, + enum wt_status_deserialize_wait dw, + struct wt_status *des_s) +{ + int k, limit; + int result = DESERIALIZE_ERR; + + /* + * For "fail" or "no", try exactly once to read the status cache. + * Return an error if the file is stale. + */ + if (dw == DESERIALIZE_WAIT__FAIL || dw == DESERIALIZE_WAIT__NO) + return try_deserialize_read_from_file_1(cmd_s, path, des_s); + + /* + * Wait for the status cache file to refresh. Wait duration can + * be in tenths of a second or unlimited. Poll every 100ms. + */ + if (dw == DESERIALIZE_WAIT__BLOCK) { + /* + * Convert "unlimited" to 1 day. + */ + limit = 10 * 60 * 60 * 24; + } else { + /* spin for dw tenths of a second */ + limit = dw; + } + for (k = 0; k < limit; k++) { + result = try_deserialize_read_from_file_1( + cmd_s, path, des_s); + + if (result == DESERIALIZE_OK) + break; + + sleep_millisec(100); + } + + trace_printf_key(&trace_deserialize, + "wait polled=%d result=%d '%s'", + k, result, path); + return result; +} + /* - * Read raw serialized status data from the given file + * Read raw serialized status data from the given file (or STDIN). * * Verify that the args specified in the current command * are compatible with the deserialized data (such as "-uno"). @@ -677,24 +773,25 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de * Copy display-related fields from the current command * into the deserialized data (so that the user can request * long or short as they please). + * + * Print status report using cached data. */ int wt_status_deserialize(const struct wt_status *cmd_s, - const char *path) + const char *path, + enum wt_status_deserialize_wait dw) { struct wt_status des_s; int result; if (path && *path && strcmp(path, "0")) { - int fd = xopen(path, O_RDONLY); - if (fd == -1) { - trace_printf_key(&trace_deserialize, "could not read '%s'", path); - return DESERIALIZE_ERR; - } - trace_printf_key(&trace_deserialize, "reading serialization file '%s'", path); - result = wt_deserialize_fd(cmd_s, &des_s, fd); - close(fd); + result = try_deserialize_read_from_file(cmd_s, path, dw, &des_s); } else { trace_printf_key(&trace_deserialize, "reading stdin"); + + /* + * Read status cache data from stdin. Ignore the deserialize-wait + * term, since we cannot read stdin multiple times. + */ result = wt_deserialize_fd(cmd_s, &des_s, 0); } diff --git a/wt-status.h b/wt-status.h index 0f127ff24bea07..2199f502300197 100644 --- a/wt-status.h +++ b/wt-status.h @@ -196,6 +196,15 @@ struct wt_status_serialize_data - sizeof(struct wt_status_serialize_data_fixed)]; }; +enum wt_status_deserialize_wait +{ + DESERIALIZE_WAIT__UNSET = -3, + DESERIALIZE_WAIT__FAIL = -2, /* return error, do not fallback */ + DESERIALIZE_WAIT__BLOCK = -1, /* unlimited timeout */ + DESERIALIZE_WAIT__NO = 0, /* immediately fallback */ + /* any positive value is a timeout in tenths of a second */ +}; + /* * Serialize computed status scan results using "version 1" format * to the given file. @@ -210,7 +219,8 @@ void wt_status_serialize_v1(int fd, struct wt_status *s); * fields. */ int wt_status_deserialize(const struct wt_status *cmd_s, - const char *path); + const char *path, + enum wt_status_deserialize_wait dw); /* * A helper routine for serialize and deserialize to compute From 0636e7b621f137df8c2d30d48a07a05239f73f36 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 12 Oct 2018 14:07:56 +0200 Subject: [PATCH 066/129] rebase/stash: make post-command hook work again William Baker reported that the non-built-in rebase and stash fail to run the post-command hook (which is important for VFS for Git, though). The reason is that an `exec()` will replace the current process by the newly-exec'ed one (our Windows-specific emulation cannot do that, and does not even try, so this is only an issue on Linux/macOS). As a consequence, not even the atexit() handlers are run, including the one running the post-command hook. To work around that, let's spawn the legacy rebase/stash and exit with the reported exit code. --- builtin/stash.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/builtin/stash.c b/builtin/stash.c index daa3c70c18c4d1..5b53a4842e00a7 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1560,13 +1560,17 @@ int cmd_stash(int argc, const char **argv, const char *prefix) }; if (!use_builtin_stash()) { - const char *path = mkpath("%s/git-legacy-stash", - git_exec_path()); - - if (sane_execvp(path, (char **)argv) < 0) - die_errno(_("could not exec %s"), path); - else - BUG("sane_execvp() returned???"); + struct argv_array args = ARGV_ARRAY_INIT; + int code; + + argv_array_push(&args, mkpath("%s/git-legacy-stash", + git_exec_path())); + argv_array_pushv(&args, argv + 1); + code = run_command_v_opt(args.argv, 0); + if (code < 0) + die_errno(_("could not exec %s"), args.argv[0]); + argv_array_clear(&args); + exit(code); } prefix = setup_git_directory(); From 64f57427b9e1e211bcc3c32a3d6f01b28d930615 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Fri, 16 Nov 2018 11:28:59 -0700 Subject: [PATCH 067/129] send-pack: do not check for sha1 file when GVFS_MISSING_OK set --- send-pack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/send-pack.c b/send-pack.c index 0a2b7a46630c26..1bd79cbe1559b0 100644 --- a/send-pack.c +++ b/send-pack.c @@ -15,6 +15,7 @@ #include "sha1-array.h" #include "gpg-interface.h" #include "cache.h" +#include "gvfs.h" int option_parse_push_signed(const struct option *opt, const char *arg, int unset) @@ -50,7 +51,7 @@ static int send_pack_config(const char *var, const char *value, void *unused) static void feed_object(const struct object_id *oid, FILE *fh, int negative) { - if (negative && + if (negative && !gvfs_config_is_set(GVFS_MISSING_OK) && !has_object_file_with_flags(oid, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)) From d34347add034a190132066b2159e683e77f9300d Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Thu, 6 Dec 2018 11:09:19 -0500 Subject: [PATCH 068/129] gvfs: block unsupported commands when running in a GVFS repo The following commands and options are not currently supported when working in a GVFS repo. Add code to detect and block these commands from executing. 1) fsck 2) gc 4) prune 5) repack 6) submodule 8) update-index --split-index 9) update-index --index-version (other than 4) 10) update-index --[no-]skip-worktree 11) worktree Signed-off-by: Ben Peart --- builtin/gc.c | 4 ++++ builtin/update-index.c | 10 ++++++++ git.c | 15 ++++++++---- gvfs.h | 1 + t/t0402-block-command-on-gvfs.sh | 39 ++++++++++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 5 deletions(-) create mode 100755 t/t0402-block-command-on-gvfs.sh diff --git a/builtin/gc.c b/builtin/gc.c index 3f76bf4aa73d1a..3b42ccb0f31b71 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -28,6 +28,7 @@ #include "blob.h" #include "tree.h" #include "promisor-remote.h" +#include "gvfs.h" #define FAILED_RUN "failed to run %s" @@ -585,6 +586,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (quiet) argv_array_push(&repack, "-q"); + if ((!auto_gc || (auto_gc && gc_auto_threshold > 0)) && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("'git gc' is not supported on a GVFS repo")); + if (auto_gc) { /* * Auto-gc should be least intrusive as possible. diff --git a/builtin/update-index.c b/builtin/update-index.c index d527b8f1066859..02420e85398c27 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -18,6 +18,7 @@ #include "dir.h" #include "split-index.h" #include "fsmonitor.h" +#include "gvfs.h" /* * Default to not allowing changes to the list of files. The @@ -1133,7 +1134,13 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) argc = parse_options_end(&ctx); getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf; + if (mark_skip_worktree_only && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("modifying the skip worktree bit is not supported on a GVFS repo")); + if (preferred_index_format) { + if (preferred_index_format != 4 && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("changing the index version is not supported on a GVFS repo")); + if (preferred_index_format < INDEX_FORMAT_LB || INDEX_FORMAT_UB < preferred_index_format) die("index-version %d not in range: %d..%d", @@ -1169,6 +1176,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) } if (split_index > 0) { + if (gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("split index is not supported on a GVFS repo")); + if (git_config_get_split_index() == 0) warning(_("core.splitIndex is set to false; " "remove or change it, if you really want to " diff --git a/git.c b/git.c index e4ae4fd7b7f8bb..404eac1384edcc 100644 --- a/git.c +++ b/git.c @@ -5,6 +5,7 @@ #include "run-command.h" #include "alias.h" #include "dir.h" +#include "gvfs.h" #define RUN_SETUP (1<<0) #define RUN_SETUP_GENTLY (1<<1) @@ -17,6 +18,7 @@ #define SUPPORT_SUPER_PREFIX (1<<4) #define DELAY_PAGER_CONFIG (1<<5) #define NO_PARSEOPT (1<<6) /* parse-options is not used */ +#define BLOCK_ON_GVFS_REPO (1<<7) /* command not allowed in GVFS repos */ struct cmd_struct { const char *cmd; @@ -495,6 +497,9 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (!help && p->option & NEED_WORK_TREE) setup_work_tree(); + if (!help && p->option & BLOCK_ON_GVFS_REPO && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die("'git %s' is not supported on a GVFS repo", p->cmd); + if (run_pre_command_hook(argv)) die("pre-command hook aborted command"); @@ -570,7 +575,7 @@ static struct cmd_struct commands[] = { { "fmt-merge-msg", cmd_fmt_merge_msg, RUN_SETUP }, { "for-each-ref", cmd_for_each_ref, RUN_SETUP }, { "format-patch", cmd_format_patch, RUN_SETUP }, - { "fsck", cmd_fsck, RUN_SETUP }, + { "fsck", cmd_fsck, RUN_SETUP | BLOCK_ON_GVFS_REPO}, { "fsck-objects", cmd_fsck, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id, NO_PARSEOPT }, @@ -608,7 +613,7 @@ static struct cmd_struct commands[] = { { "pack-refs", cmd_pack_refs, RUN_SETUP }, { "patch-id", cmd_patch_id, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "pickaxe", cmd_blame, RUN_SETUP }, - { "prune", cmd_prune, RUN_SETUP }, + { "prune", cmd_prune, RUN_SETUP | BLOCK_ON_GVFS_REPO}, { "prune-packed", cmd_prune_packed, RUN_SETUP }, { "pull", cmd_pull, RUN_SETUP | NEED_WORK_TREE }, { "push", cmd_push, RUN_SETUP }, @@ -621,7 +626,7 @@ static struct cmd_struct commands[] = { { "remote", cmd_remote, RUN_SETUP }, { "remote-ext", cmd_remote_ext, NO_PARSEOPT }, { "remote-fd", cmd_remote_fd, NO_PARSEOPT }, - { "repack", cmd_repack, RUN_SETUP }, + { "repack", cmd_repack, RUN_SETUP | BLOCK_ON_GVFS_REPO }, { "replace", cmd_replace, RUN_SETUP }, { "rerere", cmd_rerere, RUN_SETUP }, { "reset", cmd_reset, RUN_SETUP }, @@ -646,7 +651,7 @@ static struct cmd_struct commands[] = { { "stash", cmd_stash }, { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, { "stripspace", cmd_stripspace }, - { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX | NO_PARSEOPT }, + { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX | NO_PARSEOPT | BLOCK_ON_GVFS_REPO }, { "switch", cmd_switch, RUN_SETUP | NEED_WORK_TREE }, { "symbolic-ref", cmd_symbolic_ref, RUN_SETUP }, { "tag", cmd_tag, RUN_SETUP | DELAY_PAGER_CONFIG }, @@ -664,7 +669,7 @@ static struct cmd_struct commands[] = { { "verify-tag", cmd_verify_tag, RUN_SETUP }, { "version", cmd_version }, { "whatchanged", cmd_whatchanged, RUN_SETUP }, - { "worktree", cmd_worktree, RUN_SETUP | NO_PARSEOPT }, + { "worktree", cmd_worktree, RUN_SETUP | NO_PARSEOPT | BLOCK_ON_GVFS_REPO }, { "write-tree", cmd_write_tree, RUN_SETUP }, }; diff --git a/gvfs.h b/gvfs.h index 7c9367866f502a..e193502151467a 100644 --- a/gvfs.h +++ b/gvfs.h @@ -12,6 +12,7 @@ * The list of bits in the core_gvfs setting */ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_BLOCK_COMMANDS (1 << 1) #define GVFS_MISSING_OK (1 << 2) #define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) diff --git a/t/t0402-block-command-on-gvfs.sh b/t/t0402-block-command-on-gvfs.sh new file mode 100755 index 00000000000000..3370abad464d50 --- /dev/null +++ b/t/t0402-block-command-on-gvfs.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='block commands in GVFS repo' + +. ./test-lib.sh + +not_with_gvfs () { + command=$1 && + shift && + test_expect_success "test $command $*" " + test_config alias.g4rbled $command && + test_config core.gvfs true && + test_must_fail git $command $* && + test_must_fail git g4rbled $* && + test_unconfig core.gvfs && + test_must_fail git -c core.gvfs=true $command $* && + test_must_fail git -c core.gvfs=true g4rbled $* + " +} + +not_with_gvfs fsck +not_with_gvfs gc +not_with_gvfs gc --auto +not_with_gvfs prune +not_with_gvfs repack +not_with_gvfs submodule status +not_with_gvfs update-index --index-version 2 +not_with_gvfs update-index --skip-worktree +not_with_gvfs update-index --no-skip-worktree +not_with_gvfs update-index --split-index +not_with_gvfs worktree list + +test_expect_success 'test gc --auto succeeds when disabled via config' ' + test_config core.gvfs true && + test_config gc.auto 0 && + git gc --auto +' + +test_done From db1cc6d0e0adbc45c7569c4e64450769a72b61b4 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 26 Sep 2018 12:29:26 -0400 Subject: [PATCH 069/129] gvfs:trace2:data: add trace2 tracing around read_object_process Add trace2 region around read_object_process to collect time spent waiting for missing objects to be dynamically fetched. Signed-off-by: Jeff Hostetler --- sha1-file.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sha1-file.c b/sha1-file.c index 85fca17d471aa6..0589dfcb50f372 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -903,6 +903,8 @@ static int read_object_process(const struct object_id *oid) start = getnanotime(); + trace2_region_enter("subprocess", "read_object", the_repository); + if (!subprocess_map_initialized) { subprocess_map_initialized = 1; hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, @@ -919,13 +921,16 @@ static int read_object_process(const struct object_id *oid) if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, start_read_object_fn)) { free(entry); - return -1; + err = -1; + goto leave_region; } } process = &entry->subprocess.process; - if (!(CAP_GET & entry->supported_capabilities)) - return -1; + if (!(CAP_GET & entry->supported_capabilities)) { + err = -1; + goto leave_region; + } sigchain_push(SIGPIPE, SIG_IGN); @@ -974,6 +979,10 @@ static int read_object_process(const struct object_id *oid) trace_performance_since(start, "read_object_process"); +leave_region: + trace2_region_leave_printf("subprocess", "read_object", the_repository, + "result %d", err); + return err; } From aae47c68768d0aaa2ab31bd2495c19df18f7b383 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 26 Sep 2018 11:21:22 -0400 Subject: [PATCH 070/129] gvfs:trace2:data: status deserialization information Add trace2 region and data events describing attempts to deserialize status data using a status cache. A category:status, label:deserialize region is pushed around the deserialize code. Deserialization results when reading from a file are: category:status, path = category:status, polled = category:status, result = "ok" | "reject" When reading from STDIN are: category:status, path = "STDIN" category:status, result = "ok" | "reject" Status will fallback and run a normal status scan when a "reject" is reported (unless "--deserialize-wait=fail"). If "ok" is reported, status was able to use the status cache and avoid scanning the workdir. Additionally, a cmd_mode is emitted for each step: collection, deserialization, and serialization. For example, if deserialization is attempted and fails and status falls back to actually computing the status, a cmd_mode message containing "deserialize" is issued and then a cmd_mode for "collect" is issued. Also, if deserialization fails, a data message containing the rejection reason is emitted. Signed-off-by: Jeff Hostetler --- builtin/commit.c | 19 +++++++++++- wt-status-deserialize.c | 66 ++++++++++++++++++++++++++++++++++++++--- wt-status.h | 2 ++ 3 files changed, 82 insertions(+), 5 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index b4a75e609d038c..4f93d8951e79be 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -149,6 +149,7 @@ static int opt_parse_porcelain(const struct option *opt, const char *arg, int un static int do_serialize = 0; static char *serialize_path = NULL; +static int reject_implicit = 0; static int do_implicit_deserialize = 0; static int do_explicit_deserialize = 0; static char *deserialize_path = NULL; @@ -211,7 +212,7 @@ static int opt_parse_deserialize(const struct option *opt, const char *arg, int deserialize_path = xstrdup(arg); } if (deserialize_path && *deserialize_path - && (access(deserialize_path, R_OK) != 0)) + && (wt_status_deserialize_access(deserialize_path, R_OK) != 0)) die("cannot find serialization file '%s'", deserialize_path); @@ -1435,6 +1436,8 @@ static int git_status_config(const char *k, const char *v, void *cb) if (v && *v && access(v, R_OK) == 0) { do_implicit_deserialize = 1; deserialize_path = xstrdup(v); + } else { + reject_implicit = 1; } return 0; } @@ -1591,6 +1594,17 @@ int cmd_status(int argc, const char **argv, const char *prefix) (do_implicit_deserialize || do_explicit_deserialize)); if (try_deserialize) goto skip_init; + /* + * If we implicitly received a status cache pathname from the config + * and the file does not exist, we silently reject it and do the normal + * status "collect". Fake up some trace2 messages to reflect this and + * assist post-processors know this case is different. + */ + if (!do_serialize && reject_implicit) { + trace2_cmd_mode("implicit-deserialize"); + trace2_data_string("status", the_repository, "deserialize/reject", + "status-cache/access"); + } enable_fscache(0); if (status_format != STATUS_FORMAT_PORCELAIN && @@ -1634,6 +1648,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (s.relative_paths) s.prefix = prefix; + trace2_cmd_mode("deserialize"); result = wt_status_deserialize(&s, deserialize_path, dw); if (result == DESERIALIZE_OK) return 0; @@ -1651,6 +1666,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) fd = -1; } + trace2_cmd_mode("collect"); wt_status_collect(&s); if (0 <= fd) @@ -1665,6 +1681,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (fd_serialize < 0) die_errno(_("could not serialize to '%s'"), serialize_path); + trace2_cmd_mode("serialize"); wt_status_serialize_v1(fd_serialize, &s); close(fd_serialize); } diff --git a/wt-status-deserialize.c b/wt-status-deserialize.c index 61c8fbc44bffa1..5641922d884d6a 100644 --- a/wt-status-deserialize.c +++ b/wt-status-deserialize.c @@ -3,6 +3,22 @@ #include "pkt-line.h" #include "trace.h" +static void set_deserialize_reject_reason(const char *reason) +{ + trace2_data_string("status", the_repository, "deserialize/reject", + reason); +} + +int wt_status_deserialize_access(const char *path, int mode) +{ + int a = access(path, mode); + + if (a != 0) + set_deserialize_reject_reason("status-cache/access"); + + return a; +} + static struct trace_key trace_deserialize = TRACE_KEY_INIT(DESERIALIZE); enum deserialize_parse_strategy { @@ -49,6 +65,7 @@ static int my_validate_index(const struct cache_time *mtime_reported) struct cache_time mtime_observed_on_disk; if (lstat(path, &st)) { + set_deserialize_reject_reason("index/not-found"); trace_printf_key(&trace_deserialize, "could not stat index"); return DESERIALIZE_ERR; } @@ -56,6 +73,7 @@ static int my_validate_index(const struct cache_time *mtime_reported) mtime_observed_on_disk.nsec = ST_MTIME_NSEC(st); if ((mtime_observed_on_disk.sec != mtime_reported->sec) || (mtime_observed_on_disk.nsec != mtime_reported->nsec)) { + set_deserialize_reject_reason("index/mtime-changed"); trace_printf_key(&trace_deserialize, "index mtime changed [des %d %d][obs %d %d]", mtime_reported->sec, mtime_reported->nsec, @@ -81,10 +99,12 @@ static int my_validate_excludes(const char *path, const char *key, const char *l r = (strcmp(line, sb.buf) ? DESERIALIZE_ERR : DESERIALIZE_OK); - if (r == DESERIALIZE_ERR) + if (r == DESERIALIZE_ERR) { + set_deserialize_reject_reason("excludes/changed"); trace_printf_key(&trace_deserialize, "%s changed [cached '%s'][observed '%s']", key, line, sb.buf); + } strbuf_release(&sb); return r; @@ -140,6 +160,7 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) &index_mtime.sec, &index_mtime.nsec); if (nr_fields != 2) { + set_deserialize_reject_reason("v1-header/invalid-index-mtime"); trace_printf_key(&trace_deserialize, "invalid index_mtime (%d) '%s'", nr_fields, line); return DESERIALIZE_ERR; @@ -223,6 +244,7 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) /* status_format */ if (skip_prefix(line, "sha1_commit ", &arg)) { if (get_oid_hex(arg, &s->oid_commit)) { + set_deserialize_reject_reason("v1-header/invalid-commit-sha"); trace_printf_key(&trace_deserialize, "invalid sha1_commit"); return DESERIALIZE_ERR; } @@ -238,19 +260,23 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd) } /* prefix */ + set_deserialize_reject_reason("v1-header/unexpected-line"); trace_printf_key(&trace_deserialize, "unexpected line '%s'", line); return DESERIALIZE_ERR; } if (!have_required_index_mtime) { + set_deserialize_reject_reason("v1-header/missing-index-mtime"); trace_printf_key(&trace_deserialize, "missing '%s'", "index_mtime"); return DESERIALIZE_ERR; } if (!have_required_core_excludes) { + set_deserialize_reject_reason("v1-header/missing-core-excludes"); trace_printf_key(&trace_deserialize, "missing '%s'", "core_excludes"); return DESERIALIZE_ERR; } if (!have_required_repo_excludes) { + set_deserialize_reject_reason("v1-header/missing-repo-excludes"); trace_printf_key(&trace_deserialize, "missing '%s'", "repo_excludes"); return DESERIALIZE_ERR; } @@ -336,6 +362,7 @@ static int wt_deserialize_v1_changed_items(const struct wt_status *cmd_s, * So we reject the status cache and let the fallback * code run. */ + set_deserialize_reject_reason("v1-data/unmerged"); trace_printf_key( &trace_deserialize, "reject: V2 format and umerged file: %s", @@ -471,6 +498,7 @@ static int wt_deserialize_v1(const struct wt_status *cmd_s, struct wt_status *s, * the serialized data */ if (validate_untracked_files_arg(cmd_s->show_untracked_files, s->show_untracked_files, &untracked_strategy)) { + set_deserialize_reject_reason("args/untracked-files"); trace_printf_key(&trace_deserialize, "reject: show_untracked_file: command: %d, serialized : %d", cmd_s->show_untracked_files, s->show_untracked_files); @@ -478,6 +506,7 @@ static int wt_deserialize_v1(const struct wt_status *cmd_s, struct wt_status *s, } if (validate_ignored_files_arg(cmd_s->show_ignored_mode, s->show_ignored_mode, &ignored_strategy)) { + set_deserialize_reject_reason("args/ignored-mode"); trace_printf_key(&trace_deserialize, "reject: show_ignored_mode: command: %d, serialized: %d", cmd_s->show_ignored_mode, s->show_ignored_mode); @@ -511,6 +540,7 @@ static int wt_deserialize_v1(const struct wt_status *cmd_s, struct wt_status *s, return DESERIALIZE_ERR; continue; } + set_deserialize_reject_reason("v1-data/unexpected-line"); trace_printf_key(&trace_deserialize, "unexpected line '%s'", line); return DESERIALIZE_ERR; } @@ -532,6 +562,7 @@ static int wt_deserialize_parse(const struct wt_status *cmd_s, struct wt_status if (version == 1) return wt_deserialize_v1(cmd_s, s, fd); } + set_deserialize_reject_reason("status-cache/unsupported-version"); trace_printf_key(&trace_deserialize, "missing/unsupported version"); return DESERIALIZE_ERR; } @@ -552,6 +583,7 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de * Check the path spec on the current command */ if (cmd_s->pathspec.nr > 1) { + set_deserialize_reject_reason("args/multiple-pathspecs"); trace_printf_key(&trace_deserialize, "reject: multiple pathspecs"); return DESERIALIZE_ERR; } @@ -562,6 +594,7 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de */ if (cmd_s->pathspec.nr == 1 && my_strcmp_null(cmd_s->pathspec.items[0].match, "")) { + set_deserialize_reject_reason("args/root-pathspec"); trace_printf_key(&trace_deserialize, "reject: pathspec"); return DESERIALIZE_ERR; } @@ -578,20 +611,24 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de * or "--ignored" settings). */ if (cmd_s->is_initial != des_s->is_initial) { + set_deserialize_reject_reason("args/is-initial-changed"); trace_printf_key(&trace_deserialize, "reject: is_initial"); return DESERIALIZE_ERR; } if (my_strcmp_null(cmd_s->branch, des_s->branch)) { + set_deserialize_reject_reason("args/branch-changed"); trace_printf_key(&trace_deserialize, "reject: branch"); return DESERIALIZE_ERR; } if (my_strcmp_null(cmd_s->reference, des_s->reference)) { + set_deserialize_reject_reason("args/reference-changed"); trace_printf_key(&trace_deserialize, "reject: reference"); return DESERIALIZE_ERR; } /* verbose */ /* amend */ if (cmd_s->whence != des_s->whence) { + set_deserialize_reject_reason("args/whence-changed"); trace_printf_key(&trace_deserialize, "reject: whence"); return DESERIALIZE_ERR; } @@ -625,19 +662,23 @@ static int wt_deserialize_fd(const struct wt_status *cmd_s, struct wt_status *de /* hints */ /* ahead_behind_flags */ if (cmd_s->detect_rename != des_s->detect_rename) { + set_deserialize_reject_reason("args/detect-rename-changed"); trace_printf_key(&trace_deserialize, "reject: detect_rename"); return DESERIALIZE_ERR; } if (cmd_s->rename_score != des_s->rename_score) { + set_deserialize_reject_reason("args/rename-score-changed"); trace_printf_key(&trace_deserialize, "reject: rename_score"); return DESERIALIZE_ERR; } if (cmd_s->rename_limit != des_s->rename_limit) { + set_deserialize_reject_reason("args/rename-limit-changed"); trace_printf_key(&trace_deserialize, "reject: rename_limit"); return DESERIALIZE_ERR; } /* status_format */ if (!oideq(&cmd_s->oid_commit, &des_s->oid_commit)) { + set_deserialize_reject_reason("args/commit-changed"); trace_printf_key(&trace_deserialize, "reject: sha1_commit"); return DESERIALIZE_ERR; } @@ -725,15 +766,18 @@ static int try_deserialize_read_from_file(const struct wt_status *cmd_s, enum wt_status_deserialize_wait dw, struct wt_status *des_s) { - int k, limit; + int k = 0; + int limit; int result = DESERIALIZE_ERR; /* * For "fail" or "no", try exactly once to read the status cache. * Return an error if the file is stale. */ - if (dw == DESERIALIZE_WAIT__FAIL || dw == DESERIALIZE_WAIT__NO) - return try_deserialize_read_from_file_1(cmd_s, path, des_s); + if (dw == DESERIALIZE_WAIT__FAIL || dw == DESERIALIZE_WAIT__NO) { + result = try_deserialize_read_from_file_1(cmd_s, path, des_s); + goto done; + } /* * Wait for the status cache file to refresh. Wait duration can @@ -758,6 +802,12 @@ static int try_deserialize_read_from_file(const struct wt_status *cmd_s, sleep_millisec(100); } +done: + trace2_data_string("status", the_repository, "deserialize/path", path); + trace2_data_intmax("status", the_repository, "deserialize/polled", k); + trace2_data_string("status", the_repository, "deserialize/result", + ((result == DESERIALIZE_OK) ? "ok" : "reject")); + trace_printf_key(&trace_deserialize, "wait polled=%d result=%d '%s'", k, result, path); @@ -783,6 +833,8 @@ int wt_status_deserialize(const struct wt_status *cmd_s, struct wt_status des_s; int result; + trace2_region_enter("status", "deserialize", the_repository); + if (path && *path && strcmp(path, "0")) { result = try_deserialize_read_from_file(cmd_s, path, dw, &des_s); } else { @@ -793,8 +845,14 @@ int wt_status_deserialize(const struct wt_status *cmd_s, * term, since we cannot read stdin multiple times. */ result = wt_deserialize_fd(cmd_s, &des_s, 0); + + trace2_data_string("status", the_repository, "deserialize/path", "STDIN"); + trace2_data_string("status", the_repository, "deserialize/result", + ((result == DESERIALIZE_OK) ? "ok" : "reject")); } + trace2_region_leave("status", "deserialize", the_repository); + if (result == DESERIALIZE_OK) { wt_status_get_state(cmd_s->repo, &des_s.state, des_s.branch && !strcmp(des_s.branch, "HEAD")); diff --git a/wt-status.h b/wt-status.h index 2199f502300197..a892ecd46aae6b 100644 --- a/wt-status.h +++ b/wt-status.h @@ -222,6 +222,8 @@ int wt_status_deserialize(const struct wt_status *cmd_s, const char *path, enum wt_status_deserialize_wait dw); +int wt_status_deserialize_access(const char *path, int mode); + /* * A helper routine for serialize and deserialize to compute * metadata for the user-global and repo-local excludes files. From 0e538f6fcd4aebeb5fdc17af12b48ad90c344657 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 7 Jan 2019 12:45:48 -0500 Subject: [PATCH 071/129] gvfs:trace2:data: status serialization Add trace information around status serialization. Signed-off-by: Jeff Hostetler --- wt-status-serialize.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wt-status-serialize.c b/wt-status-serialize.c index f68235593997e4..54a365e5a020eb 100644 --- a/wt-status-serialize.c +++ b/wt-status-serialize.c @@ -297,6 +297,8 @@ void wt_status_serialize_v1(int fd, struct wt_status *s) struct string_list_item *iter; int k; + trace2_region_enter("status", "serialize", the_repository); + /* * version header must be first line. */ @@ -330,4 +332,6 @@ void wt_status_serialize_v1(int fd, struct wt_status *s) } packet_flush(fd); } + + trace2_region_leave("status", "serialize", the_repository); } From d906aaf7e577891991a41b28b68d5143ad81dd23 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 19 Nov 2018 16:26:37 -0500 Subject: [PATCH 072/129] gvfs:trace2:data: add vfs stats Report virtual filesystem summary data. Signed-off-by: Jeff Hostetler --- virtualfilesystem.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/virtualfilesystem.c b/virtualfilesystem.c index c93357e1508a51..7a107c3e2e107b 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -254,6 +254,11 @@ void apply_virtualfilesystem(struct index_state *istate) { char *buf, *entry; int i; + int nr_unknown = 0; + int nr_vfs_dirs = 0; + int nr_vfs_rows = 0; + int nr_bulk_skip = 0; + int nr_explicit_skip = 0; if (!git_config_get_virtualfilesystem()) return; @@ -271,16 +276,21 @@ void apply_virtualfilesystem(struct index_state *istate) if (buf[i] == '\0') { int pos, len; + nr_vfs_rows++; + len = buf + i - entry; /* look for a directory wild card (ie "dir1/") */ if (buf[i - 1] == '/') { + nr_vfs_dirs++; if (ignore_case) adjust_dirname_case(istate, entry); pos = index_name_pos(istate, entry, len); if (pos < 0) { pos = -pos - 1; while (pos < istate->cache_nr && !fspathncmp(istate->cache[pos]->name, entry, len)) { + if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE) + nr_bulk_skip++; istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE; pos++; } @@ -288,18 +298,41 @@ void apply_virtualfilesystem(struct index_state *istate) } else { if (ignore_case) { struct cache_entry *ce = index_file_exists(istate, entry, len, ignore_case); - if (ce) + if (ce) { + if (ce->ce_flags & CE_SKIP_WORKTREE) + nr_explicit_skip++; ce->ce_flags &= ~CE_SKIP_WORKTREE; + } + else { + nr_unknown++; + } } else { int pos = index_name_pos(istate, entry, len); - if (pos >= 0) + if (pos >= 0) { + if (istate->cache[pos]->ce_flags & CE_SKIP_WORKTREE) + nr_explicit_skip++; istate->cache[pos]->ce_flags &= ~CE_SKIP_WORKTREE; + } + else { + nr_unknown++; + } } } entry += len + 1; } } + + if (nr_vfs_rows > 0) { + trace2_data_intmax("vfs", the_repository, "apply/tracked", nr_bulk_skip + nr_explicit_skip); + + trace2_data_intmax("vfs", the_repository, "apply/vfs_rows", nr_vfs_rows); + trace2_data_intmax("vfs", the_repository, "apply/vfs_dirs", nr_vfs_dirs); + + trace2_data_intmax("vfs", the_repository, "apply/nr_unknown", nr_unknown); + trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip); + trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip); + } } /* From fc0fe94b6b97984afdcd1c6cf773250cf8361edc Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 15 Apr 2019 13:39:43 -0700 Subject: [PATCH 073/129] trace2: refactor setting process starting time Create trace2_initialize_clock() and call from main() to capture process start time in isolation and before other sub-systems are ready. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano --- compat/mingw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 518f2e0bae7c6d..49bf251f59d1c5 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3631,6 +3631,8 @@ int wmain(int argc, const wchar_t **wargv) SetConsoleCtrlHandler(handle_ctrl_c, TRUE); + trace2_initialize_clock(); + maybe_redirect_std_handles(); adjust_symlink_flags(); fsync_object_files = 1; From 597a455020893fe5c1029323f670223dcb048a8e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 30 Apr 2019 14:12:51 -0400 Subject: [PATCH 074/129] trace2:gvfs:experiment: clear_ce_flags_1 Signed-off-by: Jeff Hostetler --- unpack-trees.c | 1 + 1 file changed, 1 insertion(+) diff --git a/unpack-trees.c b/unpack-trees.c index f5fcea5f998d84..f181264f5dcb18 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1458,6 +1458,7 @@ static int clear_ce_flags(struct index_state *istate, xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)", (unsigned long)select_mask, (unsigned long)clear_mask); trace2_region_enter("unpack_trees", label, the_repository); + rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, From 997365974df259f295e2dd256c0300895a329914 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 30 Apr 2019 15:27:40 -0400 Subject: [PATCH 075/129] trace2:gvfs:experiment: traverse_trees Signed-off-by: Jeff Hostetler --- unpack-trees.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index f181264f5dcb18..aa9c8fb947ef07 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1610,7 +1610,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } trace_performance_enter(); + trace2_region_enter("exp", "traverse_trees", the_repository); ret = traverse_trees(o->src_index, len, t, &info); + trace2_region_leave("exp", "traverse_trees", the_repository); trace_performance_leave("traverse_trees"); if (ret < 0) goto return_failed; From 375785868dfb5a6e4a0647fb7a8f8371305d4a52 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 30 Apr 2019 16:02:39 -0400 Subject: [PATCH 076/129] trace2:gvfs:experiment: report_tracking Signed-off-by: Jeff Hostetler --- builtin/checkout.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 08db2eb3bab7c6..cd718ea538fb2d 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -909,8 +909,11 @@ static void update_refs_for_switch(const struct checkout_opts *opts, remove_branch_state(the_repository, !opts->quiet); strbuf_release(&msg); if (!opts->quiet && - (new_branch_info->path || (!opts->force_detach && !strcmp(new_branch_info->name, "HEAD")))) + (new_branch_info->path || (!opts->force_detach && !strcmp(new_branch_info->name, "HEAD")))) { + trace2_region_enter("exp", "report_tracking", the_repository); report_tracking(new_branch_info); + trace2_region_leave("exp", "report_tracking", the_repository); + } } static int add_pending_uninteresting_ref(const char *refname, From 72284e22c6541d2eae6a00550e27294353ed8538 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 14 Jun 2019 12:38:31 -0400 Subject: [PATCH 077/129] trace2:gvfs:experiment: read_cache: annotate thread usage in read-cache Add trace2_thread_start() and trace2_thread_exit() events to the worker threads used to read the index. This gives per-thread perf data. These workers were introduced in: abb4bb83845 read-cache: load cache extensions on a worker thread 77ff1127a4c read-cache: load cache entries on worker threads Signed-off-by: Jeff Hostetler --- read-cache.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index fd3853217dc1c5..6034b337bd2abe 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1994,6 +1994,17 @@ static void *load_index_extensions(void *_data) return NULL; } +static void *load_index_extensions_threadproc(void *_data) +{ + void *result; + + trace2_thread_start("load_index_extensions"); + result = load_index_extensions(_data); + trace2_thread_exit(); + + return result; +} + /* * A helper function that will load the specified range of cache entries * from the memory mapped file and add them to the given index. @@ -2069,12 +2080,17 @@ static void *load_cache_entries_thread(void *_data) struct load_cache_entries_thread_data *p = _data; int i; + trace2_thread_start("load_cache_entries"); + /* iterate across all ieot blocks assigned to this thread */ for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) { p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); p->offset += p->ieot->entries[i].nr; } + + trace2_thread_exit(); + return NULL; } @@ -2224,7 +2240,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) int err; p.src_offset = extension_offset; - err = pthread_create(&p.pthread, NULL, load_index_extensions, &p); + err = pthread_create(&p.pthread, NULL, load_index_extensions_threadproc, &p); if (err) die(_("unable to create load_index_extensions thread: %s"), strerror(err)); From c57a07b9134f69ec4f23ab81d3f3a28bf45e3f41 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 9 Jul 2019 14:43:47 -0400 Subject: [PATCH 078/129] trace2:gvfs:experiment: read-cache: time read/write of cache-tree extension Add regions around code to read and write the cache-tree extension when the index is read or written. This is an experiment and may be dropped in future releases if we don't need it anymore. This experiment demonstrates that it takes more time to parse and deserialize the cache-tree extension than it does to read the cache-entries. Commits [1] and [2] spreads cache-entry reading across N-1 cores and dedicates a single core to simultaneously read the index extensions. Local testing (on my machine) shows that reading the cache-tree extension takes ~0.28 seconds. The 11 cache-entry threads take ~0.08 seconds. The main thread is blocked for 0.15 to 0.20 seconds waiting for the extension thread to finish. Let's use this commit to gather some telemetry and confirm this. My point is that improvements, such as index V5 which makes the cache entries smaller, may improve performance, but the gains may be limited because of this extension. And that we may need to look inside the cache-tree extension to truly improve do_read_index() performance. [1] abb4bb83845 read-cache: load cache extensions on a worker thread [2] 77ff1127a4c read-cache: load cache entries on worker threads Signed-off-by: Jeff Hostetler --- read-cache.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/read-cache.c b/read-cache.c index 6034b337bd2abe..7d2ceb926ec8a5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1739,7 +1739,10 @@ static int read_index_extension(struct index_state *istate, { switch (CACHE_EXT(ext)) { case CACHE_EXT_TREE: + trace2_region_enter("index", "read/extension/cache_tree", NULL); istate->cache_tree = cache_tree_read(data, sz); + trace2_data_intmax("index", NULL, "read/extension/cache_tree/bytes", (intmax_t)sz); + trace2_region_leave("index", "read/extension/cache_tree", NULL); break; case CACHE_EXT_RESOLVE_UNDO: istate->resolve_undo = resolve_undo_read(data, sz); @@ -2988,9 +2991,13 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (!strip_extensions && !drop_cache_tree && istate->cache_tree) { struct strbuf sb = STRBUF_INIT; + trace2_region_enter("index", "write/extension/cache_tree", NULL); cache_tree_write(&sb, istate->cache_tree); err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_TREE, sb.len) < 0 || ce_write(&c, newfd, sb.buf, sb.len) < 0; + trace2_data_intmax("index", NULL, "write/extension/cache_tree/bytes", (intmax_t)sb.len); + trace2_region_leave("index", "write/extension/cache_tree", NULL); + strbuf_release(&sb); if (err) return -1; From b77de011069efeb8344213eadab6351df30340fa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 3 Jun 2019 11:53:08 -0400 Subject: [PATCH 079/129] trace2:gvfs:experiment: add prime_cache_tree region Signed-off-by: Jeff Hostetler --- cache-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cache-tree.c b/cache-tree.c index 2213f4632ed0b1..d94c7e24c3edcb 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -755,10 +755,14 @@ void prime_cache_tree(struct repository *r, struct index_state *istate, struct tree *tree) { + trace2_region_enter("cache_tree", "prime_cache_tree", r); + cache_tree_free(&istate->cache_tree); istate->cache_tree = cache_tree(); prime_cache_tree_rec(r, istate->cache_tree, tree); istate->cache_changed |= CACHE_TREE_CHANGED; + + trace2_region_leave("cache_tree", "prime_cache_tree", r); } /* From adf312780fdc409810d813ab5a0da7d5f00223da Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 16 Jul 2019 09:09:53 -0400 Subject: [PATCH 080/129] trace2:gvfs:experiment: add region to apply_virtualfilesystem() Signed-off-by: Jeff Hostetler --- virtualfilesystem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/virtualfilesystem.c b/virtualfilesystem.c index 7a107c3e2e107b..a7d2ff103a2fc8 100644 --- a/virtualfilesystem.c +++ b/virtualfilesystem.c @@ -263,6 +263,8 @@ void apply_virtualfilesystem(struct index_state *istate) if (!git_config_get_virtualfilesystem()) return; + trace2_region_enter("vfs", "apply", the_repository); + if (!virtual_filesystem_data.len) get_virtual_filesystem_data(&virtual_filesystem_data); @@ -333,6 +335,8 @@ void apply_virtualfilesystem(struct index_state *istate) trace2_data_intmax("vfs", the_repository, "apply/nr_bulk_skip", nr_bulk_skip); trace2_data_intmax("vfs", the_repository, "apply/nr_explicit_skip", nr_explicit_skip); } + + trace2_region_leave("vfs", "apply", the_repository); } /* From 16a1faf04e425cd2a07943127ac522924421188e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 16 Jul 2019 10:08:08 -0400 Subject: [PATCH 081/129] trace2:gvfs:experiment: add region around unpack_trees() Signed-off-by: Jeff Hostetler --- unpack-trees.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unpack-trees.c b/unpack-trees.c index aa9c8fb947ef07..ba4dcbde5c9a3f 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1524,6 +1524,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (len > MAX_UNPACK_TREES) die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); + trace2_region_enter("exp", "unpack_trees", NULL); + trace_performance_enter(); memset(&pl, 0, sizeof(pl)); if (!core_apply_sparse_checkout || !o->update) @@ -1715,6 +1717,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options trace_performance_leave("unpack_trees"); if (!o->keep_pattern_list) clear_pattern_list(&pl); + trace2_region_leave("exp", "unpack_trees", NULL); return ret; return_failed: From 304c14e15b8dd0068625f8341bbf6a4f18962df6 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 16 Jul 2019 10:16:37 -0400 Subject: [PATCH 082/129] trace2:gvfs:experiment: add region to cache_tree_fully_valid() Signed-off-by: Jeff Hostetler --- cache-tree.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index d94c7e24c3edcb..5e9f8cf779b618 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -222,7 +222,7 @@ static void discard_unused_subtrees(struct cache_tree *it) } } -int cache_tree_fully_valid(struct cache_tree *it) +static int cache_tree_fully_valid_1(struct cache_tree *it) { int i; if (!it) @@ -230,12 +230,23 @@ int cache_tree_fully_valid(struct cache_tree *it) if (it->entry_count < 0 || !has_object_file(&it->oid)) return 0; for (i = 0; i < it->subtree_nr; i++) { - if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + if (!cache_tree_fully_valid_1(it->down[i]->cache_tree)) return 0; } return 1; } +int cache_tree_fully_valid(struct cache_tree *it) +{ + int result; + + trace2_region_enter("cache_tree", "fully_valid", NULL); + result = cache_tree_fully_valid_1(it); + trace2_region_leave("cache_tree", "fully_valid", NULL); + + return result; +} + static int update_one(struct cache_tree *it, struct cache_entry **cache, int entries, From cc2047bc959e2b72005b1e482f6a8f881a22a17f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 16 Jul 2019 10:40:56 -0400 Subject: [PATCH 083/129] trace2:gvfs:experiment: add unpack_entry() counter to unpack_trees() and report_tracking() Signed-off-by: Jeff Hostetler --- builtin/checkout.c | 6 ++++++ packfile.c | 9 +++++++++ packfile.h | 5 +++++ unpack-trees.c | 5 +++++ 4 files changed, 25 insertions(+) diff --git a/builtin/checkout.c b/builtin/checkout.c index cd718ea538fb2d..39c73c3568ed63 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -26,6 +26,7 @@ #include "unpack-trees.h" #include "wt-status.h" #include "xdiff-interface.h" +#include "packfile.h" static const char * const checkout_usage[] = { N_("git checkout [] "), @@ -910,8 +911,13 @@ static void update_refs_for_switch(const struct checkout_opts *opts, strbuf_release(&msg); if (!opts->quiet && (new_branch_info->path || (!opts->force_detach && !strcmp(new_branch_info->name, "HEAD")))) { + unsigned long nr_unpack_entry_at_start; + trace2_region_enter("exp", "report_tracking", the_repository); + nr_unpack_entry_at_start = get_nr_unpack_entry(); report_tracking(new_branch_info); + trace2_data_intmax("exp", NULL, "report_tracking/nr_unpack_entries", + (intmax_t)(get_nr_unpack_entry() - nr_unpack_entry_at_start)); trace2_region_leave("exp", "report_tracking", the_repository); } } diff --git a/packfile.c b/packfile.c index 7e7c04e4d802e2..471d57ce1e6f59 100644 --- a/packfile.c +++ b/packfile.c @@ -1623,6 +1623,13 @@ static void *read_object(struct repository *r, return content; } +static unsigned long g_nr_unpack_entry; + +unsigned long get_nr_unpack_entry(void) +{ + return g_nr_unpack_entry; +} + void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, enum object_type *final_type, unsigned long *final_size) { @@ -1636,6 +1643,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; int base_from_cache = 0; + g_nr_unpack_entry++; + write_pack_access_log(p, obj_offset); /* PHASE 1: drill down to the innermost base object */ diff --git a/packfile.h b/packfile.h index fc7904ec814700..13a5368c9f06e9 100644 --- a/packfile.h +++ b/packfile.h @@ -193,4 +193,9 @@ int is_promisor_object(const struct object_id *oid); int load_idx(const char *path, const unsigned int hashsz, void *idx_map, size_t idx_size, struct packed_git *p); +/* + * Return the number of objects fetched from a packfile. + */ +unsigned long get_nr_unpack_entry(void); + #endif diff --git a/unpack-trees.c b/unpack-trees.c index ba4dcbde5c9a3f..829221c5d79646 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -18,6 +18,7 @@ #include "promisor-remote.h" #include "gvfs.h" #include "virtualfilesystem.h" +#include "packfile.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -1520,11 +1521,13 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options int i, ret; static struct cache_entry *dfc; struct pattern_list pl; + unsigned long nr_unpack_entry_at_start; if (len > MAX_UNPACK_TREES) die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); trace2_region_enter("exp", "unpack_trees", NULL); + nr_unpack_entry_at_start = get_nr_unpack_entry(); trace_performance_enter(); memset(&pl, 0, sizeof(pl)); @@ -1717,6 +1720,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options trace_performance_leave("unpack_trees"); if (!o->keep_pattern_list) clear_pattern_list(&pl); + trace2_data_intmax("unpack_trees", NULL, "unpack_trees/nr_unpack_entries", + (intmax_t)(get_nr_unpack_entry() - nr_unpack_entry_at_start)); trace2_region_leave("exp", "unpack_trees", NULL); return ret; From a82ad4f69217589e253ae95738bc0ce40597b06e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 25 Jul 2019 15:43:50 -0400 Subject: [PATCH 084/129] trace2:gvfs:experiment: increase default event depth for unpack-tree data --- trace2/tr2_tgt_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c index 6353e8ad915610..e3193651f00697 100644 --- a/trace2/tr2_tgt_event.c +++ b/trace2/tr2_tgt_event.c @@ -33,7 +33,7 @@ static struct tr2_dst tr2dst_event = { TR2_SYSENV_EVENT, 0, 0, 0, 0 }; * event target. Use the TR2_SYSENV_EVENT_NESTING setting to increase * region details in the event target. */ -static int tr2env_event_max_nesting_levels = 2; +static int tr2env_event_max_nesting_levels = 4; /* * Use the TR2_SYSENV_EVENT_BRIEF to omit the