From 679bf4e0dd16f7d6f4ba1f05af50cca24b5abee4 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 25 Nov 2019 11:37:21 -0700 Subject: [PATCH 1/4] fsmonitor: change last update timestamp on the index_state to opaque token Some file system monitors might not use or take a timestamp for processing and in the case of watchman could have race conditions with using a timestamp. Watchman uses something called a clockid that is used for race free queries to it. The clockid for watchman is simply a string. Change the fsmonitor_last_update from being a uint64_t to a char pointer so that any arbitrary data can be stored in it and passed back to the fsmonitor. Signed-off-by: Kevin Willford --- cache.h | 2 +- fsmonitor.c | 49 ++++++++++++++++++++++------------ t/helper/test-dump-fsmonitor.c | 2 +- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/cache.h b/cache.h index 1554488d669c99..170c125db38d6a 100644 --- a/cache.h +++ b/cache.h @@ -324,7 +324,7 @@ struct index_state { struct hashmap dir_hash; struct object_id oid; struct untracked_cache *untracked; - uint64_t fsmonitor_last_update; + char *fsmonitor_last_update; struct ewah_bitmap *fsmonitor_dirty; struct mem_pool *ce_mem_pool; struct progress *progress; diff --git a/fsmonitor.c b/fsmonitor.c index 868cca01e25089..98605872259d0d 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -6,8 +6,9 @@ #include "run-command.h" #include "strbuf.h" -#define INDEX_EXTENSION_VERSION (1) -#define HOOK_INTERFACE_VERSION (1) +#define INDEX_EXTENSION_VERSION1 (1) +#define INDEX_EXTENSION_VERSION2 (2) +#define HOOK_INTERFACE_VERSION (1) struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); @@ -32,17 +33,26 @@ int read_fsmonitor_extension(struct index_state *istate, const void *data, uint32_t ewah_size; struct ewah_bitmap *fsmonitor_dirty; int ret; + uint64_t timestamp; + struct strbuf last_update = STRBUF_INIT; - if (sz < sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t)) + if (sz < sizeof(uint32_t) + 1 + sizeof(uint32_t)) return error("corrupt fsmonitor extension (too short)"); hdr_version = get_be32(index); index += sizeof(uint32_t); - if (hdr_version != INDEX_EXTENSION_VERSION) + if (hdr_version == INDEX_EXTENSION_VERSION1) { + timestamp = get_be64(index); + strbuf_addf(&last_update, "%"PRIu64"", timestamp); + index += sizeof(uint64_t); + } else if (hdr_version == INDEX_EXTENSION_VERSION2) { + strbuf_addstr(&last_update, index); + index += last_update.len + 1; + } else { return error("bad fsmonitor version %d", hdr_version); + } - istate->fsmonitor_last_update = get_be64(index); - index += sizeof(uint64_t); + istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); ewah_size = get_be32(index); index += sizeof(uint32_t); @@ -79,7 +89,6 @@ void fill_fsmonitor_bitmap(struct index_state *istate) void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) { uint32_t hdr_version; - uint64_t tm; uint32_t ewah_start; uint32_t ewah_size = 0; int fixup = 0; @@ -89,11 +98,12 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) BUG("fsmonitor_dirty has more entries than the index (%"PRIuMAX" > %u)", (uintmax_t)istate->fsmonitor_dirty->bit_size, istate->cache_nr); - put_be32(&hdr_version, INDEX_EXTENSION_VERSION); + put_be32(&hdr_version, INDEX_EXTENSION_VERSION2); strbuf_add(sb, &hdr_version, sizeof(uint32_t)); - put_be64(&tm, istate->fsmonitor_last_update); - strbuf_add(sb, &tm, sizeof(uint64_t)); + strbuf_addstr(sb, istate->fsmonitor_last_update); + strbuf_addch(sb, 0); /* Want to keep a NUL */ + fixup = sb->len; strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */ @@ -110,9 +120,9 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate) } /* - * Call the query-fsmonitor hook passing the time of the last saved results. + * Call the query-fsmonitor hook passing the last update token of the saved results. */ -static int query_fsmonitor(int version, uint64_t last_update, struct strbuf *query_result) +static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result) { struct child_process cp = CHILD_PROCESS_INIT; @@ -121,7 +131,7 @@ static int query_fsmonitor(int version, uint64_t last_update, struct strbuf *que argv_array_push(&cp.args, core_fsmonitor); argv_array_pushf(&cp.args, "%d", version); - argv_array_pushf(&cp.args, "%" PRIuMAX, (uintmax_t)last_update); + argv_array_pushf(&cp.args, "%s", last_update); cp.use_shell = 1; cp.dir = get_git_work_tree(); @@ -151,6 +161,7 @@ void refresh_fsmonitor(struct index_state *istate) int query_success = 0; size_t bol; /* beginning of line */ uint64_t last_update; + struct strbuf last_update_token = STRBUF_INIT; char *buf; unsigned int i; @@ -164,6 +175,7 @@ void refresh_fsmonitor(struct index_state *istate) * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* * If we have a last update time, call query_fsmonitor for the set of @@ -217,18 +229,21 @@ void refresh_fsmonitor(struct index_state *istate) } strbuf_release(&query_result); - /* Now that we've updated istate, save the last_update time */ - istate->fsmonitor_last_update = last_update; + /* Now that we've updated istate, save the last_update_token */ + FREE_AND_NULL(istate->fsmonitor_last_update); + istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL); } void add_fsmonitor(struct index_state *istate) { unsigned int i; + struct strbuf last_update = STRBUF_INIT; if (!istate->fsmonitor_last_update) { trace_printf_key(&trace_fsmonitor, "add fsmonitor"); istate->cache_changed |= FSMONITOR_CHANGED; - istate->fsmonitor_last_update = getnanotime(); + strbuf_addf(&last_update, "%"PRIu64"", getnanotime()); + istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL); /* reset the fsmonitor state */ for (i = 0; i < istate->cache_nr; i++) @@ -250,7 +265,7 @@ void remove_fsmonitor(struct index_state *istate) if (istate->fsmonitor_last_update) { trace_printf_key(&trace_fsmonitor, "remove fsmonitor"); istate->cache_changed |= FSMONITOR_CHANGED; - istate->fsmonitor_last_update = 0; + FREE_AND_NULL(istate->fsmonitor_last_update); } } diff --git a/t/helper/test-dump-fsmonitor.c b/t/helper/test-dump-fsmonitor.c index 2786f47088e975..975f0ac8905125 100644 --- a/t/helper/test-dump-fsmonitor.c +++ b/t/helper/test-dump-fsmonitor.c @@ -13,7 +13,7 @@ int cmd__dump_fsmonitor(int ac, const char **av) printf("no fsmonitor\n"); return 0; } - printf("fsmonitor last update %"PRIuMAX"\n", (uintmax_t)istate->fsmonitor_last_update); + printf("fsmonitor last update %s\n", istate->fsmonitor_last_update); for (i = 0; i < istate->cache_nr; i++) printf((istate->cache[i]->ce_flags & CE_FSMONITOR_VALID) ? "+" : "-"); From f969c4bc17b79f7c857987793cb0c23ad3f4e899 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 9 Dec 2019 14:00:07 -0700 Subject: [PATCH 2/4] fsmonitor: handle version 2 of the hooks that will use opaque token Some file monitors like watchman will use something other than a timestamp to keep better track of what changes happen in between calls to query the fsmonitor. The clockid in watchman is a string. Now that the index is storing an opaque token for the last update the code needs to be updated to pass that opaque token to a verion 2 of the fsmonitor hook. Because there are repos that already have version 1 of the hook and we want them to continue to work when git is updated, we need to handle both version 1 and version 2 of the hook. In order to do that a config value is being added core.fsmonitorHookVersion to force what version of the hook should be used. When this is not set it will default to -1 and then the code will attempt to call version 2 of the hook first. If that fails it will fallback to trying version 1. Signed-off-by: Kevin Willford --- fsmonitor.c | 75 +++++++++++++++++++++++++++++++------ t/t7519-status-fsmonitor.sh | 7 +++- t/t7519/fsmonitor-all | 1 - t/t7519/fsmonitor-watchman | 3 +- 4 files changed, 71 insertions(+), 15 deletions(-) diff --git a/fsmonitor.c b/fsmonitor.c index 98605872259d0d..932bd9012daf9e 100644 --- a/fsmonitor.c +++ b/fsmonitor.c @@ -8,7 +8,8 @@ #define INDEX_EXTENSION_VERSION1 (1) #define INDEX_EXTENSION_VERSION2 (2) -#define HOOK_INTERFACE_VERSION (1) +#define HOOK_INTERFACE_VERSION1 (1) +#define HOOK_INTERFACE_VERSION2 (2) struct trace_key trace_fsmonitor = TRACE_KEY_INIT(FSMONITOR); @@ -25,6 +26,22 @@ static void fsmonitor_ewah_callback(size_t pos, void *is) ce->ce_flags &= ~CE_FSMONITOR_VALID; } +static int fsmonitor_hook_version(void) +{ + int hook_version; + + if (git_config_get_int("core.fsmonitorhookversion", &hook_version)) + return -1; + + if (hook_version == HOOK_INTERFACE_VERSION1 || + hook_version == HOOK_INTERFACE_VERSION2) + return hook_version; + + warning("Invalid hook version '%i' in core.fsmonitorhookversion. " + "Must be 1 or 2.", hook_version); + return -1; +} + int read_fsmonitor_extension(struct index_state *istate, const void *data, unsigned long sz) { @@ -158,8 +175,8 @@ static void fsmonitor_refresh_callback(struct index_state *istate, const char *n void refresh_fsmonitor(struct index_state *istate) { struct strbuf query_result = STRBUF_INIT; - int query_success = 0; - size_t bol; /* beginning of line */ + int query_success = 0, hook_version = -1; + size_t bol = 0; /* beginning of line */ uint64_t last_update; struct strbuf last_update_token = STRBUF_INIT; char *buf; @@ -167,6 +184,9 @@ void refresh_fsmonitor(struct index_state *istate) if (!core_fsmonitor || istate->fsmonitor_has_run_once) return; + + hook_version = fsmonitor_hook_version(); + istate->fsmonitor_has_run_once = 1; trace_printf_key(&trace_fsmonitor, "refresh fsmonitor"); @@ -175,27 +195,60 @@ void refresh_fsmonitor(struct index_state *istate) * should be inclusive to ensure we don't miss potential changes. */ last_update = getnanotime(); - strbuf_addf(&last_update_token, "%"PRIu64"", last_update); + if (hook_version == HOOK_INTERFACE_VERSION1) + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); /* - * If we have a last update time, call query_fsmonitor for the set of - * changes since that time, else assume everything is possibly dirty + * If we have a last update token, call query_fsmonitor for the set of + * changes since that token, else assume everything is possibly dirty * and check it all. */ if (istate->fsmonitor_last_update) { - query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION, - istate->fsmonitor_last_update, &query_result); + if (hook_version == -1 || hook_version == HOOK_INTERFACE_VERSION2) { + query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION2, + istate->fsmonitor_last_update, &query_result); + + if (query_success) { + if (hook_version < 0) + hook_version = HOOK_INTERFACE_VERSION2; + + /* + * First entry will be the last update token + * Need to use a char * variable because static + * analysis was suggesting to use strbuf_addbuf + * but we don't want to copy the entire strbuf + * only the the chars up to the first NUL + */ + buf = query_result.buf; + strbuf_addstr(&last_update_token, buf); + if (!last_update_token.len) { + warning("Empty last update token."); + query_success = 0; + } else { + bol = last_update_token.len + 1; + } + } else if (hook_version < 0) { + hook_version = HOOK_INTERFACE_VERSION1; + if (!last_update_token.len) + strbuf_addf(&last_update_token, "%"PRIu64"", last_update); + } + } + + if (hook_version == HOOK_INTERFACE_VERSION1) { + query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION1, + istate->fsmonitor_last_update, &query_result); + } + trace_performance_since(last_update, "fsmonitor process '%s'", core_fsmonitor); trace_printf_key(&trace_fsmonitor, "fsmonitor process '%s' returned %s", core_fsmonitor, query_success ? "success" : "failure"); } /* a fsmonitor process can return '/' to indicate all entries are invalid */ - if (query_success && query_result.buf[0] != '/') { + if (query_success && query_result.buf[bol] != '/') { /* Mark all entries returned by the monitor as dirty */ buf = query_result.buf; - bol = 0; - for (i = 0; i < query_result.len; i++) { + for (i = bol; i < query_result.len; i++) { if (buf[i] != '\0') continue; fsmonitor_refresh_callback(istate, buf + bol); diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index cf0fda2d5aa534..fbfdcca0007774 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -32,11 +32,12 @@ write_integration_script () { echo "$0: exactly 2 arguments expected" exit 2 fi - if test "$1" != 1 + if test "$1" != 2 then echo "Unsupported core.fsmonitor hook version." >&2 exit 1 fi + printf "last_update_token\0" printf "untracked\0" printf "dir1/untracked\0" printf "dir2/untracked\0" @@ -107,6 +108,7 @@ EOF # test that "update-index --fsmonitor-valid" sets the fsmonitor valid bit test_expect_success 'update-index --fsmonitor-valid" sets the fsmonitor valid bit' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" EOF git update-index --fsmonitor && git update-index --fsmonitor-valid dir1/modified && @@ -167,6 +169,7 @@ EOF # test that newly added files are marked valid test_expect_success 'newly added files are marked valid' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" EOF git add new && git add dir1/new && @@ -207,6 +210,7 @@ EOF # test that *only* files returned by the integration script get flagged as invalid test_expect_success '*only* files returned by the integration script get flagged as invalid' ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" printf "dir1/modified\0" EOF clean_repo && @@ -276,6 +280,7 @@ do # (if enabled) files unless it is told about them. test_expect_success "status doesn't detect unreported modifications" ' write_script .git/hooks/fsmonitor-test<<-\EOF && + printf "last_update_token\0" :>marker EOF clean_repo && diff --git a/t/t7519/fsmonitor-all b/t/t7519/fsmonitor-all index 691bc94dc2c8e8..94ab66bd3d8686 100755 --- a/t/t7519/fsmonitor-all +++ b/t/t7519/fsmonitor-all @@ -17,7 +17,6 @@ fi if test "$1" != 1 then - echo "Unsupported core.fsmonitor hook version." >&2 exit 1 fi diff --git a/t/t7519/fsmonitor-watchman b/t/t7519/fsmonitor-watchman index d8e7a1e5ba85c0..264b9daf834ec8 100755 --- a/t/t7519/fsmonitor-watchman +++ b/t/t7519/fsmonitor-watchman @@ -26,8 +26,7 @@ if ($version == 1) { # subtract one second to make sure watchman will return all changes $time = int ($time / 1000000000) - 1; } else { - die "Unsupported query-fsmonitor hook version '$version'.\n" . - "Falling back to scanning...\n"; + exit 1; } my $git_work_tree; From ecc288c84811efb537ef7d8b10068e3a633c9273 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 9 Dec 2019 18:20:00 -0700 Subject: [PATCH 3/4] fsmonitor: add fsmonitor hook scripts for version 2 Version 2 of the fsmonitor hooks is passed the version and an update token and must pass back a last update token to use for subsequent calls to the hook. Signed-off-by: Kevin Willford --- t/t7519/fsmonitor-all-v2 | 21 +++ t/t7519/fsmonitor-watchman-v2 | 173 +++++++++++++++++++++ templates/hooks--fsmonitor-watchman.sample | 168 +++++++++++++------- 3 files changed, 310 insertions(+), 52 deletions(-) create mode 100755 t/t7519/fsmonitor-all-v2 create mode 100755 t/t7519/fsmonitor-watchman-v2 diff --git a/t/t7519/fsmonitor-all-v2 b/t/t7519/fsmonitor-all-v2 new file mode 100755 index 00000000000000..061907e88b658a --- /dev/null +++ b/t/t7519/fsmonitor-all-v2 @@ -0,0 +1,21 @@ +#!/usr/bin/perl + +use strict; +use warnings; +# +# An test hook script to integrate with git to test fsmonitor. +# +# The hook is passed a version (currently 2) and since token +# formatted as a string and outputs to stdout all files that have been +# modified since the given time. Paths must be relative to the root of +# the working tree and separated by a single NUL. +# +#echo "$0 $*" >&2 +my ($version, $last_update_token) = @ARGV; + +if ($version ne 2) { + print "Unsupported query-fsmonitor hook version '$version'.\n"; + exit 1; +} + +print "last_update_token\0/\0" diff --git a/t/t7519/fsmonitor-watchman-v2 b/t/t7519/fsmonitor-watchman-v2 new file mode 100755 index 00000000000000..14ed0aa42de0f2 --- /dev/null +++ b/t/t7519/fsmonitor-watchman-v2 @@ -0,0 +1,173 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IPC::Open2; + +# An example hook script to integrate Watchman +# (https://facebook.github.io/watchman/) with git to speed up detecting +# new and modified files. +# +# The hook is passed a version (currently 2) and last update token +# formatted as a string and outputs to stdout a new update token and +# all files that have been modified since the update token. Paths must +# be relative to the root of the working tree and separated by a single NUL. +# +# To enable this hook, rename this file to "query-watchman" and set +# 'git config core.fsmonitor .git/hooks/query-watchman' +# +my ($version, $last_update_token) = @ARGV; + +# Uncomment for debugging +# print STDERR "$0 $version $last_update_token\n"; + +# Check the hook interface version +if ($version ne 2) { + die "Unsupported query-fsmonitor hook version '$version'.\n" . + "Falling back to scanning...\n"; +} + +my $git_work_tree = get_working_dir(); + +my $retry = 1; + +my $json_pkg; +eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; +} or do { + require JSON::PP; + $json_pkg = "JSON::PP"; +}; + +launch_watchman(); + +sub launch_watchman { + my $o = watchman_query(); + if (is_work_tree_watched($o)) { + output_result($o->{clock}, @{$o->{files}}); + } +} + +sub output_result { + my ($clockid, @files) = @_; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # binmode $fh, ":utf8"; + # print $fh "$clockid\n@files\n"; + # close $fh; + + binmode STDOUT, ":utf8"; + print $clockid; + print "\0"; + local $, = "\0"; + print @files; +} + +sub watchman_clock { + my $response = qx/watchman clock "$git_work_tree"/; + die "Failed to get clock id on '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + return $json_pkg->new->utf8->decode($response); +} + +sub watchman_query { + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; + + # In the query expression below we're asking for names of files that + # changed since $last_update_token but not from the .git folder. + # + # To accomplish this, we're using the "since" generator to use the + # recency index to select candidate nodes and "fields" to limit the + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + if (substr($last_update_token, 0, 1) eq "c") { + $last_update_token = "\"$last_update_token\""; + } + my $query = <<" END"; + ["query", "$git_work_tree", { + "since": $last_update_token, + "fields": ["name"], + "expression": ["not", ["dirname", ".git"]] + }] + END + + # Uncomment for debugging the watchman query + # open (my $fh, ">", ".git/watchman-query.json"); + # print $fh $query; + # close $fh; + + print CHLD_IN $query; + close CHLD_IN; + my $response = do {local $/; }; + + # Uncomment for debugging the watch response + # open ($fh, ">", ".git/watchman-response.json"); + # print $fh $response; + # close $fh; + + die "Watchman: command returned no output.\n" . + "Falling back to scanning...\n" if $response eq ""; + die "Watchman: command returned invalid output: $response\n" . + "Falling back to scanning...\n" unless $response =~ /^\{/; + + return $json_pkg->new->utf8->decode($response); +} + +sub is_work_tree_watched { + my ($output) = @_; + my $error = $output->{error}; + if ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) { + $retry--; + my $response = qx/watchman watch "$git_work_tree"/; + die "Failed to make watchman watch '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + $output = $json_pkg->new->utf8->decode($response); + $error = $output->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # close $fh; + + # Watchman will always return all files on the first query so + # return the fast "everything is dirty" flag to git and do the + # Watchman query just to get it over with now so we won't pay + # the cost in git to look up each individual file. + my $o = watchman_clock(); + $error = $output->{error}; + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + output_result($o->{clock}, ("/")); + $last_update_token = $o->{clock}; + + eval { launch_watchman() }; + return 0; + } + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + return 1; +} + +sub get_working_dir { + my $working_dir; + if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $working_dir = Win32::GetCwd(); + $working_dir =~ tr/\\/\//; + } else { + require Cwd; + $working_dir = Cwd::cwd(); + } + + return $working_dir; +} diff --git a/templates/hooks--fsmonitor-watchman.sample b/templates/hooks--fsmonitor-watchman.sample index ef94fa293800b3..14ed0aa42de0f2 100755 --- a/templates/hooks--fsmonitor-watchman.sample +++ b/templates/hooks--fsmonitor-watchman.sample @@ -8,102 +8,166 @@ use IPC::Open2; # (https://facebook.github.io/watchman/) with git to speed up detecting # new and modified files. # -# The hook is passed a version (currently 1) and a time in nanoseconds -# formatted as a string and outputs to stdout all files that have been -# modified since the given time. Paths must be relative to the root of -# the working tree and separated by a single NUL. +# The hook is passed a version (currently 2) and last update token +# formatted as a string and outputs to stdout a new update token and +# all files that have been modified since the update token. Paths must +# be relative to the root of the working tree and separated by a single NUL. # # To enable this hook, rename this file to "query-watchman" and set # 'git config core.fsmonitor .git/hooks/query-watchman' # -my ($version, $time) = @ARGV; +my ($version, $last_update_token) = @ARGV; -# Check the hook interface version +# Uncomment for debugging +# print STDERR "$0 $version $last_update_token\n"; -if ($version == 1) { - # convert nanoseconds to seconds - # subtract one second to make sure watchman will return all changes - $time = int ($time / 1000000000) - 1; -} else { +# Check the hook interface version +if ($version ne 2) { die "Unsupported query-fsmonitor hook version '$version'.\n" . "Falling back to scanning...\n"; } -my $git_work_tree; -if ($^O =~ 'msys' || $^O =~ 'cygwin') { - $git_work_tree = Win32::GetCwd(); - $git_work_tree =~ tr/\\/\//; -} else { - require Cwd; - $git_work_tree = Cwd::cwd(); -} +my $git_work_tree = get_working_dir(); my $retry = 1; +my $json_pkg; +eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; +} or do { + require JSON::PP; + $json_pkg = "JSON::PP"; +}; + launch_watchman(); sub launch_watchman { + my $o = watchman_query(); + if (is_work_tree_watched($o)) { + output_result($o->{clock}, @{$o->{files}}); + } +} + +sub output_result { + my ($clockid, @files) = @_; + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # binmode $fh, ":utf8"; + # print $fh "$clockid\n@files\n"; + # close $fh; + + binmode STDOUT, ":utf8"; + print $clockid; + print "\0"; + local $, = "\0"; + print @files; +} + +sub watchman_clock { + my $response = qx/watchman clock "$git_work_tree"/; + die "Failed to get clock id on '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + return $json_pkg->new->utf8->decode($response); +} + +sub watchman_query { my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') - or die "open2() failed: $!\n" . - "Falling back to scanning...\n"; + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; # In the query expression below we're asking for names of files that - # changed since $time but were not transient (ie created after - # $time but no longer exist). + # changed since $last_update_token but not from the .git folder. # # To accomplish this, we're using the "since" generator to use the # recency index to select candidate nodes and "fields" to limit the - # output to file names only. - + # output to file names only. Then we're using the "expression" term to + # further constrain the results. + if (substr($last_update_token, 0, 1) eq "c") { + $last_update_token = "\"$last_update_token\""; + } my $query = <<" END"; ["query", "$git_work_tree", { - "since": $time, - "fields": ["name"] + "since": $last_update_token, + "fields": ["name"], + "expression": ["not", ["dirname", ".git"]] }] END + # Uncomment for debugging the watchman query + # open (my $fh, ">", ".git/watchman-query.json"); + # print $fh $query; + # close $fh; + print CHLD_IN $query; close CHLD_IN; my $response = do {local $/; }; + # Uncomment for debugging the watch response + # open ($fh, ">", ".git/watchman-response.json"); + # print $fh $response; + # close $fh; + die "Watchman: command returned no output.\n" . - "Falling back to scanning...\n" if $response eq ""; + "Falling back to scanning...\n" if $response eq ""; die "Watchman: command returned invalid output: $response\n" . - "Falling back to scanning...\n" unless $response =~ /^\{/; - - my $json_pkg; - eval { - require JSON::XS; - $json_pkg = "JSON::XS"; - 1; - } or do { - require JSON::PP; - $json_pkg = "JSON::PP"; - }; - - my $o = $json_pkg->new->utf8->decode($response); - - if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { - print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; + "Falling back to scanning...\n" unless $response =~ /^\{/; + + return $json_pkg->new->utf8->decode($response); +} + +sub is_work_tree_watched { + my ($output) = @_; + my $error = $output->{error}; + if ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) { $retry--; - qx/watchman watch "$git_work_tree"/; + my $response = qx/watchman watch "$git_work_tree"/; die "Failed to make watchman watch '$git_work_tree'.\n" . "Falling back to scanning...\n" if $? != 0; + $output = $json_pkg->new->utf8->decode($response); + $error = $output->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + # Uncomment for debugging watchman output + # open (my $fh, ">", ".git/watchman-output.out"); + # close $fh; # Watchman will always return all files on the first query so # return the fast "everything is dirty" flag to git and do the # Watchman query just to get it over with now so we won't pay # the cost in git to look up each individual file. - print "/\0"; + my $o = watchman_clock(); + $error = $output->{error}; + + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; + + output_result($o->{clock}, ("/")); + $last_update_token = $o->{clock}; + eval { launch_watchman() }; - exit 0; + return 0; } - die "Watchman: $o->{error}.\n" . - "Falling back to scanning...\n" if $o->{error}; + die "Watchman: $error.\n" . + "Falling back to scanning...\n" if $error; - binmode STDOUT, ":utf8"; - local $, = "\0"; - print @{$o->{files}}; + return 1; +} + +sub get_working_dir { + my $working_dir; + if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $working_dir = Win32::GetCwd(); + $working_dir =~ tr/\\/\//; + } else { + require Cwd; + $working_dir = Cwd::cwd(); + } + + return $working_dir; } From 1db2a699ba3749bfb4ab1c048492f76f82830ee6 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Thu, 12 Dec 2019 14:06:17 -0700 Subject: [PATCH 4/4] fsmonitor: update documentation for hook version and watchman hooks A new config value for core.fsmonitorHookVersion was added to be able to force the version of the fsmonitor hook. Possible values are 1 or 2. When this is not set the code will use a value of -1 and attempt to use version 2 of the hook first and if that fails will attempt version 1. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 11 +++++++++++ Documentation/githooks.txt | 13 ++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 9e440b160d9b3f..74619a9c03bb17 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -68,6 +68,17 @@ core.fsmonitor:: avoiding unnecessary processing of files that have not changed. See the "fsmonitor-watchman" section of linkgit:githooks[5]. +core.fsmonitorHookVersion:: + Sets the version of hook that is to be used when calling fsmonitor. + There are currently versions 1 and 2. When this is not set, + version 2 will be tried first and if it fails then version 1 + will be tried. Version 1 uses a timestamp as input to determine + which files have changes since that time but some monitors + like watchman have race conditions when used with a timestamp. + Version 2 uses an opaque string so that the monitor can return + something that can be used to determine what files have changed + without race conditions. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 50365f2914e04f..3dccab53758717 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -490,9 +490,16 @@ fsmonitor-watchman ~~~~~~~~~~~~~~~~~~ This hook is invoked when the configuration option `core.fsmonitor` is -set to `.git/hooks/fsmonitor-watchman`. It takes two arguments, a version -(currently 1) and the time in elapsed nanoseconds since midnight, -January 1, 1970. +set to `.git/hooks/fsmonitor-watchman` or `.git/hooks/fsmonitor-watchmanv2` +depending on the version of the hook to use. + +Version 1 takes two arguments, a version (1) and the time in elapsed +nanoseconds since midnight, January 1, 1970. + +Version 2 takes two arguments, a version (2) and a token that is used +for identifying changes since the token. For watchman this would be +a clock id. This version must output to stdout the new token followed +by a NUL before the list of files. The hook should output to stdout the list of all files in the working directory that may have changed since the requested time. The logic