From f197975c6e6c45e5008b711e304581d6486e0489 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Fri, 22 May 2020 07:52:29 -0700 Subject: [PATCH 01/12] Extract packages more intelligently We now extract packages that have the same module root as the specified packages, as determined by the `go list` command. --- extractor/extractor.go | 87 ++++++++++++++++++++++++++++++++++-------- extractor/util/util.go | 66 +++++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 17 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index 65c5501d6..339c51a35 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -52,12 +52,42 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { extractUniverseScope() + // a map of package path to package root directory (currently the module root or the source directory) + pkgRoots := make(map[string]string) + // a map of package path to source code directory + pkgDirs := make(map[string]string) + // root directories of packages that we want to extract + wantedRoots := make(map[string]bool) + for _, pkg := range pkgs { + mdir := util.GetModDir(pkg.PkgPath) + pdir := util.GetPkgDir(pkg.PkgPath) + if mdir == "" { + mdir = pdir + } + if mdir == "" { + log.Fatalf("Unable to get a source directory for input package %s.", pkg.PkgPath) + } + pkgRoots[pkg.PkgPath] = mdir + pkgDirs[pkg.PkgPath] = pdir + wantedRoots[mdir] = true + } + // recursively visit all packages in depth-first order; // on the way down, associate each package scope with its corresponding package, // and on the way up extract the package's scope packages.Visit(pkgs, func(pkg *packages.Package) bool { return true }, func(pkg *packages.Package) { + if _, ok := pkgRoots[pkg.PkgPath]; !ok { + mdir := util.GetModDir(pkg.PkgPath) + pdir := util.GetPkgDir(pkg.PkgPath) + if mdir == "" { + mdir = pdir + } + pkgRoots[pkg.PkgPath] = mdir + pkgDirs[pkg.PkgPath] = pdir + } + tw, err := trap.NewWriter(pkg.PkgPath, pkg) if err != nil { log.Fatal(err) @@ -101,22 +131,6 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { log.Printf("Max goroutines set to %d", maxgoroutines) } - var wg sync.WaitGroup - // this semaphore is used to limit the number of files that are open at once; - // this is to prevent the extractor from running into issues with caps on the - // number of open files that can be held by one process - fdSem := newSemaphore(100) - // this semaphore is used to limit the number of goroutines spawned, so we - // don't run into memory issues - goroutineSem := newSemaphore(maxgoroutines) - - // extract AST information for all packages - for _, pkg := range pkgs { - extractPackage(pkg, &wg, goroutineSem, fdSem) - } - - wg.Wait() - cwd, err := os.Getwd() if err != nil { log.Printf("Warning: unable to get working directory: %s", err.Error()) @@ -154,6 +168,47 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { log.Printf("Done extracting %s (%dms)", path, end.Nanoseconds()/1000000) } + var wg sync.WaitGroup + // this semaphore is used to limit the number of files that are open at once; + // this is to prevent the extractor from running into issues with caps on the + // number of open files that can be held by one process + fdSem := newSemaphore(100) + // this semaphore is used to limit the number of goroutines spawned, so we + // don't run into memory issues + goroutineSem := newSemaphore(maxgoroutines) + + // extract AST information for all packages + packages.Visit(pkgs, func(pkg *packages.Package) bool { + return wantedRoots[pkgRoots[pkg.PkgPath]] + }, func(pkg *packages.Package) { + rootLoop: + for root, _ := range wantedRoots { + relDir, err := filepath.Rel(root, pkgDirs[pkg.PkgPath]) + if err != nil { + // if the paths can't be made relative, skip it + continue + } + dirList := strings.Split(relDir, string(filepath.Separator)) + if len(dirList) == 0 || dirList[0] != ".." { + // if dirList is empty, root is the same as the source dir + // if dirList starts with `".."`, it is not inside the root dir + for _, dir := range dirList { + if dir == "vendor" { + // if the path relative to the root contains vendor, continue + // + // we may want to extract the package if it's been explicitly included + // (i.e. it has been passed directly), but we shouldn't include it for + // this root + continue rootLoop + } + } + extractPackage(pkg, &wg, goroutineSem, fdSem) + } + } + }) + + wg.Wait() + return nil } diff --git a/extractor/util/util.go b/extractor/util/util.go index fdbc76c35..070833218 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -1,6 +1,12 @@ package util -import "os" +import ( + "log" + "os" + "os/exec" + "path/filepath" + "strings" +) // Getenv retrieves the value of the environment variable named by the key. // If that variable is not present, it iterates over the given aliases until @@ -18,3 +24,61 @@ func Getenv(key string, aliases ...string) string { } return "" } + +// GetModDir gets directory of the module containing the package with path `pkgpath`. +func GetModDir(pkgpath string) string { + mod, err := exec.Command("go", "list", "-e", "-f", "{{.Module}}", pkgpath).Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok { + log.Printf("Warning: go list command failed, output below:\n%s%s", mod, err.Stderr) + } else { + log.Printf("Warning: Failed to run go list: %s", err.Error()) + } + + return "" + } + + if strings.TrimSpace(string(mod)) == "" { + // if modules aren't being used, return nothing + return "" + } + + modDir, err := exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", pkgpath).Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok { + log.Printf("Warning: go list command failed, output below:\n%s%s", modDir, err.Stderr) + } else { + log.Printf("Warning: Failed to run go list: %s", err.Error()) + } + + return "" + } + + trimmed := strings.TrimSpace(string(modDir)) + abs, err := filepath.Abs(trimmed) + if err != nil { + log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error()) + } + return abs +} + +// GetPkgDir gets directory containing the package with path `pkgpath`. +func GetPkgDir(pkgpath string) string { + pkgDir, err := exec.Command("go", "list", "-e", "-f", "{{.Dir}}", pkgpath).Output() + if err != nil { + if err, ok := err.(*exec.ExitError); ok { + log.Printf("Warning: go list command failed, output below:\n%s%s", pkgDir, err.Stderr) + } else { + log.Printf("Warning: Failed to run go list: %s", err.Error()) + } + + return "" + } + + trimmed := strings.TrimSpace(string(pkgDir)) + abs, err := filepath.Abs(trimmed) + if err != nil { + log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error()) + } + return abs +} From 3513c352e62846a00cbaf8dfe5fb694770094053 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Fri, 22 May 2020 09:53:40 -0700 Subject: [PATCH 02/12] extractor: Factor out FileExists utility function --- .../cli/go-autobuilder/go-autobuilder.go | 20 ++++++------------- extractor/util/util.go | 9 +++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/extractor/cli/go-autobuilder/go-autobuilder.go b/extractor/cli/go-autobuilder/go-autobuilder.go index 13c6f36a6..0ababcf21 100644 --- a/extractor/cli/go-autobuilder/go-autobuilder.go +++ b/extractor/cli/go-autobuilder/go-autobuilder.go @@ -52,14 +52,6 @@ func getEnvGoVersion() string { return strings.Fields(string(gover))[2] } -func fileExists(filename string) bool { - _, err := os.Stat(filename) - if err != nil && !os.IsNotExist(err) { - log.Printf("Unable to stat %s: %s\n", filename, err.Error()) - } - return err == nil -} - func run(cmd *exec.Cmd) bool { cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr @@ -80,7 +72,7 @@ func run(cmd *exec.Cmd) bool { } func tryBuild(buildFile, cmd string, args ...string) bool { - if fileExists(buildFile) { + if util.FileExists(buildFile) { log.Printf("%s found, running %s\n", buildFile, cmd) return run(exec.Command(cmd, args...)) } @@ -168,21 +160,21 @@ func main() { // extraction depMode := GoGetNoModules needGopath := true - if fileExists("go.mod") { + if util.FileExists("go.mod") { depMode = GoGetWithModules needGopath = false log.Println("Found go.mod, enabling go modules") - } else if fileExists("Gopkg.toml") { + } else if util.FileExists("Gopkg.toml") { depMode = Dep log.Println("Found Gopkg.toml, using dep instead of go get") - } else if fileExists("glide.yaml") { + } else if util.FileExists("glide.yaml") { depMode = Glide log.Println("Found glide.yaml, enabling go modules") } // if a vendor/modules.txt file exists, we assume that there are vendored Go dependencies, and // skip the dependency installation step and run the extractor with `-mod=vendor` - hasVendor := fileExists("vendor/modules.txt") + hasVendor := util.FileExists("vendor/modules.txt") // if `LGTM_INDEX_NEED_GOPATH` is set, it overrides the value for `needGopath` inferred above if needGopathOverride := os.Getenv("LGTM_INDEX_NEED_GOPATH"); needGopathOverride != "" { @@ -328,7 +320,7 @@ func main() { } } - if fileExists("Gopkg.lock") { + if util.FileExists("Gopkg.lock") { // if Gopkg.lock exists, don't update it and only vendor dependencies install = exec.Command("dep", "ensure", "-v", "-vendor-only") } else { diff --git a/extractor/util/util.go b/extractor/util/util.go index 070833218..a3c85788c 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -82,3 +82,12 @@ func GetPkgDir(pkgpath string) string { } return abs } + +// FileExists tests whether the file at `filename` exists. +func FileExists(filename string) bool { + _, err := os.Stat(filename) + if err != nil && !os.IsNotExist(err) { + log.Printf("Unable to stat %s: %s\n", filename, err.Error()) + } + return err == nil +} From 296d2d5fd31d1ea8f210ddd2443bc7e7ba8f12a9 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Fri, 29 May 2020 03:41:03 -0700 Subject: [PATCH 03/12] extractor: modify FileExists to check that the path isn't a directory --- extractor/util/util.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/util/util.go b/extractor/util/util.go index a3c85788c..21a7d4ef2 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -85,9 +85,9 @@ func GetPkgDir(pkgpath string) string { // FileExists tests whether the file at `filename` exists. func FileExists(filename string) bool { - _, err := os.Stat(filename) + info, err := os.Stat(filename) if err != nil && !os.IsNotExist(err) { log.Printf("Unable to stat %s: %s\n", filename, err.Error()) } - return err == nil + return err == nil && !info.IsDir() } From 7863bb656ee2598f037c699aeb99f6c2dad59c9b Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Thu, 11 Jun 2020 03:51:32 -0700 Subject: [PATCH 04/12] Use the -mod argument from the build when calling go list --- extractor/extractor.go | 15 +++++++++++---- extractor/util/util.go | 35 ++++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index 339c51a35..6a2c994d2 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -42,6 +42,13 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { } pkgs, err := packages.Load(cfg, patterns...) + var modFlag string + for _, flag := range buildFlags { + if strings.HasPrefix(flag, "-mod=") { + modFlag = flag + } + } + if err != nil { return err } @@ -59,8 +66,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { // root directories of packages that we want to extract wantedRoots := make(map[string]bool) for _, pkg := range pkgs { - mdir := util.GetModDir(pkg.PkgPath) - pdir := util.GetPkgDir(pkg.PkgPath) + mdir := util.GetModDir(pkg.PkgPath, modFlag) + pdir := util.GetPkgDir(pkg.PkgPath, modFlag) if mdir == "" { mdir = pdir } @@ -79,8 +86,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { return true }, func(pkg *packages.Package) { if _, ok := pkgRoots[pkg.PkgPath]; !ok { - mdir := util.GetModDir(pkg.PkgPath) - pdir := util.GetPkgDir(pkg.PkgPath) + mdir := util.GetModDir(pkg.PkgPath, modFlag) + pdir := util.GetPkgDir(pkg.PkgPath, modFlag) if mdir == "" { mdir = pdir } diff --git a/extractor/util/util.go b/extractor/util/util.go index 21a7d4ef2..a4f44afd8 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -25,9 +25,17 @@ func Getenv(key string, aliases ...string) string { return "" } -// GetModDir gets directory of the module containing the package with path `pkgpath`. -func GetModDir(pkgpath string) string { - mod, err := exec.Command("go", "list", "-e", "-f", "{{.Module}}", pkgpath).Output() +// GetModDir gets directory of the module containing the package with path `pkgpath`. It passes the +// `go list` command `modflag`, which should be of the form `-mod=`, as described by `go +// help modules`. +func GetModDir(pkgpath string, modflag string) string { + var cmd *exec.Cmd + if modflag != "" { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Module}}", modflag, pkgpath) + } else { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Module}}", pkgpath) + } + mod, err := cmd.Output() if err != nil { if err, ok := err.(*exec.ExitError); ok { log.Printf("Warning: go list command failed, output below:\n%s%s", mod, err.Stderr) @@ -43,7 +51,12 @@ func GetModDir(pkgpath string) string { return "" } - modDir, err := exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", pkgpath).Output() + if modflag != "" { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", modflag, pkgpath) + } else { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", pkgpath) + } + modDir, err := cmd.Output() if err != nil { if err, ok := err.(*exec.ExitError); ok { log.Printf("Warning: go list command failed, output below:\n%s%s", modDir, err.Stderr) @@ -62,9 +75,17 @@ func GetModDir(pkgpath string) string { return abs } -// GetPkgDir gets directory containing the package with path `pkgpath`. -func GetPkgDir(pkgpath string) string { - pkgDir, err := exec.Command("go", "list", "-e", "-f", "{{.Dir}}", pkgpath).Output() +// GetPkgDir gets directory containing the package with path `pkgpath`. It passes the `go list` +// command `modflag`, which should be of the form `-mod=`, as described by `go help +// modules`. +func GetPkgDir(pkgpath string, modflag string) string { + var cmd *exec.Cmd + if modflag != "" { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Dir}}", modflag, pkgpath) + } else { + cmd = exec.Command("go", "list", "-e", "-f", "{{.Dir}}", pkgpath) + } + pkgDir, err := cmd.Output() if err != nil { if err, ok := err.(*exec.ExitError); ok { log.Printf("Warning: go list command failed, output below:\n%s%s", pkgDir, err.Stderr) From de2f407c691193b9f868f6aae4787b618a144a90 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Fri, 12 Jun 2020 10:20:42 -0700 Subject: [PATCH 05/12] Add change note for more dependency AST extraction --- change-notes/2020-06-12-more-dependency-extraction.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 change-notes/2020-06-12-more-dependency-extraction.md diff --git a/change-notes/2020-06-12-more-dependency-extraction.md b/change-notes/2020-06-12-more-dependency-extraction.md new file mode 100644 index 000000000..419e0994e --- /dev/null +++ b/change-notes/2020-06-12-more-dependency-extraction.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* The extractor now attempts to extract the AST of all dependencies that are related to the packages passed explicitly on the commandline, which is determined by using the module root or, if not using modules, the directory containing the source for those packages. From 9bd1f87d668b1c6efb70ff1cab3d9195c81749e1 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Fri, 12 Jun 2020 10:53:35 -0700 Subject: [PATCH 06/12] Address review comments --- extractor/extractor.go | 96 ++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index 6a2c994d2..1895f2b4c 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -65,19 +65,6 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { pkgDirs := make(map[string]string) // root directories of packages that we want to extract wantedRoots := make(map[string]bool) - for _, pkg := range pkgs { - mdir := util.GetModDir(pkg.PkgPath, modFlag) - pdir := util.GetPkgDir(pkg.PkgPath, modFlag) - if mdir == "" { - mdir = pdir - } - if mdir == "" { - log.Fatalf("Unable to get a source directory for input package %s.", pkg.PkgPath) - } - pkgRoots[pkg.PkgPath] = mdir - pkgDirs[pkg.PkgPath] = pdir - wantedRoots[mdir] = true - } // recursively visit all packages in depth-first order; // on the way down, associate each package scope with its corresponding package, @@ -88,6 +75,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { if _, ok := pkgRoots[pkg.PkgPath]; !ok { mdir := util.GetModDir(pkg.PkgPath, modFlag) pdir := util.GetPkgDir(pkg.PkgPath, modFlag) + // GetModDir returns the empty string if the module directory cannot be determined, e.g. if the package + // is not using modules. If this is the case, fall back to the package directory if mdir == "" { mdir = pdir } @@ -115,6 +104,13 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { } }) + for _, pkg := range pkgs { + if pkgRoots[pkg.PkgPath] == "" { + log.Fatalf("Unable to get a source directory for input package %s.", pkg.PkgPath) + } + wantedRoots[pkgRoots[pkg.PkgPath]] = true + } + // this sets the number of threads that the Go runtime will spawn; this is separate // from the number of goroutines that the program spawns, which are scheduled into // the system threads by the Go runtime scheduler @@ -138,43 +134,6 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { log.Printf("Max goroutines set to %d", maxgoroutines) } - cwd, err := os.Getwd() - if err != nil { - log.Printf("Warning: unable to get working directory: %s", err.Error()) - log.Println("Skipping go.mod extraction") - } - rcwd, err := filepath.EvalSymlinks(cwd) - if err == nil { - cwd = rcwd - } - - goModPaths := make([]string, 0, 10) - - filepath.Walk(cwd, func(path string, info os.FileInfo, err error) error { - if filepath.Base(path) == "go.mod" && info != nil && info.Mode().IsRegular() { - if err != nil { - log.Printf("Found go.mod with path %s, but encountered error %s", path, err.Error()) - } - - goModPaths = append(goModPaths, path) - } - - return nil - }) - - for _, path := range goModPaths { - log.Printf("Extracting %s", path) - start := time.Now() - - err := extractGoMod(path) - if err != nil { - log.Printf("Failed to extract go.mod: %s", err.Error()) - } - - end := time.Since(start) - log.Printf("Done extracting %s (%dms)", path, end.Nanoseconds()/1000000) - } - var wg sync.WaitGroup // this semaphore is used to limit the number of files that are open at once; // this is to prevent the extractor from running into issues with caps on the @@ -216,6 +175,43 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { wg.Wait() + cwd, err := os.Getwd() + if err != nil { + log.Printf("Warning: unable to get working directory: %s", err.Error()) + log.Println("Skipping go.mod extraction") + } + rcwd, err := filepath.EvalSymlinks(cwd) + if err == nil { + cwd = rcwd + } + + goModPaths := make([]string, 0, 10) + + filepath.Walk(cwd, func(path string, info os.FileInfo, err error) error { + if filepath.Base(path) == "go.mod" && info != nil && info.Mode().IsRegular() { + if err != nil { + log.Printf("Found go.mod with path %s, but encountered error %s", path, err.Error()) + } + + goModPaths = append(goModPaths, path) + } + + return nil + }) + + for _, path := range goModPaths { + log.Printf("Extracting %s", path) + start := time.Now() + + err := extractGoMod(path) + if err != nil { + log.Printf("Failed to extract go.mod: %s", err.Error()) + } + + end := time.Since(start) + log.Printf("Done extracting %s (%dms)", path, end.Nanoseconds()/1000000) + } + return nil } From e25b882e42365f67a7030435e3fa213e93460701 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Mon, 15 Jun 2020 23:14:39 -0700 Subject: [PATCH 07/12] Clarify some comments As suggested in code review Co-authored-by: Max Schaefer <54907921+max-schaefer@users.noreply.github.com> --- extractor/util/util.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/util/util.go b/extractor/util/util.go index a4f44afd8..de48d999e 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -47,7 +47,7 @@ func GetModDir(pkgpath string, modflag string) string { } if strings.TrimSpace(string(mod)) == "" { - // if modules aren't being used, return nothing + // if modules aren't being used, return the empty string return "" } @@ -104,7 +104,7 @@ func GetPkgDir(pkgpath string, modflag string) string { return abs } -// FileExists tests whether the file at `filename` exists. +// FileExists tests whether the file at `filename` exists and is not a directory. func FileExists(filename string) bool { info, err := os.Stat(filename) if err != nil && !os.IsNotExist(err) { From ebdd724b75dbb0c47c7764e21703c7ed10b93004 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Mon, 15 Jun 2020 23:47:49 -0700 Subject: [PATCH 08/12] Simplify logic for deciding whether to extract a package --- extractor/extractor.go | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index 1895f2b4c..b547710d4 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -145,31 +145,21 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { // extract AST information for all packages packages.Visit(pkgs, func(pkg *packages.Package) bool { - return wantedRoots[pkgRoots[pkg.PkgPath]] + return true }, func(pkg *packages.Package) { - rootLoop: + sep := regexp.QuoteMeta(string(filepath.Separator)) + relativeOrVendorRe := regexp.MustCompile(`(^\.\.` + sep + ")|((^|.*" + sep + ")vendor" + sep + ").*") + for root, _ := range wantedRoots { relDir, err := filepath.Rel(root, pkgDirs[pkg.PkgPath]) - if err != nil { - // if the paths can't be made relative, skip it + if err != nil || relativeOrVendorRe.MatchString(relDir) { + // if the paths can't be made relative or the relative path starts with `".."` or contains a + // directory called `"vendor"`, skip it continue } - dirList := strings.Split(relDir, string(filepath.Separator)) - if len(dirList) == 0 || dirList[0] != ".." { - // if dirList is empty, root is the same as the source dir - // if dirList starts with `".."`, it is not inside the root dir - for _, dir := range dirList { - if dir == "vendor" { - // if the path relative to the root contains vendor, continue - // - // we may want to extract the package if it's been explicitly included - // (i.e. it has been passed directly), but we shouldn't include it for - // this root - continue rootLoop - } - } - extractPackage(pkg, &wg, goroutineSem, fdSem) - } + + extractPackage(pkg, &wg, goroutineSem, fdSem) + return } }) From fa391b15163db9a77de598065d2a9ee09a559e5e Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Mon, 15 Jun 2020 23:48:11 -0700 Subject: [PATCH 09/12] extractor: Factor out common bits for running `go list` --- extractor/extractor.go | 8 ++--- extractor/util/util.go | 79 ++++++++++++++++-------------------------- 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index b547710d4..659a69dde 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -42,10 +42,10 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { } pkgs, err := packages.Load(cfg, patterns...) - var modFlag string + modFlags := make([]string, 0, 1) for _, flag := range buildFlags { if strings.HasPrefix(flag, "-mod=") { - modFlag = flag + modFlags = append(modFlags, flag) } } @@ -73,8 +73,8 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { return true }, func(pkg *packages.Package) { if _, ok := pkgRoots[pkg.PkgPath]; !ok { - mdir := util.GetModDir(pkg.PkgPath, modFlag) - pdir := util.GetPkgDir(pkg.PkgPath, modFlag) + mdir := util.GetModDir(pkg.PkgPath, modFlags...) + pdir := util.GetPkgDir(pkg.PkgPath, modFlags...) // GetModDir returns the empty string if the module directory cannot be determined, e.g. if the package // is not using modules. If this is the case, fall back to the package directory if mdir == "" { diff --git a/extractor/util/util.go b/extractor/util/util.go index de48d999e..ca31da5f4 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -25,81 +25,60 @@ func Getenv(key string, aliases ...string) string { return "" } -// GetModDir gets directory of the module containing the package with path `pkgpath`. It passes the -// `go list` command `modflag`, which should be of the form `-mod=`, as described by `go -// help modules`. -func GetModDir(pkgpath string, modflag string) string { - var cmd *exec.Cmd - if modflag != "" { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Module}}", modflag, pkgpath) - } else { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Module}}", pkgpath) - } - mod, err := cmd.Output() +// runGoList is a helper function for running go list with format `format` and flags `flags` on +// package `pkgpath`. +func runGoList(format string, pkgpath string, flags ...string) (string, error) { + args := append([]string{"list", "-e", "-f", format}, flags...) + args = append(args, pkgpath) + cmd := exec.Command("go", args...) + out, err := cmd.Output() + if err != nil { if err, ok := err.(*exec.ExitError); ok { - log.Printf("Warning: go list command failed, output below:\n%s%s", mod, err.Stderr) + log.Printf("Warning: go list command failed, output below:\nstdout:\n%s\nstderr:\n%s\n", out, err.Stderr) } else { log.Printf("Warning: Failed to run go list: %s", err.Error()) } - - return "" + return "", err } - if strings.TrimSpace(string(mod)) == "" { - // if modules aren't being used, return the empty string + return strings.TrimSpace(string(out)), nil +} + +// GetModDir gets directory of the module containing the package with path `pkgpath`. It passes the +// `go list` the flags specified by `flags`. +func GetModDir(pkgpath string, flags ...string) string { + mod, err := runGoList("{{.Module}}", pkgpath, flags...) + if err != nil || mod == "" { + // if the command errors or modules aren't being used, return the empty string return "" } - if modflag != "" { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", modflag, pkgpath) - } else { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Module.Dir}}", pkgpath) - } - modDir, err := cmd.Output() + modDir, err := runGoList("{{.Module.Dir}}", pkgpath, flags...) if err != nil { - if err, ok := err.(*exec.ExitError); ok { - log.Printf("Warning: go list command failed, output below:\n%s%s", modDir, err.Stderr) - } else { - log.Printf("Warning: Failed to run go list: %s", err.Error()) - } - return "" } - trimmed := strings.TrimSpace(string(modDir)) - abs, err := filepath.Abs(trimmed) + abs, err := filepath.Abs(modDir) if err != nil { - log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error()) + log.Printf("Warning: unable to make %s absolute: %s", modDir, err.Error()) + return "" } return abs } // GetPkgDir gets directory containing the package with path `pkgpath`. It passes the `go list` -// command `modflag`, which should be of the form `-mod=`, as described by `go help -// modules`. -func GetPkgDir(pkgpath string, modflag string) string { - var cmd *exec.Cmd - if modflag != "" { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Dir}}", modflag, pkgpath) - } else { - cmd = exec.Command("go", "list", "-e", "-f", "{{.Dir}}", pkgpath) - } - pkgDir, err := cmd.Output() +// command the flags specified by `flags`. +func GetPkgDir(pkgpath string, flags ...string) string { + pkgDir, err := runGoList("{{.Dir}}", pkgpath, flags...) if err != nil { - if err, ok := err.(*exec.ExitError); ok { - log.Printf("Warning: go list command failed, output below:\n%s%s", pkgDir, err.Stderr) - } else { - log.Printf("Warning: Failed to run go list: %s", err.Error()) - } - return "" } - trimmed := strings.TrimSpace(string(pkgDir)) - abs, err := filepath.Abs(trimmed) + abs, err := filepath.Abs(pkgDir) if err != nil { - log.Printf("Warning: unable to make %s absolute: %s", trimmed, err.Error()) + log.Printf("Warning: unable to make %s absolute: %s", pkgDir, err.Error()) + return "" } return abs } From 9e8d386f3ce649c5c7e76044b5fedef184cbf90a Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Mon, 15 Jun 2020 23:51:43 -0700 Subject: [PATCH 10/12] Clarify change note --- change-notes/2020-06-12-more-dependency-extraction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/change-notes/2020-06-12-more-dependency-extraction.md b/change-notes/2020-06-12-more-dependency-extraction.md index 419e0994e..76523ced9 100644 --- a/change-notes/2020-06-12-more-dependency-extraction.md +++ b/change-notes/2020-06-12-more-dependency-extraction.md @@ -1,2 +1,2 @@ lgtm,codescanning -* The extractor now attempts to extract the AST of all dependencies that are related to the packages passed explicitly on the commandline, which is determined by using the module root or, if not using modules, the directory containing the source for those packages. +* The extractor now attempts to extract the AST of all dependencies that are related to the packages passed explicitly on the commandline, which is determined by using the module root or, if not using modules, the directory containing the source for those packages. In particular, this means if a package passed to the extractor depends on another package inside the same module, the dependency's AST will now be extracted. From 380060c7e451ac07dc72b8713a9d80eb37a00cde Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Tue, 16 Jun 2020 12:45:13 -0700 Subject: [PATCH 11/12] extractor: Refactor regexp compilation for the relative directory check --- extractor/extractor.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/extractor/extractor.go b/extractor/extractor.go index 659a69dde..1f878cb81 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -143,18 +143,20 @@ func ExtractWithFlags(buildFlags []string, patterns []string) error { // don't run into memory issues goroutineSem := newSemaphore(maxgoroutines) + sep := regexp.QuoteMeta(string(filepath.Separator)) + // if a path matches this regexp, we don't want to extract this package. Currently, it checks + // - that the path does not contain a `..` segment, and + // - the path does not contain a `vendor` directory. + noExtractRe := regexp.MustCompile(`.*(^|` + sep + `)(\.\.|vendor)($|` + sep + `).*`) + // extract AST information for all packages packages.Visit(pkgs, func(pkg *packages.Package) bool { return true }, func(pkg *packages.Package) { - sep := regexp.QuoteMeta(string(filepath.Separator)) - relativeOrVendorRe := regexp.MustCompile(`(^\.\.` + sep + ")|((^|.*" + sep + ")vendor" + sep + ").*") - for root, _ := range wantedRoots { relDir, err := filepath.Rel(root, pkgDirs[pkg.PkgPath]) - if err != nil || relativeOrVendorRe.MatchString(relDir) { - // if the paths can't be made relative or the relative path starts with `".."` or contains a - // directory called `"vendor"`, skip it + if err != nil || noExtractRe.MatchString(relDir) { + // if the path can't be made relative or matches the noExtract regexp skip it continue } From 6e5e9ce5de7330354207c7dae8d846b578d94a30 Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Tue, 30 Jun 2020 11:44:10 -0700 Subject: [PATCH 12/12] Improve comments for extractor utility functions --- extractor/util/util.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extractor/util/util.go b/extractor/util/util.go index ca31da5f4..c8eeb33f4 100644 --- a/extractor/util/util.go +++ b/extractor/util/util.go @@ -45,8 +45,8 @@ func runGoList(format string, pkgpath string, flags ...string) (string, error) { return strings.TrimSpace(string(out)), nil } -// GetModDir gets directory of the module containing the package with path `pkgpath`. It passes the -// `go list` the flags specified by `flags`. +// GetModDir gets the absolute directory of the module containing the package with path +// `pkgpath`. It passes the `go list` the flags specified by `flags`. func GetModDir(pkgpath string, flags ...string) string { mod, err := runGoList("{{.Module}}", pkgpath, flags...) if err != nil || mod == "" { @@ -67,8 +67,8 @@ func GetModDir(pkgpath string, flags ...string) string { return abs } -// GetPkgDir gets directory containing the package with path `pkgpath`. It passes the `go list` -// command the flags specified by `flags`. +// GetPkgDir gets the absolute directory containing the package with path `pkgpath`. It passes the +// `go list` command the flags specified by `flags`. func GetPkgDir(pkgpath string, flags ...string) string { pkgDir, err := runGoList("{{.Dir}}", pkgpath, flags...) if err != nil {