From 2ed37903d87765adfdab7937f0e69ca2dbac6d52 Mon Sep 17 00:00:00 2001
From: Max Schaefer
Date: Wed, 27 Feb 2019 11:54:59 +0000
Subject: [PATCH 1/2] JavaScript: Include list of relevant environment
variables in Javadoc for `AutoBuild`.
---
.../src/com/semmle/js/extractor/AutoBuild.java | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index 42b969f191ee..762df7fe8e58 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -58,6 +58,23 @@
*
*
*
+ * Additionally, the following environment variables may be set to customise extraction
+ * (explained in more detail below):
+ *
+ *
+ *
+ * LGTM_INDEX_INCLUDE: a newline-separated list of paths to include
+ * LGTM_INDEX_EXCLUDE: a newline-separated list of paths to exclude
+ * LGTM_REPOSITORY_FOLDERS_CSV: the path of a CSV file containing file classifications
+ * LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style
+ * patterns that can be used to refine the list of files to include and exclude
+ * LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
+ * LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
+ * LGTM_TRAP_CACHE: the path of a directory to use for trap caching
+ * LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
+
+ *
+ *
* It extracts the following:
*
*
From 9d77619afc9364957dc69a1480a97b30f7e5bb2e Mon Sep 17 00:00:00 2001
From: Max Schaefer
Date: Wed, 27 Feb 2019 12:02:01 +0000
Subject: [PATCH 2/2] JavaScript: Make file types customisable in AutoBuild.
Every once in a while we encounter projects using some custom file extension for files that we could in principle extract, but since the extractor doesn't know about the extension the files are skipped.
To handle this, the legacy extractor has a `--file-type` option that one can use to specify a file type to use for all files in that particular extraction. So far, `AutoBuild` has nothing of the sort.
This PR proposes to introduce an environment variable `LGTM_INDEX_FILETYPES` to allow a similar customisation. In the fullness of time, this variable would be set through `lgtm.yml` in the usual way, but for now it is undocumented and for internal use only.
Specifically, `LGTM_INDEX_FILETYPES` is a newline-separated list of ".extension:filetype" pairs, specifying that files with the given `.extension` should be extracted as type `filetype`, where
`filetype` is one of `js`, `html`, `json`, `typescript` or `yaml`.
For example, `.jsm:js` causes all `.jsm` files to be extracted as JavaScript.
This can also be used to override default file types: for example, by specifying `.js:typescript` all JavaScript files will be extracted as TypeScript.
---
.../com/semmle/js/extractor/AutoBuild.java | 82 +++++++++++++++++--
.../js/extractor/test/AutoBuildTests.java | 61 ++++++++++++--
2 files changed, 128 insertions(+), 15 deletions(-)
diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index 762df7fe8e58..71bb515ec54c 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -16,8 +16,10 @@
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -69,6 +71,8 @@
* LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude
* LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
+ * LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs
+ * specifying which {@link FileType} to use for the given extension
* LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
* LGTM_TRAP_CACHE: the path of a directory to use for trap caching
* LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
@@ -160,6 +164,12 @@
*
*
*
+ * The environment variable LGTM_INDEX_FILETYPES may be set to a newline-separated
+ * list of file type specifications of the form .extension:filetype, causing all
+ * files whose name ends in .extension to also be included by default.
+ *
+ *
+ *
* The default exclusion patterns cause the following files to be excluded:
*
*
@@ -174,6 +184,11 @@
*
*
*
+ * The file type as which a file is extracted can be customised via the LGTM_INDEX_FILETYPES
+ * environment variable explained above.
+ *
+ *
+ *
* Note that all these customisations only apply to LGTM_SRC. Extraction of
* externs is not customisable.
*
@@ -193,6 +208,7 @@
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
+ private final Map fileTypes = new LinkedHashMap<>();
private final Set includes = new LinkedHashSet<>();
private final Set excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -208,6 +224,7 @@ public AutoBuild() {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
+ setupFileTypes();
setupMatchers();
}
@@ -277,6 +294,25 @@ private ITrapCache mkTrapCache() {
return trapCache;
}
+ private void setupFileTypes() {
+ for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
+ spec = spec.trim();
+ if (spec.isEmpty())
+ continue;
+ String[] fields = spec.split(":");
+ if (fields.length != 2)
+ continue;
+ String extension = fields[0].trim();
+ String fileType = fields[1].trim();
+ try {
+ fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
+ } catch (IllegalArgumentException e) {
+ Exceptions.ignore(e, "We construct a better error message.");
+ throw new UserError("Invalid file type '" + fileType + "'.");
+ }
+ }
+ }
+
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -350,6 +386,10 @@ private void setupFilters() {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
+ // include any explicitly specified extensions
+ for (String extension : fileTypes.keySet())
+ patterns.add("**/*" + extension);
+
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -483,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
- ExtractorConfig config = new ExtractorConfig(true);
- config = config.withSourceType(getSourceType());
- config = config.withTypeScriptMode(typeScriptMode);
- if (defaultEncoding != null)
- config = config.withDefaultEncoding(defaultEncoding);
- FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
+ // default extractor
+ FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
+
+ // custom extractor for explicitly specified file types
+ Map customExtractors = new LinkedHashMap<>();
+ for (Map.Entry spec : fileTypes.entrySet()) {
+ String extension = spec.getKey();
+ String fileType = spec.getValue().name();
+ ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
+ customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
+ }
Set filesToExtract = new LinkedHashSet<>();
List tsconfigFiles = new ArrayList<>();
- findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
+ findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
- Set extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
+ Set extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
+ FileExtractor extractor = defaultExtractor;
+ if (!fileTypes.isEmpty()) {
+ String extension = FileUtil.extension(f);
+ if (customExtractors.containsKey(extension))
+ extractor = customExtractors.get(extension);
+ }
extract(extractor, f, null);
}
}
}
+ private ExtractorConfig mkExtractorConfig() {
+ ExtractorConfig config = new ExtractorConfig(true);
+ config = config.withSourceType(getSourceType());
+ config = config.withTypeScriptMode(typeScriptMode);
+ if (defaultEncoding != null)
+ config = config.withDefaultEncoding(defaultEncoding);
+ return config;
+ }
+
private Set extractTypeScript(FileExtractor extractor, Set files, List tsconfig) {
Set extractedFiles = new LinkedHashSet<>();
@@ -591,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
- if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
+ boolean supported = extractor.supports(file.toFile());
+ if (!supported && !fileTypes.isEmpty()) {
+ supported = fileTypes.containsKey(FileUtil.extension(file));
+ }
+ if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}
diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
index c2c156ee003c..77b3e5119604 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
@@ -23,6 +23,7 @@
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
+import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public void teardown() throws IOException {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
- * extracted, its path is added to {@link #expected}.
+ * extracted, its path is added to {@link #expected}. If non-null, parameter
+ * {@code fileType} indicates the file type with which we expect the file to be extracted.
+ */
+ private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
+ Path f = addFile(root, components);
+ if (extracted) {
+ expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
+ }
+ return f;
+ }
+
+ /**
+ * Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
+ return addFile(extracted, null, root, components);
+ }
+
+ /**
+ * Create a file at the specified path under {@code root} and return it.
+ */
+ private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
- Path f = Files.createFile(p);
- if (extracted)
- expected.add(f.toString());
- return f;
+ return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ private void runTest() throws IOException {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
- actual.add(file.toString());
+ String extracted = file.toString();
+ if (extractor.getConfig().hasFileType())
+ extracted += ":" + extractor.getFileType(file.toFile());
+ actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public void minifiedFilesCanBeReIncluded() throws IOException {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
+
+ @Test
+ public void customExtensions() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
+ addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
+ addFile(false, LGTM_SRC, "tstjsm");
+ addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
+ addFile(true, LGTM_SRC, "tst.html");
+ addFile(true, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void overrideExtension() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
+ addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void invalidFileType() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
+ try {
+ runTest();
+ Assert.fail("expected UserError");
+ } catch (UserError ue) {
+ Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
+ }
+ }
}