diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index 42b969f191ee..71bb515ec54c 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -16,8 +16,10 @@
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -58,6 +60,25 @@
*
*
*
+ * Additionally, the following environment variables may be set to customise extraction
+ * (explained in more detail below):
+ *
+ *
+ *
+ * LGTM_INDEX_INCLUDE: a newline-separated list of paths to include
+ * LGTM_INDEX_EXCLUDE: a newline-separated list of paths to exclude
+ * LGTM_REPOSITORY_FOLDERS_CSV: the path of a CSV file containing file classifications
+ * LGTM_INDEX_FILTERS: a newline-separated list of {@link ProjectLayout}-style
+ * patterns that can be used to refine the list of files to include and exclude
+ * LGTM_INDEX_TYPESCRIPT: whether to extract TypeScript
+ * LGTM_INDEX_FILETYPES: a newline-separated list of ".extension:filetype" pairs
+ * specifying which {@link FileType} to use for the given extension
+ * LGTM_INDEX_THREADS: the maximum number of files to extract in parallel
+ * LGTM_TRAP_CACHE: the path of a directory to use for trap caching
+ * LGTM_TRAP_CACHE_BOUND: the size to bound the trap cache to
+
+ *
+ *
* It extracts the following:
*
*
@@ -143,6 +164,12 @@
*
*
*
+ * The environment variable LGTM_INDEX_FILETYPES may be set to a newline-separated
+ * list of file type specifications of the form .extension:filetype, causing all
+ * files whose name ends in .extension to also be included by default.
+ *
+ *
+ *
* The default exclusion patterns cause the following files to be excluded:
*
*
@@ -157,6 +184,11 @@
*
*
*
+ * The file type as which a file is extracted can be customised via the LGTM_INDEX_FILETYPES
+ * environment variable explained above.
+ *
+ *
+ *
* Note that all these customisations only apply to LGTM_SRC. Extraction of
* externs is not customisable.
*
@@ -176,6 +208,7 @@
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
+ private final Map fileTypes = new LinkedHashMap<>();
private final Set includes = new LinkedHashSet<>();
private final Set excludes = new LinkedHashSet<>();
private ProjectLayout filters;
@@ -191,6 +224,7 @@ public AutoBuild() {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
+ setupFileTypes();
setupMatchers();
}
@@ -260,6 +294,25 @@ private ITrapCache mkTrapCache() {
return trapCache;
}
+ private void setupFileTypes() {
+ for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
+ spec = spec.trim();
+ if (spec.isEmpty())
+ continue;
+ String[] fields = spec.split(":");
+ if (fields.length != 2)
+ continue;
+ String extension = fields[0].trim();
+ String fileType = fields[1].trim();
+ try {
+ fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
+ } catch (IllegalArgumentException e) {
+ Exceptions.ignore(e, "We construct a better error message.");
+ throw new UserError("Invalid file type '" + fileType + "'.");
+ }
+ }
+ }
+
/**
* Set up include and exclude matchers based on environment variables.
*/
@@ -333,6 +386,10 @@ private void setupFilters() {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");
+ // include any explicitly specified extensions
+ for (String extension : fileTypes.keySet())
+ patterns.add("**/*" + extension);
+
// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
@@ -466,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
- ExtractorConfig config = new ExtractorConfig(true);
- config = config.withSourceType(getSourceType());
- config = config.withTypeScriptMode(typeScriptMode);
- if (defaultEncoding != null)
- config = config.withDefaultEncoding(defaultEncoding);
- FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
+ // default extractor
+ FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);
+
+ // custom extractor for explicitly specified file types
+ Map customExtractors = new LinkedHashMap<>();
+ for (Map.Entry spec : fileTypes.entrySet()) {
+ String extension = spec.getKey();
+ String fileType = spec.getValue().name();
+ ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
+ customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
+ }
Set filesToExtract = new LinkedHashSet<>();
List tsconfigFiles = new ArrayList<>();
- findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
+ findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);
// extract TypeScript projects and files
- Set extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
+ Set extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);
// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
+ FileExtractor extractor = defaultExtractor;
+ if (!fileTypes.isEmpty()) {
+ String extension = FileUtil.extension(f);
+ if (customExtractors.containsKey(extension))
+ extractor = customExtractors.get(extension);
+ }
extract(extractor, f, null);
}
}
}
+ private ExtractorConfig mkExtractorConfig() {
+ ExtractorConfig config = new ExtractorConfig(true);
+ config = config.withSourceType(getSourceType());
+ config = config.withTypeScriptMode(typeScriptMode);
+ if (defaultEncoding != null)
+ config = config.withDefaultEncoding(defaultEncoding);
+ return config;
+ }
+
private Set extractTypeScript(FileExtractor extractor, Set files, List tsconfig) {
Set extractedFiles = new LinkedHashSet<>();
@@ -574,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
return FileVisitResult.SKIP_SUBTREE;
// extract files that are supported and pass the include/exclude patterns
- if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
+ boolean supported = extractor.supports(file.toFile());
+ if (!supported && !fileTypes.isEmpty()) {
+ supported = fileTypes.containsKey(FileUtil.extension(file));
+ }
+ if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}
diff --git a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
index c2c156ee003c..77b3e5119604 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/test/AutoBuildTests.java
@@ -23,6 +23,7 @@
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
+import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
@@ -74,15 +75,31 @@ public void teardown() throws IOException {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
- * extracted, its path is added to {@link #expected}.
+ * extracted, its path is added to {@link #expected}. If non-null, parameter
+ * {@code fileType} indicates the file type with which we expect the file to be extracted.
+ */
+ private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
+ Path f = addFile(root, components);
+ if (extracted) {
+ expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
+ }
+ return f;
+ }
+
+ /**
+ * Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
+ return addFile(extracted, null, root, components);
+ }
+
+ /**
+ * Create a file at the specified path under {@code root} and return it.
+ */
+ private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
- Path f = Files.createFile(p);
- if (extracted)
- expected.add(f.toString());
- return f;
+ return Files.createFile(p);
}
/**
@@ -96,7 +113,10 @@ private void runTest() throws IOException {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
- actual.add(file.toString());
+ String extracted = file.toString();
+ if (extractor.getConfig().hasFileType())
+ extracted += ":" + extractor.getFileType(file.toFile());
+ actual.add(extracted);
}
@Override
@@ -453,4 +473,33 @@ public void minifiedFilesCanBeReIncluded() throws IOException {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}
+
+ @Test
+ public void customExtensions() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
+ addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
+ addFile(false, LGTM_SRC, "tstjsm");
+ addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
+ addFile(true, LGTM_SRC, "tst.html");
+ addFile(true, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void overrideExtension() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
+ addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
+ runTest();
+ }
+
+ @Test
+ public void invalidFileType() throws IOException {
+ envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
+ try {
+ runTest();
+ Assert.fail("expected UserError");
+ } catch (UserError ue) {
+ Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
+ }
+ }
}