Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 90 additions & 9 deletions javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -58,6 +60,25 @@
* </ul>
*
* <p>
* Additionally, the following environment variables may be set to customise extraction
* (explained in more detail below):
* </p>
*
* <ul>
* <li><code>LGTM_INDEX_INCLUDE</code>: a newline-separated list of paths to include</li>
* <li><code>LGTM_INDEX_EXCLUDE</code>: a newline-separated list of paths to exclude</li>
* <li><code>LGTM_REPOSITORY_FOLDERS_CSV</code>: the path of a CSV file containing file classifications</li>
* <li><code>LGTM_INDEX_FILTERS</code>: a newline-separated list of {@link ProjectLayout}-style
* patterns that can be used to refine the list of files to include and exclude</li>
* <li><code>LGTM_INDEX_TYPESCRIPT</code>: whether to extract TypeScript</li>
* <li><code>LGTM_INDEX_FILETYPES</code>: a newline-separated list of ".extension:filetype" pairs
* specifying which {@link FileType} to use for the given extension</li>
* <li><code>LGTM_INDEX_THREADS</code>: the maximum number of files to extract in parallel</li>
* <li><code>LGTM_TRAP_CACHE</code>: the path of a directory to use for trap caching</li>
* <li><code>LGTM_TRAP_CACHE_BOUND</code>: the size to bound the trap cache to</li>
</ul>
*
* <p>
* It extracts the following:
* </p>
*
Expand Down Expand Up @@ -143,6 +164,12 @@
* </p>
*
* <p>
* The environment variable <code>LGTM_INDEX_FILETYPES</code> may be set to a newline-separated
* list of file type specifications of the form <code>.extension:filetype</code>, causing all
* files whose name ends in <code>.extension</code> to also be included by default.
* </p>
*
* <p>
* The default exclusion patterns cause the following files to be excluded:
* </p>
* <ul>
Expand All @@ -157,6 +184,11 @@
* </p>
*
* <p>
* The file type as which a file is extracted can be customised via the <code>LGTM_INDEX_FILETYPES</code>
* environment variable explained above.
* </p>
*
* <p>
* Note that all these customisations only apply to <code>LGTM_SRC</code>. Extraction of
* externs is not customisable.
* </p>
Expand All @@ -176,6 +208,7 @@
public class AutoBuild {
private final ExtractorOutputConfig outputConfig;
private final ITrapCache trapCache;
private final Map<String, FileType> fileTypes = new LinkedHashMap<>();
private final Set<Path> includes = new LinkedHashSet<>();
private final Set<Path> excludes = new LinkedHashSet<>();
private ProjectLayout filters;
Expand All @@ -191,6 +224,7 @@ public AutoBuild() {
this.trapCache = mkTrapCache();
this.typeScriptMode = getEnumFromEnvVar("LGTM_INDEX_TYPESCRIPT", TypeScriptMode.class, TypeScriptMode.BASIC);
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
setupFileTypes();
setupMatchers();
}

Expand Down Expand Up @@ -260,6 +294,25 @@ private ITrapCache mkTrapCache() {
return trapCache;
}

private void setupFileTypes() {
for (String spec : Main.NEWLINE.split(getEnvVar("LGTM_INDEX_FILETYPES", ""))) {
spec = spec.trim();
if (spec.isEmpty())
continue;
String[] fields = spec.split(":");
if (fields.length != 2)
continue;
String extension = fields[0].trim();
String fileType = fields[1].trim();
try {
fileTypes.put(extension, FileType.valueOf(StringUtil.uc(fileType)));
} catch (IllegalArgumentException e) {
Exceptions.ignore(e, "We construct a better error message.");
throw new UserError("Invalid file type '" + fileType + "'.");
}
}
}

/**
* Set up include and exclude matchers based on environment variables.
*/
Expand Down Expand Up @@ -333,6 +386,10 @@ private void setupFilters() {
patterns.add("**/.eslintrc*");
patterns.add("**/package.json");

// include any explicitly specified extensions
for (String extension : fileTypes.keySet())
patterns.add("**/*" + extension);

// exclude files whose name strongly suggests they are minified
patterns.add("-**/*.min.js");
patterns.add("-**/*-min.js");
Expand Down Expand Up @@ -466,28 +523,48 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
* Extract all supported candidate files that pass the filters.
*/
private void extractSource() throws IOException {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
// default extractor
FileExtractor defaultExtractor = new FileExtractor(mkExtractorConfig(), outputConfig, trapCache);

// custom extractor for explicitly specified file types
Map<String, FileExtractor> customExtractors = new LinkedHashMap<>();
for (Map.Entry<String, FileType> spec : fileTypes.entrySet()) {
String extension = spec.getKey();
String fileType = spec.getValue().name();
ExtractorConfig extractorConfig = mkExtractorConfig().withFileType(fileType);
customExtractors.put(extension, new FileExtractor(extractorConfig, outputConfig, trapCache));
}

Set<Path> filesToExtract = new LinkedHashSet<>();
List<Path> tsconfigFiles = new ArrayList<>();
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
findFilesToExtract(defaultExtractor, filesToExtract, tsconfigFiles);

// extract TypeScript projects and files
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
Set<Path> extractedFiles = extractTypeScript(defaultExtractor, filesToExtract, tsconfigFiles);

// extract remaining files
for (Path f : filesToExtract) {
if (extractedFiles.add(f)) {
FileExtractor extractor = defaultExtractor;
if (!fileTypes.isEmpty()) {
String extension = FileUtil.extension(f);
if (customExtractors.containsKey(extension))
extractor = customExtractors.get(extension);
}
extract(extractor, f, null);
}
}
}

private ExtractorConfig mkExtractorConfig() {
ExtractorConfig config = new ExtractorConfig(true);
config = config.withSourceType(getSourceType());
config = config.withTypeScriptMode(typeScriptMode);
if (defaultEncoding != null)
config = config.withDefaultEncoding(defaultEncoding);
return config;
}

private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
Set<Path> extractedFiles = new LinkedHashSet<>();

Expand Down Expand Up @@ -574,7 +651,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
return FileVisitResult.SKIP_SUBTREE;

// extract files that are supported and pass the include/exclude patterns
if (extractor.supports(file.toFile()) && isFileIncluded(file)) {
boolean supported = extractor.supports(file.toFile());
if (!supported && !fileTypes.isEmpty()) {
supported = fileTypes.containsKey(FileUtil.extension(file));
}
if (supported && isFileIncluded(file)) {
filesToExtract.add(normalizePath(file));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.semmle.js.extractor.AutoBuild;
import com.semmle.js.extractor.ExtractorState;
import com.semmle.js.extractor.FileExtractor;
import com.semmle.js.extractor.FileExtractor.FileType;
import com.semmle.util.data.StringUtil;
import com.semmle.util.exception.UserError;
import com.semmle.util.files.FileUtil8;
Expand Down Expand Up @@ -74,15 +75,31 @@ public void teardown() throws IOException {
/**
* Add a file under {@code root} that we either do or don't expect to be extracted,
* depending on the value of {@code extracted}. If the file is expected to be
* extracted, its path is added to {@link #expected}.
* extracted, its path is added to {@link #expected}. If non-null, parameter
* {@code fileType} indicates the file type with which we expect the file to be extracted.
*/
private Path addFile(boolean extracted, FileType fileType, Path root, String... components) throws IOException {
Path f = addFile(root, components);
if (extracted) {
expected.add(f + (fileType == null ? "" : ":" + fileType.toString()));
}
return f;
}

/**
* Add a file with default file type; see {@link #addFile(boolean, FileType, Path, String...)}.
*/
private Path addFile(boolean extracted, Path root, String... components) throws IOException {
return addFile(extracted, null, root, components);
}

/**
* Create a file at the specified path under {@code root} and return it.
*/
private Path addFile(Path root, String... components) throws IOException {
Path p = Paths.get(root.toString(), components);
Files.createDirectories(p.getParent());
Path f = Files.createFile(p);
if (extracted)
expected.add(f.toString());
return f;
return Files.createFile(p);
}

/**
Expand All @@ -96,7 +113,10 @@ private void runTest() throws IOException {
new AutoBuild() {
@Override
protected void extract(FileExtractor extractor, Path file, ExtractorState state) {
actual.add(file.toString());
String extracted = file.toString();
if (extractor.getConfig().hasFileType())
extracted += ":" + extractor.getFileType(file.toFile());
actual.add(extracted);
}

@Override
Expand Down Expand Up @@ -453,4 +473,33 @@ public void minifiedFilesCanBeReIncluded() throws IOException {
addFile(true, LGTM_SRC, "compute_min.js");
runTest();
}

@Test
public void customExtensions() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:js\n.soy:html");
addFile(true, FileType.JS, LGTM_SRC, "tst.jsm");
addFile(false, LGTM_SRC, "tstjsm");
addFile(true, FileType.HTML, LGTM_SRC, "tst.soy");
addFile(true, LGTM_SRC, "tst.html");
addFile(true, LGTM_SRC, "tst.js");
runTest();
}

@Test
public void overrideExtension() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".js:typescript");
addFile(true, FileType.TYPESCRIPT, LGTM_SRC, "tst.js");
runTest();
}

@Test
public void invalidFileType() throws IOException {
envVars.put("LGTM_INDEX_FILETYPES", ".jsm:javascript");
try {
runTest();
Assert.fail("expected UserError");
} catch (UserError ue) {
Assert.assertEquals("Invalid file type 'javascript'.", ue.getMessage());
}
}
}