From 38a38ab780f60eb3e439252d8e6c72e71c097ab9 Mon Sep 17 00:00:00 2001
From: Max Schaefer <max@semmle.com>
Date: Tue, 28 May 2019 10:53:02 +0100
Subject: [PATCH] JavaScript: Make autobuilder fail if no JS/TS code was seen.

In particular, the autobuilder will no longer succeed for projects that
contain HTML or YAML files but no JS/TS code. Further down the line,
this prevents LGTM.com from classifying such projects as "JavaScript"
projects.
---
 change-notes/1.21/extractor-javascript.md     |  2 +-
 .../com/semmle/js/extractor/AutoBuild.java    | 57 +++++++++++--------
 .../semmle/js/extractor/FileExtractor.java    | 12 ++--
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/change-notes/1.21/extractor-javascript.md b/change-notes/1.21/extractor-javascript.md
index 5a18c906c304..99dc32dac0cc 100644
--- a/change-notes/1.21/extractor-javascript.md
+++ b/change-notes/1.21/extractor-javascript.md
@@ -5,5 +5,5 @@
 ## Changes to code extraction
 
 * ECMAScript 2019 support is now enabled by default.
-
+* On LGTM, JavaScript extraction for projects that do not contain any JavaScript or TypeScript code will now fail, even if the project contains other file types (such as HTML or YAML) recognized by the JavaScript extractor.
 * YAML files are now extracted by default on LGTM. You can specify exclusion filters in your `lgtm.yml` file to override this behavior.
diff --git a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
index f5a7721984c0..0544290ebd65 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java
@@ -1,26 +1,5 @@
 package com.semmle.js.extractor;
 
-import com.semmle.js.extractor.ExtractorConfig.SourceType;
-import com.semmle.js.extractor.FileExtractor.FileType;
-import com.semmle.js.extractor.trapcache.DefaultTrapCache;
-import com.semmle.js.extractor.trapcache.DummyTrapCache;
-import com.semmle.js.extractor.trapcache.ITrapCache;
-import com.semmle.js.parser.ParsedProject;
-import com.semmle.js.parser.TypeScriptParser;
-import com.semmle.ts.extractor.TypeExtractor;
-import com.semmle.ts.extractor.TypeTable;
-import com.semmle.util.data.StringUtil;
-import com.semmle.util.exception.CatastrophicError;
-import com.semmle.util.exception.Exceptions;
-import com.semmle.util.exception.ResourceError;
-import com.semmle.util.exception.UserError;
-import com.semmle.util.extraction.ExtractorOutputConfig;
-import com.semmle.util.files.FileUtil;
-import com.semmle.util.io.csv.CSVReader;
-import com.semmle.util.language.LegacyLanguage;
-import com.semmle.util.process.Env;
-import com.semmle.util.projectstructure.ProjectLayout;
-import com.semmle.util.trap.TrapWriter;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
@@ -48,6 +27,28 @@
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Stream;
 
+import com.semmle.js.extractor.ExtractorConfig.SourceType;
+import com.semmle.js.extractor.FileExtractor.FileType;
+import com.semmle.js.extractor.trapcache.DefaultTrapCache;
+import com.semmle.js.extractor.trapcache.DummyTrapCache;
+import com.semmle.js.extractor.trapcache.ITrapCache;
+import com.semmle.js.parser.ParsedProject;
+import com.semmle.js.parser.TypeScriptParser;
+import com.semmle.ts.extractor.TypeExtractor;
+import com.semmle.ts.extractor.TypeTable;
+import com.semmle.util.data.StringUtil;
+import com.semmle.util.exception.CatastrophicError;
+import com.semmle.util.exception.Exceptions;
+import com.semmle.util.exception.ResourceError;
+import com.semmle.util.exception.UserError;
+import com.semmle.util.extraction.ExtractorOutputConfig;
+import com.semmle.util.files.FileUtil;
+import com.semmle.util.io.csv.CSVReader;
+import com.semmle.util.language.LegacyLanguage;
+import com.semmle.util.process.Env;
+import com.semmle.util.projectstructure.ProjectLayout;
+import com.semmle.util.trap.TrapWriter;
+
 /**
  * An alternative entry point to the JavaScript extractor.
  *
@@ -194,6 +195,7 @@ public class AutoBuild {
   private final TypeScriptMode typeScriptMode;
   private final String defaultEncoding;
   private ExecutorService threadPool;
+  private volatile boolean seenCode = false;
 
   public AutoBuild() {
     this.LGTM_SRC = toRealPath(getPathFromEnvVar("LGTM_SRC"));
@@ -425,7 +427,7 @@ private boolean addPathPattern(Set<Path> patterns, Path base, String pattern) {
   }
 
   /** Perform extraction. */
-  public void run() throws IOException {
+  public int run() throws IOException {
     startThreadPool();
     try {
       extractSource();
@@ -434,6 +436,11 @@ public void run() throws IOException {
     } finally {
       shutdownThreadPool();
     }
+    if (!seenCode) {
+      warn("No JavaScript or TypeScript code found.");
+      return -1;
+    }
+    return 0;
   }
 
   private void startThreadPool() {
@@ -736,7 +743,9 @@ private void doExtract(FileExtractor extractor, Path file, ExtractorState state)
 
     try {
       long start = logBeginProcess("Extracting " + file);
-      extractor.extract(f, state);
+      Integer loc = extractor.extract(f, state);
+      if (!extractor.getConfig().isExterns() && (loc == null || loc != 0))
+        seenCode = true;
       logEndProcess(start, "Done extracting " + file);
     } catch (Throwable t) {
       System.err.println("Exception while extracting " + file + ".");
@@ -787,7 +796,7 @@ protected void extractXml() throws IOException {
 
   public static void main(String[] args) {
     try {
-      new AutoBuild().run();
+      System.exit(new AutoBuild().run());
     } catch (IOException | UserError | CatastrophicError e) {
       System.err.println(e.toString());
       System.exit(1);
diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java
index b192e37760c4..f70454c8daa6 100644
--- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java
+++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java
@@ -384,7 +384,10 @@ public boolean supports(File f) {
     return config.hasFileType() || FileType.forFile(f, config) != null;
   }
 
-  public void extract(File f, ExtractorState state) throws IOException {
+  /**
+   * @return the number of lines of code extracted, or {@code null} if the file was cached
+   */
+  public Integer extract(File f, ExtractorState state) throws IOException {
     // populate source archive
     String source = new WholeIO(config.getDefaultEncoding()).strictread(f);
     outputConfig.getSourceArchive().add(f, source);
@@ -396,7 +399,7 @@ public void extract(File f, ExtractorState state) throws IOException {
     locationManager.emitFileLocation(fileLabel, 0, 0, 0, 0);
 
     // now extract the contents
-    extractContents(f, fileLabel, source, locationManager, state);
+    return extractContents(f, fileLabel, source, locationManager, state);
   }
 
   /**
@@ -420,7 +423,7 @@ public void extract(File f, ExtractorState state) throws IOException {
    * <p>Also note that we support extraction with TRAP writer factories that are not file-backed;
    * obviously, no caching is done in that scenario.
    */
-  private void extractContents(
+  private Integer extractContents(
       File f, Label fileLabel, String source, LocationManager locationManager, ExtractorState state)
       throws IOException {
     TrapWriter trapwriter = locationManager.getTrapWriter();
@@ -440,7 +443,7 @@ private void extractContents(
 
       if (cacheFile.exists()) {
         FileUtil.append(cacheFile, resultFile);
-        return;
+        return null;
       }
 
       // not in the cache yet, so use a caching TRAP writer to
@@ -463,6 +466,7 @@ private void extractContents(
       trapwriter.addTuple("numlines", fileLabel, numLines, linesOfCode, linesOfComments);
       trapwriter.addTuple("filetype", fileLabel, fileType.toString());
       successful = true;
+      return linesOfCode;
     } finally {
       if (!successful && trapwriter instanceof CachingTrapWriter)
         ((CachingTrapWriter) trapwriter).discard();