Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -38,6 +39,11 @@ public class FileExtractor {
*/
public static final Pattern JSON_OBJECT_START = Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*");

/**
* The charset for decoding UTF-8 strings.
*/
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");

/**
* Information about supported file types.
*/
Expand Down Expand Up @@ -169,6 +175,11 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
if (isXml(bytes, length))
return true;

// Avoid files with an unrecognized shebang header.
if (hasUnrecognizedShebang(bytes, length)) {
return true;
}

return false;
} catch (IOException e) {
Exceptions.ignore(e, "Let extractor handle this one.");
Expand Down Expand Up @@ -249,6 +260,38 @@ private boolean hasUnprintableUtf8(byte[] bytes, int length) {
return false;
}

/**
* Returns true if the byte sequence starts with a shebang line that is not
* recognized as a JavaScript interpreter.
*/
private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
// Shebangs preceded by a BOM aren't recognized in UNIX, but the BOM might only
// be present in the source file, to be stripped out in the build process.
int startIndex = skipBOM(bytes, length);
if (startIndex + 2 >= length) return false;
if (bytes[startIndex] != '#' || bytes[startIndex + 1] != '!') {
return false;
}
int endOfLine = -1;
for (int i = startIndex; i < length; ++i) {
if (bytes[i] == '\r' || bytes[i] == '\n') {
endOfLine = i;
break;
}
}
if (endOfLine == -1) {
// The shebang is either very long or there are no other lines in the file.
// Treat this as unrecognized.
return true;
}
// Extract the shebang text
int startOfText = startIndex + "#!".length();
int lengthOfText = endOfLine - startOfText;
String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET);
// Check if the shebang is a recognized JavaScript intepreter.
return !NODE_INVOCATION.matcher(text).find();
}

@Override
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
return new TypeScriptExtractor(config, state.getTypeScriptParser());
Expand Down
5 changes: 5 additions & 0 deletions javascript/extractor/tests/shebang/input/not-typescript.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env perl

use strict;

exit 0;
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env node
interface Foo {
x: number;
}
3 changes: 3 additions & 0 deletions javascript/extractor/tests/shebang/input/typescript.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
interface Foo {
x: number;
}
3 changes: 3 additions & 0 deletions javascript/extractor/tests/shebang/options.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"typescript": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#10000=@"/typescript-with-shebang.ts;sourcefile"
files(#10000,"/typescript-with-shebang.ts","typescript-with-shebang","ts",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
toplevels(#20001,0)
#20002=@"loc,{#10000},1,1,5,0"
locations_default(#20002,#10000,1,1,5,0)
hasLocation(#20001,#20002)
#20003=@"local_type_name;{Foo};{#20000}"
local_type_names(#20003,"Foo",#20000)
#20004=*
stmts(#20004,34,#20001,0,"#!/usr/ ... mber;\n}")
#20005=@"loc,{#10000},1,1,4,1"
locations_default(#20005,#10000,1,1,4,1)
hasLocation(#20004,#20005)
stmtContainers(#20004,#20001)
#20006=*
typeexprs(#20006,1,#20004,0,"Foo")
#20007=@"loc,{#10000},2,11,2,13"
locations_default(#20007,#10000,2,11,2,13)
hasLocation(#20006,#20007)
enclosingStmt(#20006,#20004)
exprContainers(#20006,#20001)
literals("Foo","Foo",#20006)
typedecl(#20006,#20003)
#20008=*
properties(#20008,#20004,2,8,"x: number;")
#20009=@"loc,{#10000},3,3,3,12"
locations_default(#20009,#10000,3,3,3,12)
hasLocation(#20008,#20009)
#20010=*
exprs(#20010,0,#20008,0,"x")
#20011=@"loc,{#10000},3,3,3,3"
locations_default(#20011,#10000,3,3,3,3)
hasLocation(#20010,#20011)
enclosingStmt(#20010,#20004)
exprContainers(#20010,#20001)
literals("x","x",#20010)
isAbstractMember(#20008)
#20012=*
typeexprs(#20012,2,#20008,2,"number")
#20013=@"loc,{#10000},3,6,3,11"
locations_default(#20013,#10000,3,6,3,11)
hasLocation(#20012,#20013)
enclosingStmt(#20012,#20004)
exprContainers(#20012,#20001)
literals("number","number",#20012)
#20014=*
lines(#20014,#20001,"#!/usr/bin/env node","
")
#20015=@"loc,{#10000},1,1,1,19"
locations_default(#20015,#10000,1,1,1,19)
hasLocation(#20014,#20015)
#20016=*
lines(#20016,#20001,"interface Foo {","
")
#20017=@"loc,{#10000},2,1,2,15"
locations_default(#20017,#10000,2,1,2,15)
hasLocation(#20016,#20017)
#20018=*
lines(#20018,#20001," x: number;","
")
#20019=@"loc,{#10000},3,1,3,12"
locations_default(#20019,#10000,3,1,3,12)
hasLocation(#20018,#20019)
indentation(#10000,3," ",2)
#20020=*
lines(#20020,#20001,"}","
")
#20021=@"loc,{#10000},4,1,4,1"
locations_default(#20021,#10000,4,1,4,1)
hasLocation(#20020,#20021)
numlines(#20001,4,3,0)
#20022=*
tokeninfo(#20022,7,#20001,0,"interface")
#20023=@"loc,{#10000},2,1,2,9"
locations_default(#20023,#10000,2,1,2,9)
hasLocation(#20022,#20023)
#20024=*
tokeninfo(#20024,6,#20001,1,"Foo")
hasLocation(#20024,#20007)
#20025=*
tokeninfo(#20025,8,#20001,2,"{")
#20026=@"loc,{#10000},2,15,2,15"
locations_default(#20026,#10000,2,15,2,15)
hasLocation(#20025,#20026)
#20027=*
tokeninfo(#20027,6,#20001,3,"x")
hasLocation(#20027,#20011)
#20028=*
tokeninfo(#20028,8,#20001,4,":")
#20029=@"loc,{#10000},3,4,3,4"
locations_default(#20029,#10000,3,4,3,4)
hasLocation(#20028,#20029)
#20030=*
tokeninfo(#20030,7,#20001,5,"number")
hasLocation(#20030,#20013)
#20031=*
tokeninfo(#20031,8,#20001,6,";")
#20032=@"loc,{#10000},3,12,3,12"
locations_default(#20032,#10000,3,12,3,12)
hasLocation(#20031,#20032)
#20033=*
tokeninfo(#20033,8,#20001,7,"}")
hasLocation(#20033,#20021)
#20034=*
tokeninfo(#20034,0,#20001,8,"")
#20035=@"loc,{#10000},5,1,5,0"
locations_default(#20035,#10000,5,1,5,0)
hasLocation(#20034,#20035)
#20036=*
entry_cfg_node(#20036,#20001)
#20037=@"loc,{#10000},1,1,1,0"
locations_default(#20037,#10000,1,1,1,0)
hasLocation(#20036,#20037)
#20038=*
exit_cfg_node(#20038,#20001)
hasLocation(#20038,#20035)
successor(#20004,#20038)
successor(#20036,#20004)
numlines(#10000,4,3,0)
filetype(#10000,"typescript")
123 changes: 123 additions & 0 deletions javascript/extractor/tests/shebang/output/trap/typescript.ts.trap
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#10000=@"/typescript.ts;sourcefile"
files(#10000,"/typescript.ts","typescript","ts",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
toplevels(#20001,0)
#20002=@"loc,{#10000},1,1,4,0"
locations_default(#20002,#10000,1,1,4,0)
hasLocation(#20001,#20002)
#20003=@"local_type_name;{Foo};{#20000}"
local_type_names(#20003,"Foo",#20000)
#20004=*
stmts(#20004,34,#20001,0,"interfa ... mber;\n}")
#20005=@"loc,{#10000},1,1,3,1"
locations_default(#20005,#10000,1,1,3,1)
hasLocation(#20004,#20005)
stmtContainers(#20004,#20001)
#20006=*
typeexprs(#20006,1,#20004,0,"Foo")
#20007=@"loc,{#10000},1,11,1,13"
locations_default(#20007,#10000,1,11,1,13)
hasLocation(#20006,#20007)
enclosingStmt(#20006,#20004)
exprContainers(#20006,#20001)
literals("Foo","Foo",#20006)
typedecl(#20006,#20003)
#20008=*
properties(#20008,#20004,2,8,"x: number;")
#20009=@"loc,{#10000},2,3,2,12"
locations_default(#20009,#10000,2,3,2,12)
hasLocation(#20008,#20009)
#20010=*
exprs(#20010,0,#20008,0,"x")
#20011=@"loc,{#10000},2,3,2,3"
locations_default(#20011,#10000,2,3,2,3)
hasLocation(#20010,#20011)
enclosingStmt(#20010,#20004)
exprContainers(#20010,#20001)
literals("x","x",#20010)
isAbstractMember(#20008)
#20012=*
typeexprs(#20012,2,#20008,2,"number")
#20013=@"loc,{#10000},2,6,2,11"
locations_default(#20013,#10000,2,6,2,11)
hasLocation(#20012,#20013)
enclosingStmt(#20012,#20004)
exprContainers(#20012,#20001)
literals("number","number",#20012)
#20014=*
lines(#20014,#20001,"interface Foo {","
")
#20015=@"loc,{#10000},1,1,1,15"
locations_default(#20015,#10000,1,1,1,15)
hasLocation(#20014,#20015)
#20016=*
lines(#20016,#20001," x: number;","
")
#20017=@"loc,{#10000},2,1,2,12"
locations_default(#20017,#10000,2,1,2,12)
hasLocation(#20016,#20017)
indentation(#10000,2," ",2)
#20018=*
lines(#20018,#20001,"}","
")
#20019=@"loc,{#10000},3,1,3,1"
locations_default(#20019,#10000,3,1,3,1)
hasLocation(#20018,#20019)
numlines(#20001,3,3,0)
#20020=*
tokeninfo(#20020,7,#20001,0,"interface")
#20021=@"loc,{#10000},1,1,1,9"
locations_default(#20021,#10000,1,1,1,9)
hasLocation(#20020,#20021)
#20022=*
tokeninfo(#20022,6,#20001,1,"Foo")
hasLocation(#20022,#20007)
#20023=*
tokeninfo(#20023,8,#20001,2,"{")
#20024=@"loc,{#10000},1,15,1,15"
locations_default(#20024,#10000,1,15,1,15)
hasLocation(#20023,#20024)
#20025=*
tokeninfo(#20025,6,#20001,3,"x")
hasLocation(#20025,#20011)
#20026=*
tokeninfo(#20026,8,#20001,4,":")
#20027=@"loc,{#10000},2,4,2,4"
locations_default(#20027,#10000,2,4,2,4)
hasLocation(#20026,#20027)
#20028=*
tokeninfo(#20028,7,#20001,5,"number")
hasLocation(#20028,#20013)
#20029=*
tokeninfo(#20029,8,#20001,6,";")
#20030=@"loc,{#10000},2,12,2,12"
locations_default(#20030,#10000,2,12,2,12)
hasLocation(#20029,#20030)
#20031=*
tokeninfo(#20031,8,#20001,7,"}")
hasLocation(#20031,#20019)
#20032=*
tokeninfo(#20032,0,#20001,8,"")
#20033=@"loc,{#10000},4,1,4,0"
locations_default(#20033,#10000,4,1,4,0)
hasLocation(#20032,#20033)
#20034=*
entry_cfg_node(#20034,#20001)
#20035=@"loc,{#10000},1,1,1,0"
locations_default(#20035,#10000,1,1,1,0)
hasLocation(#20034,#20035)
#20036=*
exit_cfg_node(#20036,#20001)
hasLocation(#20036,#20033)
successor(#20004,#20036)
successor(#20034,#20004)
numlines(#10000,3,3,0)
filetype(#10000,"typescript")