Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions java/ql/lib/semmle/code/java/Expr.qll
Original file line number Diff line number Diff line change
Expand Up @@ -298,18 +298,15 @@ class CompileTimeConstantExpr extends Expr {
*
* Note that this does not handle the following cases:
*
* - values of type `long`,
* - `char` literals.
* - values of type `long`.
*/
cached
int getIntValue() {
exists(IntegralType t | this.getType() = t | t.getName().toLowerCase() != "long") and
(
exists(string lit | lit = this.(Literal).getValue() |
// `char` literals may get parsed incorrectly, so disallow.
not this instanceof CharacterLiteral and
result = lit.toInt()
)
result = this.(IntegerLiteral).getIntValue()
or
result = this.(CharacterLiteral).getCodePointValue()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably don't want to surprise users by exposing this change through an existing API

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if this is really a surprise. If you perform an arithmetic operation with a char literal in Java then it actually uses its code point, e.g. 'a' + 1 is 98.
If a new predicate was added for this, then a lot of the logic for the arithmetic operations of CompileTimeConstantExpr would have to be duplicated.

Additionally getIntValue() already covers a cast to char, so there is not a big difference then.

or
exists(CastExpr cast, int val |
cast = this and val = cast.getExpr().(CompileTimeConstantExpr).getIntValue()
Expand Down Expand Up @@ -719,6 +716,22 @@ class DoubleLiteral extends Literal, @doubleliteral {
/** A character literal. For example, `'\n'`. */
class CharacterLiteral extends Literal, @characterliteral {
override string getAPrimaryQlClass() { result = "CharacterLiteral" }

/**
* Gets a string which consists of the single character represented by
* this literal.
*
* Unicode surrogate characters (U+D800 to U+DFFF) have the replacement character
* U+FFFD as result instead.
*/
override string getValue() { result = super.getValue() }

/**
* Gets the Unicode code point value of the character represented by
* this literal. The result is the same as if the Java code had cast
* the character to an `int`.
*/
int getCodePointValue() { result.toUnicode() = this.getValue() }
}

/**
Expand Down
5 changes: 4 additions & 1 deletion java/ql/lib/semmle/code/java/Type.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,10 @@ predicate erasedHaveIntersection(RefType t1, RefType t2) {
t2 = erase(_)
}

/** An integral type, which may be either a primitive or a boxed type. */
/**
* An integral type, which may be either a primitive or a boxed type.
* This includes the types `char` and `Character`.
*/
class IntegralType extends Type {
IntegralType() {
exists(string name |
Expand Down
4 changes: 2 additions & 2 deletions java/ql/test/library-tests/constants/constants/Values.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ void values(final int notConstant) {
int binary_literal = 0b101010; //42
int negative_binary_literal = -0b101010; //-42
int binary_literal_underscores = 0b1_0101_0; //42
char char_literal = '*'; //Not handled
char char_literal = '*'; //42
long long_literal = 42L; //Not handled
boolean boolean_literal = true; //true
Integer boxed_int = new Integer(42); //Not handled
Expand All @@ -30,7 +30,7 @@ void values(final int notConstant) {
byte downcast_byte_4 = (byte) 214; // -42
byte downcast_byte_5 = (byte) (-214); // 42
short downcast_short = (short) 32768; // -32768
int cast_of_non_constant = (int) '*'; //Not handled
int cast_of_non_constant = (int) '*'; //42
long cast_to_long = (long) 42; //Not handled

int unary_plus = +42; //42
Expand Down
2 changes: 2 additions & 0 deletions java/ql/test/library-tests/constants/getIntValue.expected
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
| constants/Values.java:16:30:16:37 | 0b101010 | 42 |
| constants/Values.java:17:39:17:47 | -... | -42 |
| constants/Values.java:18:42:18:51 | 0b1_0101_0 | 42 |
| constants/Values.java:19:29:19:31 | '*' | 42 |
| constants/Values.java:25:20:25:27 | (...)... | 42 |
| constants/Values.java:26:25:26:33 | (...)... | 42 |
| constants/Values.java:27:32:27:43 | (...)... | -42 |
Expand All @@ -17,6 +18,7 @@
| constants/Values.java:30:32:30:41 | (...)... | -42 |
| constants/Values.java:31:32:31:44 | (...)... | 42 |
| constants/Values.java:32:32:32:44 | (...)... | -32768 |
| constants/Values.java:33:36:33:44 | (...)... | 42 |
| constants/Values.java:36:26:36:28 | +... | 42 |
| constants/Values.java:39:27:39:29 | -... | -42 |
| constants/Values.java:43:27:43:28 | ~... | -1 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class CharLiterals {
'\\',
'\'',
'\123', // octal escape sequence for 'S'
// CodeQL uses U+FFFD for unpaired surrogates, see https://github.com/github/codeql/issues/6611
'\uD800', // high surrogate
'\uDC00', // low surrogate
// Using Unicode escapes (which are handled during pre-processing)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
| CharLiterals.java:5:3:5:5 | 'a' | a |
| CharLiterals.java:6:3:6:10 | '\\u0061' | a |
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 |
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff |
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff |
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 |
| CharLiterals.java:11:3:11:6 | '\\n' | \n |
| CharLiterals.java:12:3:12:5 | '"' | " |
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ |
| CharLiterals.java:14:3:14:6 | '\\'' | ' |
| CharLiterals.java:15:3:15:8 | '\\123' | S |
| CharLiterals.java:16:3:16:10 | '\\uD800' | \ufffd |
| CharLiterals.java:17:3:17:10 | '\\uDC00' | \ufffd |
| CharLiterals.java:19:3:19:16 | '\\u005C\\u005C' | \\ |
| CharLiterals.java:20:3:20:16 | '\\u005C\\u0027' | ' |
| CharLiterals.java:21:8:21:15 | 7a\\u0027 | a |
| CharLiterals.java:26:4:26:6 | 'a' | a |
| CharLiterals.java:27:4:27:6 | 'a' | a |
| CharLiterals.java:32:3:32:5 | 'a' | a |
| CharLiterals.java:32:9:32:11 | 'b' | b |
| CharLiterals.java:5:3:5:5 | 'a' | a | 97 |
| CharLiterals.java:6:3:6:10 | '\\u0061' | a | 97 |
| CharLiterals.java:7:3:7:10 | '\\u0000' | \u0000 | 0 |
| CharLiterals.java:8:3:8:10 | '\\uFFFF' | \uffff | 65535 |
| CharLiterals.java:9:3:9:10 | '\\ufFfF' | \uffff | 65535 |
| CharLiterals.java:10:3:10:6 | '\\0' | \u0000 | 0 |
| CharLiterals.java:11:3:11:6 | '\\n' | \n | 10 |
| CharLiterals.java:12:3:12:5 | '"' | " | 34 |
| CharLiterals.java:13:3:13:6 | '\\\\' | \\ | 92 |
| CharLiterals.java:14:3:14:6 | '\\'' | ' | 39 |
| CharLiterals.java:15:3:15:8 | '\\123' | S | 83 |
| CharLiterals.java:17:3:17:10 | '\\uD800' | \ufffd | 65533 |
| CharLiterals.java:18:3:18:10 | '\\uDC00' | \ufffd | 65533 |
| CharLiterals.java:20:3:20:16 | '\\u005C\\u005C' | \\ | 92 |
| CharLiterals.java:21:3:21:16 | '\\u005C\\u0027' | ' | 39 |
| CharLiterals.java:22:8:22:15 | 7a\\u0027 | a | 97 |
| CharLiterals.java:27:4:27:6 | 'a' | a | 97 |
| CharLiterals.java:28:4:28:6 | 'a' | a | 97 |
| CharLiterals.java:33:3:33:5 | 'a' | a | 97 |
| CharLiterals.java:33:9:33:11 | 'b' | b | 98 |
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import semmle.code.java.Expr

from CharacterLiteral lit
select lit, lit.getValue()
select lit, lit.getValue(), lit.getCodePointValue()
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class StringLiterals {
"\uD800\uDC00", // surrogate pair
"\uDBFF\uDFFF", // U+10FFFF
// Unpaired surrogates
// CodeQL uses U+FFFD for them, see https://github.com/github/codeql/issues/6611
"\uD800",
"\uDC00",
"hello\uD800hello\uDC00world", // malformed surrogates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,32 @@
| StringLiterals.java:23:3:23:18 | "\\uaBcDeF\\u0aB1" | \uabcdeF\u0ab1 | \uabcdeF\u0ab1 | |
| StringLiterals.java:24:3:24:16 | "\\uD800\\uDC00" | \ud800\udc00 | \ud800\udc00 | |
| StringLiterals.java:25:3:25:16 | "\\uDBFF\\uDFFF" | \udbff\udfff | \udbff\udfff | |
| StringLiterals.java:27:3:27:10 | "\\uD800" | \ufffd | \ufffd | |
| StringLiterals.java:28:3:28:10 | "\\uDC00" | \ufffd | \ufffd | |
| StringLiterals.java:29:3:29:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
| StringLiterals.java:31:3:31:16 | "\\u005C\\u0022" | " | " | |
| StringLiterals.java:32:8:32:20 | 2\\u0061\\u0022 | a | a | |
| StringLiterals.java:37:3:39:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
| StringLiterals.java:41:3:43:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:44:3:46:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
| StringLiterals.java:47:3:49:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
| StringLiterals.java:50:3:52:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:53:3:55:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:56:4:58:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:59:3:62:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
| StringLiterals.java:63:3:64:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
| StringLiterals.java:65:3:68:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
| StringLiterals.java:69:3:70:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
| StringLiterals.java:71:3:74:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
| StringLiterals.java:75:3:79:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
| StringLiterals.java:82:10:84:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
| StringLiterals.java:90:3:90:19 | "hello" + "world" | helloworld | helloworld | |
| StringLiterals.java:91:3:92:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
| StringLiterals.java:93:10:93:12 | "a" | a | a | |
| StringLiterals.java:94:3:94:5 | "a" | a | a | |
| StringLiterals.java:28:3:28:10 | "\\uD800" | \ufffd | \ufffd | |
| StringLiterals.java:29:3:29:10 | "\\uDC00" | \ufffd | \ufffd | |
| StringLiterals.java:30:3:30:31 | "hello\\uD800hello\\uDC00world" | hello\ufffdhello\ufffdworld | hello\ufffdhello\ufffdworld | |
| StringLiterals.java:32:3:32:16 | "\\u005C\\u0022" | " | " | |
| StringLiterals.java:33:8:33:20 | 2\\u0061\\u0022 | a | a | |
| StringLiterals.java:38:3:40:5 | """ \t \n\t\ttest "text" and escaped \\u0022\n\t\t""" | test "text" and escaped "\n | test "text" and escaped "\n | text-block |
| StringLiterals.java:42:3:44:5 | """\n\t\t\tindented\n\t\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:45:3:47:5 | """\n\tno indentation last line\n\t\t""" | no indentation last line\n | no indentation last line\n | text-block |
| StringLiterals.java:48:3:50:7 | """\n\tindentation last line\n\t\t\\s""" | indentation last line\n\t | indentation last line\n\t | text-block |
| StringLiterals.java:51:3:53:6 | """\n\t\t\tnot-indented\n\t\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:54:3:56:4 | """\n\t\tindented\n\t""" | \tindented\n | \tindented\n | text-block |
| StringLiterals.java:57:4:59:5 | """\n\t\tnot-indented\n\t\t""" | not-indented\n | not-indented\n | text-block |
| StringLiterals.java:60:3:63:6 | """\n\t\t spaces (only single space is trimmed)\n\t\t\ttab\n\t\t\t""" | spaces (only single space is trimmed)\ntab\n | spaces (only single space is trimmed)\ntab\n | text-block |
| StringLiterals.java:64:3:65:22 | """\n\t\t\tend on same line""" | end on same line | end on same line | text-block |
| StringLiterals.java:66:3:69:5 | """\n\t\ttrailing spaces ignored: \t \n\t\tnot ignored: \t \\s\n\t\t""" | trailing spaces ignored:\nnot ignored: \t \n | trailing spaces ignored:\nnot ignored: \t \n | text-block |
| StringLiterals.java:70:3:71:18 | """\n\t\t3 quotes:""\\"""" | 3 quotes:""" | 3 quotes:""" | text-block |
| StringLiterals.java:72:3:75:5 | """\n\t\tline \\\n\t\tcontinuation \\\n\t\t""" | line continuation | line continuation | text-block |
| StringLiterals.java:76:3:80:5 | """\n\t\tExplicit line breaks:\\n\n\t\t\\r\\n\n\t\t\\r\n\t\t""" | Explicit line breaks:\n\n\r\n\n\r\n | Explicit line breaks:\n\n\r\n\n\r\n | text-block |
| StringLiterals.java:83:10:85:16 | 2"\\u0022\n\t\ttest\n\t\t\\u0022\\uu0022" | test\n | test\n | |
| StringLiterals.java:91:3:91:19 | "hello" + "world" | helloworld | helloworld | |
| StringLiterals.java:92:3:93:20 | """\n\t\thello""" + "world" | helloworld | helloworld | text-block |
| StringLiterals.java:94:10:94:12 | "a" | a | a | |
| StringLiterals.java:95:3:95:5 | "a" | a | a | |
| StringLiterals.java:96:7:96:9 | "a" | a | a | |
| StringLiterals.java:97:3:97:5 | "a" | a | a | |
| StringLiterals.java:98:10:98:12 | "a" | a | a | |
| StringLiterals.java:99:3:99:5 | "a" | a | a | |
| StringLiterals.java:100:9:100:11 | "a" | a | a | |
| StringLiterals.java:96:3:96:5 | "a" | a | a | |
| StringLiterals.java:97:7:97:9 | "a" | a | a | |
| StringLiterals.java:98:3:98:5 | "a" | a | a | |
| StringLiterals.java:99:10:99:12 | "a" | a | a | |
| StringLiterals.java:100:3:100:5 | "a" | a | a | |
| StringLiterals.java:101:9:101:11 | "a" | a | a | |