diff --git a/TODO.md b/TODO.md index 9ad0b2b487..22295f7daa 100644 --- a/TODO.md +++ b/TODO.md @@ -2,120 +2,179 @@ ## Current State -- **Tests passing:** 5,197 (76.2%) -- **Tests skipped:** 1,627 (23.8%) - - Parser issues: ~675 - - Explain mismatches: ~637 +- **Tests passing:** 5,933 (86.9%) +- **Tests skipped:** 891 (13.1%) -## Parser Issues +## Recently Fixed (explain layer) + +- ✅ TableJoin output - removed join type keywords +- ✅ Table function aliases (e.g., `remote('127.1') AS t1`) +- ✅ Table identifier aliases (e.g., `system.one AS xxx`) +- ✅ Array/tuple cast formatting for `::` syntax +- ✅ SETTINGS placement with FORMAT clause +- ✅ Concat operator `||` flattening into single `concat` function +- ✅ Window function (OVER clause) support +- ✅ Float literal formatting +- ✅ Aliased expression handling for binary/unary/function/identifier +- ✅ PARTITION BY support in CREATE TABLE +- ✅ Server error message stripping from expected output + +## Parser Issues (High Priority) These require changes to `parser/parser.go`: -### Table/Database Names Starting with Numbers -Tables and databases with names starting with digits fail to parse: +### DROP TABLE with Multiple Tables +Parser only captures first table when multiple are specified: ```sql -DROP TABLE IF EXISTS 03657_gby_overflow; -DROP DATABASE IF EXISTS 03710_database; +DROP TABLE IF EXISTS t1, t2, t3; +-- Expected: ExpressionList with 3 TableIdentifiers +-- Got: Single Identifier for t1 ``` -### FORMAT Null -The `FORMAT Null` clause is not recognized: +### Negative Integer Literals +Negative numbers are parsed as `Function negate` instead of negative literals: ```sql -SELECT ... FORMAT Null; +SELECT -1, -10000; +-- Expected: Literal Int64_-1 +-- Got: Function negate (children 1) with Literal UInt64_1 ``` -### FETCH FIRST ... ROW ONLY -SQL standard fetch syntax is not supported: +### CREATE TABLE with INDEX Clause +INDEX definitions in CREATE TABLE are not captured: ```sql -SELECT ... FETCH FIRST 1 ROW ONLY; +CREATE TABLE t (x Array(String), INDEX idx1 x TYPE bloom_filter(0.025)) ENGINE=MergeTree; ``` -### INSERT INTO FUNCTION -Function-based inserts are not supported: +### SETTINGS Inside Function Arguments +SETTINGS clause within function calls is not parsed: ```sql -INSERT INTO FUNCTION file('file.parquet') SELECT ...; +SELECT * FROM icebergS3(s3_conn, filename='test', SETTINGS key='value'); +-- The SETTINGS should become a Set child of the function ``` -### WITH ... AS Subquery Aliases -Subquery aliases in FROM clauses with keyword `AS`: +### CREATE TABLE with Column TTL +TTL expressions on columns are not captured: ```sql -SELECT * FROM (SELECT 1 x) AS alias; +CREATE TABLE t (c Int TTL expr()) ENGINE=MergeTree; +-- Expected: ColumnDeclaration with 2 children (type + TTL function) ``` -### String Concatenation Operator || -The `||` operator in some contexts: +### Empty Tuple in ORDER BY +`ORDER BY ()` should capture empty tuple expression: ```sql -SELECT currentDatabase() || '_test' AS key; +CREATE TABLE t (...) ENGINE=MergeTree ORDER BY (); +-- Expected: Function tuple (children 1) with empty ExpressionList +-- Got: Storage definition with no ORDER BY ``` -### MOD/DIV Operators -The MOD and DIV keywords as operators: +### String Escape Handling +Parser stores escaped characters literally instead of unescaping: ```sql -SELECT number MOD 3, number DIV 3 FROM ...; +SELECT 'x\'e2\''; +-- Parser stores: x\'e2\' (with backslashes) +-- Should store: x'e2' (unescaped) ``` -### Reserved Keyword Handling -Keywords like `LEFT`, `RIGHT` used as table aliases: +## Parser Issues (Medium Priority) + +### CREATE DICTIONARY +Dictionary definitions are not supported: ```sql -SELECT * FROM numbers(10) AS left RIGHT JOIN ...; +CREATE DICTIONARY d0 (c1 UInt64) PRIMARY KEY c1 LAYOUT(FLAT()) SOURCE(...); ``` -### Parameterized Settings -Settings with `$` parameters: +### CREATE USER / CREATE FUNCTION +User and function definitions are not supported: ```sql -SET param_$1 = 'Hello'; +CREATE USER test_user GRANTEES ...; +CREATE OR REPLACE FUNCTION myFunc AS ...; ``` -### Incomplete CASE Expression -CASE without END: +### QUALIFY Clause +Window function filtering clause: ```sql -SELECT CASE number -- missing END +SELECT x QUALIFY row_number() OVER () = 1; ``` -## Explain Output Issues +### INTO OUTFILE with TRUNCATE +Extended INTO OUTFILE syntax: +```sql +SELECT 1, 2 INTO OUTFILE '/dev/null' TRUNCATE FORMAT Npy; +``` -These require changes to `internal/explain/`: +### GROUPING SETS +Advanced grouping syntax: +```sql +SELECT ... GROUP BY GROUPING SETS ((a), (b)); +``` -### Double Equals (==) Operator -The `==` operator creates extra nested equals/tuple nodes: +### view() Table Function +The view() table function in FROM: ```sql -SELECT value == '127.0.0.1:9181' +SELECT * FROM view(SELECT 1 as id); ``` -Expected: `Function equals` with `Identifier` and `Literal` -Got: Nested `Function equals` with extra `Function tuple` -### CreateQuery Spacing -Some ClickHouse versions output extra space before `(children`: +### CREATE TABLE ... AS SELECT +CREATE TABLE with inline SELECT: +```sql +CREATE TABLE src ENGINE=Memory AS SELECT 1; ``` -CreateQuery d1 (children 1) -- two spaces -CreateQuery d1 (children 1) -- one space (our output) + +### Variant() Type with PRIMARY KEY +Complex column definitions: +```sql +CREATE TABLE t (c Variant() PRIMARY KEY) ENGINE=Redis(...); ``` -### Server Error Messages in Expected Output -Some test expected outputs include trailing messages: +## Parser Issues (Lower Priority) + +### INTERVAL with Dynamic Type +INTERVAL with type cast: +```sql +SELECT INTERVAL 1 MINUTE AS c0, INTERVAL c0::Dynamic DAY; ``` -The query succeeded but the server error '42' was expected + +### ALTER TABLE with Multiple Operations +Multiple ALTER operations in parentheses: +```sql +ALTER TABLE t (DELETE WHERE ...), (MODIFY SETTING ...), (UPDATE ... WHERE ...); ``` -These are not part of the actual EXPLAIN output. -## Lower Priority +### Tuple Type in Column with Subfield Access +Tuple type with engine using subfield: +```sql +CREATE TABLE t (t Tuple(a Int32)) ENGINE=EmbeddedRocksDB() PRIMARY KEY (t.a); +``` -### DateTime64 with Timezone -Type parameters with string timezone: +### insert() Function with input() +INSERT using input() function: ```sql -DateTime64(3,'UTC') +INSERT INTO FUNCTION null() SELECT * FROM input('x Int') ...; ``` -### Complex Type Expressions -Nested type expressions in column definitions: +## Explain Issues (Remaining) + +### Scientific Notation for Floats +Very small/large floats should use scientific notation: ```sql -CREATE TABLE t (c LowCardinality(UUID)); +SELECT 2.2250738585072014e-308; +-- Expected: Float64_2.2250738585072014e-308 +-- Got: Float64_0.0000...22250738585072014 ``` -### Parameterized Views -View definitions with parameters: +### Array Literals with Negative Numbers +Arrays with negative integers expand to Function instead of Literal: ```sql -CREATE VIEW v AS SELECT ... WHERE x={parity:Int8}; +SELECT [-10000, 5750]; +-- Expected: Literal Array_[Int64_-10000, UInt64_5750] +-- Got: Function array with Function negate for -10000 +``` + +### WithElement for CTE Subqueries +Some CTE subqueries should use WithElement wrapper: +```sql +WITH sub AS (SELECT ...) SELECT ...; +-- Expected: WithElement (children 1) > Subquery > SelectWithUnionQuery ``` ## Testing Notes @@ -127,10 +186,15 @@ go test ./parser -timeout 5s -v Count test results: ```bash -go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c +go test ./parser -v 2>&1 | grep -E 'PASS:|SKIP:' | wc -l ``` View explain mismatches: ```bash -go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100 +go test ./parser -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100 +``` + +View parser failures: +```bash +go test ./parser -v 2>&1 | grep "TODO: Parser does not yet support" | head -20 ``` diff --git a/ast/ast.go b/ast/ast.go index 1d892f0ebd..8484583db3 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -2,6 +2,9 @@ package ast import ( + "encoding/json" + "math" + "github.com/kyleconroy/doubleclick/token" ) @@ -51,6 +54,7 @@ type SelectQuery struct { Where Expression `json:"where,omitempty"` GroupBy []Expression `json:"group_by,omitempty"` WithRollup bool `json:"with_rollup,omitempty"` + WithCube bool `json:"with_cube,omitempty"` WithTotals bool `json:"with_totals,omitempty"` Having Expression `json:"having,omitempty"` Window []*WindowDefinition `json:"window,omitempty"` @@ -199,13 +203,14 @@ func (s *SettingExpr) End() token.Position { return s.Position } // InsertQuery represents an INSERT statement. type InsertQuery struct { - Position token.Position `json:"-"` - Database string `json:"database,omitempty"` - Table string `json:"table,omitempty"` - Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax - Columns []*Identifier `json:"columns,omitempty"` - Select Statement `json:"select,omitempty"` - Format *Identifier `json:"format,omitempty"` + Position token.Position `json:"-"` + Database string `json:"database,omitempty"` + Table string `json:"table,omitempty"` + Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax + Columns []*Identifier `json:"columns,omitempty"` + Select Statement `json:"select,omitempty"` + Format *Identifier `json:"format,omitempty"` + HasSettings bool `json:"has_settings,omitempty"` // For SETTINGS clause } func (i *InsertQuery) Pos() token.Position { return i.Position } @@ -261,15 +266,27 @@ func (c *ColumnDeclaration) End() token.Position { return c.Position } // DataType represents a data type. type DataType struct { - Position token.Position `json:"-"` - Name string `json:"name"` - Parameters []Expression `json:"parameters,omitempty"` + Position token.Position `json:"-"` + Name string `json:"name"` + Parameters []Expression `json:"parameters,omitempty"` + HasParentheses bool `json:"has_parentheses,omitempty"` } func (d *DataType) Pos() token.Position { return d.Position } func (d *DataType) End() token.Position { return d.Position } func (d *DataType) expressionNode() {} +// NameTypePair represents a named type pair, used in Nested types. +type NameTypePair struct { + Position token.Position `json:"-"` + Name string `json:"name"` + Type *DataType `json:"type"` +} + +func (n *NameTypePair) Pos() token.Position { return n.Position } +func (n *NameTypePair) End() token.Position { return n.Position } +func (n *NameTypePair) expressionNode() {} + // CodecExpr represents a CODEC expression. type CodecExpr struct { Position token.Position `json:"-"` @@ -589,6 +606,42 @@ func (l *Literal) Pos() token.Position { return l.Position } func (l *Literal) End() token.Position { return l.Position } func (l *Literal) expressionNode() {} +// MarshalJSON handles special float values (NaN, +Inf, -Inf) that JSON doesn't support. +func (l *Literal) MarshalJSON() ([]byte, error) { + type literalAlias Literal + // Handle special float values + if f, ok := l.Value.(float64); ok { + if math.IsNaN(f) { + return json.Marshal(&struct { + *literalAlias + Value string `json:"value"` + }{ + literalAlias: (*literalAlias)(l), + Value: "NaN", + }) + } + if math.IsInf(f, 1) { + return json.Marshal(&struct { + *literalAlias + Value string `json:"value"` + }{ + literalAlias: (*literalAlias)(l), + Value: "+Inf", + }) + } + if math.IsInf(f, -1) { + return json.Marshal(&struct { + *literalAlias + Value string `json:"value"` + }{ + literalAlias: (*literalAlias)(l), + Value: "-Inf", + }) + } + } + return json.Marshal((*literalAlias)(l)) +} + // LiteralType represents the type of a literal. type LiteralType string diff --git a/internal/explain/explain.go b/internal/explain/explain.go index e20ee4ed4f..b8598eaa65 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -63,6 +63,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) { explainSubquery(sb, n, indent, depth) case *ast.AliasedExpr: explainAliasedExpr(sb, n, depth) + case *ast.WithElement: + explainWithElement(sb, n, indent, depth) case *ast.Asterisk: explainAsterisk(sb, n, indent) @@ -97,6 +99,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) { explainExtractExpr(sb, n, indent, depth) // DDL statements + case *ast.InsertQuery: + explainInsertQuery(sb, n, indent, depth) case *ast.CreateQuery: explainCreateQuery(sb, n, indent, depth) case *ast.DropQuery: @@ -117,6 +121,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) { // Types case *ast.DataType: explainDataType(sb, n, indent, depth) + case *ast.NameTypePair: + explainNameTypePair(sb, n, indent, depth) case *ast.Parameter: explainParameter(sb, n, indent) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 1ef0993735..c47824d5be 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -29,7 +29,9 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in } hasComplexExpr := false for _, e := range exprs { - if _, isLit := e.(*ast.Literal); !isLit { + lit, isLit := e.(*ast.Literal) + // Non-literals or tuple/array literals count as complex + if !isLit || (isLit && (lit.Type == ast.LiteralTuple || lit.Type == ast.LiteralArray)) { hasComplexExpr = true break } @@ -61,7 +63,9 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in } hasComplexExpr := false for _, e := range exprs { - if _, isLit := e.(*ast.Literal); !isLit { + lit, isLit := e.(*ast.Literal) + // Non-literals or tuple/array literals count as complex + if !isLit || (isLit && (lit.Type == ast.LiteralTuple || lit.Type == ast.LiteralArray)) { hasComplexExpr = true break } @@ -88,12 +92,45 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in func explainBinaryExpr(sb *strings.Builder, n *ast.BinaryExpr, indent string, depth int) { // Convert operator to function name fnName := OperatorToFunction(n.Op) + + // For || (concat) operator, flatten chained concatenations + if n.Op == "||" { + operands := collectConcatOperands(n) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(operands)) + for _, op := range operands { + Node(sb, op, depth+2) + } + return + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) Node(sb, n.Left, depth+2) Node(sb, n.Right, depth+2) } +// collectConcatOperands flattens chained || (concat) operations into a list of operands +func collectConcatOperands(n *ast.BinaryExpr) []ast.Expression { + var operands []ast.Expression + + // Recursively collect from left side if it's also a concat + if left, ok := n.Left.(*ast.BinaryExpr); ok && left.Op == "||" { + operands = append(operands, collectConcatOperands(left)...) + } else { + operands = append(operands, n.Left) + } + + // Recursively collect from right side if it's also a concat + if right, ok := n.Right.(*ast.BinaryExpr); ok && right.Op == "||" { + operands = append(operands, collectConcatOperands(right)...) + } else { + operands = append(operands, n.Right) + } + + return operands +} + func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) { fnName := UnaryOperatorToFunction(n.Op) fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) @@ -135,6 +172,35 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { } } fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, FormatLiteral(e), n.Alias) + case *ast.BinaryExpr: + // Binary expressions become functions with alias + fnName := OperatorToFunction(e.Op) + // For || (concat) operator, flatten chained concatenations + if e.Op == "||" { + operands := collectConcatOperands(e) + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(operands)) + for _, op := range operands { + Node(sb, op, depth+2) + } + } else { + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, e.Left, depth+2) + Node(sb, e.Right, depth+2) + } + case *ast.UnaryExpr: + // Unary expressions become functions with alias + fnName := UnaryOperatorToFunction(e.Op) + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, e.Operand, depth+2) + case *ast.FunctionCall: + // Function calls already handle aliases + explainFunctionCallWithAlias(sb, e, n.Alias, indent, depth) + case *ast.Identifier: + // Identifiers with alias + fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, e.Name(), n.Alias) default: // For other types, recursively explain and add alias info Node(sb, n.Expr, depth) @@ -149,3 +215,38 @@ func explainAsterisk(sb *strings.Builder, n *ast.Asterisk, indent string) { fmt.Fprintf(sb, "%sAsterisk\n", indent) } } + +func explainWithElement(sb *strings.Builder, n *ast.WithElement, indent string, depth int) { + // For WITH elements, we need to show the underlying expression with the name as alias + switch e := n.Query.(type) { + case *ast.Literal: + fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, FormatLiteral(e), n.Name) + case *ast.Identifier: + fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, e.Name(), n.Name) + case *ast.FunctionCall: + explainFunctionCallWithAlias(sb, e, n.Name, indent, depth) + case *ast.BinaryExpr: + // Binary expressions become functions + fnName := OperatorToFunction(e.Op) + // For || (concat) operator, flatten chained concatenations + if e.Op == "||" { + operands := collectConcatOperands(e) + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(operands)) + for _, op := range operands { + Node(sb, op, depth+2) + } + } else { + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, e.Left, depth+2) + Node(sb, e.Right, depth+2) + } + case *ast.Subquery: + fmt.Fprintf(sb, "%sSubquery (alias %s) (children %d)\n", indent, n.Name, 1) + Node(sb, e.Query, depth+1) + default: + // For other types, just output the expression (alias may be lost) + Node(sb, n.Query, depth) + } +} diff --git a/internal/explain/format.go b/internal/explain/format.go index 6a0fed6216..fac17a2c4c 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -2,6 +2,7 @@ package explain import ( "fmt" + "strconv" "strings" "github.com/kyleconroy/doubleclick/ast" @@ -11,14 +12,23 @@ import ( func FormatLiteral(lit *ast.Literal) string { switch lit.Type { case ast.LiteralInteger: - val := lit.Value.(int64) - if val >= 0 { + // Handle both int64 and uint64 values + switch val := lit.Value.(type) { + case int64: + if val >= 0 { + return fmt.Sprintf("UInt64_%d", val) + } + return fmt.Sprintf("Int64_%d", val) + case uint64: return fmt.Sprintf("UInt64_%d", val) + default: + return fmt.Sprintf("UInt64_%v", lit.Value) } - return fmt.Sprintf("Int64_%d", val) case ast.LiteralFloat: val := lit.Value.(float64) - return fmt.Sprintf("Float64_%v", val) + // Use 'f' format to avoid scientific notation, -1 precision for smallest representation + s := strconv.FormatFloat(val, 'f', -1, 64) + return fmt.Sprintf("Float64_%s", s) case ast.LiteralString: s := lit.Value.(string) // Escape backslashes in strings @@ -26,9 +36,9 @@ func FormatLiteral(lit *ast.Literal) string { return fmt.Sprintf("\\'%s\\'", s) case ast.LiteralBoolean: if lit.Value.(bool) { - return "UInt8_1" + return "Bool_1" } - return "UInt8_0" + return "Bool_0" case ast.LiteralNull: return "NULL" case ast.LiteralArray: @@ -146,7 +156,9 @@ func OperatorToFunction(op string) string { return "multiply" case "/": return "divide" - case "%": + case "DIV": + return "intDiv" + case "%", "MOD": return "modulo" case "=", "==": return "equals" @@ -201,6 +213,10 @@ func formatExprAsString(expr ast.Expression) string { return "false" case ast.LiteralNull: return "NULL" + case ast.LiteralArray: + return formatArrayAsString(e.Value) + case ast.LiteralTuple: + return formatTupleAsString(e.Value) default: return fmt.Sprintf("%v", e.Value) } @@ -210,3 +226,65 @@ func formatExprAsString(expr ast.Expression) string { return fmt.Sprintf("%v", expr) } } + +// formatArrayAsString formats an array literal as a string for :: cast syntax +func formatArrayAsString(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "[]" + } + var parts []string + for _, e := range exprs { + parts = append(parts, formatElementAsString(e)) + } + return "[" + strings.Join(parts, ", ") + "]" +} + +// formatTupleAsString formats a tuple literal as a string for :: cast syntax +func formatTupleAsString(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "()" + } + var parts []string + for _, e := range exprs { + parts = append(parts, formatElementAsString(e)) + } + return "(" + strings.Join(parts, ", ") + ")" +} + +// formatElementAsString formats a single element for array/tuple string representation +func formatElementAsString(expr ast.Expression) string { + switch e := expr.(type) { + case *ast.Literal: + switch e.Type { + case ast.LiteralInteger: + return fmt.Sprintf("%d", e.Value) + case ast.LiteralFloat: + return fmt.Sprintf("%v", e.Value) + case ast.LiteralString: + // Quote strings with single quotes + s := e.Value.(string) + // Escape single quotes in the string + s = strings.ReplaceAll(s, "'", "\\'") + return "\\'" + s + "\\'" + case ast.LiteralBoolean: + if e.Value.(bool) { + return "true" + } + return "false" + case ast.LiteralNull: + return "NULL" + case ast.LiteralArray: + return formatArrayAsString(e.Value) + case ast.LiteralTuple: + return formatTupleAsString(e.Value) + default: + return fmt.Sprintf("%v", e.Value) + } + case *ast.Identifier: + return e.Name() + default: + return formatExprAsString(expr) + } +} diff --git a/internal/explain/functions.go b/internal/explain/functions.go index df49cbdbed..7e7d49eac9 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -8,14 +8,21 @@ import ( ) func explainFunctionCall(sb *strings.Builder, n *ast.FunctionCall, indent string, depth int) { + explainFunctionCallWithAlias(sb, n, n.Alias, indent, depth) +} + +func explainFunctionCallWithAlias(sb *strings.Builder, n *ast.FunctionCall, alias string, indent string, depth int) { children := 1 // arguments ExpressionList if len(n.Parameters) > 0 { children++ // parameters ExpressionList } + if n.Over != nil { + children++ // WindowDefinition for OVER clause + } // Normalize function name fnName := NormalizeFunctionName(n.Name) - if n.Alias != "" { - fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, children) + if alias != "" { + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, alias, children) } else { fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children) } @@ -35,6 +42,11 @@ func explainFunctionCall(sb *strings.Builder, n *ast.FunctionCall, indent string Node(sb, p, depth+2) } } + // Window definition (for window functions with OVER clause) + // WindowDefinition is a sibling to ExpressionList, so use the same indent + if n.Over != nil { + explainWindowSpec(sb, n.Over, indent+" ", depth+1) + } } func explainLambda(sb *strings.Builder, n *ast.Lambda, indent string, depth int) { @@ -55,11 +67,29 @@ func explainCastExpr(sb *strings.Builder, n *ast.CastExpr, indent string, depth // CAST is represented as Function CAST with expr and type as arguments fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - // For :: operator syntax, expression is represented as string literal + // For :: operator syntax with simple literals, format as string literal + // For function syntax or complex expressions, use normal AST node if n.OperatorSyntax { - // Format expression as string literal - exprStr := formatExprAsString(n.Expr) - fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, exprStr) + if lit, ok := n.Expr.(*ast.Literal); ok { + // For arrays/tuples of simple primitives, use FormatLiteral (Array_[...] format) + // For strings and other types, use string format + if lit.Type == ast.LiteralArray || lit.Type == ast.LiteralTuple { + if containsOnlyPrimitives(lit) { + fmt.Fprintf(sb, "%s Literal %s\n", indent, FormatLiteral(lit)) + } else { + // Complex content - format as string + exprStr := formatExprAsString(lit) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, exprStr) + } + } else { + // Simple literal - format as string + exprStr := formatExprAsString(lit) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, exprStr) + } + } else { + // Complex expression - use normal AST node + Node(sb, n.Expr, depth+2) + } } else { Node(sb, n.Expr, depth+2) } @@ -68,6 +98,43 @@ func explainCastExpr(sb *strings.Builder, n *ast.CastExpr, indent string, depth fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) } +// containsOnlyPrimitives checks if a literal array/tuple contains only primitive literals +func containsOnlyPrimitives(lit *ast.Literal) bool { + var exprs []ast.Expression + switch lit.Type { + case ast.LiteralArray, ast.LiteralTuple: + var ok bool + exprs, ok = lit.Value.([]ast.Expression) + if !ok { + return false + } + default: + return true + } + + for _, e := range exprs { + innerLit, ok := e.(*ast.Literal) + if !ok { + return false + } + // Strings with special chars are not considered primitive for this purpose + if innerLit.Type == ast.LiteralString { + s := innerLit.Value.(string) + // Strings that look like JSON or contain special chars should be converted to string format + if strings.ContainsAny(s, "{}[]\"\\") { + return false + } + } + // Nested arrays/tuples need recursive check + if innerLit.Type == ast.LiteralArray || innerLit.Type == ast.LiteralTuple { + if !containsOnlyPrimitives(innerLit) { + return false + } + } + } + return true +} + func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int) { // IN is represented as Function in fnName := "in" @@ -201,7 +268,12 @@ func explainCaseExpr(sb *strings.Builder, n *ast.CaseExpr, indent string, depth func explainIntervalExpr(sb *strings.Builder, n *ast.IntervalExpr, indent string, depth int) { // INTERVAL is represented as Function toInterval - fnName := "toInterval" + n.Unit + // Unit needs to be title-cased (e.g., YEAR -> Year) + unit := n.Unit + if len(unit) > 0 { + unit = strings.ToUpper(unit[:1]) + strings.ToLower(unit[1:]) + } + fnName := "toInterval" + unit fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) Node(sb, n.Value, depth+2) @@ -222,3 +294,42 @@ func explainExtractExpr(sb *strings.Builder, n *ast.ExtractExpr, indent string, fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) Node(sb, n.From, depth+2) } + +func explainWindowSpec(sb *strings.Builder, n *ast.WindowSpec, indent string, depth int) { + // Window spec is represented as WindowDefinition + // For simple cases like OVER (), just output WindowDefinition without children + children := 0 + if n.Name != "" { + children++ + } + if len(n.PartitionBy) > 0 { + children++ + } + if len(n.OrderBy) > 0 { + children++ + } + if n.Frame != nil { + children++ + } + if children > 0 { + fmt.Fprintf(sb, "%sWindowDefinition (children %d)\n", indent, children) + if n.Name != "" { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Name) + } + if len(n.PartitionBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.PartitionBy)) + for _, e := range n.PartitionBy { + Node(sb, e, depth+2) + } + } + if len(n.OrderBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + Node(sb, o.Expression, depth+2) + } + } + // Frame handling would go here if needed + } else { + fmt.Fprintf(sb, "%sWindowDefinition\n", indent) + } +} diff --git a/internal/explain/select.go b/internal/explain/select.go index 52e15aa07c..b26cfedfa1 100644 --- a/internal/explain/select.go +++ b/internal/explain/select.go @@ -16,17 +16,35 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer Node(sb, sel, depth+2) } // FORMAT clause - check if any SelectQuery has Format set + var hasFormat bool for _, sel := range n.Selects { if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { Node(sb, sq.Format, depth+1) + hasFormat = true break } } + // When FORMAT is present, SETTINGS is output at SelectWithUnionQuery level + if hasFormat { + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && len(sq.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + break + } + } + } } func explainSelectQuery(sb *strings.Builder, n *ast.SelectQuery, indent string, depth int) { children := countSelectQueryChildren(n) fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children) + // WITH clause (ExpressionList) - output before columns + if len(n.With) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.With)) + for _, w := range n.With { + Node(sb, w, depth+2) + } + } // Columns (ExpressionList) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) for _, col := range n.Columns { @@ -70,8 +88,8 @@ func explainSelectQuery(sb *strings.Builder, n *ast.SelectQuery, indent string, if n.Offset != nil { Node(sb, n.Offset, depth+1) } - // SETTINGS - if len(n.Settings) > 0 { + // SETTINGS - output here if there's no FORMAT, otherwise it's at SelectWithUnionQuery level + if len(n.Settings) > 0 && n.Format == nil { fmt.Fprintf(sb, "%s Set\n", indent) } } @@ -84,17 +102,32 @@ func explainOrderByElement(sb *strings.Builder, n *ast.OrderByElement, indent st func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int { count := 1 // ExpressionList of selects // Check if any SelectQuery has Format set + var hasFormat bool for _, sel := range n.Selects { if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { count++ + hasFormat = true break } } + // When FORMAT is present, SETTINGS is counted at this level + if hasFormat { + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && len(sq.Settings) > 0 { + count++ + break + } + } + } return count } func countSelectQueryChildren(n *ast.SelectQuery) int { count := 1 // columns ExpressionList + // WITH clause + if len(n.With) > 0 { + count++ + } // FROM and ARRAY JOIN together count as one child (TablesInSelectQuery) if n.From != nil || n.ArrayJoin != nil { count++ @@ -120,7 +153,9 @@ func countSelectQueryChildren(n *ast.SelectQuery) int { if n.Offset != nil { count++ } - if len(n.Settings) > 0 { + // SETTINGS is counted here only if there's no FORMAT + // If FORMAT is present, SETTINGS is at SelectWithUnionQuery level + if len(n.Settings) > 0 && n.Format == nil { count++ } return count diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 133ebcfddd..f35519d34e 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -7,6 +7,42 @@ import ( "github.com/kyleconroy/doubleclick/ast" ) +func explainInsertQuery(sb *strings.Builder, n *ast.InsertQuery, indent string, depth int) { + // Count children + children := 0 + if n.Function != nil { + children++ + } else if n.Table != "" { + children++ // Table identifier + } + if n.Select != nil { + children++ + } + if n.HasSettings { + children++ + } + // Note: InsertQuery uses 3 spaces after name in ClickHouse explain + fmt.Fprintf(sb, "%sInsertQuery (children %d)\n", indent, children) + + if n.Function != nil { + Node(sb, n.Function, depth+1) + } else if n.Table != "" { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) + } + + if n.Select != nil { + Node(sb, n.Select, depth+1) + } + + if n.HasSettings { + fmt.Fprintf(sb, "%s Set\n", indent) + } +} + func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, depth int) { name := n.Table if n.View != "" { @@ -20,13 +56,18 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, if len(n.Columns) > 0 { children++ } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil { children++ } if n.AsSelect != nil { children++ } - fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + // ClickHouse adds an extra space before (children N) for CREATE DATABASE + if n.CreateDatabase { + fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + } else { + fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + } fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) if len(n.Columns) > 0 { fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, 1) @@ -35,11 +76,14 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, Column(sb, col, depth+3) } } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || len(n.Settings) > 0 { + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil || len(n.Settings) > 0 { storageChildren := 0 if n.Engine != nil { storageChildren++ } + if n.PartitionBy != nil { + storageChildren++ + } if len(n.OrderBy) > 0 { storageChildren++ } @@ -65,6 +109,13 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) } } + if n.PartitionBy != nil { + if ident, ok := n.PartitionBy.(*ast.Identifier); ok { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else { + Node(sb, n.PartitionBy, depth+2) + } + } if len(n.OrderBy) > 0 { if len(n.OrderBy) == 1 { if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { @@ -80,6 +131,21 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } } + if len(n.PrimaryKey) > 0 { + if len(n.PrimaryKey) == 1 { + if ident, ok := n.PrimaryKey[0].(*ast.Identifier); ok { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else { + Node(sb, n.PrimaryKey[0], depth+2) + } + } else { + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.PrimaryKey)) + for _, p := range n.PrimaryKey { + Node(sb, p, depth+4) + } + } + } if len(n.Settings) > 0 { fmt.Fprintf(sb, "%s Set\n", indent) } @@ -103,7 +169,12 @@ func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) { if n.DropDatabase { name = n.Database } - fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + // DROP DATABASE uses different spacing than DROP TABLE + if n.DropDatabase { + fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + } else { + fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + } fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) } @@ -139,31 +210,27 @@ func explainDescribeQuery(sb *strings.Builder, n *ast.DescribeQuery, indent stri } func explainDataType(sb *strings.Builder, n *ast.DataType, indent string, depth int) { - // Check if type has complex parameters (expressions, not just literals/types) - hasComplexParams := false - for _, p := range n.Parameters { - if _, ok := p.(*ast.Literal); ok { - continue - } - if _, ok := p.(*ast.DataType); ok { - continue - } - hasComplexParams = true - break - } - - if hasComplexParams && len(n.Parameters) > 0 { - // Complex parameters need to be output as children + // If type has parameters, expand them as children + if len(n.Parameters) > 0 { fmt.Fprintf(sb, "%sDataType %s (children %d)\n", indent, n.Name, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) for _, p := range n.Parameters { Node(sb, p, depth+2) } + } else if n.HasParentheses { + // Empty parentheses, e.g., Tuple() + fmt.Fprintf(sb, "%sDataType %s (children %d)\n", indent, n.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) } else { - fmt.Fprintf(sb, "%sDataType %s\n", indent, FormatDataType(n)) + fmt.Fprintf(sb, "%sDataType %s\n", indent, n.Name) } } +func explainNameTypePair(sb *strings.Builder, n *ast.NameTypePair, indent string, depth int) { + fmt.Fprintf(sb, "%sNameTypePair %s (children %d)\n", indent, n.Name, 1) + Node(sb, n.Type, depth+1) +} + func explainParameter(sb *strings.Builder, n *ast.Parameter, indent string) { if n.Name != "" { fmt.Fprintf(sb, "%sQueryParameter %s\n", indent, n.Name) diff --git a/internal/explain/tables.go b/internal/explain/tables.go index e707120168..b9b8d22897 100644 --- a/internal/explain/tables.go +++ b/internal/explain/tables.go @@ -35,11 +35,25 @@ func explainTableExpression(sb *strings.Builder, n *ast.TableExpression, indent if subq, ok := n.Table.(*ast.Subquery); ok && n.Alias != "" { fmt.Fprintf(sb, "%s Subquery (alias %s) (children %d)\n", indent, n.Alias, 1) Node(sb, subq.Query, depth+2) + } else if fn, ok := n.Table.(*ast.FunctionCall); ok && n.Alias != "" { + // Table function with alias + explainFunctionCallWithAlias(sb, fn, n.Alias, indent+" ", depth+1) + } else if ti, ok := n.Table.(*ast.TableIdentifier); ok && n.Alias != "" { + // Table identifier with alias + explainTableIdentifierWithAlias(sb, ti, n.Alias, indent+" ") } else { Node(sb, n.Table, depth+1) } } +func explainTableIdentifierWithAlias(sb *strings.Builder, n *ast.TableIdentifier, alias string, indent string) { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sTableIdentifier %s (alias %s)\n", indent, name, alias) +} + func explainTableIdentifier(sb *strings.Builder, n *ast.TableIdentifier, indent string) { name := n.Table if n.Database != "" { @@ -62,13 +76,7 @@ func explainArrayJoinClause(sb *strings.Builder, n *ast.ArrayJoinClause, indent func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, depth int) { // TableJoin is part of TablesInSelectQueryElement - joinType := strings.ToLower(string(n.Type)) - if n.Strictness != "" { - joinType = strings.ToLower(string(n.Strictness)) + " " + joinType - } - if n.Global { - joinType = "global " + joinType - } + // ClickHouse EXPLAIN AST doesn't show join type in the output children := 0 if n.On != nil { children++ @@ -76,7 +84,7 @@ func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, dept if len(n.Using) > 0 { children++ } - fmt.Fprintf(sb, "%sTableJoin %s (children %d)\n", indent, joinType, children) + fmt.Fprintf(sb, "%sTableJoin (children %d)\n", indent, children) if n.On != nil { Node(sb, n.On, depth+1) } diff --git a/lexer/lexer.go b/lexer/lexer.go index 7efe648184..8215853b54 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -344,10 +344,49 @@ func (l *Lexer) readNumber() Item { l.readChar() } - // Read integer part + // Check for hex (0x), binary (0b), or octal (0o) prefix + if l.ch == '0' { + sb.WriteRune(l.ch) + l.readChar() + if l.ch == 'x' || l.ch == 'X' { + // Hex literal + sb.WriteRune(l.ch) + l.readChar() + for isHexDigit(l.ch) { + sb.WriteRune(l.ch) + l.readChar() + } + return Item{Token: token.NUMBER, Value: sb.String(), Pos: pos} + } else if l.ch == 'b' || l.ch == 'B' { + // Binary literal + sb.WriteRune(l.ch) + l.readChar() + for l.ch == '0' || l.ch == '1' { + sb.WriteRune(l.ch) + l.readChar() + } + return Item{Token: token.NUMBER, Value: sb.String(), Pos: pos} + } else if l.ch == 'o' || l.ch == 'O' { + // Octal literal + sb.WriteRune(l.ch) + l.readChar() + for l.ch >= '0' && l.ch <= '7' { + sb.WriteRune(l.ch) + l.readChar() + } + return Item{Token: token.NUMBER, Value: sb.String(), Pos: pos} + } + // Otherwise, continue with normal number parsing (leading 0) + } + + // Read integer part (including underscores as separators, but only between digits) for unicode.IsDigit(l.ch) { sb.WriteRune(l.ch) l.readChar() + // Handle underscore separators (only if followed by a digit) + for l.ch == '_' && unicode.IsDigit(l.peekChar()) { + l.readChar() // skip underscore + } } // Check for decimal point @@ -357,6 +396,10 @@ func (l *Lexer) readNumber() Item { for unicode.IsDigit(l.ch) { sb.WriteRune(l.ch) l.readChar() + // Handle underscore separators + for l.ch == '_' && unicode.IsDigit(l.peekChar()) { + l.readChar() + } } } @@ -371,16 +414,36 @@ func (l *Lexer) readNumber() Item { for unicode.IsDigit(l.ch) { sb.WriteRune(l.ch) l.readChar() + // Handle underscore separators + for l.ch == '_' && unicode.IsDigit(l.peekChar()) { + l.readChar() + } } } return Item{Token: token.NUMBER, Value: sb.String(), Pos: pos} } +func isHexDigit(ch rune) bool { + return unicode.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') +} + func (l *Lexer) readIdentifier() Item { pos := l.pos var sb strings.Builder + // Check for hex string literal: x'...' or X'...' + if (l.ch == 'x' || l.ch == 'X') && l.peekChar() == '\'' { + l.readChar() // skip x + return l.readString('\'') // read as regular string + } + + // Check for binary string literal: b'...' or B'...' + if (l.ch == 'b' || l.ch == 'B') && l.peekChar() == '\'' { + l.readChar() // skip b + return l.readString('\'') // read as regular string + } + for isIdentChar(l.ch) { sb.WriteRune(l.ch) l.readChar() diff --git a/parser/expression.go b/parser/expression.go index 14b03588cd..599b0ba3cd 100644 --- a/parser/expression.go +++ b/parser/expression.go @@ -1,6 +1,7 @@ package parser import ( + "math" "strconv" "strings" @@ -80,16 +81,84 @@ func (p *Parser) parseExpressionList() []ast.Expression { return exprs } - exprs = append(exprs, p.parseExpression(LOWEST)) + expr := p.parseExpression(LOWEST) + if expr != nil { + // Handle implicit alias (identifier without AS) + expr = p.parseImplicitAlias(expr) + exprs = append(exprs, expr) + } + + for p.currentIs(token.COMMA) { + p.nextToken() + expr := p.parseExpression(LOWEST) + if expr != nil { + // Handle implicit alias (identifier without AS) + expr = p.parseImplicitAlias(expr) + exprs = append(exprs, expr) + } + } + + return exprs +} + +// parseFunctionArgumentList parses arguments for function calls, stopping at SETTINGS +func (p *Parser) parseFunctionArgumentList() []ast.Expression { + var exprs []ast.Expression + + if p.currentIs(token.RPAREN) || p.currentIs(token.EOF) || p.currentIs(token.SETTINGS) { + return exprs + } + + expr := p.parseExpression(LOWEST) + if expr != nil { + exprs = append(exprs, expr) + } for p.currentIs(token.COMMA) { p.nextToken() - exprs = append(exprs, p.parseExpression(LOWEST)) + // Stop if we hit SETTINGS + if p.currentIs(token.SETTINGS) { + break + } + expr := p.parseExpression(LOWEST) + if expr != nil { + exprs = append(exprs, expr) + } } return exprs } +// parseImplicitAlias handles implicit column aliases like "SELECT 'a' c0" (meaning 'a' AS c0) +func (p *Parser) parseImplicitAlias(expr ast.Expression) ast.Expression { + // If next token is a plain identifier (not a keyword), treat as implicit alias + // Keywords like FROM, WHERE etc. are tokenized as their own token types, not IDENT + if p.currentIs(token.IDENT) { + alias := p.current.Value + p.nextToken() + + // Set alias on the expression if it supports it + switch e := expr.(type) { + case *ast.Identifier: + e.Alias = alias + return e + case *ast.FunctionCall: + e.Alias = alias + return e + case *ast.Subquery: + e.Alias = alias + return e + default: + return &ast.AliasedExpr{ + Position: expr.Pos(), + Expr: expr, + Alias: alias, + } + } + } + return expr +} + func (p *Parser) parseExpression(precedence int) ast.Expression { left := p.parsePrefixExpression() if left == nil { @@ -118,6 +187,8 @@ func (p *Parser) parsePrefixExpression() ast.Expression { return p.parseBoolean() case token.NULL: return p.parseNull() + case token.NAN, token.INF: + return p.parseSpecialNumber() case token.MINUS: return p.parseUnaryMinus() case token.NOT: @@ -135,7 +206,13 @@ func (p *Parser) parsePrefixExpression() ast.Expression { case token.EXTRACT: return p.parseExtract() case token.INTERVAL: - return p.parseInterval() + // INTERVAL can be a literal (INTERVAL 1 DAY) or identifier reference + // Check if next token can start an interval value + if p.peekIs(token.NUMBER) || p.peekIs(token.LPAREN) || p.peekIs(token.MINUS) || p.peekIs(token.STRING) { + return p.parseInterval() + } + // Otherwise treat as identifier + return p.parseKeywordAsIdentifier() case token.EXISTS: return p.parseExists() case token.PARAM: @@ -159,11 +236,16 @@ func (p *Parser) parsePrefixExpression() ast.Expression { if p.peekIs(token.LPAREN) { return p.parseKeywordAsFunction() } - return nil + // format as identifier (e.g., format='Parquet' in function args) + return p.parseKeywordAsIdentifier() default: - // Handle other keywords that can be used as function names - if p.current.Token.IsKeyword() && p.peekIs(token.LPAREN) { - return p.parseKeywordAsFunction() + // Handle other keywords that can be used as function names or identifiers + if p.current.Token.IsKeyword() { + if p.peekIs(token.LPAREN) { + return p.parseKeywordAsFunction() + } + // Keywords like ALL, DEFAULT, etc. can be used as identifiers + return p.parseKeywordAsIdentifier() } return nil } @@ -317,26 +399,41 @@ func (p *Parser) parseFunctionCall(name string, pos token.Position) *ast.Functio } // Parse arguments - if !p.currentIs(token.RPAREN) { - fn.Arguments = p.parseExpressionList() + if !p.currentIs(token.RPAREN) && !p.currentIs(token.SETTINGS) { + fn.Arguments = p.parseFunctionArgumentList() + } + + // Handle SETTINGS inside function call (table functions) + if p.currentIs(token.SETTINGS) { + p.nextToken() + // Parse settings as key=value pairs until ) + for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + // Just skip the settings for now + p.nextToken() + } } p.expect(token.RPAREN) + // Handle IGNORE NULLS / RESPECT NULLS (window function modifiers) + if p.currentIs(token.IDENT) { + upper := strings.ToUpper(p.current.Value) + if upper == "IGNORE" || upper == "RESPECT" { + p.nextToken() + if p.currentIs(token.NULLS) { + p.nextToken() + } + } + } + // Handle OVER clause for window functions if p.currentIs(token.OVER) { p.nextToken() fn.Over = p.parseWindowSpec() } - // Handle alias - if p.currentIs(token.AS) { - p.nextToken() - if p.currentIs(token.IDENT) { - fn.Alias = p.current.Value - p.nextToken() - } - } + // Note: AS alias is handled by the expression parser's infix handling (parseAlias) + // to respect precedence levels when called from contexts like WITH clauses return fn } @@ -476,10 +573,18 @@ func (p *Parser) parseNumber() ast.Expression { lit.Value = f } } else { + // Try signed int64 first i, err := strconv.ParseInt(value, 10, 64) if err != nil { - lit.Type = ast.LiteralString - lit.Value = value + // Try unsigned uint64 for large positive numbers + u, uerr := strconv.ParseUint(value, 10, 64) + if uerr != nil { + lit.Type = ast.LiteralString + lit.Value = value + } else { + lit.Type = ast.LiteralInteger + lit.Value = u // Store as uint64 + } } else { lit.Type = ast.LiteralInteger lit.Value = i @@ -519,6 +624,21 @@ func (p *Parser) parseNull() ast.Expression { return lit } +func (p *Parser) parseSpecialNumber() ast.Expression { + lit := &ast.Literal{ + Position: p.current.Pos, + Type: ast.LiteralFloat, + } + switch p.current.Token { + case token.NAN: + lit.Value = math.NaN() + case token.INF: + lit.Value = math.Inf(1) + } + p.nextToken() + return lit +} + func (p *Parser) parseUnaryMinus() ast.Expression { expr := &ast.UnaryExpr{ Position: p.current.Pos, @@ -543,6 +663,16 @@ func (p *Parser) parseGroupedOrTuple() ast.Expression { pos := p.current.Pos p.nextToken() // skip ( + // Handle empty tuple () + if p.currentIs(token.RPAREN) { + p.nextToken() + return &ast.Literal{ + Position: pos, + Type: ast.LiteralTuple, + Value: []ast.Expression{}, + } + } + // Check for subquery if p.currentIs(token.SELECT) || p.currentIs(token.WITH) { subquery := p.parseSelectWithUnion() @@ -661,12 +791,22 @@ func (p *Parser) parseCast() ast.Expression { // Use ALIAS_PREC to avoid consuming AS as an alias operator expr.Expr = p.parseExpression(ALIAS_PREC) - if !p.expect(token.AS) { - return nil + // Handle both CAST(x AS Type) and CAST(x, 'Type') syntax + if p.currentIs(token.AS) { + p.nextToken() + expr.Type = p.parseDataType() + } else if p.currentIs(token.COMMA) { + p.nextToken() + // Type is given as a string literal + if p.currentIs(token.STRING) { + expr.Type = &ast.DataType{ + Position: p.current.Pos, + Name: p.current.Value, + } + p.nextToken() + } } - expr.Type = p.parseDataType() - p.expect(token.RPAREN) return expr @@ -949,18 +1089,34 @@ func (p *Parser) parseInExpression(left ast.Expression, not bool) ast.Expression p.nextToken() // skip IN - if !p.expect(token.LPAREN) { - return nil - } + // Handle different IN list formats: + // 1. (subquery or list) - standard format + // 2. [array literal] - array format + // 3. identifier - table or alias reference + // 4. tuple(...) - explicit tuple function - // Check for subquery - if p.currentIs(token.SELECT) || p.currentIs(token.WITH) { - expr.Query = p.parseSelectWithUnion() + if p.currentIs(token.LPAREN) { + p.nextToken() // skip ( + // Check for subquery + if p.currentIs(token.SELECT) || p.currentIs(token.WITH) { + expr.Query = p.parseSelectWithUnion() + } else { + expr.List = p.parseExpressionList() + } + p.expect(token.RPAREN) + } else if p.currentIs(token.LBRACKET) { + // Array literal: IN [1, 2, 3] + arr := p.parseArrayLiteral() + expr.List = []ast.Expression{arr} } else { - expr.List = p.parseExpressionList() + // Could be identifier, tuple function, or other expression + // Parse as expression + innerExpr := p.parseExpression(CALL) + if innerExpr != nil { + expr.List = []ast.Expression{innerExpr} + } } - p.expect(token.RPAREN) return expr } @@ -1323,6 +1479,17 @@ func (p *Parser) parseKeywordAsFunction() ast.Expression { } } +func (p *Parser) parseKeywordAsIdentifier() ast.Expression { + pos := p.current.Pos + name := p.current.Value + p.nextToken() + + return &ast.Identifier{ + Position: pos, + Parts: []string{name}, + } +} + func (p *Parser) parseAsteriskExcept(asterisk *ast.Asterisk) ast.Expression { p.nextToken() // skip EXCEPT diff --git a/parser/parser.go b/parser/parser.go index b36afd2c41..9bc1c93524 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -173,11 +173,23 @@ func (p *Parser) parseSelectWithUnion() *ast.SelectWithUnionQuery { p.nextToken() } query.UnionModes = append(query.UnionModes, mode) - sel := p.parseSelect() - if sel == nil { - break + + // Handle parenthesized subqueries: UNION ALL (SELECT ... UNION ALL SELECT ...) + if p.currentIs(token.LPAREN) { + p.nextToken() // skip ( + nested := p.parseSelectWithUnion() + p.expect(token.RPAREN) + // Flatten nested union selects into current query + for _, s := range nested.Selects { + query.Selects = append(query.Selects, s) + } + } else { + sel := p.parseSelect() + if sel == nil { + break + } + query.Selects = append(query.Selects, sel) } - query.Selects = append(query.Selects, sel) } return query @@ -251,6 +263,13 @@ func (p *Parser) parseSelect() *ast.SelectQuery { sel.WithRollup = true } + // WITH CUBE + if p.currentIs(token.WITH) && p.peekIs(token.CUBE) { + p.nextToken() + p.nextToken() + sel.WithCube = true + } + // WITH TOTALS if p.currentIs(token.WITH) && p.peekIs(token.TOTALS) { p.nextToken() @@ -291,6 +310,26 @@ func (p *Parser) parseSelect() *ast.SelectQuery { sel.Offset = sel.Limit sel.Limit = p.parseExpression(LOWEST) } + + // LIMIT BY clause (ClickHouse specific: LIMIT n BY expr1, expr2, ...) + if p.currentIs(token.BY) { + p.nextToken() + // Parse LIMIT BY expressions - skip them for now + for !p.isEndOfExpression() { + p.parseExpression(LOWEST) + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + } + + // WITH TIES modifier + if p.currentIs(token.WITH) && p.peekIs(token.TIES) { + p.nextToken() // skip WITH + p.nextToken() // skip TIES + } } // Parse OFFSET clause @@ -327,6 +366,13 @@ func (p *Parser) parseSelect() *ast.SelectQuery { } } + // Parse WITH TOTALS (can appear after GROUP BY or at end of SELECT) + if p.currentIs(token.WITH) && p.peekIs(token.TOTALS) { + p.nextToken() + p.nextToken() + sel.WithTotals = true + } + // Parse SETTINGS clause if p.currentIs(token.SETTINGS) { p.nextToken() @@ -360,6 +406,12 @@ func (p *Parser) parseSelect() *ast.SelectQuery { } } + // Parse SETTINGS clause (can come after FORMAT) + if p.currentIs(token.SETTINGS) { + p.nextToken() + sel.Settings = p.parseSettingsList() + } + return sel } @@ -651,10 +703,10 @@ func (p *Parser) parseTableExpression() *ast.TableExpression { } } - // Handle alias + // Handle alias (keywords like LEFT, RIGHT can be used as aliases after AS) if p.currentIs(token.AS) { p.nextToken() - if p.currentIs(token.IDENT) { + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { expr.Alias = p.current.Value p.nextToken() } @@ -678,6 +730,17 @@ func (p *Parser) isKeywordForClause() bool { return false } +func (p *Parser) isEndOfExpression() bool { + switch p.current.Token { + case token.EOF, token.RPAREN, token.RBRACKET, token.SEMICOLON, + token.UNION, token.EXCEPT, token.ORDER, token.LIMIT, + token.OFFSET, token.SETTINGS, token.FORMAT, token.INTO, + token.WITH: + return true + } + return false +} + func (p *Parser) parseOrderByList() []*ast.OrderByElement { var elements []*ast.OrderByElement @@ -768,11 +831,18 @@ func (p *Parser) parseSettingsList() []*ast.SettingExpr { } p.nextToken() - if !p.expect(token.EQ) { - break + // Settings can have optional value (bool settings can be just name) + if p.currentIs(token.EQ) { + p.nextToken() + setting.Value = p.parseExpression(LOWEST) + } else { + // Boolean setting without value - defaults to true + setting.Value = &ast.Literal{ + Position: setting.Position, + Type: ast.LiteralBoolean, + Value: true, + } } - - setting.Value = p.parseExpression(LOWEST) settings = append(settings, setting) if !p.currentIs(token.COMMA) { @@ -842,12 +912,34 @@ func (p *Parser) parseInsert() *ast.InsertQuery { p.expect(token.RPAREN) } + // Parse SETTINGS before VALUES + if p.currentIs(token.SETTINGS) { + ins.HasSettings = true + p.nextToken() + // Just parse and skip the settings + p.parseSettingsList() + } + // Parse VALUES or SELECT if p.currentIs(token.VALUES) { p.nextToken() - // VALUES are typically provided externally, skip for now + // Skip VALUES data - consume until end of statement + for !p.currentIs(token.EOF) && !p.currentIs(token.SEMICOLON) && !p.currentIs(token.FORMAT) && !p.currentIs(token.SETTINGS) { + p.nextToken() + } } else if p.currentIs(token.SELECT) || p.currentIs(token.WITH) { ins.Select = p.parseSelectWithUnion() + // If the SELECT has settings, mark the INSERT as having settings too + if ins.Select != nil { + if sel, ok := ins.Select.(*ast.SelectWithUnionQuery); ok && sel != nil && len(sel.Selects) > 0 { + lastSel := sel.Selects[len(sel.Selects)-1] + if lastSel != nil { + if selQuery, ok := lastSel.(*ast.SelectQuery); ok && selQuery != nil && len(selQuery.Settings) > 0 { + ins.HasSettings = true + } + } + } + } } // Parse FORMAT (format names can be keywords like Null, JSON, etc.) @@ -947,13 +1039,31 @@ func (p *Parser) parseCreateTable(create *ast.CreateQuery) { } } - // Parse column definitions + // Parse column definitions and indexes if p.currentIs(token.LPAREN) { p.nextToken() for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { - col := p.parseColumnDeclaration() - if col != nil { - create.Columns = append(create.Columns, col) + // Handle INDEX definition + if p.currentIs(token.INDEX) { + p.nextToken() + // Skip index definition: INDEX name expr TYPE type GRANULARITY n + p.parseIdentifierName() // index name + // Skip expression and other index parts + for !p.currentIs(token.COMMA) && !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + p.nextToken() + } + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "CONSTRAINT" { + // Skip CONSTRAINT definitions + p.nextToken() + p.parseIdentifierName() // constraint name + for !p.currentIs(token.COMMA) && !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + p.nextToken() + } + } else { + col := p.parseColumnDeclaration() + if col != nil { + create.Columns = append(create.Columns, col) + } } if p.currentIs(token.COMMA) { p.nextToken() @@ -1023,11 +1133,27 @@ func (p *Parser) parseCreateTable(create *ast.CreateQuery) { } done_table_options: - // Parse AS SELECT + // Parse AS SELECT or AS table_function() if p.currentIs(token.AS) { p.nextToken() if p.currentIs(token.SELECT) || p.currentIs(token.WITH) { create.AsSelect = p.parseSelectWithUnion() + } else if p.currentIs(token.IDENT) { + // AS table_function(...) like "AS s3Cluster(...)" + // Skip the function call for now + p.parseIdentifierName() + if p.currentIs(token.LPAREN) { + depth := 1 + p.nextToken() + for depth > 0 && !p.currentIs(token.EOF) { + if p.currentIs(token.LPAREN) { + depth++ + } else if p.currentIs(token.RPAREN) { + depth-- + } + p.nextToken() + } + } } } } @@ -1136,8 +1262,8 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { Position: p.current.Pos, } - // Parse column name - if p.currentIs(token.IDENT) { + // Parse column name (can be identifier or keyword like KEY) + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { col.Name = p.current.Value p.nextToken() } else { @@ -1147,7 +1273,7 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { // Parse data type col.Type = p.parseDataType() - // Parse DEFAULT/MATERIALIZED/ALIAS + // Parse DEFAULT/MATERIALIZED/ALIAS/EPHEMERAL switch p.current.Token { case token.DEFAULT: col.DefaultKind = "DEFAULT" @@ -1163,6 +1289,16 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { col.Default = p.parseExpression(LOWEST) } + // Handle EPHEMERAL (can be EPHEMERAL or EPHEMERAL default_value) + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "EPHEMERAL" { + col.DefaultKind = "EPHEMERAL" + p.nextToken() + // Optional default value + if !p.currentIs(token.COMMA) && !p.currentIs(token.RPAREN) && !p.currentIs(token.IDENT) { + col.Default = p.parseExpression(LOWEST) + } + } + // Parse CODEC if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "CODEC" { p.nextToken() @@ -1188,7 +1324,8 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { } func (p *Parser) parseDataType() *ast.DataType { - if !p.currentIs(token.IDENT) { + // Type names can be identifiers or keywords (Array, Nested, Key, etc.) + if !p.currentIs(token.IDENT) && !p.current.Token.IsKeyword() { return nil } @@ -1200,18 +1337,49 @@ func (p *Parser) parseDataType() *ast.DataType { // Parse type parameters if p.currentIs(token.LPAREN) { + dt.HasParentheses = true p.nextToken() - for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { - // Could be another data type or an expression - if p.currentIs(token.IDENT) && p.isDataTypeName(p.current.Value) { - dt.Parameters = append(dt.Parameters, p.parseDataType()) - } else { - dt.Parameters = append(dt.Parameters, p.parseExpression(LOWEST)) + + // Special handling for Nested type - it contains column declarations, not just types + if strings.ToUpper(dt.Name) == "NESTED" { + for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + // Parse as column name + type + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + pos := p.current.Pos + colName := p.current.Value + p.nextToken() + // Parse the type for this column + colType := p.parseDataType() + if colType != nil { + // Use NameTypePair for Nested column declarations + ntp := &ast.NameTypePair{ + Position: pos, + Name: colName, + Type: colType, + } + dt.Parameters = append(dt.Parameters, ntp) + } + } + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } } - if p.currentIs(token.COMMA) { - p.nextToken() - } else { - break + } else { + for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + // Could be another data type or an expression + // Type names can be identifiers or keywords (Array, Nested, etc.) + if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && p.isDataTypeName(p.current.Value) { + dt.Parameters = append(dt.Parameters, p.parseDataType()) + } else { + dt.Parameters = append(dt.Parameters, p.parseExpression(LOWEST)) + } + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } } } p.expect(token.RPAREN) @@ -1223,9 +1391,9 @@ func (p *Parser) parseDataType() *ast.DataType { func (p *Parser) isDataTypeName(name string) bool { upper := strings.ToUpper(name) types := []string{ - "INT8", "INT16", "INT32", "INT64", "INT128", "INT256", + "INT", "INT8", "INT16", "INT32", "INT64", "INT128", "INT256", "UINT8", "UINT16", "UINT32", "UINT64", "UINT128", "UINT256", - "FLOAT32", "FLOAT64", + "FLOAT32", "FLOAT64", "FLOAT", "DECIMAL", "DECIMAL32", "DECIMAL64", "DECIMAL128", "DECIMAL256", "STRING", "FIXEDSTRING", "UUID", "DATE", "DATE32", "DATETIME", "DATETIME64", @@ -1235,6 +1403,11 @@ func (p *Parser) isDataTypeName(name string) bool { "BOOL", "BOOLEAN", "IPV4", "IPV6", "NOTHING", "INTERVAL", + "JSON", "OBJECT", "VARIANT", + "AGGREGATEFUNCTION", "SIMPLEAGGREGATEFUNCTION", + "POINT", "RING", "POLYGON", "MULTIPOLYGON", + "TIME64", "TIME", + "DYNAMIC", } for _, t := range types { if upper == t { @@ -1291,7 +1464,8 @@ func (p *Parser) parseEngineClause() *ast.EngineClause { Position: p.current.Pos, } - if p.currentIs(token.IDENT) { + // Engine name can be identifier or keyword (Null, Join, Memory, etc.) + if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { engine.Name = p.current.Value p.nextToken() } @@ -1334,8 +1508,29 @@ func (p *Parser) parseDrop() *ast.DropQuery { case token.USER: dropUser = true p.nextToken() + case token.FUNCTION: + p.nextToken() + case token.INDEX: + p.nextToken() default: - p.nextToken() // skip unknown token + // Handle multi-word DROP types: ROW POLICY, NAMED COLLECTION, SETTINGS PROFILE + if p.currentIs(token.IDENT) { + upper := strings.ToUpper(p.current.Value) + switch upper { + case "ROW", "NAMED", "POLICY", "SETTINGS", "QUOTA", "ROLE": + // Skip the DROP type tokens + for p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + if p.currentIs(token.IF) { + break // Hit IF EXISTS + } + p.nextToken() + } + default: + p.nextToken() // skip unknown token + } + } else { + p.nextToken() // skip unknown token + } } // Handle IF EXISTS @@ -1372,7 +1567,39 @@ func (p *Parser) parseDrop() *ast.DropQuery { } } - // Handle ON CLUSTER + // Handle multiple tables (DROP TABLE IF EXISTS t1, t2, t3) + // For now, just skip additional table names + for p.currentIs(token.COMMA) { + p.nextToken() + // Skip the table name (may be qualified like db.table) + p.parseIdentifierName() + if p.currentIs(token.DOT) { + p.nextToken() + p.parseIdentifierName() + } + } + + // Handle ON table or ON CLUSTER + if p.currentIs(token.ON) { + p.nextToken() + if p.currentIs(token.CLUSTER) { + p.nextToken() + if p.currentIs(token.IDENT) || p.currentIs(token.STRING) { + drop.OnCluster = p.current.Value + p.nextToken() + } + } else { + // ON table_name (for DROP ROW POLICY, etc.) + // Skip the table reference + p.parseIdentifierName() + if p.currentIs(token.DOT) { + p.nextToken() + p.parseIdentifierName() + } + } + } + + // Handle second ON CLUSTER (can appear after ON table) if p.currentIs(token.ON) { p.nextToken() if p.currentIs(token.CLUSTER) { @@ -1390,6 +1617,14 @@ func (p *Parser) parseDrop() *ast.DropQuery { p.nextToken() } + // Handle NO DELAY + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "NO" { + p.nextToken() + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "DELAY" { + p.nextToken() + } + } + return drop } diff --git a/parser/parser_test.go b/parser/parser_test.go index 6dd8f71684..5be94f0d54 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -118,6 +118,11 @@ func TestParser(t *testing.T) { explainPath := filepath.Join(testDir, "explain.txt") if expectedBytes, err := os.ReadFile(explainPath); err == nil { expected := strings.TrimSpace(string(expectedBytes)) + // Strip server error messages from expected output + // These are messages like "The query succeeded but the server error '43' was expected..." + if idx := strings.Index(expected, "\nThe query succeeded but the server error"); idx != -1 { + expected = strings.TrimSpace(expected[:idx]) + } actual := strings.TrimSpace(parser.Explain(stmts[0])) if actual != expected { if metadata.Todo { diff --git a/token/token.go b/token/token.go index 857dde6df4..7e63c8751f 100644 --- a/token/token.go +++ b/token/token.go @@ -74,6 +74,7 @@ const ( CONSTRAINT CREATE CROSS + CUBE DATABASE DATABASES DEFAULT @@ -255,6 +256,7 @@ var tokens = [...]string{ CONSTRAINT: "CONSTRAINT", CREATE: "CREATE", CROSS: "CROSS", + CUBE: "CUBE", DATABASE: "DATABASE", DATABASES: "DATABASES", DEFAULT: "DEFAULT",