diff --git a/Specifications/Language/5_Grammar/QSharpLexer.g4 b/Specifications/Language/5_Grammar/QSharpLexer.g4 new file mode 100644 index 0000000..3f54bc9 --- /dev/null +++ b/Specifications/Language/5_Grammar/QSharpLexer.g4 @@ -0,0 +1,191 @@ +lexer grammar QSharpLexer; + +// Keywords + +Adj : 'Adj'; +AdjointFunctor : 'Adjoint'; +AdjointGenerator : 'adjoint'; +And : 'and'; +Apply : 'apply'; +As : 'as'; +Auto : 'auto'; +BigInt : 'BigInt'; +Body : 'body'; +Bool : 'Bool'; +Borrowing : 'borrowing'; +ControlledFunctor : 'Controlled'; +ControlledGenerator : 'controlled'; +Ctl : 'Ctl'; +Distribute : 'distribute'; +Double : 'Double'; +Elif : 'elif'; +Else : 'else'; +Fail : 'fail'; +False : 'false'; +Fixup : 'fixup'; +For : 'for'; +Function : 'function'; +If : 'if'; +In : 'in'; +Int : 'Int'; +Internal : 'internal'; +Intrinsic : 'intrinsic'; +Invert : 'invert'; +Is : 'is'; +Let : 'let'; +Mutable : 'mutable'; +Namespace : 'namespace'; +New : 'new'; +Newtype : 'newtype'; +Not : 'not'; +One : 'One'; +Open : 'open'; +Operation : 'operation'; +Or : 'or'; +Pauli : 'Pauli'; +PauliI : 'PauliI'; +PauliX : 'PauliX'; +PauliY : 'PauliY'; +PauliZ : 'PauliZ'; +Qubit : 'Qubit'; +Range : 'Range'; +Repeat : 'repeat'; +Result : 'Result'; +Return : 'return'; +Self : 'self'; +Set : 'set'; +String : 'String'; +True : 'true'; +Unit : 'Unit'; +Until : 'until'; +Using : 'using'; +While : 'while'; +Within : 'within'; +Zero : 'Zero'; + +// Operators + +AndEqual : 'and='; +ArrowLeft : '<-'; +ArrowRight : '->'; +Asterisk : '*'; +AsteriskEqual : '*='; +At : '@'; +Bang : '!'; +BraceLeft : '{' -> pushMode(DEFAULT_MODE); +BraceRight : '}' { if (!_modeStack.isEmpty()) popMode(); }; +BracketLeft : '['; +BracketRight : ']'; +Caret : '^'; +CaretEqual : '^='; +Colon : ':'; +Comma : ','; +DollarQuote : '$"' -> pushMode(INTERPOLATED); +Dot : '.'; +DoubleColon : '::'; +DoubleDot : '..'; +DoubleEqual : '=='; +DoubleQuote : '"' -> pushMode(STRING); +Ellipsis : '...'; +Equal : '='; +FatArrowRight : '=>'; +Greater : '>'; +GreaterEqual : '>='; +Less : '<'; +LessEqual : '<='; +Minus : '-'; +MinusEqual : '-='; +NotEqual : '!='; +OrEqual : 'or='; +ParenLeft : '('; +ParenRight : ')'; +Percent : '%'; +PercentEqual : '%='; +Pipe : '|'; +Plus : '+'; +PlusEqual : '+='; +Question : '?'; +Semicolon : ';'; +Slash : '/'; +SlashEqual : '/='; +TripleAmpersand : '&&&'; +TripleAmpersandEqual : '&&&='; +TripleCaret : '^^^'; +TripleCaretEqual : '^^^='; +TripleGreater : '>>>'; +TripleGreaterEqual : '>>>='; +TripleLess : '<<<'; +TripleLessEqual : '<<<='; +TriplePipe : '|||'; +TriplePipeEqual : '|||='; +TripleTilde : '~~~'; +Underscore : '_'; +With : 'w/'; +WithEqual : 'w/='; + +// Literals + +fragment Digit : [0-9]; + +IntegerLiteral + : Digit+ + | ('0x' | '0X') [0-9a-fA-F]+ + | ('0o' | '0O') [0-7]+ + | ('0b' | '0B') [0-1]+ + ; + +BigIntegerLiteral : IntegerLiteral ('L' | 'l'); + +DoubleLiteral + : Digit+ '.' Digit+ + | '.' Digit+ + // "n.." should be interpreted as an integer range, not the double "n." followed by a dot. + | Digit+ '.' { _input.LA(1) != '.' }? + | Digit+ ('e' | 'E') Digit+ + ; + +Identifier : IdentifierStart IdentifierContinuation*; + +IdentifierStart + : Underscore + | [\p{Letter}] + | [\p{Letter_Number}] + ; + +IdentifierContinuation + : [\p{Connector_Punctuation}] + | [\p{Decimal_Number}] + | [\p{Format}] + | [\p{Letter}] + | [\p{Letter_Number}] + | [\p{Nonspacing_Mark}] + | [\p{Spacing_Mark}] + ; + +TypeParameter : '\'' Identifier; + +Whitespace : (' ' | '\n' | '\r' | '\t')+ -> channel(HIDDEN); + +Comment : '//' ~('\n' | '\r')* -> channel(HIDDEN); + +Invalid : . -> channel(HIDDEN); + +// Strings + +mode STRING; + +StringEscape : '\\' .; + +StringText : ~('"' | '\\')+; + +StringDoubleQuote : '"' -> popMode; + +mode INTERPOLATED; + +InterpStringEscape : '\\' .; + +InterpBraceLeft : '{' -> pushMode(DEFAULT_MODE); + +InterpStringText : ~('\\' | '"' | '{')+; + +InterpDoubleQuote : '"' -> popMode; diff --git a/Specifications/Language/5_Grammar/QSharpParser.g4 b/Specifications/Language/5_Grammar/QSharpParser.g4 new file mode 100644 index 0000000..ca91aa7 --- /dev/null +++ b/Specifications/Language/5_Grammar/QSharpParser.g4 @@ -0,0 +1,257 @@ +parser grammar QSharpParser; + +options { + tokenVocab = QSharpLexer; +} + +program : namespace* EOF; + +// Namespace + +namespace : 'namespace' qualifiedName BraceLeft namespaceElement* BraceRight; + +qualifiedName : Identifier ('.' Identifier)*; + +namespaceElement + : openDirective + | typeDeclaration + | callableDeclaration + ; + +// Open Directive + +openDirective : 'open' qualifiedName ('as' qualifiedName)? ';'; + +// Declaration + +attribute : '@' expression; + +access : 'internal'; + +declarationPrefix : attribute* access?; + +// Type Declaration + +typeDeclaration : declarationPrefix 'newtype' Identifier '=' underlyingType ';'; + +underlyingType + : typeDeclarationTuple + | type + ; + +typeDeclarationTuple : '(' (typeTupleItem (',' typeTupleItem)*)? ')'; + +typeTupleItem + : namedItem + | underlyingType + ; + +namedItem : Identifier ':' type; + +// Callable Declaration + +callableDeclaration + : declarationPrefix ('function' | 'operation') + Identifier typeParameterBinding? parameterTuple + ':' type characteristics? + callableBody + ; + +typeParameterBinding : '<' (TypeParameter (',' TypeParameter)*)? '>'; + +parameterTuple : '(' (parameter (',' parameter)*)? ')'; + +parameter + : namedItem + | parameterTuple + ; + +characteristics : 'is' characteristicsExpression; + +characteristicsExpression + : 'Adj' + | 'Ctl' + | '(' characteristicsExpression ')' + | characteristicsExpression '*' characteristicsExpression + | characteristicsExpression '+' characteristicsExpression + ; + +callableBody + : BraceLeft specialization* BraceRight + | scope + ; + +specialization : specializationName+ specializationGenerator; + +specializationName + : 'body' + | 'adjoint' + | 'controlled' + ; + +specializationGenerator + : 'auto' ';' + | 'self' ';' + | 'invert' ';' + | 'distribute' ';' + | 'intrinsic' ';' + | providedSpecialization + ; + +providedSpecialization : specializationParameterTuple? scope; + +specializationParameterTuple : '(' (specializationParameter (',' specializationParameter)*)? ')'; + +specializationParameter + : Identifier + | '...' + ; + +// Type + +type + : '_' + | TypeParameter + | 'BigInt' + | 'Bool' + | 'Double' + | 'Int' + | 'Pauli' + | 'Qubit' + | 'Range' + | 'Result' + | 'String' + | 'Unit' + | qualifiedName + | '(' (type (',' type)* ','?)? ')' + | '(' arrowType characteristics? ')' + | type '[' ']' + ; + +arrowType + : '(' type ('->' | '=>') type ')' + | type ('->' | '=>') type + ; + +// Statement + +statement + : expression ';' + | 'return' expression ';' + | 'fail' expression ';' + | 'let' symbolBinding '=' expression ';' + | 'mutable' symbolBinding '=' expression ';' + | 'set' symbolBinding '=' expression ';' + | 'set' Identifier updateOperator expression ';' + | 'set' Identifier 'w/=' expression '<-' expression ';' + | 'if' '(' expression ')' scope + | 'elif' '(' expression ')' scope + | 'else' scope + | 'for' '(' symbolBinding 'in' expression ')' scope + | 'while' '(' expression ')' scope + | 'repeat' scope + | 'until' '(' expression ')' (';' | 'fixup' scope) + | 'within' scope + | 'apply' scope + | 'using' '(' symbolBinding '=' qubitInitializer ')' scope + | 'borrowing' '(' symbolBinding '=' qubitInitializer ')' scope + ; + +scope : BraceLeft statement* BraceRight; + +symbolBinding + : '_' + | Identifier + | '(' (symbolBinding (',' symbolBinding)* ','?)? ')' + ; + +updateOperator + : '^=' + | '*=' + | '/=' + | '%=' + | '+=' + | '-=' + | '>>>=' + | '<<<=' + | '&&&=' + | '^^^=' + | '|||=' + | 'and=' + | 'or=' + ; + +qubitInitializer + : 'Qubit' '(' ')' + | 'Qubit' '[' expression ']' + | '(' (qubitInitializer (',' qubitInitializer)* ','?)? ')' + ; + +// Expression + +expression + : '_' + | qualifiedName ('<' (type (',' type)* ','?)? '>')? + | IntegerLiteral + | BigIntegerLiteral + | DoubleLiteral + | DoubleQuote stringContent* StringDoubleQuote + | DollarQuote interpStringContent* InterpDoubleQuote + | boolLiteral + | resultLiteral + | pauliLiteral + | '(' (expression (',' expression)* ','?)? ')' + | '[' (expression (',' expression)* ','?)? ']' + | 'new' type '[' expression ']' + | expression ('::' Identifier | '[' expression ']') + | expression '!' + | 'Controlled' expression + | 'Adjoint' expression + | expression '(' (expression (',' expression)* ','?)? ')' + | ('-' | 'not' | '~~~') expression + | expression '^' expression + | expression ('*' | '/' | '%') expression + | expression ('+' | '-') expression + | expression ('>>>' | '<<<') expression + | expression ('>' | '<' | '>=' | '<=') expression + | expression ('==' | '!=') expression + | expression '&&&' expression + | expression '^^^' expression + | expression '|||' expression + | expression 'and' expression + | expression 'or' expression + | expression '?' expression '|' expression + | expression '..' expression + | expression '...' + | '...' expression + | '...' + | expression 'w/' expression '<-' expression + ; + +boolLiteral + : 'false' + | 'true' + ; + +resultLiteral + : 'Zero' + | 'One' + ; + +pauliLiteral + : 'PauliI' + | 'PauliX' + | 'PauliY' + | 'PauliZ' + ; + +stringContent + : StringEscape + | StringText + ; + +interpStringContent + : InterpStringEscape + | InterpBraceLeft expression BraceRight + | InterpStringText + ; diff --git a/Specifications/Language/5_Grammar/README.md b/Specifications/Language/5_Grammar/README.md new file mode 100644 index 0000000..5ae103f --- /dev/null +++ b/Specifications/Language/5_Grammar/README.md @@ -0,0 +1,12 @@ +# Grammar + +A reference implementation of the Q# grammar is available in the [ANTLR4](https://www.antlr.org/) format. +The grammar source files are listed below: + +* [**QSharpLexer.g4**](https://github.com/microsoft/qsharp-language/blob/main/Specifications/Language/5_Grammar/QSharpLexer.g4) describes the lexical structure of Q#. +* [**QSharpParser.g4**](https://github.com/microsoft/qsharp-language/blob/main/Specifications/Language/5_Grammar/QSharpParser.g4) describes the syntax of Q#. + +The Q# grammar uses [*actions*](https://github.com/antlr/antlr4/blob/master/doc/actions.md) and [*semantic predicates*](https://github.com/antlr/antlr4/blob/master/doc/predicates.md). +These features allow grammars to include custom source code in the ANTLR-generated parser, which means that the code needs to be written in the same language as the ANTLR target language. +If you are using the Q# grammar to generate parsers in a language other than Java, you may need to update the code used by the actions and semantic predicates to match the target language. +Target-specific code is marked by curly braces `{` `}` in the grammar. diff --git a/Specifications/Language/README.md b/Specifications/Language/README.md index e27d4eb..9b031a0 100644 --- a/Specifications/Language/README.md +++ b/Specifications/Language/README.md @@ -62,3 +62,4 @@ programs are implemented in terms of statements and expressions, much like in cl 1. [Type Parameterizations](https://github.com/microsoft/qsharp-language/blob/main/Specifications/Language/4_TypeSystem/TypeParameterizations.md#type-parameterizations) 1. [Type Inference](https://github.com/microsoft/qsharp-language/blob/main/Specifications/Language/4_TypeSystem/TypeInference.md#type-inference) +1. [Grammar](https://github.com/microsoft/qsharp-language/tree/main/Specifications/Language/5_Grammar#grammar)