From 5640d1758d4915ddad3ae33bb51f4531d0d00024 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:05:36 +0200 Subject: [PATCH 01/16] Added token for regexp. --- token/token.go | 1 + 1 file changed, 1 insertion(+) diff --git a/token/token.go b/token/token.go index 21180f2..6f49fe5 100644 --- a/token/token.go +++ b/token/token.go @@ -55,6 +55,7 @@ const ( EQ = "==" NOT_EQ = "!=" STRING = "STRING" + REGEXP = "REGEXP" LBRACKET = "[" RBRACKET = "]" COLON = ":" From 346a87de29b706eb784eb493f5a7132a507bb863 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:17:41 +0200 Subject: [PATCH 02/16] Lex regular expressions --- lexer/lexer.go | 125 +++++++++++++++++++++++++++++++++++++++++--- lexer/lexer_test.go | 116 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 231 insertions(+), 10 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 99afb81..f981517 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,17 +1,28 @@ package lexer import ( + "fmt" "strings" "github.com/skx/monkey/token" ) -// Lexer used to be as lexer for monkey programming language. +// Lexer holds our object-state. type Lexer struct { - position int //current character position - readPosition int //next character position - ch rune //current character - characters []rune //rune slice of input string + // The current character position + position int + + // The next character position + readPosition int + + //The current character + ch rune + + // A rune slice of our input string + characters []rune + + // Previous token. + prevToken token.Token } // New a Lexer instance from string input. @@ -140,7 +151,34 @@ func (l *Lexer) NextToken() token.Token { l.readChar() tok = token.Token{Type: token.SLASH_EQUALS, Literal: string(ch) + string(l.ch)} } else { - tok = newToken(token.SLASH, l.ch) + // slash is mostly division, but could + // be the start of a regular expression + + // We exclude: + // a[b] / c -> RBRACKET + // ( a + b ) / c -> RPAREN + // a / c -> IDENT + // 3.2 / c -> FLOAT + // 1 / c -> IDENT + // + if l.prevToken.Type == token.RBRACKET || + l.prevToken.Type == token.RPAREN || + l.prevToken.Type == token.IDENT || + l.prevToken.Type == token.INT || + l.prevToken.Type == token.FLOAT { + + tok = newToken(token.SLASH, l.ch) + } else { + str, err := l.readRegexp() + if err == nil { + tok.Type = token.REGEXP + tok.Literal = str + } else { + fmt.Printf("%s\n", err.Error()) + tok.Type = token.REGEXP + tok.Literal = str + } + } } case rune('*'): if l.peekChar() == rune('*') { @@ -170,13 +208,27 @@ func (l *Lexer) NextToken() token.Token { } else { tok = newToken(token.GT, l.ch) } + case rune('~'): + if l.peekChar() == rune('=') { + ch := l.ch + l.readChar() + tok = token.Token{Type: token.CONTAINS, Literal: string(ch) + string(l.ch)} + } + case rune('!'): if l.peekChar() == rune('=') { ch := l.ch l.readChar() tok = token.Token{Type: token.NOT_EQ, Literal: string(ch) + string(l.ch)} } else { - tok = newToken(token.BANG, l.ch) + if l.peekChar() == rune('~') { + ch := l.ch + l.readChar() + tok = token.Token{Type: token.NOT_CONTAINS, Literal: string(ch) + string(l.ch)} + + } else { + tok = newToken(token.BANG, l.ch) + } } case rune('"'): tok.Type = token.STRING @@ -194,14 +246,21 @@ func (l *Lexer) NextToken() token.Token { tok.Literal = "" tok.Type = token.EOF default: + if isDigit(l.ch) { - return l.readDecimal() + tok := l.readDecimal() + l.prevToken = tok + return tok + } tok.Literal = l.readIdentifier() tok.Type = token.LookupIdentifier(tok.Literal) + l.prevToken = tok + return tok } l.readChar() + l.prevToken = tok return tok } @@ -470,6 +529,56 @@ func (l *Lexer) readString() string { return out } +// read a regexp, including flags. +func (l *Lexer) readRegexp() (string, error) { + out := "" + + for { + l.readChar() + + if l.ch == rune(0) { + return "unterminated regular expression", fmt.Errorf("unterminated regular expression") + } + if l.ch == '/' { + + // consume the terminating "/". + l.readChar() + + // prepare to look for flags + flags := "" + + // two flags are supported: + // i -> Ignore-case + // m -> Multiline + // + for l.ch == rune('i') || l.ch == rune('m') { + + // save the char - unless it is a repeat + if !strings.Contains(flags, string(l.ch)) { + + // we're going to sort the flags + tmp := strings.Split(flags, "") + tmp = append(tmp, string(l.ch)) + flags = strings.Join(tmp, "") + + } + + // read the next + l.readChar() + } + + // convert the regexp to go-lang + if len(flags) > 0 { + out = "(?" + flags + ")" + out + } + break + } + out = out + string(l.ch) + } + + return out, nil +} + // read the end of a backtick-quoted string func (l *Lexer) readBacktick() string { position := l.position + 1 diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index cfb6a26..253c9b4 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -42,7 +42,7 @@ let add = fn(x, y){ x+y; }; let result = add(five, ten); -!-/ *5; +!- *5; 5<10>5; if(5<10){ @@ -104,7 +104,6 @@ for {token.SEMICOLON, ";"}, {token.BANG, "!"}, {token.MINUS, "-"}, - {token.SLASH, "/"}, {token.ASTERISK, "*"}, {token.INT, "5"}, {token.SEMICOLON, ";"}, @@ -492,3 +491,116 @@ func TestIntDotMethod(t *testing.T) { } } } + +// TestRegexp ensures a simple regexp can be parsed. +func TestRegexp(t *testing.T) { + input := `if ( f ~= /steve/i ) +if ( f ~= /steve/m ) +if ( f ~= /steve/mi ) +if ( f ~= /steve/miiiiiiiiiiiiiiiiimmmmmmmmmmmmmiiiii )` + + tests := []struct { + expectedType token.Type + expectedLiteral string + }{ + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.IDENT, "f"}, + {token.CONTAINS, "~="}, + {token.REGEXP, "(?i)steve"}, + {token.RPAREN, ")"}, + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.IDENT, "f"}, + {token.CONTAINS, "~="}, + {token.REGEXP, "(?m)steve"}, + {token.RPAREN, ")"}, + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.IDENT, "f"}, + {token.CONTAINS, "~="}, + {token.REGEXP, "(?mi)steve"}, + {token.RPAREN, ")"}, + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.IDENT, "f"}, + {token.CONTAINS, "~="}, + {token.REGEXP, "(?mi)steve"}, + {token.RPAREN, ")"}, + {token.EOF, ""}, + } + l := New(input) + for i, tt := range tests { + tok := l.NextToken() + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type) + } + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal) + } + } +} + +// TestIllegalRegexp is designed to look for an unterminated/illegal regexp +func TestIllegalRegexp(t *testing.T) { + input := `if ( f ~= /steve )` + + tests := []struct { + expectedType token.Type + expectedLiteral string + }{ + {token.IF, "if"}, + {token.LPAREN, "("}, + {token.IDENT, "f"}, + {token.CONTAINS, "~="}, + {token.REGEXP, "unterminated regular expression"}, + {token.EOF, ""}, + } + l := New(input) + for i, tt := range tests { + tok := l.NextToken() + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type) + } + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal) + } + } +} + +// TestDiv is designed to test that a division is recognized; that it is +// not confused with a regular-expression. +func TestDiv(t *testing.T) { + input := `a = b / c; +a = 3/4; +` + + tests := []struct { + expectedType token.Type + expectedLiteral string + }{ + {token.IDENT, "a"}, + {token.ASSIGN, "="}, + {token.IDENT, "b"}, + {token.SLASH, "/"}, + {token.IDENT, "c"}, + {token.SEMICOLON, ";"}, + {token.IDENT, "a"}, + {token.ASSIGN, "="}, + {token.INT, "3"}, + {token.SLASH, "/"}, + {token.INT, "4"}, + {token.SEMICOLON, ";"}, + {token.EOF, ""}, + } + l := New(input) + for i, tt := range tests { + tok := l.NextToken() + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong, expected=%q, got=%q", i, tt.expectedType, tok.Type) + } + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - Literal wrong, expected=%q, got=%q", i, tt.expectedLiteral, tok.Literal) + } + } +} From 45eca7a1a4c09e1712cb2d7ffc88df4faa28ec75 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:17:53 +0200 Subject: [PATCH 03/16] Added new token types for regexp match/not-match --- token/token.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/token/token.go b/token/token.go index 6f49fe5..c2231a8 100644 --- a/token/token.go +++ b/token/token.go @@ -60,6 +60,9 @@ const ( RBRACKET = "]" COLON = ":" PERIOD = "." + CONTAINS = "~=" + NOT_CONTAINS = "!~" + ILLEGAL = "ILLEGAL" ) // reversed keywords From a8a5ada608d77a87233da9fb37d9ad4351a74afd Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:20:39 +0200 Subject: [PATCH 04/16] Added regular expression object --- object/object.go | 1 + object/object_regexp.go | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 object/object_regexp.go diff --git a/object/object.go b/object/object.go index f2b8da7..e8ee107 100644 --- a/object/object.go +++ b/object/object.go @@ -18,6 +18,7 @@ const ( ARRAY_OBJ = "ARRAY" HASH_OBJ = "HASH" FILE_OBJ = "FILE" + REGEXP_OBJ = "REGEXP" ) // Object is the interface that all of our various object-types must implmenet. diff --git a/object/object_regexp.go b/object/object_regexp.go new file mode 100644 index 0000000..95ebea5 --- /dev/null +++ b/object/object_regexp.go @@ -0,0 +1,25 @@ +// The implementation of our regular-expression object. + +package object + +// Regexp wraps regular-expressions and implements Object and Hashable interfaces. +type Regexp struct { + // Value holds the string value this object wraps. + Value string +} + +// Type returns the type of this object. +func (s *Regexp) Type() Type { + return REGEXP_OBJ +} + +// Inspect returns a string-representation of the given object. +func (r *Regexp) Inspect() string { + return r.Value +} + +// InvokeMethod invokes a method against the object. +// (Built-in methods only.) +func (s *Regexp) InvokeMethod(method string, env Environment, args ...Object) Object { + return nil +} From bfc98b119140f2b516bb9e054954aa4a3ef7248c Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:23:28 +0200 Subject: [PATCH 05/16] Added AST node for regexp --- ast/ast.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ast/ast.go b/ast/ast.go index b4ee4b0..0246c0e 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -561,6 +561,36 @@ func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal } // String returns this object as a string. func (sl *StringLiteral) String() string { return sl.Token.Literal } +// RegexpLiteral holds a regular-expression. +type RegexpLiteral struct { + // Token is the token + Token token.Token + + // Value is the value of the regular expression. + Value string +} + +func (rl *RegexpLiteral) expressionNode() {} + +// TokenLiteral returns the literal token. +func (rl *RegexpLiteral) TokenLiteral() string { return rl.Token.Literal } + +// String returns this object as a string. +func (rl *RegexpLiteral) String() string { + + start := "/" + val := rl.Token.Literal + end := "/" + + if strings.HasPrefix(rl.Token.Literal, "(?i)") { + end = "/i" + val = strings.TrimPrefix(val, "(?i)") + } + + str := start + val + end + return str +} + // BacktickLiteral holds details of a command to be executed type BacktickLiteral struct { // Token is the actual token From daeaa4938cd4a128eca8e17ef2ad171abf31008f Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:23:39 +0200 Subject: [PATCH 06/16] Parse Regexp into AST --- parser/parser.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parser/parser.go b/parser/parser.go index 47622ee..5dc3d5f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -116,6 +116,7 @@ func New(l *lexer.Lexer) *Parser { p.registerPrefix(token.BACKTICK, p.parseBacktickLiteral) p.registerPrefix(token.LBRACKET, p.parseArrayLiteral) p.registerPrefix(token.LBRACE, p.parseHashLiteral) + p.registerPrefix(token.REGEXP, p.parseRegexpLiteral) p.infixParseFns = make(map[token.Type]infixParseFn) p.registerInfix(token.ASSIGN, p.parseAssignExpression) @@ -519,6 +520,11 @@ func (p *Parser) parseStringLiteral() ast.Expression { return &ast.StringLiteral{Token: p.curToken, Value: p.curToken.Literal} } +// parseRegexpLiteral parses a regular-expression. +func (p *Parser) parseRegexpLiteral() ast.Expression { + return &ast.RegexpLiteral{Token: p.curToken, Value: p.curToken.Literal} +} + // parseBacktickLiteral parses a backtick-expression. func (p *Parser) parseBacktickLiteral() ast.Expression { return &ast.BacktickLiteral{Token: p.curToken, Value: p.curToken.Literal} From 89140ace02c627d03425a80086cc72e57641c8d5 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:24:08 +0200 Subject: [PATCH 07/16] Instantiate a regexp when we see ast.RegexpLiteral --- evaluator/evaluator.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluator/evaluator.go b/evaluator/evaluator.go index 5262cfc..979a63e 100644 --- a/evaluator/evaluator.go +++ b/evaluator/evaluator.go @@ -142,6 +142,8 @@ func Eval(node ast.Node, env *object.Environment) object.Object { return &object.Array{Elements: elements} case *ast.StringLiteral: return &object.String{Value: node.Value} + case *ast.RegexpLiteral: + return &object.Regexp{Value: node.Value} case *ast.BacktickLiteral: return backTickOperation(node.Value) case *ast.IndexExpression: From cca1e326aa52f0be4f6c08b5c47f10947969c30b Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:35:28 +0200 Subject: [PATCH 08/16] Updated comment --- object/object_regexp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object/object_regexp.go b/object/object_regexp.go index 95ebea5..bd70f8e 100644 --- a/object/object_regexp.go +++ b/object/object_regexp.go @@ -2,7 +2,7 @@ package object -// Regexp wraps regular-expressions and implements Object and Hashable interfaces. +// Regexp wraps regular-expressions and implements the Object interface. type Regexp struct { // Value holds the string value this object wraps. Value string From 5b3afe69a206c313c71b2ff63ec5a32562de46ad Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:35:39 +0200 Subject: [PATCH 09/16] Add null-test on regexp --- evaluator/evaluator.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluator/evaluator.go b/evaluator/evaluator.go index 979a63e..666f8bf 100644 --- a/evaluator/evaluator.go +++ b/evaluator/evaluator.go @@ -1012,6 +1012,8 @@ func objectToNativeBoolean(o object.Object) bool { return obj.Value case *object.String: return obj.Value != "" + case *object.Regexp: + return obj.Value != "" case *object.Null: return false case *object.Integer: From d7c2b2a4e32deb4a030dc859f8e77387411442d5 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 06:35:48 +0200 Subject: [PATCH 10/16] type() now works on a regexp too --- evaluator/stdlib_core.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluator/stdlib_core.go b/evaluator/stdlib_core.go index f407fa5..269101e 100644 --- a/evaluator/stdlib_core.go +++ b/evaluator/stdlib_core.go @@ -510,6 +510,8 @@ func typeFun(args ...object.Object) object.Object { switch args[0].(type) { case *object.String: return &object.String{Value: "string"} + case *object.Regexp: + return &object.String{Value: "regexp"} case *object.Boolean: return &object.String{Value: "bool"} case *object.Builtin: From ed32d4849bce26744b81d447efafadfc78aa6d9d Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 13:06:45 +0200 Subject: [PATCH 11/16] Handle '\!~' + '~=' --- parser/parser.go | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 5dc3d5f..4d38928 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -23,28 +23,32 @@ type ( const ( _ int = iota LOWEST - COND // OR or AND - ASSIGN // = - EQUALS // == or != - LESSGREATER // > or < - SUM // + or - - PRODUCT // * or / - POWER // ** - MOD // % - PREFIX // -X or !X - CALL // myFunction(X) - INDEX // array[index], map[key] + COND // OR or AND + ASSIGN // = + EQUALS // == or != + REGEXP_MATCH // !~ ~= + LESSGREATER // > or < + SUM // + or - + PRODUCT // * or / + POWER // ** + MOD // % + PREFIX // -X or !X + CALL // myFunction(X) + INDEX // array[index], map[key] ) // each token precedence var precedences = map[token.Type]int{ - token.ASSIGN: ASSIGN, - token.EQ: EQUALS, - token.NOT_EQ: EQUALS, - token.LT: LESSGREATER, - token.LT_EQUALS: LESSGREATER, - token.GT: LESSGREATER, - token.GT_EQUALS: LESSGREATER, + token.ASSIGN: ASSIGN, + token.EQ: EQUALS, + token.NOT_EQ: EQUALS, + token.LT: LESSGREATER, + token.LT_EQUALS: LESSGREATER, + token.GT: LESSGREATER, + token.GT_EQUALS: LESSGREATER, + token.CONTAINS: REGEXP_MATCH, + token.NOT_CONTAINS: REGEXP_MATCH, + token.PLUS: SUM, token.PLUS_EQUALS: SUM, token.MINUS: SUM, @@ -102,6 +106,7 @@ func New(l *lexer.Lexer) *Parser { p.prefixParseFns = make(map[token.Type]prefixParseFn) p.registerPrefix(token.IDENT, p.parseIdentifier) p.registerPrefix(token.INT, p.parseIntegerLiteral) + p.registerPrefix(token.REGEXP, p.parseRegexpLiteral) p.registerPrefix(token.FLOAT, p.parseFloatLiteral) p.registerPrefix(token.TRUE, p.parseBoolean) p.registerPrefix(token.FALSE, p.parseBoolean) @@ -141,6 +146,8 @@ func New(l *lexer.Lexer) *Parser { p.registerInfix(token.MINUS_EQUALS, p.parseAssignExpression) p.registerInfix(token.ASTERISK_EQUALS, p.parseAssignExpression) p.registerInfix(token.SLASH_EQUALS, p.parseAssignExpression) + p.registerInfix(token.CONTAINS, p.parseInfixExpression) + p.registerInfix(token.NOT_CONTAINS, p.parseInfixExpression) p.postfixParseFns = make(map[token.Type]postfixParseFn) p.registerPostfix(token.PLUS_PLUS, p.parsePostfixExpression) From 912ff7099be9cf43606335d1c51a01d0b5401a17 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Sat, 9 Nov 2019 13:06:57 +0200 Subject: [PATCH 12/16] Handle regexp matches --- evaluator/evaluator.go | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/evaluator/evaluator.go b/evaluator/evaluator.go index 666f8bf..d118d6f 100644 --- a/evaluator/evaluator.go +++ b/evaluator/evaluator.go @@ -275,8 +275,14 @@ func evalInfixExpression(operator string, left, right object.Object) object.Obje return nativeBoolToBooleanObject(objectToNativeBoolean(left) && objectToNativeBoolean(right)) case operator == "||": return nativeBoolToBooleanObject(objectToNativeBoolean(left) || objectToNativeBoolean(right)) + case operator == "!~": + return notMatches(left, right) + case operator == "~=": + return matches(left, right) + case operator == "==": return nativeBoolToBooleanObject(left == right) + case operator == "!=": return nativeBoolToBooleanObject(left != right) case left.Type() == object.BOOLEAN_OBJ && right.Type() == object.BOOLEAN_OBJ: @@ -290,6 +296,45 @@ func evalInfixExpression(operator string, left, right object.Object) object.Obje } } +func matches(left, right object.Object) object.Object { + + str := left.Inspect() + + // Compile the regular expression. + r, err := regexp.Compile(right.Inspect()) + + // Ensure it compiled + if err != nil { + return newError("error compiling regexp '%s': %s", right.Inspect(), err) + } + + // Test if it matched + if r.MatchString(str) { + return TRUE + } + + return FALSE +} + +func notMatches(left, right object.Object) object.Object { + str := left.Inspect() + + // Compile the regular expression. + r, err := regexp.Compile(right.Inspect()) + + // Ensure it compiled + if err != nil { + return newError("error compiling regexp '%s': %s", right.Inspect(), err) + } + + // Test if it matched + if r.MatchString(str) { + return FALSE + } + + return TRUE +} + // boolean operations func evalBooleanInfixExpression(operator string, left, right object.Object) object.Object { // convert the bools to strings. From 657b705570edc89407444f7207bffec979ed96d9 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Mon, 11 Nov 2019 22:09:18 +0200 Subject: [PATCH 13/16] Regexp object has distinct flags --- ast/ast.go | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 0246c0e..51d5434 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -2,6 +2,7 @@ package ast import ( "bytes" + "fmt" "strings" "github.com/skx/monkey/token" @@ -568,6 +569,9 @@ type RegexpLiteral struct { // Value is the value of the regular expression. Value string + + // Flags contains any flags associated with the regexp. + Flags string } func (rl *RegexpLiteral) expressionNode() {} @@ -578,17 +582,7 @@ func (rl *RegexpLiteral) TokenLiteral() string { return rl.Token.Literal } // String returns this object as a string. func (rl *RegexpLiteral) String() string { - start := "/" - val := rl.Token.Literal - end := "/" - - if strings.HasPrefix(rl.Token.Literal, "(?i)") { - end = "/i" - val = strings.TrimPrefix(val, "(?i)") - } - - str := start + val + end - return str + return (fmt.Sprintf("/%s/%s", rl.Value, rl.Flags)) } // BacktickLiteral holds details of a command to be executed From 2df056cd8df8371820c867bee3ccabc6305401a9 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Mon, 11 Nov 2019 22:09:30 +0200 Subject: [PATCH 14/16] Setup regexp-flags when parsing regexp --- parser/parser.go | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/parser/parser.go b/parser/parser.go index 4d38928..7ccc421 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -529,7 +529,28 @@ func (p *Parser) parseStringLiteral() ast.Expression { // parseRegexpLiteral parses a regular-expression. func (p *Parser) parseRegexpLiteral() ast.Expression { - return &ast.RegexpLiteral{Token: p.curToken, Value: p.curToken.Literal} + + flags := "" + + val := p.curToken.Literal + if strings.HasPrefix(val, "(?") { + val = strings.TrimPrefix(val, "(?") + + i := 0 + for i < len(val) { + + if val[i] == ')' { + + val = val[i+1:] + break + } else { + flags += string(val[i]) + } + + i++ + } + } + return &ast.RegexpLiteral{Token: p.curToken, Value: val, Flags: flags} } // parseBacktickLiteral parses a backtick-expression. From cc0faff0ae051292d3b6a2f9396f612de77c11c1 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Mon, 11 Nov 2019 22:09:45 +0200 Subject: [PATCH 15/16] Process regexp matches correctly --- evaluator/evaluator.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/evaluator/evaluator.go b/evaluator/evaluator.go index d118d6f..216a137 100644 --- a/evaluator/evaluator.go +++ b/evaluator/evaluator.go @@ -143,7 +143,7 @@ func Eval(node ast.Node, env *object.Environment) object.Object { case *ast.StringLiteral: return &object.String{Value: node.Value} case *ast.RegexpLiteral: - return &object.Regexp{Value: node.Value} + return &object.Regexp{Value: node.Value, Flags: node.Flags} case *ast.BacktickLiteral: return backTickOperation(node.Value) case *ast.IndexExpression: @@ -300,8 +300,17 @@ func matches(left, right object.Object) object.Object { str := left.Inspect() + if right.Type() != object.REGEXP_OBJ { + return newError("regexp required for regexp-match, given %s", right.Type()) + } + + val := right.(*object.Regexp).Value + if right.(*object.Regexp).Flags != "" { + val = "(?" + right.(*object.Regexp).Flags + ")" + val + } + // Compile the regular expression. - r, err := regexp.Compile(right.Inspect()) + r, err := regexp.Compile(val) // Ensure it compiled if err != nil { @@ -319,8 +328,17 @@ func matches(left, right object.Object) object.Object { func notMatches(left, right object.Object) object.Object { str := left.Inspect() + if right.Type() != object.REGEXP_OBJ { + return newError("regexp required for regexp-match, given %s", right.Type()) + } + + val := right.(*object.Regexp).Value + if right.(*object.Regexp).Flags != "" { + val = "(?" + right.(*object.Regexp).Flags + ")" + val + } + // Compile the regular expression. - r, err := regexp.Compile(right.Inspect()) + r, err := regexp.Compile(val) // Ensure it compiled if err != nil { From 13ba3996955fd50ee1373c110adaaa12c4a83955 Mon Sep 17 00:00:00 2001 From: Steve Kemp Date: Mon, 11 Nov 2019 22:09:54 +0200 Subject: [PATCH 16/16] Store flags along/within regexp object. --- object/object_regexp.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/object/object_regexp.go b/object/object_regexp.go index bd70f8e..8dca87b 100644 --- a/object/object_regexp.go +++ b/object/object_regexp.go @@ -6,6 +6,9 @@ package object type Regexp struct { // Value holds the string value this object wraps. Value string + + // Flags holds the flags for the object + Flags string } // Type returns the type of this object.