skx · skx · Nov 11, 2019 · Nov 9, 2019 · Nov 9, 2019 · Nov 9, 2019
diff --git a/ast/ast.go b/ast/ast.go
@@ -2,6 +2,7 @@ package ast
 
 import (
 	"bytes"
+	"fmt"
 	"strings"
 
 	"github.com/skx/monkey/token"
@@ -561,6 +562,29 @@ func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal }
 // String returns this object as a string.
 func (sl *StringLiteral) String() string { return sl.Token.Literal }
 
+// RegexpLiteral holds a regular-expression.
+type RegexpLiteral struct {
+	// Token is the token
+	Token token.Token
+
+	// Value is the value of the regular expression.
+	Value string
+
+	// Flags contains any flags associated with the regexp.
+	Flags string
+}
+
+func (rl *RegexpLiteral) expressionNode() {}
+
+// TokenLiteral returns the literal token.
+func (rl *RegexpLiteral) TokenLiteral() string { return rl.Token.Literal }
+
+// String returns this object as a string.
+func (rl *RegexpLiteral) String() string {
+
+	return (fmt.Sprintf("/%s/%s", rl.Value, rl.Flags))
+}
+
 // BacktickLiteral holds details of a command to be executed
 type BacktickLiteral struct {
 	// Token is the actual token

diff --git a/evaluator/evaluator.go b/evaluator/evaluator.go
@@ -142,6 +142,8 @@ func Eval(node ast.Node, env *object.Environment) object.Object {
 		return &object.Array{Elements: elements}
 	case *ast.StringLiteral:
 		return &object.String{Value: node.Value}
+	case *ast.RegexpLiteral:
+		return &object.Regexp{Value: node.Value, Flags: node.Flags}
 	case *ast.BacktickLiteral:
 		return backTickOperation(node.Value)
 	case *ast.IndexExpression:
@@ -273,8 +275,14 @@ func evalInfixExpression(operator string, left, right object.Object) object.Obje
 		return nativeBoolToBooleanObject(objectToNativeBoolean(left) && objectToNativeBoolean(right))
 	case operator == "||":
 		return nativeBoolToBooleanObject(objectToNativeBoolean(left) || objectToNativeBoolean(right))
+	case operator == "!~":
+		return notMatches(left, right)
+	case operator == "~=":
+		return matches(left, right)
+
 	case operator == "==":
 		return nativeBoolToBooleanObject(left == right)
+
 	case operator == "!=":
 		return nativeBoolToBooleanObject(left != right)
 	case left.Type() == object.BOOLEAN_OBJ && right.Type() == object.BOOLEAN_OBJ:
@@ -288,6 +296,63 @@ func evalInfixExpression(operator string, left, right object.Object) object.Obje
 	}
 }
 
+func matches(left, right object.Object) object.Object {
+
+	str := left.Inspect()
+
+	if right.Type() != object.REGEXP_OBJ {
+		return newError("regexp required for regexp-match, given %s", right.Type())
+	}
+
+	val := right.(*object.Regexp).Value
+	if right.(*object.Regexp).Flags != "" {
+		val = "(?" + right.(*object.Regexp).Flags + ")" + val
+	}
+
+	// Compile the regular expression.
+	r, err := regexp.Compile(val)
+
+	// Ensure it compiled
+	if err != nil {
+		return newError("error compiling regexp '%s': %s", right.Inspect(), err)
+	}
+
+	// Test if it matched
+	if r.MatchString(str) {
+		return TRUE
+	}
+
+	return FALSE
+}
+
+func notMatches(left, right object.Object) object.Object {
+	str := left.Inspect()
+
+	if right.Type() != object.REGEXP_OBJ {
+		return newError("regexp required for regexp-match, given %s", right.Type())
+	}
+
+	val := right.(*object.Regexp).Value
+	if right.(*object.Regexp).Flags != "" {
+		val = "(?" + right.(*object.Regexp).Flags + ")" + val
+	}
+
+	// Compile the regular expression.
+	r, err := regexp.Compile(val)
+
+	// Ensure it compiled
+	if err != nil {
+		return newError("error compiling regexp '%s': %s", right.Inspect(), err)
+	}
+
+	// Test if it matched
+	if r.MatchString(str) {
+		return FALSE
+	}
+
+	return TRUE
+}
+
 // boolean operations
 func evalBooleanInfixExpression(operator string, left, right object.Object) object.Object {
 	// convert the bools to strings.
@@ -1010,6 +1075,8 @@ func objectToNativeBoolean(o object.Object) bool {
 		return obj.Value
 	case *object.String:
 		return obj.Value != ""
+	case *object.Regexp:
+		return obj.Value != ""
 	case *object.Null:
 		return false
 	case *object.Integer:

diff --git a/evaluator/stdlib_core.go b/evaluator/stdlib_core.go
@@ -510,6 +510,8 @@ func typeFun(args ...object.Object) object.Object {
 	switch args[0].(type) {
 	case *object.String:
 		return &object.String{Value: "string"}
+	case *object.Regexp:
+		return &object.String{Value: "regexp"}
 	case *object.Boolean:
 		return &object.String{Value: "bool"}
 	case *object.Builtin:

diff --git a/lexer/lexer.go b/lexer/lexer.go
@@ -1,17 +1,28 @@
 package lexer
 
 import (
+	"fmt"
 	"strings"
 
 	"github.com/skx/monkey/token"
 )
 
-// Lexer used to be as lexer for monkey programming language.
+// Lexer holds our object-state.
 type Lexer struct {
-	position     int    //current character position
-	readPosition int    //next character position
-	ch           rune   //current character
-	characters   []rune //rune slice of input string
+	// The current character position
+	position int
+
+	// The next character position
+	readPosition int
+
+	//The current character
+	ch rune
+
+	// A rune slice of our input string
+	characters []rune
+
+	// Previous token.
+	prevToken token.Token
 }
 
 // New a Lexer instance from string input.
@@ -140,7 +151,34 @@ func (l *Lexer) NextToken() token.Token {
 			l.readChar()
 			tok = token.Token{Type: token.SLASH_EQUALS, Literal: string(ch) + string(l.ch)}
 		} else {
-			tok = newToken(token.SLASH, l.ch)
+			// slash is mostly division, but could
+			// be the start of a regular expression
+
+			// We exclude:
+			//   a[b] / c       -> RBRACKET
+			//   ( a + b ) / c   -> RPAREN
+			//   a / c           -> IDENT
+			//   3.2 / c         -> FLOAT
+			//   1 / c           -> IDENT
+			//
+			if l.prevToken.Type == token.RBRACKET ||
+				l.prevToken.Type == token.RPAREN ||
+				l.prevToken.Type == token.IDENT ||
+				l.prevToken.Type == token.INT ||
+				l.prevToken.Type == token.FLOAT {
+
+				tok = newToken(token.SLASH, l.ch)
+			} else {
+				str, err := l.readRegexp()
+				if err == nil {
+					tok.Type = token.REGEXP
+					tok.Literal = str
+				} else {
+					fmt.Printf("%s\n", err.Error())
+					tok.Type = token.REGEXP
+					tok.Literal = str
+				}
+			}
 		}
 	case rune('*'):
 		if l.peekChar() == rune('*') {
@@ -170,13 +208,27 @@ func (l *Lexer) NextToken() token.Token {
 		} else {
 			tok = newToken(token.GT, l.ch)
 		}
+	case rune('~'):
+		if l.peekChar() == rune('=') {
+			ch := l.ch
+			l.readChar()
+			tok = token.Token{Type: token.CONTAINS, Literal: string(ch) + string(l.ch)}
+		}
+
 	case rune('!'):
 		if l.peekChar() == rune('=') {
 			ch := l.ch
 			l.readChar()
 			tok = token.Token{Type: token.NOT_EQ, Literal: string(ch) + string(l.ch)}
 		} else {
-			tok = newToken(token.BANG, l.ch)
+			if l.peekChar() == rune('~') {
+				ch := l.ch
+				l.readChar()
+				tok = token.Token{Type: token.NOT_CONTAINS, Literal: string(ch) + string(l.ch)}
+
+			} else {
+				tok = newToken(token.BANG, l.ch)
+			}
 		}
 	case rune('"'):
 		tok.Type = token.STRING
@@ -194,14 +246,21 @@ func (l *Lexer) NextToken() token.Token {
 		tok.Literal = ""
 		tok.Type = token.EOF
 	default:
+
 		if isDigit(l.ch) {
-			return l.readDecimal()
+			tok := l.readDecimal()
+			l.prevToken = tok
+			return tok
+
 		}
 		tok.Literal = l.readIdentifier()
 		tok.Type = token.LookupIdentifier(tok.Literal)
+		l.prevToken = tok
+
 		return tok
 	}
 	l.readChar()
+	l.prevToken = tok
 	return tok
 }
 
@@ -470,6 +529,56 @@ func (l *Lexer) readString() string {
 	return out
 }
 
+// read a regexp, including flags.
+func (l *Lexer) readRegexp() (string, error) {
+	out := ""
+
+	for {
+		l.readChar()
+
+		if l.ch == rune(0) {
+			return "unterminated regular expression", fmt.Errorf("unterminated regular expression")
+		}
+		if l.ch == '/' {
+
+			// consume the terminating "/".
+			l.readChar()
+
+			// prepare to look for flags
+			flags := ""
+
+			// two flags are supported:
+			//   i -> Ignore-case
+			//   m -> Multiline
+			//
+			for l.ch == rune('i') || l.ch == rune('m') {
+
+				// save the char - unless it is a repeat
+				if !strings.Contains(flags, string(l.ch)) {
+
+					// we're going to sort the flags
+					tmp := strings.Split(flags, "")
+					tmp = append(tmp, string(l.ch))
+					flags = strings.Join(tmp, "")
+
+				}
+
+				// read the next
+				l.readChar()
+			}
+
+			// convert the regexp to go-lang
+			if len(flags) > 0 {
+				out = "(?" + flags + ")" + out
+			}
+			break
+		}
+		out = out + string(l.ch)
+	}
+
+	return out, nil
+}
+
 // read the end of a backtick-quoted string
 func (l *Lexer) readBacktick() string {
 	position := l.position + 1