From 66048ae334a9fa2bec2b47e706b3c8691daf311c Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Tue, 19 Apr 2016 13:48:32 +0300
Subject: [PATCH 01/23] [Refactor] Generalize kickstart engines, in preparation
 for more to come

---
 posix.mak                                     |   2 +-
 std/regex/internal/backtracking.d             |   2 +-
 std/regex/internal/ir.d                       |  16 +-
 std/regex/internal/parser.d                   |   6 +-
 std/regex/internal/{kickstart.d => shiftor.d} | 162 ++++++++++--------
 std/regex/internal/thompson.d                 |   2 +-
 win32.mak                                     |   2 +-
 win64.mak                                     |   2 +-
 8 files changed, 111 insertions(+), 83 deletions(-)
 rename std/regex/internal/{kickstart.d => shiftor.d} (82%)

diff --git a/posix.mak b/posix.mak
index 8b9f55ea391..749ce932736 100644
--- a/posix.mak
+++ b/posix.mak
@@ -192,7 +192,7 @@ PACKAGE_std_experimental_ndslice = package iteration selection slice
 PACKAGE_std_net = curl isemail
 PACKAGE_std_range = interfaces package primitives
 PACKAGE_std_regex = package $(addprefix internal/,generator ir parser \
-  backtracking kickstart tests thompson)
+  backtracking shiftor tests thompson)
 
 # Modules in std (including those in packages)
 STD_MODULES=$(call P2MODULES,$(STD_PACKAGES))
diff --git a/std/regex/internal/backtracking.d b/std/regex/internal/backtracking.d
index a7c360c5e88..5427b1b380e 100644
--- a/std/regex/internal/backtracking.d
+++ b/std/regex/internal/backtracking.d
@@ -216,7 +216,7 @@ template BacktrackingMatcher(bool CTregex)
             }
             static if (kicked)
             {
-                if (!re.kickstart.empty)
+                if (re.kickstart)
                 {
                     for (;;)
                     {
diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index b5d3417b950..912b435b87d 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -452,6 +452,17 @@ struct Group(DataIndex)
         writeln("\t", disassemble(slice, pc, dict));
 }
 
+/+
+    Generic interface for kickstart engine components.
+    The goal of kickstart is to advance input to the next potential match,
+    the more accurate & fast the better.
++/
+interface Kickstart(Char){
+@trusted:
+    bool opCall(ref Input!Char input);
+    @property bool empty() const;
+}
+
 /++
     $(D Regex) object holds regular expression pattern in compiled form.
     Instances of this object are constructed via calls to $(D regex).
@@ -513,7 +524,6 @@ struct Regex(Char)
     }
 
 package(std.regex):
-    import std.regex.internal.kickstart : Kickstart; //TODO: get rid of this dependency
     NamedGroup[] dict;                     // maps name -> user group number
     uint ngroup;                           // number of internal groups
     uint maxCounterDepth;                  // max depth of nested {n,m} repetitions
@@ -622,10 +632,10 @@ struct Input(Char)
     @property bool atEnd(){
         return _index == _origin.length;
     }
+
     bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos)
     {
-        size_t idx = kick.search(_origin, _index);
-        _index = idx;
+        kick(this);
         return nextChar(res, pos);
     }
 
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index 49f6b45573f..ade9c8a3e65 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -1604,7 +1604,11 @@ struct Parser(R, Generator)
         }
         checkIfOneShot();
         if (!(flags & RegexInfo.oneShot))
-            kickstart = Kickstart!Char(zis, new uint[](256));
+        {
+            kickstart = new ShiftOr!Char(zis);
+            if(kickstart.empty)
+                kickstart = null;
+        }
         debug(std_regex_allocation) writefln("IR processed, max threads: %d", threadCount);
         optimize(zis);
     }
diff --git a/std/regex/internal/kickstart.d b/std/regex/internal/shiftor.d
similarity index 82%
rename from std/regex/internal/kickstart.d
rename to std/regex/internal/shiftor.d
index f052a955509..f57dbe20420 100644
--- a/std/regex/internal/kickstart.d
+++ b/std/regex/internal/shiftor.d
@@ -2,7 +2,7 @@
     Kickstart is a coarse-grained "filter" engine that finds likely matches
     to be verified by full-blown matcher.
 */
-module std.regex.internal.kickstart;
+module std.regex.internal.shiftor;
 
 package(std.regex):
 
@@ -26,7 +26,7 @@ uint effectiveSize(Char)()
     Kickstart engine using ShiftOr algorithm,
     a bit parallel technique for inexact string searching.
 */
-struct ShiftOr(Char)
+class ShiftOr(Char) : Kickstart!Char
 {
 private:
     uint[] table;
@@ -127,13 +127,13 @@ private:
     }
 
 public:
-    @trusted this(ref Regex!Char re, uint[] memory)
+    @trusted this(ref Regex!Char re)
     {
         static import std.algorithm.comparison;
         import std.algorithm.searching : countUntil;
         import std.conv : text;
         import std.range : assumeSorted;
-        assert(memory.length == 256);
+        uint[] memory = new uint[256];
         fChar = uint.max;
         // FNV-1a flavored hash (uses 32bits at a time)
         ulong hash(uint[] tab)
@@ -385,22 +385,23 @@ public:
         }
     }
 
-    @property bool empty() const {  return n_length == 0; }
+    final @property bool empty() const {  return n_length == 0; }
 
-    @property uint length() const{ return n_length/charSize; }
+    final @property uint length() const{ return n_length/charSize; }
 
     // lookup compatible bit pattern in haystack, return starting index
     // has a useful trait: if supplied with valid UTF indexes,
     // returns only valid UTF indexes
     // (that given the haystack in question is valid UTF string)
-    @trusted size_t search(const(Char)[] haystack, size_t idx)
+    final @trusted bool opCall(ref Input!Char s)
     {//@BUG: apparently assumes little endian machines
         import std.conv : text;
         import core.stdc.string : memchr;
         assert(!empty);
-        auto p = cast(const(ubyte)*)(haystack.ptr+idx);
+        auto haystack = s._origin;
         uint state = uint.max;
         uint limit = 1u<<(n_length - 1u);
+        auto p = cast(const(ubyte)*)(haystack.ptr+s._index);
         debug(std_regex_search) writefln("Limit: %32b",limit);
         if (fChar != uint.max)
         {
@@ -415,11 +416,17 @@ public:
                         assert(p <= end, text(p," vs ", end));
                         p = cast(ubyte*)memchr(p, fChar, end - p);
                         if (!p)
-                            return haystack.length;
+                        {
+                            s._index = haystack.length;
+                            return false;
+                        }
                         if ((cast(size_t)p & (Char.sizeof-1)) == orginalAlign)
                             break;
                         if (++p == end)
-                            return haystack.length;
+                        {
+                            s._index = haystack.length;
+                            return false;
+                        }
                     }
                     state = ~1u;
                     assert((cast(size_t)p & (Char.sizeof-1)) == orginalAlign);
@@ -433,8 +440,10 @@ public:
                         p++;
                     //first char is tested, see if that's all
                     if (!(state & limit))
-                        return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-                            -length;
+                    {
+                        s._index =  (p-cast(ubyte*)haystack.ptr)/Char.sizeof-length;
+                        return true;
+                    }
                 }
                 else
                 {//have some bits/states for possible matches,
@@ -452,8 +461,10 @@ public:
                         p++;
                     }
                     if (!(state & limit))
-                        return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-                            -length;
+                    {
+                        s._index = (p-cast(ubyte*)haystack.ptr)/Char.sizeof-length;
+                        return true;
+                    }
                 }
                 debug(std_regex_search) writefln("State: %32b", state);
             }
@@ -471,8 +482,10 @@ public:
                     state = (state<<1) | table[p[2]];
                     p += 4;
                     if (!(state & limit))//division rounds down for dchar
-                        return (p-cast(ubyte*)haystack.ptr)/Char.sizeof
-                        -length;
+                    {
+                        s._index = (p-cast(ubyte*)haystack.ptr)/Char.sizeof-length;
+                        return true;
+                    }
                 }
             }
             else
@@ -483,23 +496,31 @@ public:
                 {
                     state = (state<<1) | table[p[i++]];
                     if (!(state & limit))
-                        return idx+i/Char.sizeof-length;
+                    {
+                        s._index += i/Char.sizeof-length;
+                        return true;
+                    }
                 }
                 while (i < len)
                 {
                     state = (state<<1) | table[p[i++]];
                     if (!(state & limit))
-                        return idx+i/Char.sizeof
-                            -length;
+                    {
+                        s._index += i/Char.sizeof-length;
+                        return true;
+                    }
                     state = (state<<1) | table[p[i++]];
                     if (!(state & limit))
-                        return idx+i/Char.sizeof
-                            -length;
+                    {
+                        s._index += i/Char.sizeof-length;
+                        return true;
+                    }
                     debug(std_regex_search) writefln("State: %32b", state);
                 }
             }
         }
-        return haystack.length;
+        s._index = haystack.length;
+        return false;
     }
 
     @system debug static void dump(uint[] table)
@@ -515,65 +536,58 @@ public:
 unittest
 {
     import std.conv, std.regex;
-    @trusted void test_fixed(alias Kick)()
+    auto shiftOrLength(C)(const(C)[] pat, uint length)
+    {
+        auto r = regex(pat);
+        auto kick = new ShiftOr!C(r);
+        assert(kick.length == length, text(C.stringof, " == ", kick.length));
+        return kick;
+    }
+    auto searches(C)(const (C)[] source, ShiftOr!C kick, uint[] results...)
     {
-        foreach (i, v; AliasSeq!(char, wchar, dchar))
+        auto inp = Input!C(source);
+        foreach(r; results)
         {
-            alias Char = v;
-            alias String = immutable(v)[];
-            auto r = regex(to!String(`abc$`));
-            auto kick = Kick!Char(r, new uint[256]);
-            assert(kick.length == 3, text(Kick.stringof," ",v.stringof, " == ", kick.length));
-            auto r2 = regex(to!String(`(abc){2}a+`));
-            kick = Kick!Char(r2, new uint[256]);
-            assert(kick.length == 7, text(Kick.stringof,v.stringof," == ", kick.length));
-            auto r3 = regex(to!String(`\b(a{2}b{3}){2,4}`));
-            kick = Kick!Char(r3, new uint[256]);
-            assert(kick.length == 10, text(Kick.stringof,v.stringof," == ", kick.length));
-            auto r4 = regex(to!String(`\ba{2}c\bxyz`));
-            kick = Kick!Char(r4, new uint[256]);
-            assert(kick.length == 6, text(Kick.stringof,v.stringof, " == ", kick.length));
-            auto r5 = regex(to!String(`\ba{2}c\b`));
-            kick = Kick!Char(r5, new uint[256]);
-            size_t x = kick.search("aabaacaa", 0);
-            assert(x == 3, text(Kick.stringof,v.stringof," == ", kick.length));
-            x = kick.search("aabaacaa", x+1);
-            assert(x == 8, text(Kick.stringof,v.stringof," == ", kick.length));
+            kick(inp);
+            dchar ch;
+            size_t idx;
+            assert(inp._index == r, text(inp._index, " vs ", r));
+            inp.nextChar(ch, idx);
         }
     }
-    @trusted void test_flex(alias Kick)()
+
+    foreach(i, Char; AliasSeq!(char, wchar, dchar))
     {
-        foreach (i, v; AliasSeq!(char, wchar, dchar))
-        {
-            alias Char = v;
-            alias String = immutable(v)[];
-            auto r = regex(to!String(`abc[a-z]`));
-            auto kick = Kick!Char(r, new uint[256]);
-            auto x = kick.search(to!String("abbabca"), 0);
-            assert(x == 3, text("real x is ", x, " ",v.stringof));
+        alias String = immutable(Char)[];
+        shiftOrLength(`abc`.to!String, 3);
+        shiftOrLength(`abc$`.to!String, 3);
+        shiftOrLength(`(abc){2}a+`.to!String, 7);
+        shiftOrLength(`\b(a{2}b{3}){2,4}`.to!String, 10);
+        shiftOrLength(`\ba{2}c\bxyz`.to!String, 6);
+        auto kick = shiftOrLength(`\ba{2}c\b`.to!String, 3);
+        auto inp = Input!Char("aabaacaa");
+        assert(kick(inp));
+        assert(inp._index == 3, text(Char.stringof," == ", kick.length));
+        dchar ch;
+        size_t idx;
+        inp.nextChar(ch, idx);
+        assert(!kick(inp));
+        assert(inp._index == 8, text(Char.stringof," == ", kick.length));
+    }
 
-            auto r2 = regex(to!String(`(ax|bd|cdy)`));
-            String s2 = to!String("abdcdyabax");
-            kick = Kick!Char(r2, new uint[256]);
-            x = kick.search(s2, 0);
-            assert(x == 1, text("real x is ", x));
-            x = kick.search(s2, x+1);
-            assert(x == 3, text("real x is ", x));
-            x = kick.search(s2, x+1);
-            assert(x == 8, text("real x is ", x));
-            auto rdot = regex(to!String(`...`));
-            kick = Kick!Char(rdot, new uint[256]);
-            assert(kick.length == 0);
-            auto rN = regex(to!String(`a(b+|c+)x`));
-            kick = Kick!Char(rN, new uint[256]);
-            assert(kick.length == 3, to!string(kick.length));
-            assert(kick.search("ababx",0) == 2);
-            assert(kick.search("abaacba",0) == 3);//expected inexact
+    foreach(i, Char; AliasSeq!(char, wchar, dchar))
+    {
+        alias String = immutable(Char)[];
+        auto kick = shiftOrLength(`abc[a-z]`.to!String, 4);
+        searches("abbabca".to!String, kick, 3);
+        kick = shiftOrLength(`(ax|bd|cdy)`.to!String, 2);
+        searches("abdcdyabax".to!String, kick, 1, 3, 8);
+
+        shiftOrLength(`...`.to!String, 0);
+        kick = shiftOrLength(`a(b+|c+)x`.to!String, 3);
+        searches("ababx".to!String, kick, 2);
+        searches("abaacba".to!String, kick, 3); //expected inexact
 
-        }
     }
-    test_fixed!(ShiftOr)();
-    test_flex!(ShiftOr)();
 }
 
-alias Kickstart = ShiftOr;
diff --git a/std/regex/internal/thompson.d b/std/regex/internal/thompson.d
index 3065ee6fc2a..530e5c503e0 100644
--- a/std/regex/internal/thompson.d
+++ b/std/regex/internal/thompson.d
@@ -922,7 +922,7 @@ template ThompsonOps(E,S, bool withInput:false)
             return matchOneShot(matches);
         }
         static if (kicked)
-            if (!re.kickstart.empty)
+            if (re.kickstart)
                 return matchImpl!(true)(matches);
         return matchImpl!(false)(matches);
     }
diff --git a/win32.mak b/win32.mak
index 38cc8242fc8..b093f880b27 100644
--- a/win32.mak
+++ b/win32.mak
@@ -224,7 +224,7 @@ SRC_STD_REGEX= \
 	std\regex\internal\tests.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
-	std\regex\internal\kickstart.d \
+	std\regex\internal\shiftor.d \
 	std\regex\internal\generator.d
 
 SRC_STD_C= \
diff --git a/win64.mak b/win64.mak
index 838595e31c2..fe496708924 100644
--- a/win64.mak
+++ b/win64.mak
@@ -243,7 +243,7 @@ SRC_STD_REGEX= \
 	std\regex\internal\tests.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
-	std\regex\internal\kickstart.d \
+	std\regex\internal\shiftor.d \
 	std\regex\internal\generator.d
 
 SRC_STD_C= \

From 1416ddb0183553f291e09c009d4bcded1d549c29 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Mon, 11 Apr 2016 18:09:25 +0300
Subject: [PATCH 02/23] A start on bit-NFA

---
 std/regex/internal/bitnfa.d | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 std/regex/internal/bitnfa.d

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
new file mode 100644
index 00000000000..045a547dfce
--- /dev/null
+++ b/std/regex/internal/bitnfa.d
@@ -0,0 +1,60 @@
+//Written in the D programming language
+/*
+    Implementation of a concept "NFA in a word" which is
+    bit-parallel impementation of regex where each bit represents 
+    a state in an NFA. Execution is Thompson-style achieved via bit tricks.
+
+    There is a great number of limitations inlcuding not tracking any state (captures)
+    and not supporting even basic assertions such as ^, $  or \b.
+*/
+import std.regex.internal.ir;
+
+// since there is no way to mark a starting position
+// need 2 instance of BitNfa - one to find the end, and the other
+// to run backwards to find the start.
+struct BitNfa
+{
+    uint        asciiTab[128];    // state mask for ascii characters
+    UintTrie2   uniTab;           // state mask for unicode characters
+    uint[uint]  controlFlow;      // maps each bit pattern to resulting jumps pattern
+    uint        controlFlowMask;  // masks all control flow bits
+    uint        finalMask;        // marks final states terminating the NFA
+
+    bool opCall(Input)(ref Input r)
+    {
+        dchar ch;
+        size_t idx;
+        uint word = ~0u;
+        while(r.nextChar(ch, idx)){
+            word <<= 1; // shift - create a state
+            // cfMask has 1 for each control-flow op
+            uint cflow = ~word  & controlFlowMask; 
+            word = word | controlFlowMask; // kill cflow
+            word |= controlFlow[cflow]; // map normal ops
+            if(word & finalMask != finalMask)
+                return true;
+            // mask away failing states
+            if(ch < 0x80)
+                word |= assciiTab[ch];
+            else
+                word |= uniTab[ch];
+        }
+        return false;
+    }
+}
+
+final class BitMatcher
+{
+    BitNfa forward, backward;
+    bool opCall(Input)(ref Input r)
+    {
+        bool res = forward(r);
+        if(res){
+            auto backward = r.loopBack
+            backward(backward);
+            r.reset(backward._index);
+        }
+        return res;
+    }
+}
+

From 99095eebfc7d8af7181b11c20a289c9cbad58916 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Wed, 13 Apr 2016 17:13:56 +0300
Subject: [PATCH 03/23] ASCII-only version of  Bit-NFA

---
 posix.mak                   |   2 +-
 std/regex/internal/bitnfa.d | 512 ++++++++++++++++++++++++++++++++++--
 std/regex/internal/ir.d     | 108 ++++++++
 std/regex/internal/parser.d |   2 +-
 4 files changed, 607 insertions(+), 17 deletions(-)

diff --git a/posix.mak b/posix.mak
index 749ce932736..b97bd52d3c6 100644
--- a/posix.mak
+++ b/posix.mak
@@ -192,7 +192,7 @@ PACKAGE_std_experimental_ndslice = package iteration selection slice
 PACKAGE_std_net = curl isemail
 PACKAGE_std_range = interfaces package primitives
 PACKAGE_std_regex = package $(addprefix internal/,generator ir parser \
-  backtracking shiftor tests thompson)
+  backtracking bitnfa tests thompson shiftor)
 
 # Modules in std (including those in packages)
 STD_MODULES=$(call P2MODULES,$(STD_PACKAGES))
diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 045a547dfce..049f5fdac61 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -1,60 +1,542 @@
 //Written in the D programming language
 /*
     Implementation of a concept "NFA in a word" which is
-    bit-parallel impementation of regex where each bit represents 
+    bit-parallel impementation of regex where each bit represents
     a state in an NFA. Execution is Thompson-style achieved via bit tricks.
 
     There is a great number of limitations inlcuding not tracking any state (captures)
     and not supporting even basic assertions such as ^, $  or \b.
 */
+module std.regex.internal.bitnfa;
+
+package(std.regex):
+
 import std.regex.internal.ir;
 
-// since there is no way to mark a starting position
-// need 2 instance of BitNfa - one to find the end, and the other
+debug(std_regex_bitnfa) import std.stdio;
+
+
+
+struct HashTab()
+{
+    @disable this(this);
+
+    uint opIndex(uint key)
+    {
+        auto p = locate(key, table);
+        assert(p.occupied);
+        return p.value;
+    }
+
+    void opIndexAssign(uint value, uint key)
+    {
+        if(table.length == 0) grow();
+        auto p = locate(key, table);
+        if(!p.occupied)
+        {
+            items++;
+            if(4*items >= table.length*3)
+            {
+                grow();
+                p = locate(key, table);
+            }
+            p.occupied = true;
+            p.key = key;
+        }
+        p.value = value;
+    }
+
+    auto keys()
+    {
+        auto app = appender!(uint[])();
+        foreach(i, v; table)
+        {
+            if(v.occupied)
+                app.put(v.key);
+        }
+        return app.data;
+    }
+
+    auto values()
+    {
+        auto app = appender!(uint[])();
+        foreach(i, v; table)
+        {
+            if(v.occupied)
+                app.put(v.value);
+        }
+        return app.data;
+    }
+
+private:
+    static uint hashOf(uint val)
+    {
+        return (val >> 20) ^ (val>>8) ^ val;
+    }
+
+    struct Node
+    {
+        uint key;
+        uint value;
+        bool occupied;
+    }
+    Node[] table;
+    size_t items;
+
+    static Node* locate(uint key, Node[] table)
+    {
+        size_t slot = hashOf(key) & (table.length-1);
+        while(table.ptr[slot].occupied)
+        {
+            if(table.ptr[slot].key == key)
+                break;
+            slot += 1;
+            if(slot == table.length)
+                slot = 0;
+        }
+        return table.ptr+slot;
+    }
+
+    void grow()
+    {
+        Node[] newTable = new Node[table.length ? table.length*2 : 4];
+        foreach(i, v; table)
+        {
+            if(v.occupied)
+            {
+                auto p = locate(v.key, newTable);
+                *p = v;
+            }
+        }
+    }
+}
+
+
+// Specialized 2-level trie of uint masks for BitNfa.
+// Uses the concept of CoW: a page gets modified in place
+// if the block's ref-count is 1, else a newblock is allocated
+// and ref count is decreased
+struct UIntTrie2
+{
+    ushort[] index;             // pages --> blocks
+    ushort[] refCounts;         // ref counts for each block
+    uint[]   hashes;            // hashes of blocks
+    uint[]   blocks;            // linear array with blocks
+    uint[]   scratch;           // temporary block
+    enum     blockSize = 2<<8;  // size of block
+
+    static uint hash(uint[] data)
+    {
+        uint h = 5183;
+        foreach(v; data)
+        {
+            h = 31*h + v;
+        }
+        return h;
+    }
+
+    static UIntTrie2 opCall()
+    {
+        UIntTrie2 ut;
+        ut.index.length = 2<<13;
+        ut.blocks = new uint[blockSize];
+        ut.blocks[] = uint.max; // all ones
+        ut.scratch = new uint[blockSize];
+        ut.refCounts = new ushort[1];
+        ut.refCounts[0] = 2<<13;
+        ut.hashes = new uint[1];
+        ut.hashes[0] = hash(ut.blocks);
+        return ut;
+    }
+
+    bool opIndex(dchar ch)
+    {
+        return false; // TODO: stub
+    }
+
+    void opIndexOpAssign(string op)(uint val, dchar ch)
+    {
+        // TODO: stub
+    }
+
+    void opSliceOpAssign(string op)(uint val, uint start, uint end)
+    {
+        // TODO: stub
+    }
+}
+
+// Since there is no way to mark a starting position
+// we need 2 instances of BitNfa: one to find the end, and the other
 // to run backwards to find the start.
 struct BitNfa
 {
-    uint        asciiTab[128];    // state mask for ascii characters
-    UintTrie2   uniTab;           // state mask for unicode characters
+    uint[128]   asciiTab;         // state mask for ascii characters
+    UIntTrie2   uniTab;           // state mask for unicode characters
     uint[uint]  controlFlow;      // maps each bit pattern to resulting jumps pattern
     uint        controlFlowMask;  // masks all control flow bits
     uint        finalMask;        // marks final states terminating the NFA
+    bool        empty;            // if this engine is empty
+
+    void combineControlFlow()
+    {
+        uint[] keys = controlFlow.keys;
+        uint[] values = controlFlow.values;
+        auto selection = new bool[keys.length];
+        bool nextChoice()
+        {
+            uint i;
+            for(i=0;i<selection.length; i++)
+            {
+                selection[i] ^= true;
+                if(selection[i])
+                    break;
+            }
+            return i != selection.length;
+        }
+        // first prepare full mask
+        foreach(k; keys) controlFlowMask |= k;
+        // next set all combinations in cf
+        while(nextChoice())
+        {
+            uint kmask = 0, vmask = 0;
+            foreach(i,v; selection)
+                if(v)
+                {
+                    kmask |= keys[i];
+                    vmask |= values[i];
+                }
+            controlFlow[kmask] = vmask;
+        }
+    }
+
+    uint[] collectControlFlow(Bytecode[] ir, uint i)
+    {
+        uint[] result;
+        Stack!uint paths;
+        paths.push(i);
+        while(!paths.empty())
+        {
+            uint j = paths.pop();
+            switch(ir[j].code) with(IR)
+            {
+            case OrStart:
+                j += IRL!OrStart;
+                assert(ir[j].code == Option);
+                while(ir[j].code == Option)
+                {
+                    //import std.stdio;
+                    //writefln("> %d %s", j, ir[j].mnemonic);
+                    paths.push(j+IRL!Option);
+                    //writefln(">> %d", j+IRL!Option);
+                    j = j + ir[j].data + IRL!Option;
+                }
+                break;
+            case GotoEndOr:
+                paths.push(j+IRL!GotoEndOr+ir[j].data);
+                break;
+            case OrEnd, Wordboundary, Notwordboundary, Bol, Eol, Nop, GroupStart, GroupEnd:
+                paths.push(j+ir[j].length);
+                break;
+            case LookaheadStart, NeglookaheadStart, LookbehindStart,
+                NeglookbehindStart:
+                paths.push(j + IRL!LookaheadStart + ir[j].data + IRL!LookaheadEnd);
+                break;
+            case InfiniteStart, InfiniteQStart:
+                paths.push(j+IRL!InfiniteStart);
+                paths.push(j+ir[j].data+IRL!InfiniteEnd);
+                break;
+            case InfiniteBloomStart:
+                paths.push(j+IRL!InfiniteStart);
+                paths.push(j+ir[j].data+IRL!InfiniteBloomEnd);
+                break;
+            case InfiniteEnd, InfiniteQEnd:
+                paths.push(j-ir[j].data);
+                paths.push(j+IRL!InfiniteEnd);
+                break;
+            case InfiniteBloomEnd:
+                paths.push(j-ir[j].data);
+                paths.push(j+IRL!InfiniteBloomEnd);
+                break;
+            default:
+                result ~= j;
+            }
+        }
+        return result;
+    }
+
+    this(Char)(auto ref Regex!Char re)
+    {
+        asciiTab[] = uint.max; // all ones
+        uniTab = UIntTrie2();
+        controlFlow[0] = 0;
+        // pc -> bit number
+        uint[] bitMapping = new uint[re.ir.length];
+        uint bitCount = 0, nesting=0, lastNonnested=0;
+        bool stop = false;
+        with(re)
+outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
+        {
+            if(nesting == 0) lastNonnested = i;
+            if(ir[i].isStart) nesting++;
+            if(ir[i].isEnd) nesting--;
+            switch(ir[i].code)
+            {
+            case Option, OrEnd, Nop, Bol,
+            GroupStart, GroupEnd,
+            Eol, Wordboundary, Notwordboundary:
+                bitMapping[i] = bitCount;
+                break;
+            // skipover complex assertions
+            case LookaheadStart, NeglookaheadStart, LookbehindStart,
+                NeglookbehindStart:
+                bitMapping[i] = bitCount;
+                nesting--;
+                i += IRL!LookbehindStart + ir[i].data; // IRL end gets skiped by 'for'
+                break;
+            // unsupported instructions
+            case RepeatStart, RepeatQStart, Backref:
+                stop = true;
+                break outer;
+            case OrChar:
+                uint s = ir[i].sequence;
+                for(uint j=i; j<i+s; j++)
+                    bitMapping[j] = bitCount;
+                i += (s-1)*IRL!OrChar;
+                bitCount++;
+                if(bitCount == 32)
+                    break outer;
+                break;
+            default:
+                bitMapping[i] = bitCount++;
+                if(bitCount == 32)
+                    break outer;
+            }
+        }
+        if(bitCount == 0)
+            empty = true;
+        debug(std_regex_bitnfa) writeln("LEN:", lastNonnested);
+        // the total processable length
+        uint length=lastNonnested;
+        finalMask |= 1u<<bitMapping[length];
+        if(stop)
+            finalMask <<= 1;
+        with(re)
+        for(uint i=0; i<length; i += ir[i].length)
+        {
+            switch(ir[i].code) with (IR)
+            {
+            case OrStart,GotoEndOr, InfiniteStart,
+            InfiniteBloomStart, InfiniteBloomEnd,
+            InfiniteEnd, InfiniteQEnd, InfiniteQStart:
+                // collect stops across all paths
+                auto rets = collectControlFlow(ir, i);
+                uint mask = 0;
+                debug(std_regex_bitnfa) writeln(rets);
+                foreach(pc; rets) mask |= 1u<<bitMapping[pc];
+                // map this individual c-f to all possible stops
+                controlFlow[1u<<bitMapping[i]] = mask;
+                break;
+            case Option, OrEnd, Nop, Bol,
+                GroupStart, GroupEnd,
+                Eol, Wordboundary, Notwordboundary:
+                break;
+            case LookaheadStart, NeglookaheadStart, LookbehindStart,
+                NeglookbehindStart:
+                i += IRL!LookaheadStart + ir[i].data;
+                break;
+            case End:
+                finalMask |= 1u<<bitMapping[i];
+                break;
+            case Char:
+                uint mask = 1u<<bitMapping[i];
+                auto ch = ir[i].data;
+                //import std.stdio;
+                //writefln("Char %c - %b", cast(dchar)ch, mask);
+                if(ch < 0x80)
+                    asciiTab[ch] &= ~mask;
+                else
+                    uniTab[ch] &= ~mask;
+                break;
+            case OrChar:
+                uint s = ir[i].sequence;
+                for(size_t j=i; j<i+s; j++)
+                {
+                    uint mask = 1u<<bitMapping[i];
+                    auto ch = ir[j].data;
+                    //import std.stdio;
+                    //writefln("OrChar %c - %b", cast(dchar)ch, mask);
+                    if(ch < 0x80)
+                        asciiTab[ch] &= ~mask;
+                    else
+                        uniTab[ch] &= ~mask;
+                }
+                i += s-1;
+                break;
+            case CodepointSet, Trie:
+                auto cset = charsets[ir[i].data];
+                uint mask = 1u<<bitMapping[i];
+                foreach(ival; cset.byInterval)
+                {
+                    if(ival.b < 0x80)
+                        asciiTab[ival.a..ival.b] &= ~mask;
+                    else
+                    {
+                        if(ival.a < 0x80)
+                            asciiTab[ival.a..0x80] &= ~mask;
+                        uniTab[ival.a..ival.b] &= ~mask;
+                    }
+                }
+                break;
+            default:
+                assert(0, "Unexpected instruction in BitNFA: "~ir[i].mnemonic);
+            }
+        }
+        combineControlFlow();
+    }
 
     bool opCall(Input)(ref Input r)
     {
+        bool matched = false;
+        size_t mIdx = 0;
         dchar ch;
         size_t idx;
         uint word = ~0u;
-        while(r.nextChar(ch, idx)){
+        for(;;)
+        {
             word <<= 1; // shift - create a state
             // cfMask has 1 for each control-flow op
-            uint cflow = ~word  & controlFlowMask; 
+            uint cflow = ~word  & controlFlowMask;
             word = word | controlFlowMask; // kill cflow
-            word |= controlFlow[cflow]; // map normal ops
-            if(word & finalMask != finalMask)
-                return true;
+            word &= ~controlFlow[cflow]; // map normal ops
+            debug(std_regex_bitnfa) writefln("%b %b %b %b", word, finalMask, cflow, controlFlowMask);
+            if((word & finalMask) != finalMask)
+            {
+                matched = true; // keep running to see if there is longer match
+                mIdx = r._index;
+            }
+            else if(matched)
+                break;
+            if(!r.nextChar(ch, idx))
+                break;
             // mask away failing states
             if(ch < 0x80)
-                word |= assciiTab[ch];
+                word |= asciiTab[ch];
             else
                 word |= uniTab[ch];
         }
-        return false;
+        if(matched)
+        {
+            r.reset(mIdx);
+        }
+        return matched;
     }
 }
 
 final class BitMatcher
 {
     BitNfa forward, backward;
+
+    this(Char)(auto ref Regex!Char re)
+    {
+        forward = BitNfa(re);
+        //reverse Bytecode
+        auto re2 = re;
+        re2.ir = re2.ir.dup;
+        // keep the end where it belongs
+        reverseBytecode(re2.ir[0..$-1]);
+        // check for the case of multiple patterns as one alternation
+        with(IR) with(re2) if(ir[0].code == OrStart)
+        {
+            size_t pc = IRL!OrStart;
+            while(ir[pc].code == Option)
+            {
+                size_t size = ir[pc].data;
+                if(ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
+                    size -= IRL!GotoEndOr;
+                size_t j = pc + IRL!Option;
+                if(ir[j].code == End)
+                {
+                    auto save = ir[j];
+                    foreach(k; j+1..j+size)
+                        ir[k-1] = ir[k];
+                    ir[j+size-1] = save;
+                }
+                pc = j + ir[pc].data;
+            }
+        }
+        backward = BitNfa(re2);
+    }
+
     bool opCall(Input)(ref Input r)
     {
         bool res = forward(r);
         if(res){
-            auto backward = r.loopBack
-            backward(backward);
-            r.reset(backward._index);
+            auto back = r.loopBack(r._index);
+            assert(backward(back));
+            r.reset(back._index);
         }
         return res;
     }
 }
 
+version(unittest)
+{
+    template check(alias make)
+    {
+        private void check(T)(string input, T re, size_t idx=uint.max)
+        {
+            import std.regex, std.conv;
+            import std.stdio;
+            auto rex = regex(re);
+            auto m = make(rex);
+            auto s = Input!char(input);
+            assert(m(s), "Failed "~input~" with "~to!string(re));
+            assert(s._index == idx || (idx ==uint.max && s._index == input.length));
+        }
+    }
+
+    template checkFail(alias make)
+    {
+        private void checkFail(T)(string input, T re, size_t idx=uint.max)
+        {
+            import std.regex, std.conv;
+            import std.stdio;
+            auto rex = regex(re);
+            auto m = make(rex);
+            auto s = Input!char(input);
+            assert(!m(s), "Should have failed "~input~" with "~to!string(re));
+            assert(s._index == idx || (idx ==uint.max && s._index == input.length));
+        }
+    }
+
+    alias checkBit = check!BitNfa;
+    alias checkBitFail = checkFail!BitNfa;
+    auto makeMatcher(R)(R regex){ return new BitMatcher(regex); }
+    alias checkM = check!makeMatcher;
+    alias checkMFail = checkFail!makeMatcher;
+}
+
+unittest
+{
+    "xabcd".checkBit("abc", 4);
+    "xabbbcdyy".checkBit("a[b-c]*c", 6);
+    "abc1".checkBit("([a-zA-Z_0-9]*)1");
+    "abbabc".checkBit("(a|b)*",5);
+    "abd".checkBitFail("abc");
+    // check truncation
+    "0123456789_0123456789_0123456789_012"
+        .checkBit("0123456789_0123456789_0123456789_0123456789", 31);
+    "0123456789_0123456789_0123456789_012"
+        .checkBit("0123456789(0123456789_0123456789_0123456789_0123456789|01234)",10);
+    // assertions ignored
+    "0abc1".checkBit("(?<![0-9])[a-c]*$", 4);
+    // stop on repetition
+    "abcdef1".checkBit("a[a-z]{5}", 1);
+    "ads@email.com".checkBit(`\S+@\S+`);
+}
+
+unittest
+{
+    "xxabcy".checkM("abc", 2);
+    "_10bcy".checkM([`\d+`, `[a-z]+`], 1);
+}
\ No newline at end of file
diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index 912b435b87d..f1ec1571e52 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -463,6 +463,114 @@ interface Kickstart(Char){
     @property bool empty() const;
 }
 
+//basic stack, just in case it gets used anywhere else then Parser
+@trusted struct Stack(T)
+{
+    T[] data;
+    @property bool empty(){ return data.empty; }
+
+    @property size_t length(){ return data.length; }
+
+    void push(T val){ data ~= val;  }
+
+    T pop()
+    {
+        assert(!empty);
+        auto val = data[$ - 1];
+        data = data[0 .. $ - 1];
+        if(!__ctfe)
+            cast(void)data.assumeSafeAppend();
+        return val;
+    }
+
+    @property ref T top()
+    {
+        assert(!empty);
+        return data[$ - 1];
+    }
+}
+
+@trusted void reverseBytecode()(Bytecode[] code)
+{
+    import std.typecons;
+    Bytecode[] rev = new Bytecode[code.length];
+    uint revPc = cast(uint)rev.length;
+    Stack!(Tuple!(uint, uint, uint)) stack;
+    uint start = 0;
+    uint end = cast(uint)code.length;
+    for(;;)
+    {
+        for(uint pc = start; pc < end; )
+        {
+            uint len = code[pc].length;
+            if(code[pc].code == IR.GotoEndOr)
+                break; //pick next alternation branch
+            if(code[pc].isAtom)
+            {
+                rev[revPc - len .. revPc] = code[pc .. pc + len];
+                revPc -= len;
+                pc += len;
+            }
+            else if(code[pc].isStart || code[pc].isEnd)
+            {
+                //skip over other embedded lookbehinds they are reversed
+                if(code[pc].code == IR.LookbehindStart
+                    || code[pc].code == IR.NeglookbehindStart)
+                {
+                    uint blockLen = len + code[pc].data
+                         + code[pc].pairedLength;
+                    rev[revPc - blockLen .. revPc] = code[pc .. pc + blockLen];
+                    pc += blockLen;
+                    revPc -= blockLen;
+                    continue;
+                }
+                uint second = code[pc].indexOfPair(pc);
+                uint secLen = code[second].length;
+                rev[revPc - secLen .. revPc] = code[second .. second + secLen];
+                revPc -= secLen;
+                if(code[pc].code == IR.OrStart)
+                {
+                    //we pass len bytes forward, but secLen in reverse
+                    uint revStart = revPc - (second + len - secLen - pc);
+                    uint r = revStart;
+                    uint i = pc + IRL!(IR.OrStart);
+                    while(code[i].code == IR.Option)
+                    {
+                        if(code[i - 1].code != IR.OrStart)
+                        {
+                            assert(code[i - 1].code == IR.GotoEndOr);
+                            rev[r - 1] = code[i - 1];
+                        }
+                        rev[r] = code[i];
+                        auto newStart = i + IRL!(IR.Option);
+                        auto newEnd = newStart + code[i].data;
+                        auto newRpc = r + code[i].data + IRL!(IR.Option);
+                        if(code[newEnd].code != IR.OrEnd)
+                        {
+                            newRpc--;
+                        }
+                        stack.push(tuple(newStart, newEnd, newRpc));
+                        r += code[i].data + IRL!(IR.Option);
+                        i += code[i].data + IRL!(IR.Option);
+                    }
+                    pc = i;
+                    revPc = revStart;
+                    assert(code[pc].code == IR.OrEnd);
+                }
+                else
+                    pc += len;
+            }
+        }
+        if(stack.empty)
+            break;
+        start = stack.top[0];
+        end = stack.top[1];
+        revPc = stack.top[2];
+        stack.pop();
+    }
+    code[] = rev[];
+}
+
 /++
     $(D Regex) object holds regular expression pattern in compiled form.
     Instances of this object are constructed via calls to $(D regex).
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index ade9c8a3e65..d780b39206f 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -4,7 +4,7 @@
 */
 module std.regex.internal.parser;
 
-import std.regex.internal.ir;
+import std.regex.internal.ir, std.regex.internal.shiftor;
 import std.range.primitives, std.uni, std.meta,
     std.traits, std.typecons, std.exception;
 static import std.ascii;

From ed4e07c7793b3453e7360022d4c983f4bd3ef505 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Thu, 21 Apr 2016 16:48:19 +0300
Subject: [PATCH 04/23] [std.regex] Bit-NFA: implement unicode trie

---
 std/regex/internal/bitnfa.d | 98 ++++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 11 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 049f5fdac61..3433e4ed62f 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -14,7 +14,7 @@ package(std.regex):
 import std.regex.internal.ir;
 
 debug(std_regex_bitnfa) import std.stdio;
-
+import std.algorithm;
 
 
 struct HashTab()
@@ -118,12 +118,14 @@ private:
 // and ref count is decreased
 struct UIntTrie2
 {
-    ushort[] index;             // pages --> blocks
-    ushort[] refCounts;         // ref counts for each block
-    uint[]   hashes;            // hashes of blocks
-    uint[]   blocks;            // linear array with blocks
-    uint[]   scratch;           // temporary block
-    enum     blockSize = 2<<8;  // size of block
+    ushort[] index;                       // pages --> blocks
+    ushort[] refCounts;                   // ref counts for each block
+    uint[]   hashes;                      // hashes of blocks
+    uint[]   blocks;                      // linear array with blocks
+    uint[]   scratch;                     // temporary block
+    enum     blockBits = 8;               // size of block in bits
+    enum     blockSize = 1<<blockBits;    // size of block
+
 
     static uint hash(uint[] data)
     {
@@ -149,22 +151,96 @@ struct UIntTrie2
         return ut;
     }
 
-    bool opIndex(dchar ch)
+    uint opIndex(dchar ch)
+    {
+        immutable blk = index[ch>>blockBits];
+        //writeln(">blk = ", blk);
+        return blocks.ptr[blk*blockSize + (ch & (blockSize-1))];
+    }
+
+    void setPageRange(string op)(uint val, uint low, uint high)
     {
-        return false; // TODO: stub
+        immutable blk = index[low>>blockBits];
+        //writeln("<blk = ", blk);
+        if(refCounts[blk] == 1) // modify in-place
+        {
+            immutable lowIdx = blk*blockSize + (low & (blockSize-1));
+            immutable highIdx = high - low + lowIdx;
+            mixin("blocks[lowIdx..highIdx] "~op~"= val;");
+        }
+        else        
+        {
+            // create a new page
+            refCounts[blk]--;
+            immutable lowIdx = low & (blockSize-1);
+            immutable highIdx = high - low + lowIdx;
+            scratch[] = blocks[blk*blockSize..(blk+1)*blockSize];
+            mixin("scratch[lowIdx..highIdx] "~op~"= val;");
+            uint h = hash(scratch);
+            bool found = false;
+            foreach(i,_; hashes.enumerate.filter!(x => x[1] == h))
+            {
+                if(scratch[] == blocks[i*blockSize .. (i+1)*blockSize])
+                {
+                    // re-route to existing page
+                    index[low>>blockBits] = cast(ushort)i;
+                    refCounts[i]++; // inc refs
+                    found = true;
+                    break;
+                }
+            }
+            if(!found)
+            {
+                index[low>>blockBits] = cast(ushort)hashes.length;
+                blocks ~= scratch[];
+                refCounts ~= 1;
+                hashes ~= h;
+            }
+        }
     }
 
     void opIndexOpAssign(string op)(uint val, dchar ch)
     {
-        // TODO: stub
+        setPageRange!op(val, ch, ch+1);
     }
 
     void opSliceOpAssign(string op)(uint val, uint start, uint end)
     {
-        // TODO: stub
+        uint startBlk  = start >> blockBits;
+        uint endBlk = end >> blockBits;
+        uint first = min(startBlk*blockSize+blockSize, end);
+        setPageRange!op(val, start, first);
+        foreach(blk; startBlk..endBlk)
+            setPageRange!op(val, blk*blockSize, (blk+1)*blockSize);
+        if(first != end)
+        {
+            setPageRange!op(val, endBlk*blockSize, end);
+        }
     }
 }
 
+unittest
+{
+    UIntTrie2 trie = UIntTrie2();
+    trie['d'] &= 3;
+    assert(trie['d'] == 3);
+    trie['\u0280'] &= 1;
+    assert(trie['\u0280'] == 1);
+    import std.uni;
+    UIntTrie2 trie2 = UIntTrie2();
+    auto letters = unicode("L");
+    foreach(r; letters.byInterval)
+        trie2[r.a..r.b] &= 1;
+    foreach(ch; letters.byCodepoint)
+        assert(trie2[ch] == 1);
+    auto space = unicode("WhiteSpace");
+    auto trie3 = UIntTrie2();
+    foreach(r; space.byInterval)
+        trie3[r.a..r.b] &= 2;
+    foreach(ch; space.byCodepoint)
+        assert(trie3[ch] == 2);
+}
+
 // Since there is no way to mark a starting position
 // we need 2 instances of BitNfa: one to find the end, and the other
 // to run backwards to find the start.

From 5c21564f6ff23f75b271e58762672857cd7157fa Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Thu, 21 Apr 2016 17:18:54 +0300
Subject: [PATCH 05/23] [std.regex] Improved hash-table for Bit-NFA

---
 std/regex/internal/bitnfa.d | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 3433e4ed62f..58164f6000b 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -17,11 +17,11 @@ debug(std_regex_bitnfa) import std.stdio;
 import std.algorithm;
 
 
-struct HashTab()
+struct HashTab
 {
     @disable this(this);
 
-    uint opIndex(uint key)
+    uint opIndex()(uint key)
     {
         auto p = locate(key, table);
         assert(p.occupied);
@@ -69,7 +69,7 @@ struct HashTab()
     }
 
 private:
-    static uint hashOf(uint val)
+    static uint hashOf()(uint val)
     {
         return (val >> 20) ^ (val>>8) ^ val;
     }
@@ -83,12 +83,12 @@ private:
     Node[] table;
     size_t items;
 
-    static Node* locate(uint key, Node[] table)
+    static Node* locate()(uint key, Node[] table)
     {
         size_t slot = hashOf(key) & (table.length-1);
-        while(table.ptr[slot].occupied)
+        while(table[slot].occupied)
         {
-            if(table.ptr[slot].key == key)
+            if(table[slot].key == key)
                 break;
             slot += 1;
             if(slot == table.length)
@@ -108,6 +108,7 @@ private:
                 *p = v;
             }
         }
+        table = newTable;
     }
 }
 
@@ -248,7 +249,7 @@ struct BitNfa
 {
     uint[128]   asciiTab;         // state mask for ascii characters
     UIntTrie2   uniTab;           // state mask for unicode characters
-    uint[uint]  controlFlow;      // maps each bit pattern to resulting jumps pattern
+    HashTab     controlFlow;      // maps each bit pattern to resulting jumps pattern
     uint        controlFlowMask;  // masks all control flow bits
     uint        finalMask;        // marks final states terminating the NFA
     bool        empty;            // if this engine is empty
@@ -609,6 +610,7 @@ unittest
     // stop on repetition
     "abcdef1".checkBit("a[a-z]{5}", 1);
     "ads@email.com".checkBit(`\S+@\S+`);
+    //"abc".checkBit(`([^ ]*)?`);
 }
 
 unittest

From 7d930c1de845b931d8c8d9572f1befe938cee5d7 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Thu, 21 Apr 2016 17:31:27 +0300
Subject: [PATCH 06/23] [std.regex] Even faster hash table for Bit-NFA

---
 std/regex/internal/bitnfa.d | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 58164f6000b..db5ba78fe35 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -40,8 +40,8 @@ struct HashTab
                 grow();
                 p = locate(key, table);
             }
-            p.occupied = true;
-            p.key = key;
+            p.key_ = key;
+            p.setOccupied();
         }
         p.value = value;
     }
@@ -76,9 +76,12 @@ private:
 
     struct Node
     {
-        uint key;
+        uint key_;
         uint value;
-        bool occupied;
+        @property uint key()(){ return key_ & 0x7fff_ffff; }
+        @property bool occupied()(){ return (key_ & 0x8000_0000) != 0; }
+        void setOccupied(){ key_ |= 0x8000_0000; }
+
     }
     Node[] table;
     size_t items;

From 845551b3d6cc5b8f2aca5aa0074d274eae4947a1 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 10:40:10 +0300
Subject: [PATCH 07/23] [std.regex] Bit-NFA fix premature stop on repetitions

---
 std/regex/internal/bitnfa.d | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index db5ba78fe35..0b4f3f244f1 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -352,7 +352,6 @@ struct BitNfa
         // pc -> bit number
         uint[] bitMapping = new uint[re.ir.length];
         uint bitCount = 0, nesting=0, lastNonnested=0;
-        bool stop = false;
         with(re)
 outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
         {
@@ -375,7 +374,7 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                 break;
             // unsupported instructions
             case RepeatStart, RepeatQStart, Backref:
-                stop = true;
+                bitMapping[i] = bitCount;
                 break outer;
             case OrChar:
                 uint s = ir[i].sequence;
@@ -398,8 +397,6 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
         // the total processable length
         uint length=lastNonnested;
         finalMask |= 1u<<bitMapping[length];
-        if(stop)
-            finalMask <<= 1;
         with(re)
         for(uint i=0; i<length; i += ir[i].length)
         {
@@ -613,7 +610,7 @@ unittest
     // stop on repetition
     "abcdef1".checkBit("a[a-z]{5}", 1);
     "ads@email.com".checkBit(`\S+@\S+`);
-    //"abc".checkBit(`([^ ]*)?`);
+    "abc@email.com".checkBit(`\S+@\S?1`, 4);
 }
 
 unittest

From 46102312886a8ab11eb335192e6bcfd558649063 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 11:13:05 +0300
Subject: [PATCH 08/23] [std.regex] Bit-NFA - fix inversion of the right
 sub-portion of regex

---
 std/regex/internal/bitnfa.d | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 0b4f3f244f1..cdd1d9b2fc0 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -255,7 +255,9 @@ struct BitNfa
     HashTab     controlFlow;      // maps each bit pattern to resulting jumps pattern
     uint        controlFlowMask;  // masks all control flow bits
     uint        finalMask;        // marks final states terminating the NFA
-    bool        empty;            // if this engine is empty
+    uint        length;            // if this engine is empty
+
+    @property bool empty() const { return length == 0; }
 
     void combineControlFlow()
     {
@@ -391,12 +393,10 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                     break outer;
             }
         }
-        if(bitCount == 0)
-            empty = true;
         debug(std_regex_bitnfa) writeln("LEN:", lastNonnested);
         // the total processable length
-        uint length=lastNonnested;
-        finalMask |= 1u<<bitMapping[length];
+        finalMask |= 1u<<bitMapping[lastNonnested];
+        length = lastNonnested;
         with(re)
         for(uint i=0; i<length; i += ir[i].length)
         {
@@ -468,6 +468,7 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                 assert(0, "Unexpected instruction in BitNFA: "~ir[i].mnemonic);
             }
         }
+        length += re.ir[lastNonnested].length;
         combineControlFlow();
     }
 
@@ -509,19 +510,23 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
     }
 }
 
-final class BitMatcher
+final class BitMatcher(Char) : Kickstart!(Char)
+    if(is(Char : dchar))
 {
+@trusted:
     BitNfa forward, backward;
 
-    this(Char)(auto ref Regex!Char re)
+    this()(auto ref Regex!Char re)
     {
         forward = BitNfa(re);
         //reverse Bytecode
         auto re2 = re;
         re2.ir = re2.ir.dup;
         // keep the end where it belongs
-        reverseBytecode(re2.ir[0..$-1]);
+        uint len = forward.length - 1;
+        reverseBytecode(re2.ir[0..len]);
         // check for the case of multiple patterns as one alternation
+        if(len == re2.ir.length-IRL!(IR.End))
         with(IR) with(re2) if(ir[0].code == OrStart)
         {
             size_t pc = IRL!OrStart;
@@ -544,7 +549,7 @@ final class BitMatcher
         backward = BitNfa(re2);
     }
 
-    bool opCall(Input)(ref Input r)
+    final bool opCall(ref Input!Char r)
     {
         bool res = forward(r);
         if(res){
@@ -554,6 +559,8 @@ final class BitMatcher
         }
         return res;
     }
+
+    final @property bool empty() const{ return forward.empty; }
 }
 
 version(unittest)
@@ -588,7 +595,7 @@ version(unittest)
 
     alias checkBit = check!BitNfa;
     alias checkBitFail = checkFail!BitNfa;
-    auto makeMatcher(R)(R regex){ return new BitMatcher(regex); }
+    auto makeMatcher(Char)(Regex!Char regex){ return new BitMatcher!(Char)(regex); }
     alias checkM = check!makeMatcher;
     alias checkMFail = checkFail!makeMatcher;
 }
@@ -617,4 +624,5 @@ unittest
 {
     "xxabcy".checkM("abc", 2);
     "_10bcy".checkM([`\d+`, `[a-z]+`], 1);
-}
\ No newline at end of file
+    "abc@email.com".checkM(`\S+@\S?1`, 0);
+}

From c27b118954ce6c3fe1a13b17b63233105bded654 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 11:21:54 +0300
Subject: [PATCH 09/23] [std.regex] Add Bit-NFA to win32/win64 makefiles

---
 win32.mak | 1 +
 win64.mak | 1 +
 2 files changed, 2 insertions(+)

diff --git a/win32.mak b/win32.mak
index b093f880b27..743f6f3260a 100644
--- a/win32.mak
+++ b/win32.mak
@@ -225,6 +225,7 @@ SRC_STD_REGEX= \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
 	std\regex\internal\shiftor.d \
+	std\regex\internal\bitnfa.d \
 	std\regex\internal\generator.d
 
 SRC_STD_C= \
diff --git a/win64.mak b/win64.mak
index fe496708924..7a7040a7d6b 100644
--- a/win64.mak
+++ b/win64.mak
@@ -244,6 +244,7 @@ SRC_STD_REGEX= \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
 	std\regex\internal\shiftor.d \
+	std\regex\internal\bitnfa.d \
 	std\regex\internal\generator.d
 
 SRC_STD_C= \

From 138a2f3dcb80a15b9cacdc521024a628855f7d20 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 11:37:44 +0300
Subject: [PATCH 10/23] [std.regex] Integrate Bit-NFA into std.regex

---
 std/regex/internal/bitnfa.d |  8 +++++++-
 std/regex/internal/parser.d | 11 ++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index cdd1d9b2fc0..6eded48171c 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -182,8 +182,9 @@ struct UIntTrie2
             mixin("scratch[lowIdx..highIdx] "~op~"= val;");
             uint h = hash(scratch);
             bool found = false;
-            foreach(i,_; hashes.enumerate.filter!(x => x[1] == h))
+            foreach(i,x; hashes)
             {
+                if(x != h) continue;
                 if(scratch[] == blocks[i*blockSize .. (i+1)*blockSize])
                 {
                     // re-route to existing page
@@ -424,6 +425,11 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
             case End:
                 finalMask |= 1u<<bitMapping[i];
                 break;
+            case Any:
+                uint mask = 1u<<bitMapping[i];
+                asciiTab[0..0x80] &= ~mask;
+                uniTab[0..0x11_0000] &= ~mask;
+                break;
             case Char:
                 uint mask = 1u<<bitMapping[i];
                 auto ch = ir[i].data;
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index d780b39206f..d25a2a64abb 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -4,7 +4,8 @@
 */
 module std.regex.internal.parser;
 
-import std.regex.internal.ir, std.regex.internal.shiftor;
+import std.regex.internal.ir, std.regex.internal.shiftor,
+    std.regex.internal.bitnfa;
 import std.range.primitives, std.uni, std.meta,
     std.traits, std.typecons, std.exception;
 static import std.ascii;
@@ -181,7 +182,7 @@ dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit)
     return val;
 }
 
-@system unittest //BUG canFind is system
+@safe unittest
 {
     import std.algorithm.searching : canFind;
     string[] non_hex = [ "000j", "000z", "FffG", "0Z"];
@@ -1607,7 +1608,11 @@ struct Parser(R, Generator)
         {
             kickstart = new ShiftOr!Char(zis);
             if(kickstart.empty)
-                kickstart = null;
+            {
+                kickstart = new BitMatcher!Char(zis);
+                if(kickstart.empty)
+                    kickstart = null;
+            }
         }
         debug(std_regex_allocation) writefln("IR processed, max threads: %d", threadCount);
         optimize(zis);

From 88ce1a12124c9632861e5369986432bfcdacd9ac Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 12:22:36 +0300
Subject: [PATCH 11/23] [std.regex] Limit ShiftOr to only apply where it's
 better then Bit-NFA

---
 std/regex/internal/shiftor.d | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/std/regex/internal/shiftor.d b/std/regex/internal/shiftor.d
index f57dbe20420..4c12bec0833 100644
--- a/std/regex/internal/shiftor.d
+++ b/std/regex/internal/shiftor.d
@@ -339,25 +339,6 @@ public:
                         t.pc += IRL!(IR.RepeatEnd);
                     }
                     break;
-                case IR.InfiniteStart, IR.InfiniteQStart:
-                    t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
-                    goto case IR.InfiniteEnd; //both Q and non-Q
-                case IR.InfiniteEnd:
-                case IR.InfiniteQEnd:
-                    auto slot = re.ir[t.pc+1].raw+t.counter;
-                    auto val = hash(t.tab);
-                    if (val in merge[slot])
-                        goto L_StopThread; // merge equivalent
-                    merge[slot][val] = true;
-                    uint len = re.ir[t.pc].data;
-                    uint pc1, pc2; //branches to take in priority order
-                    if (++t.hops == 32)
-                        goto L_StopThread;
-                    pc1 = t.pc + IRL!(IR.InfiniteEnd);
-                    pc2 = t.pc - len;
-                    trs ~= fork(t, pc2, t.counter);
-                    t.pc = pc1;
-                    break;
                 case IR.GroupStart, IR.GroupEnd:
                     t.pc += IRL!(IR.GroupStart);
                     break;
@@ -385,7 +366,7 @@ public:
         }
     }
 
-    final @property bool empty() const {  return n_length == 0; }
+    final @property bool empty() const {  return n_length < 3 && fChar == uint.max; }
 
     final @property uint length() const{ return n_length/charSize; }
 
@@ -584,7 +565,7 @@ unittest
         searches("abdcdyabax".to!String, kick, 1, 3, 8);
 
         shiftOrLength(`...`.to!String, 0);
-        kick = shiftOrLength(`a(b+|c+)x`.to!String, 3);
+        kick = shiftOrLength(`a(b{1,2}|c{1,2})x`.to!String, 3);
         searches("ababx".to!String, kick, 2);
         searches("abaacba".to!String, kick, 3); //expected inexact
 

From 2011a1a75c2a2f4eb11e7c4cb0c7bc8ddab5b1e6 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 12:36:55 +0300
Subject: [PATCH 12/23] Trailing whitespace

---
 std/regex/internal/bitnfa.d | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 6eded48171c..6e389b5bad1 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -172,7 +172,7 @@ struct UIntTrie2
             immutable highIdx = high - low + lowIdx;
             mixin("blocks[lowIdx..highIdx] "~op~"= val;");
         }
-        else        
+        else
         {
             // create a new page
             refCounts[blk]--;

From 20c5e399d3923a0804a3a5209eadd136ce94102f Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 22 Apr 2016 13:18:34 +0300
Subject: [PATCH 13/23] Try to reduce memory usage in CT-regex tests

---
 std/regex/internal/tests.d | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d
index a098fcc431c..993164cc2fb 100644
--- a/std/regex/internal/tests.d
+++ b/std/regex/internal/tests.d
@@ -353,7 +353,7 @@ unittest
     void run_tests(alias matchFn)()
     {
         int i;
-        foreach (Char; AliasSeq!( char, wchar, dchar))
+        foreach(Char; AliasSeq!( char, wchar, dchar))
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             alias String = immutable(Char)[];
             String produceExpected(M,Range)(auto ref M m, Range fmt)
@@ -363,7 +363,7 @@ unittest
                 return app.data;
             }
             Regex!(Char) r;
-            foreach (a, tvd; tv)
+            foreach(a, tvd; tv)
             {
                 uint c = tvd.result[0];
                 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
@@ -380,7 +380,7 @@ unittest
 
                 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
 
-                if (c != 'c')
+                if(c != 'c')
                 {
                     auto m = matchFn(to!(String)(tvd.input), r);
                     i = !m.empty;
@@ -427,11 +427,11 @@ unittest
             alias Tests = Sequence!(220, tv.length);
         }
         else
-            alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
-        foreach (a, v; Tests)
+            alias Tests = AliasSeq!(Sequence!(0, 25), Sequence!(238, tv.length-5));
+        foreach(a, v; Tests)
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             enum tvd = tv[v];
-            static if (tvd.result == "c")
+            static if(tvd.result == "c")
             {
                 static assert(!__traits(compiles, (){
                     enum r = regex(tvd.pattern, tvd.flags);
@@ -449,11 +449,11 @@ unittest
                 bool ok = (c == 'y') ^ m.empty;
                 assert(ok, text("ctRegex: failed to match pattern #",
                     a ,": ", tvd.pattern));
-                if (c == 'y')
+                if(c == 'y')
                 {
                     import std.stdio;
                     auto result = produceExpected(m, tvd.format);
-                    if (result != tvd.replace)
+                    if(result != tvd.replace)
                         writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
                                 tvd.replace, " vs ", result);
                 }
@@ -568,7 +568,7 @@ unittest
         string s = "a quick brown fox jumps over a lazy dog";
         auto r1 = regex("\\b[a-z]+\\b","g");
         string[] test;
-        foreach (m; matchFn(s, r1))
+        foreach(m; matchFn(s, r1))
             test ~= m.hit;
         assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
         auto free_reg = regex(`
@@ -689,7 +689,7 @@ unittest
     {
         import std.uni : toUpper;
 
-        foreach (i, v; AliasSeq!(string, wstring, dstring))
+        foreach(i, v; AliasSeq!(string, wstring, dstring))
         {
             auto baz(Cap)(Cap m)
             if (is(Cap == Captures!(Cap.String)))
@@ -805,7 +805,7 @@ unittest
     auto r = regex(
        r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
     auto uniCapturesNew = match(uniFileOld, r);
-    for (int i = 0; i < 20; i++)
+    for(int i = 0; i < 20; i++)
         foreach (matchNew; uniCapturesNew) {}
     //a second issue with same symptoms
     auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
@@ -1007,7 +1007,7 @@ unittest
 unittest
 {
     auto ctPat2 = regex(r"^[CDF]$", "i");
-    foreach (v; ["C", "c", "D", "d", "F", "f"])
+    foreach(v; ["C", "c", "D", "d", "F", "f"])
         assert(matchAll(v, ctPat2).front.hit == v);
 }
 

From 3544686b7474507ecee4c1c974f81cf33d6d3df2 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Sun, 24 Apr 2016 17:02:33 +0300
Subject: [PATCH 14/23] WIP fixing multi-pattern match

---
 std/regex/internal/bitnfa.d | 42 +++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 6e389b5bad1..49f3f7b6d31 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -158,14 +158,12 @@ struct UIntTrie2
     uint opIndex(dchar ch)
     {
         immutable blk = index[ch>>blockBits];
-        //writeln(">blk = ", blk);
         return blocks.ptr[blk*blockSize + (ch & (blockSize-1))];
     }
 
     void setPageRange(string op)(uint val, uint low, uint high)
     {
         immutable blk = index[low>>blockBits];
-        //writeln("<blk = ", blk);
         if(refCounts[blk] == 1) // modify in-place
         {
             immutable lowIdx = blk*blockSize + (low & (blockSize-1));
@@ -394,7 +392,7 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                     break outer;
             }
         }
-        debug(std_regex_bitnfa) writeln("LEN:", lastNonnested);
+        debug(std_regex_bitnfa) __ctfe || writeln("LEN:", lastNonnested);
         // the total processable length
         finalMask |= 1u<<bitMapping[lastNonnested];
         length = lastNonnested;
@@ -409,7 +407,7 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                 // collect stops across all paths
                 auto rets = collectControlFlow(ir, i);
                 uint mask = 0;
-                debug(std_regex_bitnfa) writeln(rets);
+                debug(std_regex_bitnfa) __ctfe || writeln(rets);
                 foreach(pc; rets) mask |= 1u<<bitMapping[pc];
                 // map this individual c-f to all possible stops
                 controlFlow[1u<<bitMapping[i]] = mask;
@@ -492,7 +490,7 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
             uint cflow = ~word  & controlFlowMask;
             word = word | controlFlowMask; // kill cflow
             word &= ~controlFlow[cflow]; // map normal ops
-            debug(std_regex_bitnfa) writefln("%b %b %b %b", word, finalMask, cflow, controlFlowMask);
+            debug(std_regex_bitnfa) __ctfe || writefln("%b %b %b %b", word, finalMask, cflow, controlFlowMask);
             if((word & finalMask) != finalMask)
             {
                 matched = true; // keep running to see if there is longer match
@@ -533,25 +531,32 @@ final class BitMatcher(Char) : Kickstart!(Char)
         reverseBytecode(re2.ir[0..len]);
         // check for the case of multiple patterns as one alternation
         if(len == re2.ir.length-IRL!(IR.End))
-        with(IR) with(re2) if(ir[0].code == OrStart)
         {
-            size_t pc = IRL!OrStart;
-            while(ir[pc].code == Option)
+            debug(std_regex_bitnfa) __ctfe || writeln("Reverse!");
+            with(IR) with(re2) if(ir[0].code == OrStart)
             {
-                size_t size = ir[pc].data;
-                if(ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
-                    size -= IRL!GotoEndOr;
-                size_t j = pc + IRL!Option;
-                if(ir[j].code == End)
+                size_t pc = IRL!OrStart;
+                while(ir[pc].code == Option)
                 {
-                    auto save = ir[j];
-                    foreach(k; j+1..j+size)
-                        ir[k-1] = ir[k];
-                    ir[j+size-1] = save;
+                    size_t size = ir[pc].data;
+                    if(ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
+                    {
+                        ir[pc+size-IRL!(GotoEndOr)].data = ir[pc+size-IRL!(GotoEndOr)].data+1;
+                        size -= IRL!GotoEndOr;
+                    }
+                    size_t j = pc + IRL!Option;
+                    if(ir[j].code == End)
+                    {
+                        auto save = ir[j];
+                        foreach(k; j+1..j+size)
+                            ir[k-1] = ir[k];
+                        ir[j+size-1] = save;
+                    }
+                    pc = j + ir[pc].data;
                 }
-                pc = j + ir[pc].data;
             }
         }
+        debug(std_regex_bitnfa) __ctfe || re2.print();
         backward = BitNfa(re2);
     }
 
@@ -630,5 +635,6 @@ unittest
 {
     "xxabcy".checkM("abc", 2);
     "_10bcy".checkM([`\d+`, `[a-z]+`], 1);
+    "1/03/12 - 3/03/12".checkM([r"(\d+)/(\d+)/(\d+)", "abc"],0);
     "abc@email.com".checkM(`\S+@\S?1`, 0);
 }

From ecd33f148c1cc232fe133cbeac93f05c3ab54c4f Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Wed, 4 May 2016 23:18:14 +0300
Subject: [PATCH 15/23] [std.regex] Get Bit-NFA working on the testsuite

---
 posix.mak                         |   2 +-
 std/regex/internal/backtracking.d |   1 +
 std/regex/internal/bitnfa.d       | 327 ++++++++++-------
 std/regex/internal/ir.d           |   7 +-
 std/regex/internal/parser.d       |  10 +-
 std/regex/internal/shiftor.d      |  15 +-
 std/regex/internal/tests.d        | 573 +-----------------------------
 std/regex/internal/tests2.d       | 268 ++++++++++++++
 std/regex/internal/tests3.d       | 305 ++++++++++++++++
 win32.mak                         |   2 +
 win64.mak                         |   2 +
 11 files changed, 807 insertions(+), 705 deletions(-)
 create mode 100644 std/regex/internal/tests2.d
 create mode 100644 std/regex/internal/tests3.d

diff --git a/posix.mak b/posix.mak
index b97bd52d3c6..5ce6959a644 100644
--- a/posix.mak
+++ b/posix.mak
@@ -192,7 +192,7 @@ PACKAGE_std_experimental_ndslice = package iteration selection slice
 PACKAGE_std_net = curl isemail
 PACKAGE_std_range = interfaces package primitives
 PACKAGE_std_regex = package $(addprefix internal/,generator ir parser \
-  backtracking bitnfa tests thompson shiftor)
+  backtracking bitnfa tests tests2 tests3 thompson shiftor)
 
 # Modules in std (including those in packages)
 STD_MODULES=$(call P2MODULES,$(STD_PACKAGES))
diff --git a/std/regex/internal/backtracking.d b/std/regex/internal/backtracking.d
index 5427b1b380e..2d14f604178 100644
--- a/std/regex/internal/backtracking.d
+++ b/std/regex/internal/backtracking.d
@@ -225,6 +225,7 @@ template BacktrackingMatcher(bool CTregex)
                             return val;
                         else
                         {
+                            import std.stdio;
                             if (atEnd)
                                 break;
                             search();
diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 49f3f7b6d31..e1ca33a5e32 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -1,3 +1,4 @@
+
 //Written in the D programming language
 /*
     Implementation of a concept "NFA in a word" which is
@@ -28,14 +29,20 @@ struct HashTab
         return p.value;
     }
 
+    bool opBinaryRight(string op:"in")(uint key)
+    {
+        auto p = locate(key, table);
+        return p.occupied;
+    }
+
     void opIndexAssign(uint value, uint key)
     {
-        if(table.length == 0) grow();
+        if (table.length == 0) grow();
         auto p = locate(key, table);
-        if(!p.occupied)
+        if (!p.occupied)
         {
             items++;
-            if(4*items >= table.length*3)
+            if (4*items >= table.length*3)
             {
                 grow();
                 p = locate(key, table);
@@ -49,9 +56,9 @@ struct HashTab
     auto keys()
     {
         auto app = appender!(uint[])();
-        foreach(i, v; table)
+        foreach (i, v; table)
         {
-            if(v.occupied)
+            if (v.occupied)
                 app.put(v.key);
         }
         return app.data;
@@ -60,9 +67,9 @@ struct HashTab
     auto values()
     {
         auto app = appender!(uint[])();
-        foreach(i, v; table)
+        foreach (i, v; table)
         {
-            if(v.occupied)
+            if (v.occupied)
                 app.put(v.value);
         }
         return app.data;
@@ -89,12 +96,12 @@ private:
     static Node* locate()(uint key, Node[] table)
     {
         size_t slot = hashOf(key) & (table.length-1);
-        while(table[slot].occupied)
+        while (table[slot].occupied)
         {
-            if(table[slot].key == key)
+            if (table[slot].key == key)
                 break;
             slot += 1;
-            if(slot == table.length)
+            if (slot == table.length)
                 slot = 0;
         }
         return table.ptr+slot;
@@ -103,9 +110,9 @@ private:
     void grow()
     {
         Node[] newTable = new Node[table.length ? table.length*2 : 4];
-        foreach(i, v; table)
+        foreach (i, v; table)
         {
-            if(v.occupied)
+            if (v.occupied)
             {
                 auto p = locate(v.key, newTable);
                 *p = v;
@@ -134,7 +141,7 @@ struct UIntTrie2
     static uint hash(uint[] data)
     {
         uint h = 5183;
-        foreach(v; data)
+        foreach (v; data)
         {
             h = 31*h + v;
         }
@@ -164,7 +171,7 @@ struct UIntTrie2
     void setPageRange(string op)(uint val, uint low, uint high)
     {
         immutable blk = index[low>>blockBits];
-        if(refCounts[blk] == 1) // modify in-place
+        if (refCounts[blk] == 1) // modify in-place
         {
             immutable lowIdx = blk*blockSize + (low & (blockSize-1));
             immutable highIdx = high - low + lowIdx;
@@ -180,10 +187,10 @@ struct UIntTrie2
             mixin("scratch[lowIdx..highIdx] "~op~"= val;");
             uint h = hash(scratch);
             bool found = false;
-            foreach(i,x; hashes)
+            foreach (i,x; hashes)
             {
-                if(x != h) continue;
-                if(scratch[] == blocks[i*blockSize .. (i+1)*blockSize])
+                if (x != h) continue;
+                if (scratch[] == blocks[i*blockSize .. (i+1)*blockSize])
                 {
                     // re-route to existing page
                     index[low>>blockBits] = cast(ushort)i;
@@ -192,7 +199,7 @@ struct UIntTrie2
                     break;
                 }
             }
-            if(!found)
+            if (!found)
             {
                 index[low>>blockBits] = cast(ushort)hashes.length;
                 blocks ~= scratch[];
@@ -213,9 +220,9 @@ struct UIntTrie2
         uint endBlk = end >> blockBits;
         uint first = min(startBlk*blockSize+blockSize, end);
         setPageRange!op(val, start, first);
-        foreach(blk; startBlk..endBlk)
+        foreach (blk; startBlk..endBlk)
             setPageRange!op(val, blk*blockSize, (blk+1)*blockSize);
-        if(first != end)
+        if (first != end)
         {
             setPageRange!op(val, endBlk*blockSize, end);
         }
@@ -232,15 +239,15 @@ unittest
     import std.uni;
     UIntTrie2 trie2 = UIntTrie2();
     auto letters = unicode("L");
-    foreach(r; letters.byInterval)
+    foreach (r; letters.byInterval)
         trie2[r.a..r.b] &= 1;
-    foreach(ch; letters.byCodepoint)
+    foreach (ch; letters.byCodepoint)
         assert(trie2[ch] == 1);
     auto space = unicode("WhiteSpace");
     auto trie3 = UIntTrie2();
-    foreach(r; space.byInterval)
+    foreach (r; space.byInterval)
         trie3[r.a..r.b] &= 2;
-    foreach(ch; space.byCodepoint)
+    foreach (ch; space.byCodepoint)
         assert(trie3[ch] == 2);
 }
 
@@ -266,22 +273,22 @@ struct BitNfa
         bool nextChoice()
         {
             uint i;
-            for(i=0;i<selection.length; i++)
+            for (i=0;i<selection.length; i++)
             {
                 selection[i] ^= true;
-                if(selection[i])
+                if (selection[i])
                     break;
             }
             return i != selection.length;
         }
         // first prepare full mask
-        foreach(k; keys) controlFlowMask |= k;
+        foreach (k; keys) controlFlowMask |= k;
         // next set all combinations in cf
-        while(nextChoice())
+        while (nextChoice())
         {
             uint kmask = 0, vmask = 0;
-            foreach(i,v; selection)
-                if(v)
+            foreach (i,v; selection)
+                if (v)
                 {
                     kmask |= keys[i];
                     vmask |= values[i];
@@ -293,17 +300,20 @@ struct BitNfa
     uint[] collectControlFlow(Bytecode[] ir, uint i)
     {
         uint[] result;
+        bool[] visited = new bool[ir.length];
         Stack!uint paths;
         paths.push(i);
-        while(!paths.empty())
+        while (!paths.empty())
         {
             uint j = paths.pop();
-            switch(ir[j].code) with(IR)
+            if (visited[j]) continue;
+            visited[j] = true;
+            switch (ir[j].code) with(IR)
             {
             case OrStart:
                 j += IRL!OrStart;
                 assert(ir[j].code == Option);
-                while(ir[j].code == Option)
+                while (ir[j].code == Option)
                 {
                     //import std.stdio;
                     //writefln("> %d %s", j, ir[j].mnemonic);
@@ -315,7 +325,7 @@ struct BitNfa
             case GotoEndOr:
                 paths.push(j+IRL!GotoEndOr+ir[j].data);
                 break;
-            case OrEnd, Wordboundary, Notwordboundary, Bol, Eol, Nop, GroupStart, GroupEnd:
+            case OrEnd, Wordboundary, Notwordboundary, Bof, Bol, Eol, Eof, Nop, GroupStart, GroupEnd:
                 paths.push(j+ir[j].length);
                 break;
             case LookaheadStart, NeglookaheadStart, LookbehindStart,
@@ -324,11 +334,11 @@ struct BitNfa
                 break;
             case InfiniteStart, InfiniteQStart:
                 paths.push(j+IRL!InfiniteStart);
-                paths.push(j+ir[j].data+IRL!InfiniteEnd);
+                paths.push(j+IRL!InfiniteStart+ir[j].data+IRL!InfiniteEnd);
                 break;
             case InfiniteBloomStart:
                 paths.push(j+IRL!InfiniteStart);
-                paths.push(j+ir[j].data+IRL!InfiniteBloomEnd);
+                paths.push(j+IRL!InfiniteBloomStart+ir[j].data+IRL!InfiniteBloomEnd);
                 break;
             case InfiniteEnd, InfiniteQEnd:
                 paths.push(j-ir[j].data);
@@ -354,16 +364,16 @@ struct BitNfa
         uint[] bitMapping = new uint[re.ir.length];
         uint bitCount = 0, nesting=0, lastNonnested=0;
         with(re)
-outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
+outer:  for (uint i=0; i<ir.length; i += ir[i].length) with(IR)
         {
-            if(nesting == 0) lastNonnested = i;
-            if(ir[i].isStart) nesting++;
-            if(ir[i].isEnd) nesting--;
-            switch(ir[i].code)
+            if (nesting == 0) lastNonnested = i;
+            if (ir[i].isStart) nesting++;
+            if (ir[i].isEnd) nesting--;
+            switch (ir[i].code)
             {
-            case Option, OrEnd, Nop, Bol,
+            case Option, OrEnd, Nop, Bof, Bol,
             GroupStart, GroupEnd,
-            Eol, Wordboundary, Notwordboundary:
+            Eol, Eof, Wordboundary, Notwordboundary:
                 bitMapping[i] = bitCount;
                 break;
             // skipover complex assertions
@@ -379,16 +389,16 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                 break outer;
             case OrChar:
                 uint s = ir[i].sequence;
-                for(uint j=i; j<i+s; j++)
+                for (uint j=i; j<i+s; j++)
                     bitMapping[j] = bitCount;
                 i += (s-1)*IRL!OrChar;
                 bitCount++;
-                if(bitCount == 32)
+                if (bitCount == 32)
                     break outer;
                 break;
             default:
                 bitMapping[i] = bitCount++;
-                if(bitCount == 32)
+                if (bitCount == 32)
                     break outer;
             }
         }
@@ -397,24 +407,24 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
         finalMask |= 1u<<bitMapping[lastNonnested];
         length = lastNonnested;
         with(re)
-        for(uint i=0; i<length; i += ir[i].length)
+        for (uint i=0; i<length; i += ir[i].length)
         {
-            switch(ir[i].code) with (IR)
+            switch (ir[i].code) with (IR)
             {
             case OrStart,GotoEndOr, InfiniteStart,
-            InfiniteBloomStart, InfiniteBloomEnd,
-            InfiniteEnd, InfiniteQEnd, InfiniteQStart:
+            InfiniteQStart,InfiniteBloomStart,
+            InfiniteBloomEnd, InfiniteEnd, InfiniteQEnd:
                 // collect stops across all paths
                 auto rets = collectControlFlow(ir, i);
                 uint mask = 0;
                 debug(std_regex_bitnfa) __ctfe || writeln(rets);
-                foreach(pc; rets) mask |= 1u<<bitMapping[pc];
+                foreach (pc; rets) mask |= 1u<<bitMapping[pc];
                 // map this individual c-f to all possible stops
                 controlFlow[1u<<bitMapping[i]] = mask;
                 break;
-            case Option, OrEnd, Nop, Bol,
+            case Option, OrEnd, Nop, Bol, Bof,
                 GroupStart, GroupEnd,
-                Eol, Wordboundary, Notwordboundary:
+                Eol, Eof, Wordboundary, Notwordboundary:
                 break;
             case LookaheadStart, NeglookaheadStart, LookbehindStart,
                 NeglookbehindStart:
@@ -433,20 +443,20 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
                 auto ch = ir[i].data;
                 //import std.stdio;
                 //writefln("Char %c - %b", cast(dchar)ch, mask);
-                if(ch < 0x80)
+                if (ch < 0x80)
                     asciiTab[ch] &= ~mask;
                 else
                     uniTab[ch] &= ~mask;
                 break;
             case OrChar:
                 uint s = ir[i].sequence;
-                for(size_t j=i; j<i+s; j++)
+                for (size_t j=i; j<i+s; j++)
                 {
                     uint mask = 1u<<bitMapping[i];
                     auto ch = ir[j].data;
                     //import std.stdio;
                     //writefln("OrChar %c - %b", cast(dchar)ch, mask);
-                    if(ch < 0x80)
+                    if (ch < 0x80)
                         asciiTab[ch] &= ~mask;
                     else
                         uniTab[ch] &= ~mask;
@@ -456,13 +466,13 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
             case CodepointSet, Trie:
                 auto cset = charsets[ir[i].data];
                 uint mask = 1u<<bitMapping[i];
-                foreach(ival; cset.byInterval)
+                foreach (ival; cset.byInterval)
                 {
-                    if(ival.b < 0x80)
+                    if (ival.b < 0x80)
                         asciiTab[ival.a..ival.b] &= ~mask;
                     else
                     {
-                        if(ival.a < 0x80)
+                        if (ival.a < 0x80)
                             asciiTab[ival.a..0x80] &= ~mask;
                         uniTab[ival.a..ival.b] &= ~mask;
                     }
@@ -474,16 +484,23 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
         }
         length += re.ir[lastNonnested].length;
         combineControlFlow();
+        if (0x1 & finalMask)
+        {
+            length = 0;
+        }
+        else if (0x1 in controlFlow)
+        {
+            if (controlFlow[0x01] & finalMask)
+                length = 0; // set zero-width as empty
+        }
     }
 
-    bool opCall(Input)(ref Input r)
+    bool search(Input)(ref Input r)
     {
-        bool matched = false;
-        size_t mIdx = 0;
         dchar ch;
         size_t idx;
         uint word = ~0u;
-        for(;;)
+        for (;;)
         {
             word <<= 1; // shift - create a state
             // cfMask has 1 for each control-flow op
@@ -491,31 +508,102 @@ outer:  for(uint i=0; i<ir.length; i += ir[i].length) with(IR)
             word = word | controlFlowMask; // kill cflow
             word &= ~controlFlow[cflow]; // map normal ops
             debug(std_regex_bitnfa) __ctfe || writefln("%b %b %b %b", word, finalMask, cflow, controlFlowMask);
-            if((word & finalMask) != finalMask)
+            if ((word & finalMask) != finalMask)
             {
-                matched = true; // keep running to see if there is longer match
+                return true;
+            }
+            if (!r.nextChar(ch, idx))
+                break;
+            // mask away failing states
+            if (ch < 0x80)
+                word |= asciiTab[ch];
+            else
+                word |= uniTab[ch];
+        }
+        return false;
+    }
+
+    bool match(Input)(ref Input r)
+    {
+        dchar ch;
+        size_t idx;
+        uint word = ~1u;
+        size_t mIdx = 0;
+        bool matched = false;
+        auto save = r._index;
+        for (;;)
+        {
+            // cfMask has 1 for each control-flow op
+            uint cflow = ~word  & controlFlowMask;
+            word = word | controlFlowMask; // kill cflow
+            word &= ~controlFlow[cflow]; // map normal ops
+            debug(std_regex_bitnfa) __ctfe || writefln("%b %b %b %b", word, finalMask, cflow, controlFlowMask);
+            if ((word & finalMask) != finalMask)
+            {
+                // keep running to see if there is longer match
+                matched = true;
                 mIdx = r._index;
             }
-            else if(matched)
+            else if (word == ~0u) // no active states
                 break;
-            if(!r.nextChar(ch, idx))
+            if (!r.nextChar(ch, idx))
                 break;
             // mask away failing states
-            if(ch < 0x80)
+            if (ch < 0x80)
                 word |= asciiTab[ch];
             else
                 word |= uniTab[ch];
+            // shift and
+            word = (word<<1) | 1;
+
         }
-        if(matched)
-        {
+        if (matched)
             r.reset(mIdx);
-        }
+        else
+            r.reset(save);
         return matched;
     }
 }
 
+auto reverseBitNfa(Char)(auto ref Regex!Char re, uint length)
+{
+    auto re2 = re;
+    re2.ir = re2.ir.dup;
+    uint len = length - 1;
+    reverseBytecode(re2.ir[0..len]);
+    // check for the case of multiple patterns as one alternation
+    if (len == re2.ir.length-IRL!(IR.End))
+    {
+        debug(std_regex_bitnfa) __ctfe || writeln("Reverse!");
+        with(IR) with(re2) if (ir[0].code == OrStart)
+        {
+            size_t pc = IRL!OrStart;
+            while (ir[pc].code == Option)
+            {
+                size_t size = ir[pc].data;
+                if (ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
+                {
+                    ir[pc+size-IRL!(GotoEndOr)].data = ir[pc+size-IRL!(GotoEndOr)].data+1;
+                    size -= IRL!GotoEndOr;
+                }
+                size_t j = pc + IRL!Option;
+                if (ir[j].code == End)
+                {
+                    auto save = ir[j];
+                    foreach (k; j+1..j+size)
+                        ir[k-1] = ir[k];
+                    ir[j+size-1] = save;
+                }
+                pc = j + ir[pc].data;
+            }
+        }
+    }
+    debug(std_regex_bitnfa) __ctfe || re2.print();
+    return BitNfa(re2);
+}
+
 final class BitMatcher(Char) : Kickstart!(Char)
-    if(is(Char : dchar))
+    if (is(Char : dchar))
 {
 @trusted:
     BitNfa forward, backward;
@@ -523,54 +611,36 @@ final class BitMatcher(Char) : Kickstart!(Char)
     this()(auto ref Regex!Char re)
     {
         forward = BitNfa(re);
-        //reverse Bytecode
-        auto re2 = re;
-        re2.ir = re2.ir.dup;
         // keep the end where it belongs
-        uint len = forward.length - 1;
-        reverseBytecode(re2.ir[0..len]);
-        // check for the case of multiple patterns as one alternation
-        if(len == re2.ir.length-IRL!(IR.End))
-        {
-            debug(std_regex_bitnfa) __ctfe || writeln("Reverse!");
-            with(IR) with(re2) if(ir[0].code == OrStart)
-            {
-                size_t pc = IRL!OrStart;
-                while(ir[pc].code == Option)
-                {
-                    size_t size = ir[pc].data;
-                    if(ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
-                    {
-                        ir[pc+size-IRL!(GotoEndOr)].data = ir[pc+size-IRL!(GotoEndOr)].data+1;
-                        size -= IRL!GotoEndOr;
-                    }
-                    size_t j = pc + IRL!Option;
-                    if(ir[j].code == End)
-                    {
-                        auto save = ir[j];
-                        foreach(k; j+1..j+size)
-                            ir[k-1] = ir[k];
-                        ir[j+size-1] = save;
-                    }
-                    pc = j + ir[pc].data;
-                }
-            }
-        }
-        debug(std_regex_bitnfa) __ctfe || re2.print();
-        backward = BitNfa(re2);
+        if (!forward.empty)
+            backward = reverseBitNfa(re, forward.length);
     }
 
-    final bool opCall(ref Input!Char r)
+    final bool search(ref Input!Char r)
     {
-        bool res = forward(r);
-        if(res){
+        auto save = r._index;
+        bool res = forward.search(r);
+        if (res)
+        {
             auto back = r.loopBack(r._index);
-            assert(backward(back));
-            r.reset(back._index);
+            auto t = backward.match(back);
+            assert(t);
+            if (back._index < save)
+                r.reset(save);
+            else
+                r.reset(back._index);
         }
         return res;
     }
 
+    final bool match(ref Input!Char r)
+    {
+        auto save = r._index;
+        bool res = forward.match(r);
+        r.reset(save);
+        return res;
+    }
+
     final @property bool empty() const{ return forward.empty; }
 }
 
@@ -578,32 +648,42 @@ version(unittest)
 {
     template check(alias make)
     {
-        private void check(T)(string input, T re, size_t idx=uint.max)
+        private void check(T)(string input, T re, size_t idx=uint.max, int line=__LINE__)
         {
             import std.regex, std.conv;
             import std.stdio;
             auto rex = regex(re);
             auto m = make(rex);
             auto s = Input!char(input);
-            assert(m(s), "Failed "~input~" with "~to!string(re));
-            assert(s._index == idx || (idx ==uint.max && s._index == input.length));
+            assert(m.search(s), text("Failed @", line, " ", input, " with ", re));
+            assert(s._index == idx || (idx ==uint.max && s._index == input.length),
+                text("Failed @", line, " index=", s._index));
         }
     }
 
     template checkFail(alias make)
     {
-        private void checkFail(T)(string input, T re, size_t idx=uint.max)
+        private void checkFail(T)(string input, T re, size_t idx=uint.max, int line=__LINE__)
         {
             import std.regex, std.conv;
             import std.stdio;
             auto rex = regex(re);
             auto m = make(rex);
             auto s = Input!char(input);
-            assert(!m(s), "Should have failed "~input~" with "~to!string(re));
+            assert(!m.search(s), text("Should have failed @", line, " " , input, " with ", re));
             assert(s._index == idx || (idx ==uint.max && s._index == input.length));
         }
     }
 
+    private void checkEmpty(T)(T re)
+    {
+        import std.regex, std.conv;
+        import std.stdio;
+        auto rex = regex(re);
+        auto m = BitNfa(rex);
+        assert(m.empty, "Should be empty "~to!string(re));
+    }
+
     alias checkBit = check!BitNfa;
     alias checkBitFail = checkFail!BitNfa;
     auto makeMatcher(Char)(Regex!Char regex){ return new BitMatcher!(Char)(regex); }
@@ -616,7 +696,8 @@ unittest
     "xabcd".checkBit("abc", 4);
     "xabbbcdyy".checkBit("a[b-c]*c", 6);
     "abc1".checkBit("([a-zA-Z_0-9]*)1");
-    "abbabc".checkBit("(a|b)*",5);
+    "(a|b)*".checkEmpty;
+    "abbabc".checkBit("(a|b)*c");
     "abd".checkBitFail("abc");
     // check truncation
     "0123456789_0123456789_0123456789_012"
@@ -624,17 +705,23 @@ unittest
     "0123456789_0123456789_0123456789_012"
         .checkBit("0123456789(0123456789_0123456789_0123456789_0123456789|01234)",10);
     // assertions ignored
-    "0abc1".checkBit("(?<![0-9])[a-c]*$", 4);
+    "0abc1".checkBit("(?<![0-9])[a-c]+$", 2);
     // stop on repetition
     "abcdef1".checkBit("a[a-z]{5}", 1);
-    "ads@email.com".checkBit(`\S+@\S+`);
+    "ads@email.com".checkBit(`\S+@\S+`,5);
     "abc@email.com".checkBit(`\S+@\S?1`, 4);
+    "1".checkBit(r"\d+",1);
+    "()*".checkEmpty;
+    "^".checkEmpty;
+    "abc".checkBit(`\w[bc]`, 2);
 }
 
 unittest
 {
     "xxabcy".checkM("abc", 2);
     "_10bcy".checkM([`\d+`, `[a-z]+`], 1);
-    "1/03/12 - 3/03/12".checkM([r"(\d+)/(\d+)/(\d+)", "abc"],0);
+    "1/03/12 - 3/03/12".checkM([r"\d+/\d+/\d+"],0);
     "abc@email.com".checkM(`\S+@\S?1`, 0);
+    "Strap a rocket engine on a chicken.".checkM("[ra]", 2);
+    "abcd".checkM("ab|cd", 0);
 }
diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index f1ec1571e52..1658831f5bc 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -459,7 +459,8 @@ struct Group(DataIndex)
 +/
 interface Kickstart(Char){
 @trusted:
-    bool opCall(ref Input!Char input);
+    bool search(ref Input!Char input);
+    bool match(ref Input!Char input);
     @property bool empty() const;
 }
 
@@ -741,9 +742,9 @@ struct Input(Char)
         return _index == _origin.length;
     }
 
-    bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos)
+    bool search(Kickstart!Char kick, ref dchar res, ref size_t pos)
     {
-        kick(this);
+        kick.search(this);
         return nextChar(res, pos);
     }
 
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index d25a2a64abb..3429dc5e6b6 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -729,8 +729,6 @@ struct Parser(R, Generator)
 
         while (!empty)
         {
-            debug(std_regex_parser)
-                __ctfe || writeln("*LR*\nSource: ", pat, "\nStack: ",fixupStack.data);
             switch (current)
             {
             case '(':
@@ -1485,11 +1483,13 @@ struct Parser(R, Generator)
             if (current >= privateUseStart && current <= privateUseEnd)
             {
                 g.endPattern(current - privateUseStart + 1);
-                break;
             }
-            auto op = Bytecode(IR.Char, current);
+            else
+            {
+                auto op = Bytecode(IR.Char, current);
+                g.put(op);
+            }
             next();
-            g.put(op);
         }
     }
 
diff --git a/std/regex/internal/shiftor.d b/std/regex/internal/shiftor.d
index 4c12bec0833..6066efd45ed 100644
--- a/std/regex/internal/shiftor.d
+++ b/std/regex/internal/shiftor.d
@@ -374,7 +374,7 @@ public:
     // has a useful trait: if supplied with valid UTF indexes,
     // returns only valid UTF indexes
     // (that given the haystack in question is valid UTF string)
-    final @trusted bool opCall(ref Input!Char s)
+    final @trusted bool search(ref Input!Char s)
     {//@BUG: apparently assumes little endian machines
         import std.conv : text;
         import core.stdc.string : memchr;
@@ -504,6 +504,12 @@ public:
         return false;
     }
 
+    final @trusted bool match(ref Input!Char s)
+    {
+        //TODO: stub
+        return false;
+    }
+
     @system debug static void dump(uint[] table)
     {//@@@BUG@@@ writef(ln) is @system
         import std.stdio : writefln;
@@ -529,7 +535,7 @@ unittest
         auto inp = Input!C(source);
         foreach(r; results)
         {
-            kick(inp);
+            kick.search(inp);
             dchar ch;
             size_t idx;
             assert(inp._index == r, text(inp._index, " vs ", r));
@@ -547,12 +553,12 @@ unittest
         shiftOrLength(`\ba{2}c\bxyz`.to!String, 6);
         auto kick = shiftOrLength(`\ba{2}c\b`.to!String, 3);
         auto inp = Input!Char("aabaacaa");
-        assert(kick(inp));
+        assert(kick.search(inp));
         assert(inp._index == 3, text(Char.stringof," == ", kick.length));
         dchar ch;
         size_t idx;
         inp.nextChar(ch, idx);
-        assert(!kick(inp));
+        assert(!kick.search(inp));
         assert(inp._index == 8, text(Char.stringof," == ", kick.length));
     }
 
@@ -568,7 +574,6 @@ unittest
         kick = shiftOrLength(`a(b{1,2}|c{1,2})x`.to!String, 3);
         searches("ababx".to!String, kick, 2);
         searches("abaacba".to!String, kick, 3); //expected inexact
-
     }
 }
 
diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d
index 993164cc2fb..a339e3bed92 100644
--- a/std/regex/internal/tests.d
+++ b/std/regex/internal/tests.d
@@ -8,8 +8,6 @@ package(std.regex):
 import std.conv, std.exception, std.meta, std.range,
     std.typecons, std.regex;
 
-import std.regex.internal.parser : Escapables; // characters that need escaping
-
 alias Sequence(int B, int E) = staticIota!(B, E);
 
 unittest
@@ -315,6 +313,7 @@ unittest
         TestVectors(    `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com",  "y", "$&-$1", "x-@"),
         TestVectors(    `x()(abc)(?=(d)(e)(f)\2)`,   "xabcdefabc", "y", "$&", "xabc"),
         TestVectors(    `x()(abc)(?=(d)(e)(f)()\3\4\5)`,   "xabcdefdef", "y", "$&", "xabc"),
+
 //lookback
         TestVectors(    `(?<=(ab))\d`,    "12ba3ab4",    "y",   "$&-$1", "4-ab",  "i"),
         TestVectors(    `\w(?<!\d)\w`,   "123ab24",  "y",   "$&", "ab"),
@@ -427,7 +426,7 @@ unittest
             alias Tests = Sequence!(220, tv.length);
         }
         else
-            alias Tests = AliasSeq!(Sequence!(0, 25), Sequence!(238, tv.length-5));
+            alias Tests = AliasSeq!(Sequence!(0, 25));
         foreach(a, v; Tests)
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             enum tvd = tv[v];
@@ -481,571 +480,3 @@ unittest
     auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
     assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
 }
-
-unittest
-{
-    auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
-    assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
-    auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
-    assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
-}
-
-unittest
-{
-    auto cr7 = ctRegex!(`\r.*?$`,"sm");
-    assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
-    auto greed =  ctRegex!("<packet.*?/packet>");
-    assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
-            == "<packet>text</packet>");
-}
-
-unittest
-{
-    import std.algorithm.comparison : equal;
-    auto cr8 = ctRegex!("^(a)(b)?(c*)");
-    auto m8 = bmatch("abcc",cr8);
-    assert(m8);
-    assert(m8.captures[1] == "a");
-    assert(m8.captures[2] == "b");
-    assert(m8.captures[3] == "cc");
-    auto cr9 = ctRegex!("q(a|b)*q");
-    auto m9 = match("xxqababqyy",cr9);
-    assert(m9);
-    assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
-}
-
-unittest
-{
-    import std.algorithm.comparison : equal;
-    auto rtr = regex("a|b|c");
-    enum ctr = regex("a|b|c");
-    assert(equal(rtr.ir,ctr.ir));
-    //CTFE parser BUG is triggered by group
-    //in the middle of alternation (at least not first and not last)
-    enum testCT = regex(`abc|(edf)|xyz`);
-    auto testRT = regex(`abc|(edf)|xyz`);
-    assert(equal(testCT.ir,testRT.ir));
-}
-
-unittest
-{
-    import std.algorithm.iteration : map;
-    import std.algorithm.comparison : equal;
-    enum cx = ctRegex!"(A|B|C)";
-    auto mx = match("B",cx);
-    assert(mx);
-    assert(equal(mx.captures, [ "B", "B"]));
-    enum cx2 = ctRegex!"(A|B)*";
-    assert(match("BAAA",cx2));
-
-    enum cx3 = ctRegex!("a{3,4}","i");
-    auto mx3 = match("AaA",cx3);
-    assert(mx3);
-    assert(mx3.captures[0] == "AaA");
-    enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
-    auto mx4 = match("aaaabc", cx4);
-    assert(mx4);
-    assert(mx4.captures[0] == "aaaab");
-    auto cr8 = ctRegex!("(a)(b)?(c*)");
-    auto m8 = bmatch("abcc",cr8);
-    assert(m8);
-    assert(m8.captures[1] == "a");
-    assert(m8.captures[2] == "b");
-    assert(m8.captures[3] == "cc");
-    auto cr9 = ctRegex!(".*$", "gm");
-    auto m9 = match("First\rSecond", cr9);
-    assert(m9);
-    assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
-}
-
-unittest
-{
-    import std.algorithm.iteration : map;
-    import std.algorithm.comparison : equal;
-//global matching
-    void test_body(alias matchFn)()
-    {
-        string s = "a quick brown fox jumps over a lazy dog";
-        auto r1 = regex("\\b[a-z]+\\b","g");
-        string[] test;
-        foreach(m; matchFn(s, r1))
-            test ~= m.hit;
-        assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
-        auto free_reg = regex(`
-
-            abc
-            \s+
-            "
-            (
-                    [^"]+
-                |   \\ "
-            )+
-            "
-            z
-        `, "x");
-        auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
-        assert(m);
-        string mails = " hey@you.com no@spam.net ";
-        auto rm = regex(`@(?<=\S+@)\S+`,"g");
-        assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
-        auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
-        assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
-        auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
-        assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
-        auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
-        assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
-        debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
-    }
-    test_body!bmatch();
-    test_body!match();
-}
-
-//tests for accumulated std.regex issues and other regressions
-unittest
-{
-    import std.algorithm.iteration : map;
-    import std.algorithm.comparison : equal;
-    void test_body(alias matchFn)()
-    {
-        //issue 5857
-        //matching goes out of control if ... in (...){x} has .*/.+
-        auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
-        assert(c[0] == "axxxzayyyyyzd");
-        assert(c[1] == "ayyyyyz");
-        auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
-        assert(c2[0] == "axxxayyyyyd");
-        assert(c2[1] == "ayyyyy");
-        //issue 2108
-        //greedy vs non-greedy
-        auto nogreed = regex("<packet.*?/packet>");
-        assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
-               == "<packet>text</packet>");
-        auto greed =  regex("<packet.*/packet>");
-        assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
-               == "<packet>text</packet><packet>text</packet>");
-        //issue 4574
-        //empty successful match still advances the input
-        string[] pres, posts, hits;
-        foreach (m; matchFn("abcabc", regex("","g")))
-        {
-            pres ~= m.pre;
-            posts ~= m.post;
-            assert(m.hit.empty);
-
-        }
-        auto heads = [
-            "abcabc",
-            "abcab",
-            "abca",
-            "abc",
-            "ab",
-            "a",
-            ""
-        ];
-        auto tails = [
-            "abcabc",
-             "bcabc",
-              "cabc",
-               "abc",
-                "bc",
-                 "c",
-                  ""
-        ];
-        assert(pres == array(retro(heads)));
-        assert(posts == tails);
-        //issue 6076
-        //regression on .*
-        auto re = regex("c.*|d");
-        auto m = matchFn("mm", re);
-        assert(!m);
-        debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
-        auto rprealloc = regex(`((.){5}.{1,10}){5}`);
-        auto arr = array(repeat('0',100));
-        auto m2 = matchFn(arr, rprealloc);
-        assert(m2);
-        assert(collectException(
-                regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
-                ) is null);
-        foreach (ch; [Escapables])
-        {
-            assert(match(to!string(ch),regex(`[\`~ch~`]`)));
-            assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
-            assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
-        }
-        //bugzilla 7718
-        string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
-        auto reStrCmd = regex (`(".*")|('.*')`, "g");
-        assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
-                     [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
-    }
-    test_body!bmatch();
-    test_body!match();
-}
-
-// tests for replace
-unittest
-{
-    void test(alias matchFn)()
-    {
-        import std.uni : toUpper;
-
-        foreach(i, v; AliasSeq!(string, wstring, dstring))
-        {
-            auto baz(Cap)(Cap m)
-            if (is(Cap == Captures!(Cap.String)))
-            {
-                return toUpper(m.hit);
-            }
-            alias String = v;
-            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
-                   == to!String("ack rapacity"));
-            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
-                   == to!String("ack capacity"));
-            assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
-                   == to!String("[n]oon"));
-            assert(std.regex.replace!(matchFn)(
-                to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
-            ) == to!String(": test2 test1 :"));
-            auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
-                    regex(to!String("[ar]"), "g"));
-            assert(s == "StRAp A Rocket engine on A chicken.");
-        }
-        debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
-    }
-    test!(bmatch)();
-    test!(match)();
-}
-
-// tests for splitter
-unittest
-{
-    import std.algorithm.comparison : equal;
-    auto s1 = ", abc, de,     fg, hi, ";
-    auto sp1 = splitter(s1, regex(", *"));
-    auto w1 = ["", "abc", "de", "fg", "hi", ""];
-    assert(equal(sp1, w1));
-
-    auto s2 = ", abc, de,  fg, hi";
-    auto sp2 = splitter(s2, regex(", *"));
-    auto w2 = ["", "abc", "de", "fg", "hi"];
-
-    uint cnt;
-    foreach (e; sp2)
-    {
-        assert(w2[cnt++] == e);
-    }
-    assert(equal(sp2, w2));
-}
-
-unittest
-{
-    char[] s1 = ", abc, de,  fg, hi, ".dup;
-    auto sp2 = splitter(s1, regex(", *"));
-}
-
-unittest
-{
-    import std.algorithm.comparison : equal;
-    auto s1 = ", abc, de,  fg, hi, ";
-    auto w1 = ["", "abc", "de", "fg", "hi", ""];
-    assert(equal(split(s1, regex(", *")), w1[]));
-}
-
-unittest
-{ // bugzilla 7141
-    string pattern = `[a\--b]`;
-    assert(match("-", pattern));
-    assert(match("b", pattern));
-    string pattern2 = `[&-z]`;
-    assert(match("b", pattern2));
-}
-unittest
-{//bugzilla 7111
-    assert(match("", regex("^")));
-}
-unittest
-{//bugzilla 7300
-    assert(!match("a"d, "aa"d));
-}
-
-// bugzilla 7551
-unittest
-{
-    auto r = regex("[]abc]*");
-    assert("]ab".matchFirst(r).hit == "]ab");
-    assertThrown(regex("[]"));
-    auto r2 = regex("[]abc--ab]*");
-    assert("]ac".matchFirst(r2).hit == "]");
-}
-
-unittest
-{//bugzilla 7674
-    assert("1234".replace(regex("^"), "$$") == "$1234");
-    assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
-    assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
-}
-unittest
-{// bugzilla 7679
-    import std.algorithm.comparison : equal;
-    foreach (S; AliasSeq!(string, wstring, dstring))
-    (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
-        enum re = ctRegex!(to!S(r"\."));
-        auto str = to!S("a.b");
-        assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
-        assert(split(str, re) == [to!S("a"), to!S("b")]);
-    }();
-}
-unittest
-{//bugzilla 8203
-    string data = "
-    NAME   = XPAW01_STA:STATION
-    NAME   = XPAW01_STA
-    ";
-    auto uniFileOld = data;
-    auto r = regex(
-       r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
-    auto uniCapturesNew = match(uniFileOld, r);
-    for(int i = 0; i < 20; i++)
-        foreach (matchNew; uniCapturesNew) {}
-    //a second issue with same symptoms
-    auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
-    match("аллея Театральная", r2);
-}
-unittest
-{// bugzilla 8637 purity of enforce
-    auto m = match("hello world", regex("world"));
-    enforce(m);
-}
-
-// bugzilla 8725
-unittest
-{
-  static italic = regex( r"\*
-                (?!\s+)
-                (.*?)
-                (?!\s+)
-                \*", "gx" );
-  string input = "this * is* interesting, *very* interesting";
-  assert(replace(input, italic, "<i>$1</i>") ==
-      "this * is* interesting, <i>very</i> interesting");
-}
-
-// bugzilla 8349
-unittest
-{
-    enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
-    enum peakRegex = ctRegex!(peakRegexStr);
-    //note that the regex pattern itself is probably bogus
-    assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
-}
-
-// bugzilla 9211
-unittest
-{
-    import std.algorithm.comparison : equal;
-    auto rx_1 =  regex(r"^(\w)*(\d)");
-    auto m = match("1234", rx_1);
-    assert(equal(m.front, ["1234", "3", "4"]));
-    auto rx_2 = regex(r"^([0-9])*(\d)");
-    auto m2 = match("1234", rx_2);
-    assert(equal(m2.front, ["1234", "3", "4"]));
-}
-
-// bugzilla 9280
-unittest
-{
-    string tomatch = "a!b@c";
-    static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
-    auto nm = match(tomatch, r);
-    assert(nm);
-    auto c = nm.captures;
-    assert(c[1] == "a");
-    assert(c["nick"] == "a");
-}
-
-
-// bugzilla 9579
-unittest
-{
-    char[] input = ['a', 'b', 'c'];
-    string format = "($1)";
-    // used to give a compile error:
-    auto re = regex(`(a)`, "g");
-    auto r = replace(input, re, format);
-    assert(r == "(a)bc");
-}
-
-// bugzilla 9634
-unittest
-{
-    auto re = ctRegex!"(?:a+)";
-    assert(match("aaaa", re).hit == "aaaa");
-}
-
-//bugzilla 10798
-unittest
-{
-    auto cr = ctRegex!("[abcd--c]*");
-    auto m  = "abc".match(cr);
-    assert(m);
-    assert(m.hit == "ab");
-}
-
-// bugzilla 10913
-unittest
-{
-    @system static string foo(const(char)[] s)
-    {
-        return s.dup;
-    }
-    @safe static string bar(const(char)[] s)
-    {
-        return s.dup;
-    }
-    () @system {
-        replace!((a) => foo(a.hit))("blah", regex(`a`));
-    }();
-    () @safe {
-        replace!((a) => bar(a.hit))("blah", regex(`a`));
-    }();
-}
-
-// bugzilla 11262
-unittest
-{
-    enum reg = ctRegex!(r",", "g");
-    auto str = "This,List";
-    str = str.replace(reg, "-");
-    assert(str == "This-List");
-}
-
-// bugzilla 11775
-unittest
-{
-    assert(collectException(regex("a{1,0}")));
-}
-
-// bugzilla 11839
-unittest
-{
-    import std.algorithm.comparison : equal;
-    assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
-    assert(collectException(regex(`(?P<1>\w+)`)));
-    assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
-    assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
-    assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
-}
-
-// bugzilla 12076
-unittest
-{
-    auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
-    string s = "one two";
-    auto m = match(s, RE);
-}
-
-// bugzilla 12105
-unittest
-{
-    auto r = ctRegex!`.*?(?!a)`;
-    assert("aaab".matchFirst(r).hit == "aaa");
-    auto r2 = ctRegex!`.*(?!a)`;
-    assert("aaab".matchFirst(r2).hit == "aaab");
-}
-
-//bugzilla 11784
-unittest
-{
-    assert("abcdefghijklmnopqrstuvwxyz"
-        .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
-}
-
-//bugzilla 12366
-unittest
-{
-     auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
-     assert("xxxxxxxx".match(re).empty);
-     assert(!"xxxx".match(re).empty);
-}
-
-// bugzilla 12582
-unittest
-{
-    auto r = regex(`(?P<a>abc)`);
-    assert(collectException("abc".matchFirst(r)["b"]));
-}
-
-// bugzilla 12691
-unittest
-{
-    assert(bmatch("e@", "^([a-z]|)*$").empty);
-    assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
-}
-
-//bugzilla  12713
-unittest
-{
-    assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
-}
-
-//bugzilla 12747
-unittest
-{
-    assertThrown(regex(`^x(\1)`));
-    assertThrown(regex(`^(x(\1))`));
-    assertThrown(regex(`^((x)(?=\1))`));
-}
-
-// bugzilla 14504
-unittest
-{
-    auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
-            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
-}
-
-// bugzilla 14529
-unittest
-{
-    auto ctPat2 = regex(r"^[CDF]$", "i");
-    foreach(v; ["C", "c", "D", "d", "F", "f"])
-        assert(matchAll(v, ctPat2).front.hit == v);
-}
-
-// bugzilla 14615
-unittest
-{
-    import std.stdio : writeln;
-    import std.regex : replaceFirst, replaceFirstInto, regex;
-    import std.array : appender;
-
-    auto example = "Hello, world!";
-    auto pattern = regex("^Hello, (bug)");  // won't find this one
-    auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
-    assert(result == "Hello, world!");  // Ok.
-
-    auto sink = appender!string;
-    replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
-    assert(sink.data == "Hello, world!");
-    replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
-    assert(sink.data == "Hello, world!Hello, world!");
-}
-
-// bugzilla 15573
-unittest
-{
-    auto rx = regex("[c d]", "x");
-    assert("a b".matchFirst(rx));
-}
-
-// bugzilla 15864
-unittest
-{
-    regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
-}
-
-unittest
-{
-    auto r = regex("(?# comment)abc(?# comment2)");
-    assert("abc".matchFirst(r));
-    assertThrown(regex("(?#..."));
-}
diff --git a/std/regex/internal/tests2.d b/std/regex/internal/tests2.d
new file mode 100644
index 00000000000..72d3c011763
--- /dev/null
+++ b/std/regex/internal/tests2.d
@@ -0,0 +1,268 @@
+/*
+    Regualar expressions package test suite part 2.
+*/
+module std.regex.internal.tests2;
+
+package(std.regex):
+
+import std.algorithm, std.conv, std.exception, std.meta, std.range,
+    std.typecons, std.regex;
+
+import std.regex.internal.parser : Escapables; // characters that need escaping
+
+
+unittest
+{
+    auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
+    assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
+    auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
+    assert(bmatch("aaabaaaabbb"w,  cr6).hit == "aaab"w);
+}
+
+unittest
+{
+    auto cr7 = ctRegex!(`\r.*?$`,"sm");
+    assert(bmatch("abc\r\nxy",  cr7).hit == "\r\nxy");
+    auto greed =  ctRegex!("<packet.*?/packet>");
+    assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
+            == "<packet>text</packet>");
+}
+
+unittest
+{
+    auto cr8 = ctRegex!("^(a)(b)?(c*)");
+    auto m8 = bmatch("abcc",cr8);
+    assert(m8);
+    assert(m8.captures[1] == "a");
+    assert(m8.captures[2] == "b");
+    assert(m8.captures[3] == "cc");
+    auto cr9 = ctRegex!("q(a|b)*q");
+    auto m9 = match("xxqababqyy",cr9);
+    assert(m9);
+    assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
+}
+
+unittest
+{
+    auto rtr = regex("a|b|c");
+    enum ctr = regex("a|b|c");
+    assert(equal(rtr.ir,ctr.ir));
+    //CTFE parser BUG is triggered by group
+    //in the middle of alternation (at least not first and not last)
+    enum testCT = regex(`abc|(edf)|xyz`);
+    auto testRT = regex(`abc|(edf)|xyz`);
+    assert(equal(testCT.ir,testRT.ir));
+}
+
+unittest
+{
+    enum cx = ctRegex!"(A|B|C)";
+    auto mx = match("B",cx);
+    assert(mx);
+    assert(equal(mx.captures, [ "B", "B"]));
+    enum cx2 = ctRegex!"(A|B)*";
+    assert(match("BAAA",cx2));
+
+    enum cx3 = ctRegex!("a{3,4}","i");
+    auto mx3 = match("AaA",cx3);
+    assert(mx3);
+    assert(mx3.captures[0] == "AaA");
+    enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
+    auto mx4 = match("aaaabc", cx4);
+    assert(mx4);
+    assert(mx4.captures[0] == "aaaab");
+    auto cr8 = ctRegex!("(a)(b)?(c*)");
+    auto m8 = bmatch("abcc",cr8);
+    assert(m8);
+    assert(m8.captures[1] == "a");
+    assert(m8.captures[2] == "b");
+    assert(m8.captures[3] == "cc");
+    auto cr9 = ctRegex!(".*$", "gm");
+    auto m9 = match("First\rSecond", cr9);
+    assert(m9);
+    assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
+}
+
+unittest
+{
+//global matching
+    void test_body(alias matchFn)()
+    {
+        string s = "a quick brown fox jumps over a lazy dog";
+        auto r1 = regex("\\b[a-z]+\\b","g");
+        string[] test;
+        foreach(m; matchFn(s, r1))
+            test ~= m.hit;
+        assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
+        auto free_reg = regex(`
+
+            abc
+            \s+
+            "
+            (
+                    [^"]+
+                |   \\ "
+            )+
+            "
+            z
+        `, "x");
+        auto m = match(`abc  "quoted string with \" inside"z`,free_reg);
+        assert(m);
+        string mails = " hey@you.com no@spam.net ";
+        auto rm = regex(`@(?<=\S+@)\S+`,"g");
+        assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
+        auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
+        assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
+        auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
+        assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
+        auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
+        assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
+        debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
+    }
+    test_body!bmatch();
+    test_body!match();
+}
+
+//tests for accumulated std.regex issues and other regressions
+unittest
+{
+    void test_body(alias matchFn)()
+    {
+        //issue 5857
+        //matching goes out of control if ... in (...){x} has .*/.+
+        auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
+        assert(c[0] == "axxxzayyyyyzd");
+        assert(c[1] == "ayyyyyz");
+        auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
+        assert(c2[0] == "axxxayyyyyd");
+        assert(c2[1] == "ayyyyy");
+        //issue 2108
+        //greedy vs non-greedy
+        auto nogreed = regex("<packet.*?/packet>");
+        assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
+               == "<packet>text</packet>");
+        auto greed =  regex("<packet.*/packet>");
+        assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
+               == "<packet>text</packet><packet>text</packet>");
+        //issue 4574
+        //empty successful match still advances the input
+        string[] pres, posts, hits;
+        foreach(m; matchFn("abcabc", regex("","g"))) {
+            pres ~= m.pre;
+            posts ~= m.post;
+            assert(m.hit.empty);
+
+        }
+        auto heads = [
+            "abcabc",
+            "abcab",
+            "abca",
+            "abc",
+            "ab",
+            "a",
+            ""
+        ];
+        auto tails = [
+            "abcabc",
+             "bcabc",
+              "cabc",
+               "abc",
+                "bc",
+                 "c",
+                  ""
+        ];
+        assert(pres == array(retro(heads)));
+        assert(posts == tails);
+        //issue 6076
+        //regression on .*
+        auto re = regex("c.*|d");
+        auto m = matchFn("mm", re);
+        assert(!m);
+        debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
+        auto rprealloc = regex(`((.){5}.{1,10}){5}`);
+        auto arr = array(repeat('0',100));
+        auto m2 = matchFn(arr, rprealloc);
+        assert(m2);
+        assert(collectException(
+                regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
+                ) is null);
+        foreach(ch; [Escapables])
+        {
+            assert(match(to!string(ch),regex(`[\`~ch~`]`)));
+            assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
+            assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
+        }
+        //bugzilla 7718
+        string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
+        auto reStrCmd = regex (`(".*")|('.*')`, "g");
+        assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
+                     [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
+    }
+    test_body!bmatch();
+    test_body!match();
+}
+
+// tests for replace
+unittest
+{
+    void test(alias matchFn)()
+    {
+        import std.uni : toUpper;
+
+        foreach(i, v; AliasSeq!(string, wstring, dstring))
+        {
+            auto baz(Cap)(Cap m)
+            if (is(Cap == Captures!(Cap.String)))
+            {
+                return toUpper(m.hit);
+            }
+            alias String = v;
+            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
+                   == to!String("ack rapacity"));
+            assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
+                   == to!String("ack capacity"));
+            assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
+                   == to!String("[n]oon"));
+            assert(std.regex.replace!(matchFn)(to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'"))
+                   == to!String(": test2 test1 :"));
+            auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
+                    regex(to!String("[ar]"), "g"));
+            assert(s == "StRAp A Rocket engine on A chicken.", text(s));
+        }
+        debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~"  !!!");
+    }
+    test!(bmatch)();
+    test!(match)();
+}
+
+// tests for splitter
+unittest
+{
+    auto s1 = ", abc, de,     fg, hi, ";
+    auto sp1 = splitter(s1, regex(", *"));
+    auto w1 = ["", "abc", "de", "fg", "hi", ""];
+    assert(equal(sp1, w1));
+
+    auto s2 = ", abc, de,  fg, hi";
+    auto sp2 = splitter(s2, regex(", *"));
+    auto w2 = ["", "abc", "de", "fg", "hi"];
+
+    uint cnt;
+    foreach(e; sp2) {
+        assert(w2[cnt++] == e);
+    }
+    assert(equal(sp2, w2));
+}
+
+unittest
+{
+    char[] s1 = ", abc, de,  fg, hi, ".dup;
+    auto sp2 = splitter(s1, regex(", *"));
+}
+
+unittest
+{
+    auto s1 = ", abc, de,  fg, hi, ";
+    auto w1 = ["", "abc", "de", "fg", "hi", ""];
+    assert(equal(split(s1, regex(", *")), w1[]));
+}
diff --git a/std/regex/internal/tests3.d b/std/regex/internal/tests3.d
new file mode 100644
index 00000000000..237a221738b
--- /dev/null
+++ b/std/regex/internal/tests3.d
@@ -0,0 +1,305 @@
+/*
+    Regualar expressions package test suite part 3.
+*/
+module std.regex.internal.tests3;
+
+package(std.regex):
+
+import std.algorithm, std.conv, std.exception, std.meta, std.range,
+    std.typecons, std.regex;
+
+unittest
+{ // bugzilla 7141
+    string pattern = `[a\--b]`;
+    assert(match("-", pattern));
+    assert(match("b", pattern));
+    string pattern2 = `[&-z]`;
+    assert(match("b", pattern2));
+}
+unittest
+{//bugzilla 7111
+    assert(match("", regex("^")));
+}
+unittest
+{//bugzilla 7300
+    assert(!match("a"d, "aa"d));
+}
+
+// bugzilla 7551
+unittest
+{
+    auto r = regex("[]abc]*");
+    assert("]ab".matchFirst(r).hit == "]ab");
+    assertThrown(regex("[]"));
+    auto r2 = regex("[]abc--ab]*");
+    assert("]ac".matchFirst(r2).hit == "]");
+}
+
+unittest
+{//bugzilla 7674
+    assert("1234".replace(regex("^"), "$$") == "$1234");
+    assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
+    assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
+}
+unittest
+{// bugzilla 7679
+    foreach(S; AliasSeq!(string, wstring, dstring))
+    (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
+        enum re = ctRegex!(to!S(r"\."));
+        auto str = to!S("a.b");
+        assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
+        assert(split(str, re) == [to!S("a"), to!S("b")]);
+    }();
+}
+unittest
+{//bugzilla 8203
+    string data = "
+    NAME   = XPAW01_STA:STATION
+    NAME   = XPAW01_STA
+    ";
+    auto uniFileOld = data;
+    auto r = regex(
+       r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
+    auto uniCapturesNew = match(uniFileOld, r);
+    for(int i = 0; i < 20; i++)
+        foreach (matchNew; uniCapturesNew) {}
+    //a second issue with same symptoms
+    auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
+    match("аллея Театральная", r2);
+}
+unittest
+{// bugzilla 8637 purity of enforce
+    auto m = match("hello world", regex("world"));
+    enforce(m);
+}
+
+// bugzilla 8725
+unittest
+{
+  static italic = regex( r"\*
+                (?!\s+)
+                (.*?)
+                (?!\s+)
+                \*", "gx" );
+  string input = "this * is* interesting, *very* interesting";
+  assert(replace(input, italic, "<i>$1</i>") ==
+      "this * is* interesting, <i>very</i> interesting");
+}
+
+// bugzilla 8349
+unittest
+{
+    enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
+    enum peakRegex = ctRegex!(peakRegexStr);
+    //note that the regex pattern itself is probably bogus
+    assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
+}
+
+// bugzilla 9211
+unittest
+{
+    auto rx_1 =  regex(r"^(\w)*(\d)");
+    auto m = match("1234", rx_1);
+    assert(equal(m.front, ["1234", "3", "4"]));
+    auto rx_2 = regex(r"^([0-9])*(\d)");
+    auto m2 = match("1234", rx_2);
+    assert(equal(m2.front, ["1234", "3", "4"]));
+}
+
+// bugzilla 9280
+unittest
+{
+    string tomatch = "a!b@c";
+    static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
+    auto nm = match(tomatch, r);
+    assert(nm);
+    auto c = nm.captures;
+    assert(c[1] == "a");
+    assert(c["nick"] == "a");
+}
+
+
+// bugzilla 9579
+unittest
+{
+    char[] input = ['a', 'b', 'c'];
+    string format = "($1)";
+    // used to give a compile error:
+    auto re = regex(`(a)`, "g");
+    auto r = replace(input, re, format);
+    assert(r == "(a)bc");
+}
+
+// bugzilla 9634
+unittest
+{
+    auto re = ctRegex!"(?:a+)";
+    assert(match("aaaa", re).hit == "aaaa");
+}
+
+//bugzilla 10798
+unittest
+{
+    auto cr = ctRegex!("[abcd--c]*");
+    auto m  = "abc".match(cr);
+    assert(m);
+    assert(m.hit == "ab");
+}
+
+// bugzilla 10913
+unittest
+{
+    @system static string foo(const(char)[] s)
+    {
+        return s.dup;
+    }
+    @safe static string bar(const(char)[] s)
+    {
+        return s.dup;
+    }
+    () @system {
+        replace!((a) => foo(a.hit))("blah", regex(`a`));
+    }();
+    () @safe {
+        replace!((a) => bar(a.hit))("blah", regex(`a`));
+    }();
+}
+
+// bugzilla 11262
+unittest
+{
+    enum reg = ctRegex!(r",", "g");
+    auto str = "This,List";
+    str = str.replace(reg, "-");
+    assert(str == "This-List");
+}
+
+// bugzilla 11775
+unittest
+{
+    assert(collectException(regex("a{1,0}")));
+}
+
+// bugzilla 11839
+unittest
+{
+    assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
+    assert(collectException(regex(`(?P<1>\w+)`)));
+    assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
+    assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
+    assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
+}
+
+// bugzilla 12076
+unittest
+{
+    auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
+    string s = "one two";
+    auto m = match(s, RE);
+}
+
+// bugzilla 12105
+unittest
+{
+    auto r = ctRegex!`.*?(?!a)`;
+    assert("aaab".matchFirst(r).hit == "aaa");
+    auto r2 = ctRegex!`.*(?!a)`;
+    assert("aaab".matchFirst(r2).hit == "aaab");
+}
+
+//bugzilla 11784
+unittest
+{
+    assert("abcdefghijklmnopqrstuvwxyz"
+        .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
+}
+
+//bugzilla 12366
+unittest
+{
+     auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
+     assert("xxxxxxxx".match(re).empty);
+     assert(!"xxxx".match(re).empty);
+}
+
+// bugzilla 12582
+unittest
+{
+    auto r = regex(`(?P<a>abc)`);
+    assert(collectException("abc".matchFirst(r)["b"]));
+}
+
+// bugzilla 12691
+unittest
+{
+    assert(bmatch("e@", "^([a-z]|)*$").empty);
+    assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
+}
+
+//bugzilla  12713
+unittest
+{
+    assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
+}
+
+//bugzilla 12747
+unittest
+{
+    assertThrown(regex(`^x(\1)`));
+    assertThrown(regex(`^(x(\1))`));
+    assertThrown(regex(`^((x)(?=\1))`));
+}
+
+// bugzilla 14504
+unittest
+{
+    auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
+            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+}
+
+// bugzilla 14529
+unittest
+{
+    auto ctPat2 = regex(r"^[CDF]$", "i");
+    foreach(v; ["C", "c", "D", "d", "F", "f"])
+        assert(matchAll(v, ctPat2).front.hit  == v);
+}
+
+// bugzilla 14615
+unittest
+{
+    import std.stdio : writeln;
+    import std.regex : replaceFirst, replaceFirstInto, regex;
+    import std.array : appender;
+
+    auto example = "Hello, world!";
+    auto pattern = regex("^Hello, (bug)");  // won't find this one
+    auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
+    assert(result == "Hello, world!");  // Ok.
+
+    auto sink = appender!string;
+    replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
+    assert(sink.data == "Hello, world!");
+    replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
+    assert(sink.data == "Hello, world!Hello, world!");
+}
+
+// bugzilla 15573
+unittest
+{
+    auto rx = regex("[c d]", "x");
+    assert("a b".matchFirst(rx));
+}
+
+// bugzilla 15864
+unittest
+{
+    regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
+}
+
+unittest
+{
+    auto r = regex("(?# comment)abc(?# comment2)");
+    assert("abc".matchFirst(r));
+    assertThrown(regex("(?#..."));
+}
diff --git a/win32.mak b/win32.mak
index 743f6f3260a..fd0cc4a8e94 100644
--- a/win32.mak
+++ b/win32.mak
@@ -222,6 +222,8 @@ SRC_STD_REGEX= \
 	std\regex\package.d \
 	std\regex\internal\parser.d \
 	std\regex\internal\tests.d \
+	std\regex\internal\tests2.d \
+	std\regex\internal\tests3.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
 	std\regex\internal\shiftor.d \
diff --git a/win64.mak b/win64.mak
index 7a7040a7d6b..d352d322b8a 100644
--- a/win64.mak
+++ b/win64.mak
@@ -241,6 +241,8 @@ SRC_STD_REGEX= \
 	std\regex\package.d \
 	std\regex\internal\parser.d \
 	std\regex\internal\tests.d \
+	std\regex\internal\tests2.d \
+	std\regex\internal\tests3.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
 	std\regex\internal\shiftor.d \

From b94a203b0bcf2be9d6c6f5c1367c5f4d13461612 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Sat, 7 May 2016 18:23:22 +0300
Subject: [PATCH 16/23] [std.regex] Re-style pass

---
 std/regex/internal/ir.d      | 24 ++++++++++++------------
 std/regex/internal/parser.d  |  4 ++--
 std/regex/internal/shiftor.d |  6 +++---
 std/regex/internal/tests.d   | 14 +++++++-------
 std/regex/internal/tests2.d  | 10 +++++-----
 std/regex/internal/tests3.d  |  6 +++---
 6 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index 1658831f5bc..6d47becc401 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -479,7 +479,7 @@ interface Kickstart(Char){
         assert(!empty);
         auto val = data[$ - 1];
         data = data[0 .. $ - 1];
-        if(!__ctfe)
+        if (!__ctfe)
             cast(void)data.assumeSafeAppend();
         return val;
     }
@@ -499,23 +499,23 @@ interface Kickstart(Char){
     Stack!(Tuple!(uint, uint, uint)) stack;
     uint start = 0;
     uint end = cast(uint)code.length;
-    for(;;)
+    for (;;)
     {
-        for(uint pc = start; pc < end; )
+        for (uint pc = start; pc < end; )
         {
             uint len = code[pc].length;
-            if(code[pc].code == IR.GotoEndOr)
+            if (code[pc].code == IR.GotoEndOr)
                 break; //pick next alternation branch
-            if(code[pc].isAtom)
+            if (code[pc].isAtom)
             {
                 rev[revPc - len .. revPc] = code[pc .. pc + len];
                 revPc -= len;
                 pc += len;
             }
-            else if(code[pc].isStart || code[pc].isEnd)
+            else if (code[pc].isStart || code[pc].isEnd)
             {
                 //skip over other embedded lookbehinds they are reversed
-                if(code[pc].code == IR.LookbehindStart
+                if (code[pc].code == IR.LookbehindStart
                     || code[pc].code == IR.NeglookbehindStart)
                 {
                     uint blockLen = len + code[pc].data
@@ -529,15 +529,15 @@ interface Kickstart(Char){
                 uint secLen = code[second].length;
                 rev[revPc - secLen .. revPc] = code[second .. second + secLen];
                 revPc -= secLen;
-                if(code[pc].code == IR.OrStart)
+                if (code[pc].code == IR.OrStart)
                 {
                     //we pass len bytes forward, but secLen in reverse
                     uint revStart = revPc - (second + len - secLen - pc);
                     uint r = revStart;
                     uint i = pc + IRL!(IR.OrStart);
-                    while(code[i].code == IR.Option)
+                    while (code[i].code == IR.Option)
                     {
-                        if(code[i - 1].code != IR.OrStart)
+                        if (code[i - 1].code != IR.OrStart)
                         {
                             assert(code[i - 1].code == IR.GotoEndOr);
                             rev[r - 1] = code[i - 1];
@@ -546,7 +546,7 @@ interface Kickstart(Char){
                         auto newStart = i + IRL!(IR.Option);
                         auto newEnd = newStart + code[i].data;
                         auto newRpc = r + code[i].data + IRL!(IR.Option);
-                        if(code[newEnd].code != IR.OrEnd)
+                        if (code[newEnd].code != IR.OrEnd)
                         {
                             newRpc--;
                         }
@@ -562,7 +562,7 @@ interface Kickstart(Char){
                     pc += len;
             }
         }
-        if(stack.empty)
+        if (stack.empty)
             break;
         start = stack.top[0];
         end = stack.top[1];
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index 3429dc5e6b6..8c7568a8e12 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -1607,10 +1607,10 @@ struct Parser(R, Generator)
         if (!(flags & RegexInfo.oneShot))
         {
             kickstart = new ShiftOr!Char(zis);
-            if(kickstart.empty)
+            if (kickstart.empty)
             {
                 kickstart = new BitMatcher!Char(zis);
-                if(kickstart.empty)
+                if (kickstart.empty)
                     kickstart = null;
             }
         }
diff --git a/std/regex/internal/shiftor.d b/std/regex/internal/shiftor.d
index 6066efd45ed..404c3c52679 100644
--- a/std/regex/internal/shiftor.d
+++ b/std/regex/internal/shiftor.d
@@ -533,7 +533,7 @@ unittest
     auto searches(C)(const (C)[] source, ShiftOr!C kick, uint[] results...)
     {
         auto inp = Input!C(source);
-        foreach(r; results)
+        foreach (r; results)
         {
             kick.search(inp);
             dchar ch;
@@ -543,7 +543,7 @@ unittest
         }
     }
 
-    foreach(i, Char; AliasSeq!(char, wchar, dchar))
+    foreach (i, Char; AliasSeq!(char, wchar, dchar))
     {
         alias String = immutable(Char)[];
         shiftOrLength(`abc`.to!String, 3);
@@ -562,7 +562,7 @@ unittest
         assert(inp._index == 8, text(Char.stringof," == ", kick.length));
     }
 
-    foreach(i, Char; AliasSeq!(char, wchar, dchar))
+    foreach (i, Char; AliasSeq!(char, wchar, dchar))
     {
         alias String = immutable(Char)[];
         auto kick = shiftOrLength(`abc[a-z]`.to!String, 4);
diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d
index a339e3bed92..6a3db845991 100644
--- a/std/regex/internal/tests.d
+++ b/std/regex/internal/tests.d
@@ -352,7 +352,7 @@ unittest
     void run_tests(alias matchFn)()
     {
         int i;
-        foreach(Char; AliasSeq!( char, wchar, dchar))
+        foreach (Char; AliasSeq!( char, wchar, dchar))
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             alias String = immutable(Char)[];
             String produceExpected(M,Range)(auto ref M m, Range fmt)
@@ -362,7 +362,7 @@ unittest
                 return app.data;
             }
             Regex!(Char) r;
-            foreach(a, tvd; tv)
+            foreach (a, tvd; tv)
             {
                 uint c = tvd.result[0];
                 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
@@ -379,7 +379,7 @@ unittest
 
                 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
 
-                if(c != 'c')
+                if (c != 'c')
                 {
                     auto m = matchFn(to!(String)(tvd.input), r);
                     i = !m.empty;
@@ -427,10 +427,10 @@ unittest
         }
         else
             alias Tests = AliasSeq!(Sequence!(0, 25));
-        foreach(a, v; Tests)
+        foreach (a, v; Tests)
         (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
             enum tvd = tv[v];
-            static if(tvd.result == "c")
+            static if (tvd.result == "c")
             {
                 static assert(!__traits(compiles, (){
                     enum r = regex(tvd.pattern, tvd.flags);
@@ -448,11 +448,11 @@ unittest
                 bool ok = (c == 'y') ^ m.empty;
                 assert(ok, text("ctRegex: failed to match pattern #",
                     a ,": ", tvd.pattern));
-                if(c == 'y')
+                if (c == 'y')
                 {
                     import std.stdio;
                     auto result = produceExpected(m, tvd.format);
-                    if(result != tvd.replace)
+                    if (result != tvd.replace)
                         writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
                                 tvd.replace, " vs ", result);
                 }
diff --git a/std/regex/internal/tests2.d b/std/regex/internal/tests2.d
index 72d3c011763..19286fa31f8 100644
--- a/std/regex/internal/tests2.d
+++ b/std/regex/internal/tests2.d
@@ -91,7 +91,7 @@ unittest
         string s = "a quick brown fox jumps over a lazy dog";
         auto r1 = regex("\\b[a-z]+\\b","g");
         string[] test;
-        foreach(m; matchFn(s, r1))
+        foreach (m; matchFn(s, r1))
             test ~= m.hit;
         assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
         auto free_reg = regex(`
@@ -147,7 +147,7 @@ unittest
         //issue 4574
         //empty successful match still advances the input
         string[] pres, posts, hits;
-        foreach(m; matchFn("abcabc", regex("","g"))) {
+        foreach (m; matchFn("abcabc", regex("","g"))) {
             pres ~= m.pre;
             posts ~= m.post;
             assert(m.hit.empty);
@@ -186,7 +186,7 @@ unittest
         assert(collectException(
                 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
                 ) is null);
-        foreach(ch; [Escapables])
+        foreach (ch; [Escapables])
         {
             assert(match(to!string(ch),regex(`[\`~ch~`]`)));
             assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
@@ -209,7 +209,7 @@ unittest
     {
         import std.uni : toUpper;
 
-        foreach(i, v; AliasSeq!(string, wstring, dstring))
+        foreach (i, v; AliasSeq!(string, wstring, dstring))
         {
             auto baz(Cap)(Cap m)
             if (is(Cap == Captures!(Cap.String)))
@@ -248,7 +248,7 @@ unittest
     auto w2 = ["", "abc", "de", "fg", "hi"];
 
     uint cnt;
-    foreach(e; sp2) {
+    foreach (e; sp2) {
         assert(w2[cnt++] == e);
     }
     assert(equal(sp2, w2));
diff --git a/std/regex/internal/tests3.d b/std/regex/internal/tests3.d
index 237a221738b..07541fbb62d 100644
--- a/std/regex/internal/tests3.d
+++ b/std/regex/internal/tests3.d
@@ -43,7 +43,7 @@ unittest
 }
 unittest
 {// bugzilla 7679
-    foreach(S; AliasSeq!(string, wstring, dstring))
+    foreach (S; AliasSeq!(string, wstring, dstring))
     (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
         enum re = ctRegex!(to!S(r"\."));
         auto str = to!S("a.b");
@@ -61,7 +61,7 @@ unittest
     auto r = regex(
        r"^NAME   = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
     auto uniCapturesNew = match(uniFileOld, r);
-    for(int i = 0; i < 20; i++)
+    for (int i = 0; i < 20; i++)
         foreach (matchNew; uniCapturesNew) {}
     //a second issue with same symptoms
     auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
@@ -261,7 +261,7 @@ unittest
 unittest
 {
     auto ctPat2 = regex(r"^[CDF]$", "i");
-    foreach(v; ["C", "c", "D", "d", "F", "f"])
+    foreach (v; ["C", "c", "D", "d", "F", "f"])
         assert(matchAll(v, ctPat2).front.hit  == v);
 }
 

From 3bed8491259b7dcc1e46e1bd3b5cc2ab0e72b924 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Sat, 7 May 2016 20:51:30 +0300
Subject: [PATCH 17/23] [std.regex] Save memory by building regex in 3 parts

---
 win32.mak | 28 +++++++++++++++++++---------
 win64.mak | 26 +++++++++++++++++---------
 2 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/win32.mak b/win32.mak
index fd0cc4a8e94..1d4a6409795 100644
--- a/win32.mak
+++ b/win32.mak
@@ -220,15 +220,19 @@ SRC_STD_RANGE= \
 SRC_STD_REGEX= \
 	std\regex\internal\ir.d \
 	std\regex\package.d \
-	std\regex\internal\parser.d \
 	std\regex\internal\tests.d \
-	std\regex\internal\tests2.d \
-	std\regex\internal\tests3.d \
+	std\regex\internal\generator.d
+
+SRC_STD_REGEX_2 = \
+	std\regex\internal\parser.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
+	std\regex\internal\tests2.d
+
+SRC_STD_REGEX_3 = \
 	std\regex\internal\shiftor.d \
 	std\regex\internal\bitnfa.d \
-	std\regex\internal\generator.d
+	std\regex\internal\tests3.d
 
 SRC_STD_C= \
 	std\c\process.d \
@@ -355,6 +359,8 @@ SRC_TO_COMPILE= \
 	$(SRC_STD_NET) \
 	$(SRC_STD_RANGE) \
 	$(SRC_STD_REGEX) \
+	$(SRC_STD_REGEX_2) \
+	$(SRC_STD_REGEX_3) \
 	$(SRC_STD_C) \
 	$(SRC_STD_WIN) \
 	$(SRC_STD_C_WIN) \
@@ -574,6 +580,8 @@ UNITTEST_OBJS= \
 		unittest8d.obj \
 		unittest8e.obj \
 		unittest8f.obj \
+		unittest8g.obj \
+		unittest8h.obj \
 		unittest9a.obj
 
 unittest : $(LIB)
@@ -588,11 +596,13 @@ unittest : $(LIB)
 	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest6.obj $(SRC_STD_6) $(SRC_STD_CONTAINER) $(SRC_STD_EXP_ALLOC) $(SRC_STD_EXP_LOGGER)
 	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest7.obj $(SRC_STD_7)
 	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8a.obj $(SRC_STD_REGEX)
-	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8b.obj $(SRC_STD_NET)
-	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8c.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN)
-	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8d.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS)
-	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8e.obj $(SRC_ETC) $(SRC_ETC_C)
-	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8f.obj $(SRC_STD_EXP)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8b.obj $(SRC_STD_REGEX_2)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8c.obj $(SRC_STD_REGEX_3)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8d.obj $(SRC_STD_NET)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8e.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8f.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8g.obj $(SRC_ETC) $(SRC_ETC_C)
+	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8h.obj $(SRC_STD_EXP)
 	$(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest9a.obj $(SRC_STD_EXP_NDSLICE)
 	$(DMD) $(UDFLAGS) -L/co -unittest unittest.d $(UNITTEST_OBJS) \
 		$(ZLIB) $(DRUNTIMELIB)
diff --git a/win64.mak b/win64.mak
index d352d322b8a..9c2a2a487aa 100644
--- a/win64.mak
+++ b/win64.mak
@@ -239,15 +239,19 @@ SRC_STD_RANGE= \
 SRC_STD_REGEX= \
 	std\regex\internal\ir.d \
 	std\regex\package.d \
-	std\regex\internal\parser.d \
 	std\regex\internal\tests.d \
-	std\regex\internal\tests2.d \
-	std\regex\internal\tests3.d \
+	std\regex\internal\generator.d
+
+SRC_STD_REGEX_2 = \
+	std\regex\internal\parser.d \
 	std\regex\internal\backtracking.d \
 	std\regex\internal\thompson.d \
+	std\regex\internal\tests2.d
+
+SRC_STD_REGEX_3 = \
 	std\regex\internal\shiftor.d \
 	std\regex\internal\bitnfa.d \
-	std\regex\internal\generator.d
+	std\regex\internal\tests3.d
 
 SRC_STD_C= \
 	std\c\process.d \
@@ -374,6 +378,8 @@ SRC_TO_COMPILE= \
 	$(SRC_STD_NET) \
 	$(SRC_STD_RANGE) \
 	$(SRC_STD_REGEX) \
+	$(SRC_STD_REGEX_2) \
+	$(SRC_STD_REGEX_3) \
 	$(SRC_STD_C) \
 	$(SRC_STD_WIN) \
 	$(SRC_STD_C_WIN) \
@@ -624,11 +630,13 @@ unittest : $(LIB)
 	$(DMD) $(UDFLAGS) -c -unittest -ofunittest6i.obj $(SRC_STD_6i)
 	$(DMD) $(UDFLAGS) -c -unittest -ofunittest7.obj $(SRC_STD_7) $(SRC_STD_EXP_LOGGER)
 	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8a.obj $(SRC_STD_REGEX)
-	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8b.obj $(SRC_STD_NET)
-	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8c.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN)
-	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8d.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS)
-	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8e.obj $(SRC_ETC) $(SRC_ETC_C)
-	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8f.obj $(SRC_STD_EXP)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8b.obj $(SRC_STD_REGEX_2)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8c.obj $(SRC_STD_REGEX_3)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8d.obj $(SRC_STD_NET)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8e.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8f.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8g.obj $(SRC_ETC) $(SRC_ETC_C)
+	$(DMD) $(UDFLAGS) -c -unittest -ofunittest8h.obj $(SRC_STD_EXP)
 	$(DMD) $(UDFLAGS) -c -unittest -ofunittest9.obj $(SRC_STD_EXP_ALLOC)
 	$(DMD) $(UDFLAGS) -c -unittest -ofunittest9a.obj $(SRC_STD_EXP_NDSLICE)
 	$(DMD) $(UDFLAGS) -L/OPT:NOICF -unittest unittest.d $(UNITTEST_OBJS) \

From 91f0671bc74d161b00cf3d6dc73475f3da914dfb Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Sat, 30 Jul 2016 17:01:27 +0200
Subject: [PATCH 18/23] [std.regex] rebase on top of latest master, fix shiftor

---
 std/regex/internal/bitnfa.d  |  2 ++
 std/regex/internal/shiftor.d | 11 ++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index e1ca33a5e32..691ad71a989 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -55,6 +55,7 @@ struct HashTab
 
     auto keys()
     {
+        import std.array : appender;
         auto app = appender!(uint[])();
         foreach (i, v; table)
         {
@@ -66,6 +67,7 @@ struct HashTab
 
     auto values()
     {
+        import std.array : appender;
         auto app = appender!(uint[])();
         foreach (i, v; table)
         {
diff --git a/std/regex/internal/shiftor.d b/std/regex/internal/shiftor.d
index 404c3c52679..39f1c3f6137 100644
--- a/std/regex/internal/shiftor.d
+++ b/std/regex/internal/shiftor.d
@@ -1,6 +1,6 @@
 /*
-    Kickstart is a coarse-grained "filter" engine that finds likely matches
-    to be verified by full-blown matcher.
+    ShiftOr is a kickstart engine, a coarse-grained "filter" engine that finds
+    potential matches to be verified by a full-blown matcher.
 */
 module std.regex.internal.shiftor;
 
@@ -351,7 +351,6 @@ public:
                 default:
                 L_StopThread:
                     assert(re.ir[t.pc].code >= 0x80, text(re.ir[t.pc].code));
-                    debug (fred_search) writeln("ShiftOr stumbled on ",re.ir[t.pc].mnemonic);
                     n_length = std.algorithm.comparison.min(t.idx, n_length);
                     break L_Eval_Thread;
                 }
@@ -543,6 +542,7 @@ unittest
         }
     }
 
+
     foreach (i, Char; AliasSeq!(char, wchar, dchar))
     {
         alias String = immutable(Char)[];
@@ -567,13 +567,14 @@ unittest
         alias String = immutable(Char)[];
         auto kick = shiftOrLength(`abc[a-z]`.to!String, 4);
         searches("abbabca".to!String, kick, 3);
-        kick = shiftOrLength(`(ax|bd|cdy)`.to!String, 2);
-        searches("abdcdyabax".to!String, kick, 1, 3, 8);
+        kick = shiftOrLength(`(axx|bdx|cdy)`.to!String, 3);
+        searches("abdcdxabax".to!String, kick, 3);
 
         shiftOrLength(`...`.to!String, 0);
         kick = shiftOrLength(`a(b{1,2}|c{1,2})x`.to!String, 3);
         searches("ababx".to!String, kick, 2);
         searches("abaacba".to!String, kick, 3); //expected inexact
     }
+
 }
 

From e98fa4ad5ad39487844c91357cfec4f698e88230 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 7 Oct 2016 01:20:32 +0300
Subject: [PATCH 19/23] [std.regex] Fix issue 9391 - const regex, ctRegex is
 immutable

---
 std/regex/internal/backtracking.d | 232 +++++++++++++++++-------------
 std/regex/internal/bitnfa.d       |  37 ++---
 std/regex/internal/generator.d    |   6 +-
 std/regex/internal/ir.d           |  99 +++++++------
 std/regex/internal/parser.d       | 129 ++---------------
 std/regex/internal/shiftor.d      |  20 +--
 std/regex/internal/tests.d        |   2 +-
 std/regex/internal/thompson.d     | 202 ++++++++++++++------------
 std/regex/package.d               | 139 +++++++++---------
 std/uni.d                         |  13 --
 10 files changed, 412 insertions(+), 467 deletions(-)

diff --git a/std/regex/internal/backtracking.d b/std/regex/internal/backtracking.d
index 2d14f604178..fb3b357ec67 100644
--- a/std/regex/internal/backtracking.d
+++ b/std/regex/internal/backtracking.d
@@ -30,7 +30,13 @@ template BacktrackingMatcher(bool CTregex)
         alias String = const(Char)[];
         alias RegEx = Regex!Char;
         alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream));
-        RegEx re;      //regex program
+        const(Bytecode)[] ir;
+        uint ngroup;
+        uint flags;
+        const(Interval[])[] charsets;
+        const(CharMatcher)[] matchers;
+        const(BitTable)[] filters;
+        const Kickstart!Char kickstart;
         static if (CTregex)
             MatchFn nativeFn; //native code for that program
         //Stream state
@@ -79,12 +85,17 @@ template BacktrackingMatcher(bool CTregex)
 
         static size_t initialMemory(const ref RegEx re)
         {
-            return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
+            return stackSize(re.ngroup)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof;
         }
 
-        static size_t stackSize(const ref RegEx re)
+        size_t initialMemory()
         {
-            return initialStack*(stateSize + re.ngroup*(Group!DataIndex).sizeof/size_t.sizeof)+1;
+            return stackSize(ngroup)*size_t.sizeof + merge.length*Trace.sizeof;
+        }
+
+        static size_t stackSize(uint ngroup)
+        {
+            return initialStack*(stateSize + ngroup*(Group!DataIndex).sizeof/size_t.sizeof)+1;
         }
 
         @property bool atStart(){ return index == 0; }
@@ -101,7 +112,7 @@ template BacktrackingMatcher(bool CTregex)
         {
             static if (kicked)
             {
-                if (!s.search(re.kickstart, front, index))
+                if (!s.search(kickstart, front, index))
                 {
                     index = s.lastIndex;
                 }
@@ -113,46 +124,69 @@ template BacktrackingMatcher(bool CTregex)
         //
         void newStack()
         {
-            auto chunk = mallocArray!(size_t)(stackSize(re));
+            auto chunk = mallocArray!(size_t)(stackSize(ngroup));
             chunk[0] = cast(size_t)(memory.ptr);
             memory = chunk[1..$];
         }
 
-        void initExternalMemory(void[] memBlock)
+        void initExternalMemory(void[] memBlock, size_t hotspotTableSize)
         {
-            merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock);
+            merge = arrayInChunk!(Trace)(hotspotTableSize, memBlock);
             merge[] = Trace.init;
             memory = cast(size_t[])memBlock;
             memory[0] = 0; //hidden pointer
             memory = memory[1..$];
         }
 
-        void initialize(ref RegEx program, Stream stream, void[] memBlock)
+        void dupTo(void[] memory)
         {
-            re = program;
-            s = stream;
-            exhausted = false;
-            initExternalMemory(memBlock);
-            backrefed = null;
+            initExternalMemory(memory, merge.length);
         }
 
-        auto dupTo(void[] memory)
+        this(Matcher)(ref Matcher matcher, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
         {
-            typeof(this) tmp = this;
-            tmp.initExternalMemory(memory);
-            return tmp;
+            ir = matcher.ir;
+            charsets = matcher.charsets;
+            filters = matcher.filters;
+            matchers = matcher.matchers;
+            ngroup = matcher.ngroup;
+            flags = matcher.flags;
+            s = stream;
+            exhausted = false;
+            initExternalMemory(memBlock, matcher.merge.length);
+            backrefed = null;
+            front = ch;
+            index = idx;
         }
 
-        this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx)
+        this(Matcher)(ref Matcher matcher, Stream stream, void[] memBlock)
         {
-            initialize(program, stream, memBlock);
-            front = ch;
-            index = idx;
+            ir = matcher.ir;
+            charsets = matcher.charsets;
+            filters = matcher.filters;
+            matchers = matcher.matchers;
+            ngroup = matcher.ngroup;
+            flags = matcher.flags;
+            s = stream;
+            exhausted = false;
+            initExternalMemory(memBlock, matcher.merge.length);
+            backrefed = null;
+            next();
         }
 
-        this(ref RegEx program, Stream stream, void[] memBlock)
+        this()(ref const RegEx program, Stream stream, void[] memBlock, uint regexFlags)
         {
-            initialize(program, stream, memBlock);
+            kickstart = program.kickstart;
+            ir = program.ir;
+            charsets = program.charsets;
+            filters = program.filters;
+            matchers = program.matchers;
+            ngroup = program.ngroup;
+            flags = regexFlags;
+            s = stream;
+            exhausted = false;
+            initExternalMemory(memBlock, program.hotspotTableSize);
+            backrefed = null;
             next();
         }
 
@@ -160,7 +194,7 @@ template BacktrackingMatcher(bool CTregex)
         {
             alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
             alias BackMatcher = BackMatcherTempl!(Char, Stream);
-            auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index);
+            auto fwdMatcher = BackMatcher(matcher, s, memBlock, front, index);
             return fwdMatcher;
         }
 
@@ -169,7 +203,7 @@ template BacktrackingMatcher(bool CTregex)
             alias BackMatcherTempl = .BacktrackingMatcher!(CTregex);
             alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index)));
             auto fwdMatcher =
-                BackMatcher(matcher.re, s.loopBack(index), memBlock);
+                BackMatcher(matcher, s.loopBack(index), memBlock);
             return fwdMatcher;
         }
 
@@ -182,7 +216,7 @@ template BacktrackingMatcher(bool CTregex)
             {//stream is updated here
                 matches[0].begin = start;
                 matches[0].end = index;
-                if (!(re.flags & RegexOption.global) || atEnd)
+                if (!(flags & RegexOption.global) || atEnd)
                     exhausted = true;
                 if (start == index)//empty match advances input
                     next();
@@ -202,7 +236,7 @@ template BacktrackingMatcher(bool CTregex)
             if (exhausted) //all matches collected
                 return false;
             this.matches = matches;
-            if (re.flags & RegexInfo.oneShot)
+            if (flags & RegexInfo.oneShot)
             {
                 exhausted = true;
                 const DataIndex start = index;
@@ -216,7 +250,7 @@ template BacktrackingMatcher(bool CTregex)
             }
             static if (kicked)
             {
-                if (re.kickstart)
+                if (kickstart)
                 {
                     for (;;)
                     {
@@ -285,19 +319,19 @@ template BacktrackingMatcher(bool CTregex)
                 {
                     debug(std_regex_matcher)
                         writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s",
-                            pc, counter, disassemble(re.ir, pc, re.dict),
+                            pc, counter, disassemble(ir, pc),
                             front, s._index);
-                    switch (re.ir[pc].code)
+                    switch (ir[pc].code)
                     {
                     case IR.OrChar://assumes IRL!(OrChar) == 1
                         if (atEnd)
                             goto L_backtrack;
-                        uint len = re.ir[pc].sequence;
+                        uint len = ir[pc].sequence;
                         uint end = pc + len;
-                        if (re.ir[pc].data != front && re.ir[pc+1].data != front)
+                        if (ir[pc].data != front && ir[pc+1].data != front)
                         {
                             for (pc = pc+2; pc < end; pc++)
-                                if (re.ir[pc].data == front)
+                                if (ir[pc].data == front)
                                     break;
                             if (pc == end)
                                 goto L_backtrack;
@@ -306,7 +340,7 @@ template BacktrackingMatcher(bool CTregex)
                         next();
                         break;
                     case IR.Char:
-                        if (atEnd || front != re.ir[pc].data)
+                        if (atEnd || front != ir[pc].data)
                             goto L_backtrack;
                         pc += IRL!(IR.Char);
                         next();
@@ -318,13 +352,13 @@ template BacktrackingMatcher(bool CTregex)
                         next();
                         break;
                     case IR.CodepointSet:
-                        if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front))
+                        if (atEnd || !charsets[ir[pc].data].scanFor(front))
                             goto L_backtrack;
                         next();
                         pc += IRL!(IR.CodepointSet);
                         break;
                     case IR.Trie:
-                        if (atEnd || !re.matchers[re.ir[pc].data][front])
+                        if (atEnd || !matchers[ir[pc].data][front])
                             goto L_backtrack;
                         next();
                         pc += IRL!(IR.Trie);
@@ -412,10 +446,10 @@ template BacktrackingMatcher(bool CTregex)
                             goto L_backtrack;
                         break;
                     case IR.InfiniteStart, IR.InfiniteQStart:
-                        pc += re.ir[pc].data + IRL!(IR.InfiniteStart);
+                        pc += ir[pc].data + IRL!(IR.InfiniteStart);
                         //now pc is at end IR.Infinite(Q)End
-                        uint len = re.ir[pc].data;
-                        if (re.ir[pc].code == IR.InfiniteEnd)
+                        uint len = ir[pc].data;
+                        if (ir[pc].code == IR.InfiniteEnd)
                         {
                             pushState(pc+IRL!(IR.InfiniteEnd), counter);
                             pc -= len;
@@ -427,29 +461,29 @@ template BacktrackingMatcher(bool CTregex)
                         }
                         break;
                     case IR.InfiniteBloomStart:
-                        pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart);
+                        pc += ir[pc].data + IRL!(IR.InfiniteBloomStart);
                         //now pc is at end IR.InfiniteBloomEnd
-                        immutable len = re.ir[pc].data;
-                        immutable filterIdx = re.ir[pc+2].raw;
-                        if (re.filters[filterIdx][front])
+                        immutable len = ir[pc].data;
+                        immutable filterIdx = ir[pc+2].raw;
+                        if (filters[filterIdx][front])
                             pushState(pc+IRL!(IR.InfiniteBloomEnd), counter);
                         pc -= len;
                         break;
                     case IR.RepeatStart, IR.RepeatQStart:
-                        pc += re.ir[pc].data + IRL!(IR.RepeatStart);
+                        pc += ir[pc].data + IRL!(IR.RepeatStart);
                         break;
                     case IR.RepeatEnd:
                     case IR.RepeatQEnd:
-                        if (merge[re.ir[pc + 1].raw+counter].mark(index))
+                        if (merge[ir[pc + 1].raw+counter].mark(index))
                         {
                             // merged!
                             goto L_backtrack;
                         }
                         //len, step, min, max
-                        immutable len = re.ir[pc].data;
-                        immutable step =  re.ir[pc+2].raw;
-                        immutable min = re.ir[pc+3].raw;
-                        immutable max = re.ir[pc+4].raw;
+                        immutable len = ir[pc].data;
+                        immutable step =  ir[pc+2].raw;
+                        immutable min = ir[pc+3].raw;
+                        immutable max = ir[pc+4].raw;
                         if (counter < min)
                         {
                             counter += step;
@@ -457,7 +491,7 @@ template BacktrackingMatcher(bool CTregex)
                         }
                         else if (counter < max)
                         {
-                            if (re.ir[pc].code == IR.RepeatEnd)
+                            if (ir[pc].code == IR.RepeatEnd)
                             {
                                 pushState(pc + IRL!(IR.RepeatEnd), counter%step);
                                 counter += step;
@@ -479,13 +513,13 @@ template BacktrackingMatcher(bool CTregex)
                     case IR.InfiniteEnd:
                     case IR.InfiniteQEnd:
                         debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
-                        if (merge[re.ir[pc + 1].raw+counter].mark(index))
+                        if (merge[ir[pc + 1].raw+counter].mark(index))
                         {
                             // merged!
                             goto L_backtrack;
                         }
-                        immutable len = re.ir[pc].data;
-                        if (re.ir[pc].code == IR.InfiniteEnd)
+                        immutable len = ir[pc].data;
+                        if (ir[pc].code == IR.InfiniteEnd)
                         {
                             pushState(pc + IRL!(IR.InfiniteEnd), counter);
                             pc -= len;
@@ -498,14 +532,14 @@ template BacktrackingMatcher(bool CTregex)
                         break;
                     case IR.InfiniteBloomEnd:
                         debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting);
-                        if (merge[re.ir[pc + 1].raw+counter].mark(index))
+                        if (merge[ir[pc + 1].raw+counter].mark(index))
                         {
                             // merged!
                             goto L_backtrack;
                         }
-                        immutable len = re.ir[pc].data;
-                        immutable filterIdx = re.ir[pc+2].raw;
-                        if (re.filters[filterIdx][front])
+                        immutable len = ir[pc].data;
+                        immutable filterIdx = ir[pc+2].raw;
+                        if (filters[filterIdx][front])
                         {
                             infiniteNesting--;
                             pushState(pc + IRL!(IR.InfiniteBloomEnd), counter);
@@ -514,7 +548,7 @@ template BacktrackingMatcher(bool CTregex)
                         pc -= len;
                         break;
                     case IR.OrEnd:
-                        if (merge[re.ir[pc + 1].raw+counter].mark(index))
+                        if (merge[ir[pc + 1].raw+counter].mark(index))
                         {
                             // merged!
                             goto L_backtrack;
@@ -525,34 +559,34 @@ template BacktrackingMatcher(bool CTregex)
                         pc += IRL!(IR.OrStart);
                         goto case;
                     case IR.Option:
-                        immutable len = re.ir[pc].data;
-                        if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one
+                        immutable len = ir[pc].data;
+                        if (ir[pc+len].code == IR.GotoEndOr)//not a last one
                         {
                             pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch
                         }
                         pc += IRL!(IR.Option);
                         break;
                     case IR.GotoEndOr:
-                        pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr);
+                        pc = pc + ir[pc].data + IRL!(IR.GotoEndOr);
                         break;
                     case IR.GroupStart:
-                        immutable n = re.ir[pc].data;
+                        immutable n = ir[pc].data;
                         matches[n].begin = index;
                         debug(std_regex_matcher)  writefln("IR group #%u starts at %u", n, index);
                         pc += IRL!(IR.GroupStart);
                         break;
                     case IR.GroupEnd:
-                        immutable n = re.ir[pc].data;
+                        immutable n = ir[pc].data;
                         matches[n].end = index;
                         debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index);
                         pc += IRL!(IR.GroupEnd);
                         break;
                     case IR.LookaheadStart:
                     case IR.NeglookaheadStart:
-                        immutable len = re.ir[pc].data;
+                        immutable len = ir[pc].data;
                         auto save = index;
-                        immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
-                        auto mem = malloc(initialMemory(re))[0..initialMemory(re)];
+                        immutable ms = ir[pc+1].raw, me = ir[pc+2].raw;
+                        auto mem = malloc(initialMemory())[0..initialMemory()];
                         scope(exit) free(mem.ptr);
                         static if (Stream.isLoopback)
                         {
@@ -564,10 +598,10 @@ template BacktrackingMatcher(bool CTregex)
                         }
                         matcher.matches = matches[ms .. me];
                         matcher.backrefed = backrefed.empty ? matches : backrefed;
-                        matcher.re.ir = re.ir[
+                        matcher.ir = ir[
                             pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd)
                         ];
-                        immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart);
+                        immutable match = (matcher.matchImpl() != 0) ^ (ir[pc].code == IR.NeglookaheadStart);
                         s.reset(save);
                         next();
                         if (!match)
@@ -579,26 +613,26 @@ template BacktrackingMatcher(bool CTregex)
                         break;
                     case IR.LookbehindStart:
                     case IR.NeglookbehindStart:
-                        immutable len = re.ir[pc].data;
-                        immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw;
-                        auto mem = malloc(initialMemory(re))[0..initialMemory(re)];
+                        immutable len = ir[pc].data;
+                        immutable ms = ir[pc+1].raw, me = ir[pc+2].raw;
+                        auto mem = malloc(initialMemory())[0..initialMemory()];
                         scope(exit) free(mem.ptr);
                         static if (Stream.isLoopback)
                         {
                             alias Matcher = BacktrackingMatcher!(Char, Stream);
-                            auto matcher = Matcher(re, s, mem, front, index);
+                            auto matcher = Matcher(this, s, mem, front, index);
                         }
                         else
                         {
                             alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index)));
-                            auto matcher = Matcher(re, s.loopBack(index), mem);
+                            auto matcher = Matcher(this, s.loopBack(index), mem);
                         }
                         matcher.matches = matches[ms .. me];
-                        matcher.re.ir = re.ir[
+                        matcher.ir = ir[
                           pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd)
                         ];
                         matcher.backrefed  = backrefed.empty ? matches : backrefed;
-                        immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart);
+                        immutable match = (matcher.matchImpl() != 0) ^ (ir[pc].code == IR.NeglookbehindStart);
                         if (!match)
                             goto L_backtrack;
                         else
@@ -607,8 +641,8 @@ template BacktrackingMatcher(bool CTregex)
                         }
                         break;
                     case IR.Backref:
-                        immutable n = re.ir[pc].data;
-                        auto referenced = re.ir[pc].localRef
+                        immutable n = ir[pc].data;
+                        auto referenced = ir[pc].localRef
                                 ? s[matches[n].begin .. matches[n].end]
                                 : s[backrefed[n].begin .. backrefed[n].end];
                         while (!atEnd && !referenced.empty && front == referenced.front)
@@ -629,9 +663,9 @@ template BacktrackingMatcher(bool CTregex)
                     case IR.LookbehindEnd:
                     case IR.NeglookbehindEnd:
                     case IR.End:
-                        return re.ir[pc].data;
+                        return ir[pc].data;
                     default:
-                        debug printBytecode(re.ir[0..$]);
+                        debug printBytecode(ir[0..$]);
                         assert(0);
                     L_backtrack:
                         if (!popState())
@@ -660,7 +694,7 @@ template BacktrackingMatcher(bool CTregex)
             {
                 import core.stdc.stdlib : free;
                 free(memory.ptr);//last segment is freed in RegexMatch
-                immutable size = initialStack*(stateSize + 2*re.ngroup);
+                immutable size = initialStack*(stateSize + 2*ngroup);
                 memory = prev[0..size];
                 lastState = size;
                 return true;
@@ -795,7 +829,7 @@ struct CtContext
     //to mark the portion of matches to save
     int match, total_matches;
     int reserved;
-    CodepointSet[] charsets;
+    const Interval[][] charsets;
 
 
     //state of codegenerator
@@ -805,7 +839,7 @@ struct CtContext
         int addr;
     }
 
-    this(Char)(Regex!Char re)
+    this(Char)(const Regex!Char re)
     {
         match = 1;
         reserved = 1; //first match is skipped
@@ -867,7 +901,7 @@ struct CtContext
     }
 
     //
-    CtState ctGenBlock(Bytecode[] ir, int addr)
+    CtState ctGenBlock(const(Bytecode)[] ir, int addr)
     {
         CtState result;
         result.addr = addr;
@@ -881,7 +915,7 @@ struct CtContext
     }
 
     //
-    CtState ctGenGroup(ref Bytecode[] ir, int addr)
+    CtState ctGenGroup(ref const(Bytecode)[] ir, int addr)
     {
         import std.algorithm.comparison : max;
         auto bailOut = "goto L_backtrack;";
@@ -944,7 +978,7 @@ struct CtContext
                         //(neg)lookaround piece ends
                     }
                     auto save = index;
-                    auto mem = malloc(initialMemory(re))[0..initialMemory(re)];
+                    auto mem = malloc(initialMemory())[0..initialMemory()];
                     scope(exit) free(mem.ptr);
                     static if (typeof(matcher.s).isLoopback)
                         auto lookaround = $$;
@@ -983,7 +1017,7 @@ struct CtContext
     }
 
     //generate source for bytecode contained  in OrStart ... OrEnd
-    CtState ctGenAlternation(Bytecode[] ir, int addr)
+    CtState ctGenAlternation(const(Bytecode)[] ir, int addr)
     {
         CtState[] pieces;
         CtState r;
@@ -1023,11 +1057,11 @@ struct CtContext
 
     // generate fixup code for instruction in ir,
     // fixup means it has an alternative way for control flow
-    string ctGenFixupCode(Bytecode[] ir, int addr, int fixup)
+    string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup)
     {
         return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version
     }
-    string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup)
+    string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup)
     {
         string r;
         string testCode;
@@ -1181,7 +1215,7 @@ struct CtContext
     }
 
 
-    string ctQuickTest(Bytecode[] ir, int id)
+    string ctQuickTest(const(Bytecode)[] ir, int id)
     {
         uint pc = 0;
         while (pc < ir.length && ir[pc].isAtom)
@@ -1208,7 +1242,7 @@ struct CtContext
     }
 
     //process & generate source for simple bytecodes at front of ir using address addr
-    CtState ctGenAtom(ref Bytecode[] ir, int addr)
+    CtState ctGenAtom(ref const(Bytecode)[] ir, int addr)
     {
         CtState result;
         result.code = ctAtomCode(ir, addr);
@@ -1218,7 +1252,7 @@ struct CtContext
     }
 
     //D code for atom at ir using address addr, addr < 0 means quickTest
-    string ctAtomCode(Bytecode[] ir, int addr)
+    string ctAtomCode(const(Bytecode)[] ir, int addr)
     {
         string code;
         string bailOut, nextInstr;
@@ -1263,7 +1297,7 @@ struct CtContext
             break;
         case IR.Any:
             code ~= ctSub( `
-                    if (atEnd || (!(re.flags & RegexOption.singleline)
+                    if (atEnd || (!(flags & RegexOption.singleline)
                                 && (front == '\r' || front == '\n')))
                         $$
                     $$
@@ -1273,7 +1307,7 @@ struct CtContext
             if (charsets.length)
             {
                 string name = `func_`~to!string(addr+1);
-                string funcCode = charsets[ir[0].data].toSourceCode(name);
+                string funcCode = CodepointSet(charsets[ir[0].data]).toSourceCode(name);
                 code ~= ctSub( `
                     static $$
                     if (atEnd || !$$(front))
@@ -1283,16 +1317,16 @@ struct CtContext
             }
             else
                 code ~= ctSub( `
-                    if (atEnd || !re.charsets[$$].scanFor(front))
+                    if (atEnd || !charsets[$$].scanFor(front))
                         $$
                     $$
                 $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
             break;
         case IR.Trie:
-            if (charsets.length && charsets[ir[0].data].byInterval.length  <= 8)
+            if (charsets.length && charsets[ir[0].data].length  <= 8)
                 goto case IR.CodepointSet;
             code ~= ctSub( `
-                    if (atEnd || !re.matchers[$$][front])
+                    if (atEnd || !matchers[$$][front])
                         $$
                     $$
                 $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr);
@@ -1430,7 +1464,7 @@ struct CtContext
     }
 
     //generate D code for the whole regex
-    public string ctGenRegEx(Bytecode[] ir)
+    public string ctGenRegEx(const(Bytecode)[] ir)
     {
         auto bdy = ctGenBlock(ir, 0);
         auto r = `
@@ -1476,7 +1510,7 @@ struct CtContext
 
 }
 
-string ctGenRegExCode(Char)(Regex!Char re)
+string ctGenRegExCode(Char)(const Regex!Char re)
 {
     auto context = CtContext(re);
     return context.ctGenRegEx(re.ir);
diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 691ad71a989..3e7fbd61bfd 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -20,16 +20,17 @@ import std.algorithm;
 
 struct HashTab
 {
+pure:
     @disable this(this);
 
-    uint opIndex()(uint key)
+    uint opIndex()(uint key) const
     {
         auto p = locate(key, table);
         assert(p.occupied);
         return p.value;
     }
 
-    bool opBinaryRight(string op:"in")(uint key)
+    bool opBinaryRight(string op:"in")(uint key) const
     {
         auto p = locate(key, table);
         return p.occupied;
@@ -53,7 +54,7 @@ struct HashTab
         p.value = value;
     }
 
-    auto keys()
+    auto keys() const
     {
         import std.array : appender;
         auto app = appender!(uint[])();
@@ -65,7 +66,7 @@ struct HashTab
         return app.data;
     }
 
-    auto values()
+    auto values() const
     {
         import std.array : appender;
         auto app = appender!(uint[])();
@@ -85,17 +86,17 @@ private:
 
     struct Node
     {
+    pure:
         uint key_;
         uint value;
-        @property uint key()(){ return key_ & 0x7fff_ffff; }
-        @property bool occupied()(){ return (key_ & 0x8000_0000) != 0; }
+        @property uint key()() const { return key_ & 0x7fff_ffff; }
+        @property bool occupied()() const { return (key_ & 0x8000_0000) != 0; }
         void setOccupied(){ key_ |= 0x8000_0000; }
-
     }
     Node[] table;
     size_t items;
 
-    static Node* locate()(uint key, Node[] table)
+    static N* locate(N)(uint key, N[] table)
     {
         size_t slot = hashOf(key) & (table.length-1);
         while (table[slot].occupied)
@@ -131,6 +132,7 @@ private:
 // and ref count is decreased
 struct UIntTrie2
 {
+pure:
     ushort[] index;                       // pages --> blocks
     ushort[] refCounts;                   // ref counts for each block
     uint[]   hashes;                      // hashes of blocks
@@ -164,7 +166,7 @@ struct UIntTrie2
         return ut;
     }
 
-    uint opIndex(dchar ch)
+    uint opIndex(dchar ch) const
     {
         immutable blk = index[ch>>blockBits];
         return blocks.ptr[blk*blockSize + (ch & (blockSize-1))];
@@ -258,6 +260,7 @@ unittest
 // to run backwards to find the start.
 struct BitNfa
 {
+pure:
     uint[128]   asciiTab;         // state mask for ascii characters
     UIntTrie2   uniTab;           // state mask for unicode characters
     HashTab     controlFlow;      // maps each bit pattern to resulting jumps pattern
@@ -468,7 +471,7 @@ outer:  for (uint i=0; i<ir.length; i += ir[i].length) with(IR)
             case CodepointSet, Trie:
                 auto cset = charsets[ir[i].data];
                 uint mask = 1u<<bitMapping[i];
-                foreach (ival; cset.byInterval)
+                foreach (ival; cset)
                 {
                     if (ival.b < 0x80)
                         asciiTab[ival.a..ival.b] &= ~mask;
@@ -497,7 +500,7 @@ outer:  for (uint i=0; i<ir.length; i += ir[i].length) with(IR)
         }
     }
 
-    bool search(Input)(ref Input r)
+    bool search(Input)(ref Input r) const
     {
         dchar ch;
         size_t idx;
@@ -525,7 +528,7 @@ outer:  for (uint i=0; i<ir.length; i += ir[i].length) with(IR)
         return false;
     }
 
-    bool match(Input)(ref Input r)
+    bool match(Input)(ref Input r) const
     {
         dchar ch;
         size_t idx;
@@ -567,7 +570,7 @@ outer:  for (uint i=0; i<ir.length; i += ir[i].length) with(IR)
     }
 }
 
-auto reverseBitNfa(Char)(auto ref Regex!Char re, uint length)
+auto reverseBitNfa(Char)(auto ref Regex!Char re, uint length) pure
 {
     auto re2 = re;
     re2.ir = re2.ir.dup;
@@ -610,7 +613,7 @@ final class BitMatcher(Char) : Kickstart!(Char)
 @trusted:
     BitNfa forward, backward;
 
-    this()(auto ref Regex!Char re)
+    pure this()(auto ref Regex!Char re)
     {
         forward = BitNfa(re);
         // keep the end where it belongs
@@ -618,7 +621,7 @@ final class BitMatcher(Char) : Kickstart!(Char)
             backward = reverseBitNfa(re, forward.length);
     }
 
-    final bool search(ref Input!Char r)
+    final bool search(ref Input!Char r) const
     {
         auto save = r._index;
         bool res = forward.search(r);
@@ -635,7 +638,7 @@ final class BitMatcher(Char) : Kickstart!(Char)
         return res;
     }
 
-    final bool match(ref Input!Char r)
+    final bool match(ref Input!Char r) const
     {
         auto save = r._index;
         bool res = forward.match(r);
@@ -643,7 +646,7 @@ final class BitMatcher(Char) : Kickstart!(Char)
         return res;
     }
 
-    final @property bool empty() const{ return forward.empty; }
+    final @property bool empty() pure const{ return forward.empty; }
 }
 
 version(unittest)
diff --git a/std/regex/internal/generator.d b/std/regex/internal/generator.d
index a0f2d7213bc..e9f86e83130 100644
--- a/std/regex/internal/generator.d
+++ b/std/regex/internal/generator.d
@@ -58,9 +58,9 @@ module std.regex.internal.generator;
                 case IR.CodepointSet:
                 case IR.Trie:
                     auto set = re.charsets[re.ir[pc].data];
-                    auto x = rand(cast(uint)set.byInterval.length);
-                    auto y = rand(set.byInterval[x].b - set.byInterval[x].a);
-                    formattedWrite(app, "%s", cast(dchar)(set.byInterval[x].a+y));
+                    auto x = rand(cast(uint)set.length);
+                    auto y = rand(set[x].b - set[x].a);
+                    formattedWrite(app, "%s", cast(dchar)(set[x].a+y));
                     pc += IRL!(IR.CodepointSet);
                     break;
                 case IR.Any:
diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index 6d47becc401..6795e737332 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -9,7 +9,7 @@ module std.regex.internal.ir;
 
 package(std.regex):
 
-import std.exception, std.uni, std.meta, std.traits, std.range.primitives;
+import std.exception, std.uni, std.meta, std.traits, std.typecons, std.range.primitives;
 
 debug(std_regex_parser) import std.stdio;
 // just a common trait, may be moved elsewhere
@@ -28,25 +28,6 @@ alias makeTrie = codepointSetTrie!(13, 8);
 
 CharMatcher[CodepointSet] matcherCache;
 
-//accessor with caching
-@trusted CharMatcher getMatcher(CodepointSet set)
-{// @@@BUG@@@ 6357 almost all properties of AA are not @safe
-    if (__ctfe || maxCachedMatchers == 0)
-        return CharMatcher(set);
-    else
-    {
-        auto p = set in matcherCache;
-        if (p)
-            return *p;
-        if (matcherCache.length == maxCachedMatchers)
-        {
-            // flush enmatchers in trieCache
-            matcherCache = null;
-        }
-        return (matcherCache[set] = CharMatcher(set));
-    }
-}
-
 @trusted auto memoizeExpr(string expr)()
 {
     if (__ctfe)
@@ -63,10 +44,10 @@ CharMatcher[CodepointSet] matcherCache;
 }
 
 //property for \w character class
-@property CodepointSet wordCharacter()
+@property CodepointSet wordCharacter() pure
 {
-    return memoizeExpr!("unicode.Alphabetic | unicode.Mn | unicode.Mc
-        | unicode.Me | unicode.Nd | unicode.Pc")();
+    return unicode.Alphabetic | unicode.Mn | unicode.Mc
+        | unicode.Me | unicode.Nd | unicode.Pc;
 }
 
 @property CharMatcher wordMatcher()
@@ -74,6 +55,19 @@ CharMatcher[CodepointSet] matcherCache;
     return memoizeExpr!("CharMatcher(wordCharacter)")();
 }
 
+package bool scanFor()(const(Interval)[] ivals, dchar ch)
+{
+    immutable len = ivals.length;
+    for (size_t i = 0; i < len; i++)
+    {
+        if (ch < ivals[i].a)
+            return false;
+        if (ch < ivals[i].b)
+            return true;
+    }
+    return false;
+}
+
 // some special Unicode white space characters
 private enum NEL = '\u0085', LS = '\u2028', PS = '\u2029';
 
@@ -97,7 +91,7 @@ enum RegexOption: uint {
 //do not reorder this list
 alias RegexOptionNames = AliasSeq!('g', 'i', 'x', 'U', 'm', 's');
 static assert( RegexOption.max < 0x80);
-// flags that allow guide execution of engine
+// flags that guide execution of engine
 enum RegexInfo : uint { oneShot = 0x80 }
 
 // IR bit pattern: 0b1_xxxxx_yy
@@ -173,7 +167,8 @@ template IRL(IR code)
 static assert (IRL!(IR.LookaheadStart) == 3);
 
 //how many parameters follow the IR, should be optimized fixing some IR bits
-int immediateParamsIR(IR i){
+int immediateParamsIR(IR i) pure
+{
     switch (i)
     {
     case IR.OrEnd,IR.InfiniteEnd,IR.InfiniteQEnd:
@@ -190,43 +185,43 @@ int immediateParamsIR(IR i){
 }
 
 //full length of IR instruction inlcuding all parameters that might follow it
-int lengthOfIR(IR i)
+int lengthOfIR(IR i) pure
 {
     return 1 + immediateParamsIR(i);
 }
 
 //full length of the paired IR instruction inlcuding all parameters that might follow it
-int lengthOfPairedIR(IR i)
+int lengthOfPairedIR(IR i) pure
 {
     return 1 + immediateParamsIR(pairedIR(i));
 }
 
 //if the operation has a merge point (this relies on the order of the ops)
-bool hasMerge(IR i)
+bool hasMerge(IR i) pure
 {
     return (i&0b11)==0b10 && i <= IR.RepeatQEnd;
 }
 
 //is an IR that opens a "group"
-bool isStartIR(IR i)
+bool isStartIR(IR i) pure
 {
     return (i&0b11)==0b01;
 }
 
 //is an IR that ends a "group"
-bool isEndIR(IR i)
+bool isEndIR(IR i) pure
 {
     return (i&0b11)==0b10;
 }
 
 //is a standalone IR
-bool isAtomIR(IR i)
+bool isAtomIR(IR i) pure
 {
     return (i&0b11)==0b00;
 }
 
 //makes respective pair out of IR i, swapping start/end bits of instruction
-IR pairedIR(IR i)
+IR pairedIR(IR i) pure
 {
     assert(isStartIR(i) || isEndIR(i));
     return cast(IR)(i ^ 0b11);
@@ -235,6 +230,7 @@ IR pairedIR(IR i)
 //encoded IR instruction
 struct Bytecode
 {
+pure:
     uint raw;
     //natural constraints
     enum maxSequence = 2+4;
@@ -459,14 +455,15 @@ struct Group(DataIndex)
 +/
 interface Kickstart(Char){
 @trusted:
-    bool search(ref Input!Char input);
-    bool match(ref Input!Char input);
-    @property bool empty() const;
+    bool search(ref Input!Char input) const;
+    bool match(ref Input!Char input) const;
+    @property bool empty() const pure;
 }
 
 //basic stack, just in case it gets used anywhere else then Parser
 @trusted struct Stack(T)
 {
+pure:
     T[] data;
     @property bool empty(){ return data.empty; }
 
@@ -479,8 +476,8 @@ interface Kickstart(Char){
         assert(!empty);
         auto val = data[$ - 1];
         data = data[0 .. $ - 1];
-        if (!__ctfe)
-            cast(void)data.assumeSafeAppend();
+        //if (!__ctfe)
+        //    cast(void)data.assumeSafeAppend();
         return val;
     }
 
@@ -491,7 +488,7 @@ interface Kickstart(Char){
     }
 }
 
-@trusted void reverseBytecode()(Bytecode[] code)
+@trusted void reverseBytecode()(Bytecode[] code) pure
 {
     import std.typecons;
     Bytecode[] rev = new Bytecode[code.length];
@@ -572,6 +569,8 @@ interface Kickstart(Char){
     code[] = rev[];
 }
 
+package alias Interval = Tuple!(uint,"a",uint, "b");
+
 /++
     $(D Regex) object holds regular expression pattern in compiled form.
     Instances of this object are constructed via calls to $(D regex).
@@ -580,11 +579,7 @@ interface Kickstart(Char){
 +/
 struct Regex(Char)
 {
-    //temporary workaround for identifier lookup
-    CodepointSet[] charsets; //
-    Bytecode[] ir;      //compiled bytecode of pattern
-
-
+pure:
     @safe @property bool empty() const nothrow {  return ir is null; }
 
     @safe @property auto namedCaptures()
@@ -633,14 +628,16 @@ struct Regex(Char)
     }
 
 package(std.regex):
+    Bytecode[] ir;                         // compiled bytecode of pattern
     NamedGroup[] dict;                     // maps name -> user group number
     uint ngroup;                           // number of internal groups
     uint maxCounterDepth;                  // max depth of nested {n,m} repetitions
     uint hotspotTableSize;                 // number of entries in merge table
     uint threadCount;                      // upper bound on number of Thompson VM threads
     uint flags;                            // global regex flags
-    public const(CharMatcher)[]  matchers; // tables that represent character sets
-    public const(BitTable)[] filters;      // bloom filters for conditional loops
+    Interval[][] charsets;                 // intervals of characters
+    const(CharMatcher)[]  matchers;        // tables that represent character sets
+    const(BitTable)[] filters;             // bloom filters for conditional loops
     uint[] backrefed;                      // bit array of backreferenced submatches
     Kickstart!Char kickstart;
 
@@ -696,11 +693,10 @@ package(std.regex):
 public:
     Regex!Char _regex;
     alias _regex this;
-    this(Regex!Char re, MatchFn fn)
+    this(immutable Regex!Char re, MatchFn fn) immutable
     {
         _regex = re;
         nativeFn = fn;
-
     }
 
 }
@@ -742,7 +738,7 @@ struct Input(Char)
         return _index == _origin.length;
     }
 
-    bool search(Kickstart!Char kick, ref dchar res, ref size_t pos)
+    bool search(const Kickstart!Char kick, ref dchar res, ref size_t pos)
     {
         kick.search(this);
         return nextChar(res, pos);
@@ -824,8 +820,8 @@ template BackLooper(E)
 }
 
 //
-@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name)
-{//equal is @system?
+@safe uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name)
+{
     import std.range : assumeSorted;
     import std.conv : text;
     import std.algorithm.iteration : map;
@@ -861,6 +857,7 @@ public class RegexException : Exception
 
 // simple 128-entry bit-table used with a hash function
 struct BitTable {
+pure:
     uint[4] filter;
 
     this(CodepointSet set){
@@ -889,7 +886,7 @@ struct BitTable {
 struct CharMatcher {
     BitTable ascii; // fast path for ASCII
     Trie trie;      // slow path for Unicode
-
+pure:
     this(CodepointSet set)
     {
         auto asciiSet = set & unicode.ASCII;
diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index 8c7568a8e12..9e9be2fdc57 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -7,11 +7,11 @@ module std.regex.internal.parser;
 import std.regex.internal.ir, std.regex.internal.shiftor,
     std.regex.internal.bitnfa;
 import std.range.primitives, std.uni, std.meta,
-    std.traits, std.typecons, std.exception;
+    std.traits, std.typecons, std.exception, std.range;
 static import std.ascii;
 
 // package relevant info from parser into a regex object
-auto makeRegex(S, CG)(Parser!(S, CG) p)
+auto makeRegex(S, CG)(Parser!(S, CG) p) pure
 {
     Regex!(BasicElementOf!S) re;
     auto g = p.g;
@@ -22,7 +22,10 @@ auto makeRegex(S, CG)(Parser!(S, CG) p)
         ngroup = g.ngroup;
         maxCounterDepth = g.counterDepth;
         flags = p.re_flags;
-        charsets = g.charsets;
+        charsets = g.charsets
+            .map!(x => 
+                x.byInterval.map!(x=>Interval(x.a,x.b)).array
+            ).array;
         matchers = g.matchers;
         backrefed = g.backrefed;
         re.postprocess();
@@ -77,87 +80,6 @@ unittest
     assert(nc.equal(cp[1 .. $ - 1]));
 }
 
-
-@trusted void reverseBytecode()(Bytecode[] code)
-{
-    Bytecode[] rev = new Bytecode[code.length];
-    uint revPc = cast(uint)rev.length;
-    Stack!(Tuple!(uint, uint, uint)) stack;
-    uint start = 0;
-    uint end = cast(uint)code.length;
-    for (;;)
-    {
-        for (uint pc = start; pc < end; )
-        {
-            immutable len = code[pc].length;
-            if (code[pc].code == IR.GotoEndOr)
-                break; //pick next alternation branch
-            if (code[pc].isAtom)
-            {
-                rev[revPc - len .. revPc] = code[pc .. pc + len];
-                revPc -= len;
-                pc += len;
-            }
-            else if (code[pc].isStart || code[pc].isEnd)
-            {
-                //skip over other embedded lookbehinds they are reversed
-                if (code[pc].code == IR.LookbehindStart
-                    || code[pc].code == IR.NeglookbehindStart)
-                {
-                    immutable blockLen = len + code[pc].data
-                         + code[pc].pairedLength;
-                    rev[revPc - blockLen .. revPc] = code[pc .. pc + blockLen];
-                    pc += blockLen;
-                    revPc -= blockLen;
-                    continue;
-                }
-                immutable second = code[pc].indexOfPair(pc);
-                immutable secLen = code[second].length;
-                rev[revPc - secLen .. revPc] = code[second .. second + secLen];
-                revPc -= secLen;
-                if (code[pc].code == IR.OrStart)
-                {
-                    //we pass len bytes forward, but secLen in reverse
-                    immutable revStart = revPc - (second + len - secLen - pc);
-                    uint r = revStart;
-                    uint i = pc + IRL!(IR.OrStart);
-                    while (code[i].code == IR.Option)
-                    {
-                        if (code[i - 1].code != IR.OrStart)
-                        {
-                            assert(code[i - 1].code == IR.GotoEndOr);
-                            rev[r - 1] = code[i - 1];
-                        }
-                        rev[r] = code[i];
-                        auto newStart = i + IRL!(IR.Option);
-                        auto newEnd = newStart + code[i].data;
-                        auto newRpc = r + code[i].data + IRL!(IR.Option);
-                        if (code[newEnd].code != IR.OrEnd)
-                        {
-                            newRpc--;
-                        }
-                        stack.push(tuple(newStart, newEnd, newRpc));
-                        r += code[i].data + IRL!(IR.Option);
-                        i += code[i].data + IRL!(IR.Option);
-                    }
-                    pc = i;
-                    revPc = revStart;
-                    assert(code[pc].code == IR.OrEnd);
-                }
-                else
-                    pc += len;
-            }
-        }
-        if (stack.empty)
-            break;
-        start = stack.top[0];
-        end = stack.top[1];
-        revPc = stack.top[2];
-        stack.pop();
-    }
-    code[] = rev[];
-}
-
 //test if a given string starts with hex number of maxDigit that's a valid codepoint
 //returns it's value and skips these maxDigit chars on success, throws on failure
 dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit)
@@ -212,7 +134,7 @@ auto caseEnclose(CodepointSet set)
 /+
     fetch codepoint set corresponding to a name (InBlock or binary property)
 +/
-@trusted CodepointSet getUnicodeSet(in char[] name, bool negated,  bool casefold)
+@trusted CodepointSet getUnicodeSet(in char[] name, bool negated,  bool casefold) pure
 {
     CodepointSet s = unicode(name);
     //FIXME: caseEnclose for new uni as Set | CaseEnclose(SET && LC)
@@ -223,35 +145,9 @@ auto caseEnclose(CodepointSet set)
     return s;
 }
 
-//basic stack, just in case it gets used anywhere else then Parser
-@trusted struct Stack(T)
-{
-    T[] data;
-    @property bool empty(){ return data.empty; }
-
-    @property size_t length(){ return data.length; }
-
-    void push(T val){ data ~= val;  }
-
-    T pop()
-    {
-        assert(!empty);
-        auto val = data[$ - 1];
-        data = data[0 .. $ - 1];
-        if (!__ctfe)
-            cast(void)data.assumeSafeAppend();
-        return val;
-    }
-
-    @property ref T top()
-    {
-        assert(!empty);
-        return data[$ - 1];
-    }
-}
-
 struct CodeGen
 {
+pure:
     Bytecode[] ir;                 // resulting bytecode
     Stack!(uint) fixupStack;       // stack of opened start instructions
     NamedGroup[] dict;             // maps name -> user group number
@@ -336,7 +232,7 @@ struct CodeGen
             }
             if (ivals.length*2 > maxCharsetUsed)
             {
-                auto t  = getMatcher(set);
+                auto t  = CharMatcher(set);
                 put(Bytecode(IR.Trie, cast(uint)matchers.length));
                 matchers ~= t;
                 debug(std_regex_allocation) writeln("Trie generated");
@@ -617,6 +513,7 @@ enum infinite = ~0u;
 struct Parser(R, Generator)
     if (isForwardRange!R && is(ElementType!R : dchar))
 {
+pure:
     dchar _current;
     bool empty;
     R pat, origin;       //keep full pattern for pretty printing error messages
@@ -1543,7 +1440,7 @@ struct Parser(R, Generator)
 /+
     Postproces the IR, then optimize.
 +/
-@trusted void postprocess(Char)(ref Regex!Char zis)
+@trusted void postprocess(Char)(ref Regex!Char zis) pure
 {//@@@BUG@@@ write is @system
     with(zis)
     {
@@ -1663,7 +1560,7 @@ void fixupBytecode()(Bytecode[] ir)
     assert(fixups.empty);
 }
 
-void optimize(Char)(ref Regex!Char zis)
+void optimize(Char)(ref Regex!Char zis) pure
 {
     import std.array : insertInPlace;
     CodepointSet nextSet(uint idx)
@@ -1680,7 +1577,7 @@ void optimize(Char)(ref Regex!Char zis)
                     goto default;
                 //TODO: OrChar
                 case Trie, CodepointSet:
-                    set = zis.charsets[ir[i].data];
+                    set = .CodepointSet(zis.charsets[ir[i].data]);
                     goto default;
                 case GroupStart,GroupEnd:
                     break;
diff --git a/std/regex/internal/shiftor.d b/std/regex/internal/shiftor.d
index 39f1c3f6137..48bfebfebe8 100644
--- a/std/regex/internal/shiftor.d
+++ b/std/regex/internal/shiftor.d
@@ -29,6 +29,7 @@ uint effectiveSize(Char)()
 class ShiftOr(Char) : Kickstart!Char
 {
 private:
+pure:
     uint[] table;
     uint fChar;
     uint n_length;
@@ -115,8 +116,8 @@ private:
     {
         auto t = worklist[$-1];
         worklist.length -= 1;
-        if (!__ctfe)
-            cast(void)worklist.assumeSafeAppend();
+        //if (!__ctfe)
+        //    cast(void)worklist.assumeSafeAppend();
         return t;
     }
 
@@ -241,9 +242,9 @@ public:
                             static immutable codeBounds = [0x0, 0x7F, 0x80, 0x7FF, 0x800, 0xFFFF, 0x10000, 0x10FFFF];
                         else //== 2
                             static immutable codeBounds = [0x0, 0xFFFF, 0x10000, 0x10FFFF];
-                        uint[] arr = new uint[set.byInterval.length * 2];
+                        uint[] arr = new uint[set.length * 2];
                         size_t ofs = 0;
-                        foreach (ival; set.byInterval)
+                        foreach (ival; set)
                         {
                             arr[ofs++] = ival.a;
                             arr[ofs++] = ival.b;
@@ -262,7 +263,8 @@ public:
                     auto  chars = set.length;
                     if (chars > charsetThreshold)
                         goto L_StopThread;
-                    foreach (ch; set.byCodepoint)
+                    foreach (ival; set)
+                    foreach (ch; ival.a..ival.b)
                     {
                         //avoid surrogate pairs
                         if (0xD800 <= ch && ch <= 0xDFFF)
@@ -373,7 +375,7 @@ public:
     // has a useful trait: if supplied with valid UTF indexes,
     // returns only valid UTF indexes
     // (that given the haystack in question is valid UTF string)
-    final @trusted bool search(ref Input!Char s)
+    final @trusted bool search(ref Input!Char s) const
     {//@BUG: apparently assumes little endian machines
         import std.conv : text;
         import core.stdc.string : memchr;
@@ -503,7 +505,7 @@ public:
         return false;
     }
 
-    final @trusted bool match(ref Input!Char s)
+    final @trusted bool match(ref Input!Char s) const
     {
         //TODO: stub
         return false;
@@ -514,7 +516,7 @@ public:
         import std.stdio : writefln;
         for (size_t i = 0; i < table.length; i += 4)
         {
-            writefln("%32b %32b %32b %32b",table[i], table[i+1], table[i+2], table[i+3]);
+            debug writefln("%32b %32b %32b %32b",table[i], table[i+1], table[i+2], table[i+3]);
         }
     }
 }
@@ -524,7 +526,7 @@ unittest
     import std.conv, std.regex;
     auto shiftOrLength(C)(const(C)[] pat, uint length)
     {
-        auto r = regex(pat);
+        auto r = regex(pat, "s");
         auto kick = new ShiftOr!C(r);
         assert(kick.length == length, text(C.stringof, " == ", kick.length));
         return kick;
diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d
index 6a3db845991..347c268da28 100644
--- a/std/regex/internal/tests.d
+++ b/std/regex/internal/tests.d
@@ -433,7 +433,7 @@ unittest
             static if (tvd.result == "c")
             {
                 static assert(!__traits(compiles, (){
-                    enum r = regex(tvd.pattern, tvd.flags);
+                    static r = regex(tvd.pattern, tvd.flags);
                 }), "errornously compiles regex pattern: " ~ tvd.pattern);
             }
             else
diff --git a/std/regex/internal/thompson.d b/std/regex/internal/thompson.d
index 530e5c503e0..9cced4f3c8a 100644
--- a/std/regex/internal/thompson.d
+++ b/std/regex/internal/thompson.d
@@ -93,7 +93,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            finish(t, matches, re.ir[t.pc].data);
+            finish(t, matches, ir[t.pc].data);
             //fix endpoint of the whole match
             matches[0].end = index;
             recycle(t);
@@ -243,35 +243,35 @@ template ThompsonOps(E, S, bool withInput:true)
     static bool op(IR code:IR.InfiniteStart)(E* e, S* state)
     {
         with(e) with(state)
-            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
+            t.pc += ir[t.pc].data + IRL!(IR.InfiniteStart);
         return op!(IR.InfiniteEnd)(e,state);
     }
 
     static bool op(IR code:IR.InfiniteBloomStart)(E* e, S* state)
     {
         with(e) with(state)
-            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
+            t.pc += ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
         return op!(IR.InfiniteBloomEnd)(e,state);
     }
 
     static bool op(IR code:IR.InfiniteQStart)(E* e, S* state)
     {
         with(e) with(state)
-            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
+            t.pc += ir[t.pc].data + IRL!(IR.InfiniteQStart);
         return op!(IR.InfiniteQEnd)(e,state);
     }
 
     static bool op(IR code:IR.RepeatStart)(E* e, S* state)
     {
         with(e) with(state)
-            t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
+            t.pc += ir[t.pc].data + IRL!(IR.RepeatStart);
         return op!(IR.RepeatEnd)(e,state);
     }
 
     static bool op(IR code:IR.RepeatQStart)(E* e, S* state)
     {
         with(e) with(state)
-            t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
+            t.pc += ir[t.pc].data + IRL!(IR.RepeatQStart);
         return op!(IR.RepeatQEnd)(e,state);
     }
 
@@ -281,32 +281,32 @@ template ThompsonOps(E, S, bool withInput:true)
         with(e) with(state)
         {
             //len, step, min, max
-                uint len = re.ir[t.pc].data;
-                uint step =  re.ir[t.pc+2].raw;
-                uint min = re.ir[t.pc+3].raw;
+                uint len = ir[t.pc].data;
+                uint step =  ir[t.pc+2].raw;
+                uint min = ir[t.pc+3].raw;
                 if (t.counter < min)
                 {
                     t.counter += step;
                     t.pc -= len;
                     return true;
                 }
-                if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
+                if (merge[ir[t.pc + 1].raw+t.counter] < genCounter)
                 {
                     debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
-                                    t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
-                    merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
+                                    t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
+                    merge[ir[t.pc + 1].raw+t.counter] = genCounter;
                 }
                 else
                 {
                     debug(std_regex_matcher)
                         writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
-                            t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
+                            t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
                     return popState(e);
                 }
-                uint max = re.ir[t.pc+4].raw;
+                uint max = ir[t.pc+4].raw;
                 if (t.counter < max)
                 {
-                    if (re.ir[t.pc].code == IR.RepeatEnd)
+                    if (ir[t.pc].code == IR.RepeatEnd)
                     {
                         //queue out-of-loop thread
                         worklist.insertFront(fork(t, t.pc + IRL!(IR.RepeatEnd),  t.counter % step));
@@ -335,21 +335,21 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
+            if (merge[ir[t.pc + 1].raw+t.counter] < genCounter)
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
-                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
+                                t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
+                merge[ir[t.pc + 1].raw+t.counter] = genCounter;
             }
             else
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
+                                t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
                 return popState(e);
             }
-            uint len = re.ir[t.pc].data;
+            uint len = ir[t.pc].data;
             uint pc1, pc2; //branches to take in priority order
-            if (re.ir[t.pc].code == IR.InfiniteEnd)
+            if (ir[t.pc].code == IR.InfiniteEnd)
             {
                 pc1 = t.pc - len;
                 pc2 = t.pc + IRL!(IR.InfiniteEnd);
@@ -370,24 +370,24 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
+            if (merge[ir[t.pc + 1].raw+t.counter] < genCounter)
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
-                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
+                                t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
+                merge[ir[t.pc + 1].raw+t.counter] = genCounter;
             }
             else
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
+                                t.pc, index, genCounter, merge[ir[t.pc + 1].raw+t.counter] );
                 return popState(e);
             }
-            uint len = re.ir[t.pc].data;
+            uint len = ir[t.pc].data;
             uint pc1, pc2; //branches to take in priority order
             pc1 = t.pc - len;
             pc2 = t.pc + IRL!(IR.InfiniteBloomEnd);
-            uint filterIndex = re.ir[t.pc + 2].raw;
-            if (re.filters[filterIndex][front])
+            uint filterIndex = ir[t.pc + 2].raw;
+            if (filters[filterIndex][front])
                 worklist.insertFront(fork(t, pc2, t.counter));
             t.pc = pc1;
             return true;
@@ -398,17 +398,17 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
+            if (merge[ir[t.pc + 1].raw+t.counter] < genCounter)
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
-                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
+                                t.pc, s[index .. s.lastIndex], genCounter, merge[ir[t.pc + 1].raw + t.counter] );
+                merge[ir[t.pc + 1].raw+t.counter] = genCounter;
                 t.pc += IRL!(IR.OrEnd);
             }
             else
             {
                 debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
-                                t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
+                                t.pc, s[index .. s.lastIndex], genCounter, merge[ir[t.pc + 1].raw + t.counter] );
                 return popState(e);
             }
             return true;
@@ -428,9 +428,9 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint next = t.pc + re.ir[t.pc].data + IRL!(IR.Option);
+            uint next = t.pc + ir[t.pc].data + IRL!(IR.Option);
             //queue next Option
-            if (re.ir[next].code == IR.Option)
+            if (ir[next].code == IR.Option)
             {
                 worklist.insertFront(fork(t, next, t.counter));
             }
@@ -443,7 +443,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            t.pc = t.pc + re.ir[t.pc].data + IRL!(IR.GotoEndOr);
+            t.pc = t.pc + ir[t.pc].data + IRL!(IR.GotoEndOr);
             return op!(IR.OrEnd)(e, state);
         }
     }
@@ -452,7 +452,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint n = re.ir[t.pc].data;
+            uint n = ir[t.pc].data;
             t.matches.ptr[n].begin = index;
             t.pc += IRL!(IR.GroupStart);
             return true;
@@ -462,7 +462,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint n = re.ir[t.pc].data;
+            uint n = ir[t.pc].data;
             t.matches.ptr[n].end = index;
             t.pc += IRL!(IR.GroupEnd);
             return true;
@@ -473,8 +473,8 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint n = re.ir[t.pc].data;
-            Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
+            uint n = ir[t.pc].data;
+            Group!DataIndex* source = ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
             assert(source);
             if (source[n].begin == source[n].end)//zero-width Backref!
             {
@@ -511,15 +511,15 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint len = re.ir[t.pc].data;
-            uint ms = re.ir[t.pc + 1].raw, me = re.ir[t.pc + 2].raw;
+            uint len = ir[t.pc].data;
+            uint ms = ir[t.pc + 1].raw, me = ir[t.pc + 2].raw;
             uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
-            bool positive = re.ir[t.pc].code == IR.LookbehindStart;
+            bool positive = ir[t.pc].code == IR.LookbehindStart;
             static if (Stream.isLoopback)
                 auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
             else
                 auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
-            matcher.re.ngroup = me - ms;
+            matcher.ngroup = me - ms;
             matcher.backrefed = backrefed.empty ? t.matches : backrefed;
             //backMatch
             auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
@@ -540,15 +540,15 @@ template ThompsonOps(E, S, bool withInput:true)
         with(e) with(state)
         {
             auto save = index;
-            uint len = re.ir[t.pc].data;
-            uint ms = re.ir[t.pc+1].raw, me = re.ir[t.pc+2].raw;
+            uint len = ir[t.pc].data;
+            uint ms = ir[t.pc+1].raw, me = ir[t.pc+2].raw;
             uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
-            bool positive = re.ir[t.pc].code == IR.LookaheadStart;
+            bool positive = ir[t.pc].code == IR.LookaheadStart;
             static if (Stream.isLoopback)
                 auto matcher = bwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
             else
                 auto matcher = fwdMatcher(t.pc, end, subCounters.get(t.pc, 0));
-            matcher.re.ngroup = me - ms;
+            matcher.ngroup = me - ms;
             matcher.backrefed = backrefed.empty ? t.matches : backrefed;
             auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
             freelist = matcher.freelist;
@@ -570,7 +570,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-                finish(t, matches.ptr[0 .. re.ngroup], re.ir[t.pc].data);
+                finish(t, matches.ptr[0 .. ngroup], ir[t.pc].data);
                 recycle(t);
                 //cut off low priority threads
                 recycle(clist);
@@ -589,11 +589,11 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            uint len = re.ir[t.pc].sequence;
+            uint len = ir[t.pc].sequence;
             uint end = t.pc + len;
             static assert(IRL!(IR.OrChar) == 1);
             for (; t.pc < end; t.pc++)
-                if (re.ir[t.pc].data == front)
+                if (ir[t.pc].data == front)
                     break;
             if (t.pc != end)
             {
@@ -611,7 +611,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (front == re.ir[t.pc].data)
+            if (front == ir[t.pc].data)
             {
                 t.pc += IRL!(IR.Char);
                 nlist.insertBack(t);
@@ -638,7 +638,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (re.charsets[re.ir[t.pc].data].scanFor(front))
+            if (charsets[ir[t.pc].data].scanFor(front))
             {
                 t.pc += IRL!(IR.CodepointSet);
                 nlist.insertBack(t);
@@ -656,7 +656,7 @@ template ThompsonOps(E, S, bool withInput:true)
     {
         with(e) with(state)
         {
-            if (re.matchers[re.ir[t.pc].data][front])
+            if (matchers[ir[t.pc].data][front])
             {
                 t.pc += IRL!(IR.Trie);
                 nlist.insertBack(t);
@@ -688,8 +688,8 @@ template ThompsonOps(E,S, bool withInput:false)
     {
         with(e) with(state)
         {
-            uint n = re.ir[t.pc].data;
-            Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
+            uint n = ir[t.pc].data;
+            Group!DataIndex* source = ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
             assert(source);
             if (source[n].begin == source[n].end)//zero-width Backref!
             {
@@ -725,20 +725,27 @@ template ThompsonOps(E,S, bool withInput:false)
     Thread!DataIndex* freelist;
     ThreadList!DataIndex clist, nlist;
     DataIndex[] merge;
-    Group!DataIndex[] backrefed;
-    Regex!Char re;           //regex program
+    const(Bytecode)[] ir;
+    int ngroup;             // number of capturing groups
+    uint flags;
+    const(Interval[])[] charsets;
+    const(CharMatcher)[] matchers;
+    const(BitTable)[] filters;
     Stream s;
     dchar front;
     DataIndex index;
-    DataIndex genCounter;    //merge trace counter, goes up on every dchar
-    size_t[size_t] subCounters; //a table of gen counter per sub-engine: PC -> counter
+    DataIndex genCounter;    // merge trace counter, goes up on every dchar
     OpFunc[] opCacheTrue;   // pointers to Op!(IR.xyz) for each bytecode
     OpFunc[] opCacheFalse;  // ditto
     OpBackFunc[] opCacheBackTrue;   // ditto
     OpBackFunc[] opCacheBackFalse;  // ditto
     size_t threadSize;
+    size_t threadCount;
     int matched;
     bool exhausted;
+    const Kickstart!Char kickstart;
+    Group!DataIndex[] backrefed;
+    size_t[size_t] subCounters; // a table of gen counter per sub-engine: PC -> counter
 
     static struct State
     {
@@ -799,7 +806,7 @@ template ThompsonOps(E,S, bool withInput:false)
         bool search()
         {
 
-            if (!s.search(re.kickstart, front, index))
+            if (!s.search(kickstart, front, index))
             {
                 index = s.lastIndex;
                 return false;
@@ -808,24 +815,23 @@ template ThompsonOps(E,S, bool withInput:false)
         }
     }
 
-    void initExternalMemory(void[] memory)
+    void initExternalMemory(void[] memory, size_t hotspotTableSize)
     {
-        threadSize = getThreadSize(re);
-        prepareFreeList(re.threadCount, memory);
-        if (re.hotspotTableSize)
+        prepareFreeList(threadCount, memory);
+        if (hotspotTableSize)
         {
-            merge = arrayInChunk!(DataIndex)(re.hotspotTableSize, memory);
+            merge = arrayInChunk!(DataIndex)(hotspotTableSize, memory);
             merge[] = 0;
         }
-        opCacheTrue = arrayInChunk!(OpFunc)(re.ir.length, memory);
-        opCacheFalse = arrayInChunk!(OpFunc)(re.ir.length, memory);
-        opCacheBackTrue = arrayInChunk!(OpBackFunc)(re.ir.length, memory);
-        opCacheBackFalse = arrayInChunk!(OpBackFunc)(re.ir.length, memory);
+        opCacheTrue = arrayInChunk!(OpFunc)(ir.length, memory);
+        opCacheFalse = arrayInChunk!(OpFunc)(ir.length, memory);
+        opCacheBackTrue = arrayInChunk!(OpBackFunc)(ir.length, memory);
+        opCacheBackFalse = arrayInChunk!(OpBackFunc)(ir.length, memory);
 
-        for (uint pc = 0; pc<re.ir.length; pc += re.ir[pc].length)
+        for (uint pc = 0; pc<ir.length; pc += ir[pc].length)
         {
         L_dispatch:
-            switch (re.ir[pc].code)
+            switch (ir[pc].code)
             {
                 foreach (e; __traits(allMembers, IR))
                 {
@@ -838,25 +844,38 @@ template ThompsonOps(E,S, bool withInput:false)
                 `);
                 }
             default:
-                assert(0, "Unrecognized instruction "~re.ir[pc].mnemonic);
+                assert(0, "Unrecognized instruction "~ir[pc].mnemonic);
             }
         }
     }
 
-    this()(Regex!Char program, Stream stream, void[] memory)
+    this()(const Regex!Char program, Stream stream, void[] memory, uint regexFlags)
     {
-        re = program;
+        kickstart = program.kickstart;
+        flags = regexFlags;
+        ir = program.ir;
+        ngroup = program.ngroup;
+        charsets = program.charsets;
+        matchers = program.matchers;
+        filters = program.filters;
         s = stream;
-        initExternalMemory(memory);
+        threadSize = getThreadSize(program);
+        threadCount = program.threadCount;
+        initExternalMemory(memory, program.hotspotTableSize);
         genCounter = 0;
     }
 
     this(ref ThompsonMatcher matcher, size_t lo, size_t hi, Stream stream)
     {
         s = stream;
-        re = matcher.re;
-        re.ir = re.ir[lo..hi];
+        ir = matcher.ir[lo..hi];
+        ngroup = matcher.ngroup;
+        flags = matcher.flags;
+        charsets = matcher.charsets;
+        matchers = matcher.matchers;
+        filters = matcher.filters;
         threadSize = matcher.threadSize;
+        threadCount = matcher.threadCount;
         merge = matcher.merge;
         freelist = matcher.freelist;
         opCacheTrue = matcher.opCacheTrue[lo..hi];
@@ -870,9 +889,13 @@ template ThompsonOps(E,S, bool withInput:false)
     this(ref BackMatcher matcher, size_t lo, size_t hi, Stream stream)
     {
         s = stream;
-        re = matcher.re;
-        re.ir = re.ir[lo..hi];
+        ir = matcher.ir[lo..hi];
+        ngroup = matcher.ngroup;
+        charsets = matcher.charsets;
+        matchers = matcher.matchers;
+        filters = matcher.filters;
         threadSize = matcher.threadSize;
+        threadCount = matcher.threadCount;
         merge = matcher.merge;
         freelist = matcher.freelist;
         opCacheTrue = matcher.opCacheBackTrue[lo..hi];
@@ -899,12 +922,9 @@ template ThompsonOps(E,S, bool withInput:false)
         return m;
     }
 
-    auto dupTo(void[] memory)
+    void dupTo(void[] memory)
     {
-        typeof(this) tmp = this;//bitblit
-        tmp.initExternalMemory(memory);
-        tmp.genCounter = 0;
-        return tmp;
+        initExternalMemory(memory, merge.length);
     }
 
     int match(Group!DataIndex[] matches)
@@ -915,14 +935,14 @@ template ThompsonOps(E,S, bool withInput:false)
         {
             return false;
         }
-        if (re.flags & RegexInfo.oneShot)
+        if (flags & RegexInfo.oneShot)
         {
             next();
             exhausted = true;
             return matchOneShot(matches);
         }
         static if (kicked)
-            if (re.kickstart)
+            if (kickstart)
                 return matchImpl!(true)(matches);
         return matchImpl!(false)(matches);
     }
@@ -1015,7 +1035,7 @@ template ThompsonOps(E,S, bool withInput:false)
             s.reset(matches[0].end);//reset to last successful match
             next();//and reload front character
             //--- here the exact state of stream was restored ---
-            exhausted = atEnd || !(re.flags & RegexOption.global);
+            exhausted = atEnd || !(flags & RegexOption.global);
             //+ empty match advances the input
             if (!exhausted && matches[0].begin == matches[0].end)
                 next();
@@ -1028,11 +1048,11 @@ template ThompsonOps(E,S, bool withInput:false)
     +/
     void finish(const(Thread!DataIndex)* t, Group!DataIndex[] matches, int code)
     {
-        matches.ptr[0..re.ngroup] = t.matches.ptr[0..re.ngroup];
+        matches.ptr[0..ngroup] = t.matches.ptr[0..ngroup];
         debug(std_regex_matcher)
         {
             writef("FOUND pc=%s prog_len=%s",
-                    t.pc, re.ir.length);
+                    t.pc, ir.length);
             if (!matches.empty)
                 writefln(": %s..%s", matches[0].begin, matches[0].end);
             foreach (v; matches)
@@ -1167,7 +1187,7 @@ template ThompsonOps(E,S, bool withInput:false)
     Thread!DataIndex* fork(Thread!DataIndex* master, uint pc, uint counter)
     {
         auto t = allocate();
-        t.matches.ptr[0..re.ngroup] = master.matches.ptr[0..re.ngroup];
+        t.matches.ptr[0..ngroup] = master.matches.ptr[0..ngroup];
         t.pc = pc;
         t.counter = counter;
         t.uopCounter = 0;
@@ -1178,7 +1198,7 @@ template ThompsonOps(E,S, bool withInput:false)
     Thread!DataIndex* createStart(DataIndex index, uint pc = 0)
     {
         auto t = allocate();
-        t.matches.ptr[0..re.ngroup] = (Group!DataIndex).init;
+        t.matches.ptr[0..ngroup] = (Group!DataIndex).init;
         t.matches[0].begin = index;
         t.pc = pc;
         t.counter = 0;
diff --git a/std/regex/package.d b/std/regex/package.d
index 45d2d8361e0..074ff33c094 100644
--- a/std/regex/package.d
+++ b/std/regex/package.d
@@ -320,12 +320,10 @@ public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
 
     Throws: $(D RegexException) if there were any errors during compilation.
 +/
-@trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
+@trusted public auto regexPure(S)(S[] patterns, const(char)[] flags="") pure
     if (isSomeString!(S))
 {
     import std.array : appender;
-    import std.functional : memoize;
-    enum cacheSize = 8; //TODO: invent nice interface to control regex caching
     S pat;
     if (patterns.length > 1)
     {
@@ -344,10 +342,17 @@ public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
     }
     else
         pat = patterns[0];
+    return regexImpl!S(pat, flags);
+}
 
+@trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
+    if (isSomeString!(S))
+{
+    import std.functional : memoize;
+    enum cacheSize = 8;
     if (__ctfe)
-        return regexImpl(pat, flags);
-    return memoize!(regexImpl!S, cacheSize)(pat, flags);
+        return regexPure(patterns, flags);
+    return memoize!(regexPure!S, cacheSize)(patterns, flags);
 }
 
 ///ditto
@@ -371,7 +376,7 @@ unittest
     assert(m.front[1] == "12");
 }
 
-public auto regexImpl(S)(S pattern, const(char)[] flags="")
+public auto regexImpl(S)(S pattern, const(char)[] flags="") pure
     if (isSomeString!(S))
 {
     import std.regex.internal.parser : Parser, CodeGen;
@@ -384,7 +389,7 @@ public auto regexImpl(S)(S pattern, const(char)[] flags="")
 template ctRegexImpl(alias pattern, string flags=[])
 {
     import std.regex.internal.parser, std.regex.internal.backtracking;
-    enum r = regex(pattern, flags);
+    static immutable r = cast(immutable)regexPure([pattern], flags);
     alias Char = BasicElementOf!(typeof(pattern));
     enum source = ctGenRegExCode(r);
     alias Matcher = BacktrackingMatcher!(true);
@@ -393,7 +398,7 @@ template ctRegexImpl(alias pattern, string flags=[])
         debug(std_regex_ctr) pragma(msg, source);
         mixin(source);
     }
-    enum nr = StaticRegex!Char(r, &func);
+    static immutable nr = immutable StaticRegex!Char(r, &func);
 }
 
 /++
@@ -406,7 +411,7 @@ template ctRegexImpl(alias pattern, string flags=[])
     pattern = Regular expression
     flags = The _attributes (g, i, m and x accepted)
 +/
-public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
+public static immutable ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr;
 
 enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R))
      || is(RegEx == StaticRegex!(BasicElementOf!R));
@@ -436,9 +441,9 @@ private:
     }
     uint _f, _b;
     uint _refcount; // ref count or SMALL MASK + num groups
-    NamedGroup[] _names;
+    const NamedGroup[] _names;
 
-    this()(R input, uint n, NamedGroup[] named)
+    this()(R input, uint n, const(NamedGroup)[] named)
     {
         _input = input;
         _names = named;
@@ -447,16 +452,6 @@ private:
         _f = 0;
     }
 
-    this(alias Engine)(ref RegexMatch!(R,Engine) rmatch)
-    {
-        _input = rmatch._input;
-        _names = rmatch._engine.re.dict;
-        immutable n = rmatch._engine.re.ngroup;
-        newMatches(n);
-        _b = n;
-        _f = 0;
-    }
-
     @property inout(Group!DataIndex[]) matches() inout
     {
        return (_refcount & SMALL_MASK)  ? small_matches[0 .. _refcount & 0xFF] : big_matches;
@@ -660,21 +655,24 @@ private:
     alias EngineType = Engine!Char;
     EngineType _engine;
     R _input;
+    uint _ngroup;
     Captures!(R,EngineType.DataIndex) _captures;
     void[] _memory;//is ref-counted
 
-    this(RegEx)(R input, RegEx prog)
+    this(RegEx)(R input, RegEx prog, uint reFlags)
     {
         import std.exception : enforce;
         _input = input;
+        _ngroup = prog.ngroup;
         immutable size = EngineType.initialMemory(prog)+size_t.sizeof;
         _memory = (enforce(malloc(size), "malloc failed")[0..size]);
         scope(failure) free(_memory.ptr);
         *cast(size_t*)_memory.ptr = 1;
-        _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]);
-        static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
+        _engine = EngineType(prog, Input!Char(input), 
+          _memory[size_t.sizeof..$], reFlags);
+        static if (is(typeof(prog.nativeFn)))
             _engine.nativeFn = prog.nativeFn;
-        _captures = Captures!(R,EngineType.DataIndex)(this);
+        _captures = Captures!(R,EngineType.DataIndex)(input, prog.ngroup, prog.dict);
         _captures._nMatch = _engine.match(_captures.matches);
         debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter);
     }
@@ -743,16 +741,16 @@ public:
         if (counter != 1)
         {//do cow magic first
             counter--;//we abandon this reference
-            immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof;
+            immutable size = _memory.length;
             _memory = (enforce(malloc(size), "malloc failed")[0..size]);
-            _engine = _engine.dupTo(_memory[size_t.sizeof..size]);
+            _engine.dupTo(_memory[size_t.sizeof..size]);
             counter = 1;//points to new chunk
         }
 
         if (!_captures.unique)
         {
             // has external references - allocate new space
-            _captures.newMatches(_engine.re.ngroup);
+            _captures.newMatches(_ngroup);
         }
         _captures._nMatch = _engine.match(_captures.matches);
     }
@@ -771,7 +769,7 @@ public:
 
 }
 
-private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
+private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, const RegEx re)
 {
     import core.stdc.stdlib : malloc, free;
     import std.exception : enforce;
@@ -782,17 +780,16 @@ private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re)
     void[] memory = enforce(malloc(size), "malloc failed")[0..size];
     scope(exit) free(memory.ptr);
     auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict);
-    auto engine = EngineType(re, Input!Char(input), memory);
-    static if (is(RegEx == StaticRegex!(BasicElementOf!R)))
+    auto engine = EngineType(re, Input!Char(input), memory, re.flags);
+    static if (is(typeof(re.nativeFn)))
         engine.nativeFn = re.nativeFn;
     captures._nMatch = engine.match(captures.matches);
     return captures;
 }
 
-private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re)
+private auto matchMany(alias Engine, RegEx, R)(R input, const RegEx re)
 {
-    re.flags |= RegexOption.global;
-    return RegexMatch!(R, Engine)(input, re);
+    return RegexMatch!(R, Engine)(input, re, re.flags | RegexOption.global);
 }
 
 unittest
@@ -847,7 +844,7 @@ private void replaceMatchesInto(alias output, Sink, R, T)
 }
 
 //  a general skeleton of replaceFirst
-private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
+private R replaceFirstWith(alias output, R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     import std.array : appender;
@@ -862,7 +859,7 @@ private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re)
 // ditto for replaceAll
 // the method parameter allows old API to ride on the back of the new one
 private R replaceAllWith(alias output,
-        alias method=matchAll, R, RegEx)(R input, RegEx re)
+        alias method=matchAll, R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     import std.array : appender;
@@ -891,11 +888,12 @@ private R replaceAllWith(alias output,
     Returns: a $(D RegexMatch) object holding engine state after first match.
 +/
 
-public auto match(R, RegEx)(R input, RegEx re)
+public auto match(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
 {
     import std.regex.internal.thompson : ThompsonMatcher;
-    return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re);
+    return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)
+      (input, re, re.flags);
 }
 
 ///ditto
@@ -903,14 +901,17 @@ public auto match(R, String)(R input, String re)
     if (isSomeString!R && isSomeString!String)
 {
     import std.regex.internal.thompson : ThompsonMatcher;
-    return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re));
+    auto r = regex(re);
+    return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)
+      (input, r, r.flags);
 }
 
-public auto match(R, RegEx)(R input, RegEx re)
+public auto match(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
-    return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+    return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)
+      (input, re, re.flags);
 }
 
 /++
@@ -931,7 +932,7 @@ public auto match(R, RegEx)(R input, RegEx re)
     $(LREF Captures) containing the extent of a match together with all submatches
     if there was a match, otherwise an empty $(LREF Captures) object.
 +/
-public auto matchFirst(R, RegEx)(R input, RegEx re)
+public auto matchFirst(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
 {
     import std.regex.internal.thompson : ThompsonMatcher;
@@ -954,7 +955,7 @@ public auto matchFirst(R, String)(R input, String[] re...)
     return matchOnce!ThompsonMatcher(input, regex(re));
 }
 
-public auto matchFirst(R, RegEx)(R input, RegEx re)
+public auto matchFirst(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
@@ -982,7 +983,7 @@ public auto matchFirst(R, RegEx)(R input, RegEx re)
     $(LREF RegexMatch) object that represents matcher state
     after the first match was found or an empty one if not present.
 +/
-public auto matchAll(R, RegEx)(R input, RegEx re)
+public auto matchAll(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
 {
     import std.regex.internal.thompson : ThompsonMatcher;
@@ -1005,7 +1006,7 @@ public auto matchAll(R, String)(R input, String[] re...)
     return matchMany!ThompsonMatcher(input, regex(re));
 }
 
-public auto matchAll(R, RegEx)(R input, RegEx re)
+public auto matchAll(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
@@ -1071,11 +1072,12 @@ public auto matchAll(R, RegEx)(R input, RegEx re)
     state after first match.
 
 +/
-public auto bmatch(R, RegEx)(R input, RegEx re)
+public auto bmatch(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R)))
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
-    return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re);
+    return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)
+      (input, re, re.flags);
 }
 
 ///ditto
@@ -1083,14 +1085,17 @@ public auto bmatch(R, String)(R input, String re)
     if (isSomeString!R && isSomeString!String)
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
-    return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re));
+    auto r = regex(re); 
+    return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)
+      (input, r, r.flags);
 }
 
-public auto bmatch(R, RegEx)(R input, RegEx re)
+public auto bmatch(R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R)))
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
-    return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re);
+    return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)
+      (input, re, re.flags);
 }
 
 // produces replacement string from format using captures for substitution
@@ -1183,7 +1188,7 @@ L_Replace_Loop:
     A string of the same type with the first match (if any) replaced.
     If no match is found returns the input string itself.
 +/
-public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format)
+public R replaceFirst(R, C, RegEx)(R input, const RegEx re, const(C)[] format)
     if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
 {
     return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
@@ -1210,7 +1215,7 @@ unittest
     replaced by return values of $(D fun). If no matches found
     returns the $(D input) itself.
 +/
-public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re)
+public R replaceFirst(alias fun, R, RegEx)(R input, const RegEx re)
   if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re);
@@ -1236,7 +1241,7 @@ unittest
     and the one with the user defined callback.
 +/
 public @trusted void replaceFirstInto(Sink, R, C, RegEx)
-        (ref Sink sink, R input, RegEx re, const(C)[] format)
+        (ref Sink sink, R input, const RegEx re, const(C)[] format)
     if (isOutputRange!(Sink, dchar) && isSomeString!R
         && is(C : dchar) && isRegexFor!(RegEx, R))
     {
@@ -1246,7 +1251,7 @@ public @trusted void replaceFirstInto(Sink, R, C, RegEx)
 
 ///ditto
 public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx)
-    (Sink sink, R input, RegEx re)
+    (Sink sink, R input, const RegEx re)
     if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
 {
     replaceCapturesInto!fun(sink, input, matchFirst(input, re));
@@ -1301,7 +1306,7 @@ unittest
     of the matches (if any) replaced.
     If no match is found returns the input string itself.
 +/
-public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format)
+public @trusted R replaceAll(R, C, RegEx)(R input, const RegEx re, const(C)[] format)
     if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R))
 {
     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re);
@@ -1335,7 +1340,7 @@ unittest
     re = compiled regular expression
     fun = delegate to use
 +/
-public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re)
+public @trusted R replaceAll(alias fun, R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re);
@@ -1364,7 +1369,7 @@ unittest
     the other one with a user defined functor.
 +/
 public @trusted void replaceAllInto(Sink, R, C, RegEx)
-        (Sink sink, R input, RegEx re, const(C)[] format)
+        (Sink sink, R input, const RegEx re, const(C)[] format)
     if (isOutputRange!(Sink, dchar) && isSomeString!R
         && is(C : dchar) && isRegexFor!(RegEx, R))
     {
@@ -1374,7 +1379,7 @@ public @trusted void replaceAllInto(Sink, R, C, RegEx)
 
 ///ditto
 public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
-        (Sink sink, R input, RegEx re)
+        (Sink sink, R input, const RegEx re)
     if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R))
 {
     replaceMatchesInto!fun(sink, input, matchAll(input, re));
@@ -1446,14 +1451,14 @@ public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
     The use of this function is $(RED discouraged), please use $(LREF replaceAll)
     or $(LREF replaceFirst) explicitly.
 +/
-public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format)
+public R replace(alias scheme = match, R, C, RegEx)(R input, const RegEx re, const(C)[] format)
     if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re);
 }
 
 ///ditto
-public R replace(alias fun, R, RegEx)(R input, RegEx re)
+public R replace(alias fun, R, RegEx)(R input, const RegEx re)
     if (isSomeString!R && isRegexFor!(RegEx, R))
 {
     return replaceAllWith!(fun, match)(input, re);
@@ -1475,15 +1480,14 @@ public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators,
 private:
     Range _input;
     size_t _offset;
-    alias Rx = typeof(match(Range.init,RegEx.init));
+    alias Rx = typeof(matchAll(Range.init,RegEx.init));
     Rx _match;
 
     static if (keepSeparators) bool onMatch = false;
 
-    @trusted this(Range input, RegEx separator)
+    @trusted this(Range input, const RegEx separator)
     {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted
         _input = input;
-        separator.flags |= RegexOption.global;
         if (_input.empty)
         {
             //there is nothing to match at all, make _offset > 0
@@ -1491,7 +1495,7 @@ private:
         }
         else
         {
-            _match = Rx(_input, separator);
+            _match = matchAll(_input, separator);
 
             static if (keepSeparators)
                 if (_match.pre.empty)
@@ -1579,8 +1583,9 @@ public:
 
 /// ditto
 public Splitter!(keepSeparators, Range, RegEx) splitter(
-    Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat) if (
-        is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
+    Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)
+    (Range r, const RegEx pat) 
+    if (is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
 {
     return Splitter!(keepSeparators, Range, RegEx)(r, pat);
 }
@@ -1611,7 +1616,7 @@ unittest
 }
 
 ///An eager version of $(D splitter) that creates an array with splitted slices of $(D input).
-public @trusted String[] split(String, RegEx)(String input, RegEx rx)
+public @trusted String[] split(String, RegEx)(String input, const RegEx rx)
     if (isSomeString!String  && isRegexFor!(RegEx, String))
 {
     import std.array : appender;
diff --git a/std/uni.d b/std/uni.d
index 8b7fa32fdcb..84c7ee31399 100644
--- a/std/uni.d
+++ b/std/uni.d
@@ -2120,19 +2120,6 @@ public:
         assert(!gothic['$']);
     }
 
-    // Linear scan for $(D ch). Useful only for small sets.
-    // TODO:
-    // used internally in std.regex
-    // should be properly exposed in a public API ?
-    package auto scanFor()(dchar ch) const
-    {
-        immutable len = data.length;
-        for (size_t i = 0; i < len; i++)
-            if (ch < data[i])
-                return i & 1;
-        return 0;
-    }
-
     /// Number of $(CODEPOINTS) in this set
     @property size_t length()
     {

From 87847ed4804ccbbff84eeb60d5c6061ad3c138a5 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 7 Oct 2016 01:41:09 +0300
Subject: [PATCH 20/23] [std.regex] More tests for issue 9391

---
 std/regex/internal/tests.d |  9 +++++----
 std/regex/package.d        | 32 +++++++-------------------------
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d
index 347c268da28..4f52f819c5d 100644
--- a/std/regex/internal/tests.d
+++ b/std/regex/internal/tests.d
@@ -468,15 +468,16 @@ unittest
 
 unittest
 {
-    auto cr = ctRegex!("abc");
+    immutable cr = ctRegex!("abc");
     assert(bmatch("abc",cr).hit == "abc");
-    auto cr2 = ctRegex!("ab*c");
+    immutable cr2 = ctRegex!("ab*c");
     assert(bmatch("abbbbc",cr2).hit == "abbbbc");
 }
+
 unittest
 {
-    auto cr3 = ctRegex!("^abc$");
+    immutable cr3 = ctRegex!("^abc$");
     assert(bmatch("abc",cr3).hit == "abc");
-    auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
+    immutable cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
     assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
 }
diff --git a/std/regex/package.d b/std/regex/package.d
index 074ff33c094..0d9e99272b1 100644
--- a/std/regex/package.d
+++ b/std/regex/package.d
@@ -1023,7 +1023,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re)
     foreach (String; AliasSeq!(string, wstring, const(dchar)[]))
     {
         auto str1 = "blah-bleh".to!String();
-        auto pat1 = "bl[ae]h".to!String();
+        const pat1 = "bl[ae]h".to!String();
         auto mf = matchFirst(str1, pat1);
         assert(mf.equal(["blah".to!String()]));
         auto mAll = matchAll(str1, pat1);
@@ -1031,7 +1031,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re)
             ([["blah".to!String()], ["bleh".to!String()]]));
 
         auto str2 = "1/03/12 - 3/03/12".to!String();
-        auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
+        const pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]);
         auto mf2 = matchFirst(str2, pat2);
         assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)()));
         auto mAll2 = matchAll(str2, pat2);
@@ -1041,7 +1041,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re)
         mf2.popFrontN(3);
         assert(mf2.equal(["12".to!String()]));
 
-        auto ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
+        const ctPat = ctRegex!(`(?P<Quot>\d+)/(?P<Denom>\d+)`.to!String());
         auto str = "2 + 34/56 - 6/1".to!String();
         auto cmf = matchFirst(str, ctPat);
         assert(cmf.equal(["34/56", "34", "56"].map!(to!String)()));
@@ -1270,24 +1270,6 @@ unittest
     assert(result.data == "first\nsecond\n");
 }
 
-//examples for replaceFirst
-@system unittest
-{
-    import std.conv;
-    string list = "#21 out of 46";
-    string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1))
-        (list, regex(`[0-9]+`));
-    assert(newList == "#22 out of 46");
-    import std.array;
-    string m1 = "first message\n";
-    string m2 = "second message\n";
-    auto result = appender!string();
-    replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1");
-    //equivalent of the above with user-defined callback
-    replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`));
-    assert(result.data == "first\nsecond\n");
-}
-
 /++
     Construct a new string from $(D input) by replacing all of the
     fragments that match a pattern $(D re) with a string generated
@@ -1316,7 +1298,7 @@ public @trusted R replaceAll(R, C, RegEx)(R input, const RegEx re, const(C)[] fo
 unittest
 {
     // insert comma as thousands delimiter
-    auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
+    const re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g");
     assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100");
 }
 
@@ -1416,8 +1398,8 @@ public @trusted void replaceAllInto(alias fun, Sink, R, RegEx)
         S t2F = "hound dome".to!S();
         S t1A = "court trial".to!S();
         S t2A = "hound home".to!S();
-        auto re1 = regex("curt".to!S());
-        auto re2 = regex("[dr]o".to!S());
+        const re1 = regex("curt".to!S());
+        const re2 = regex("[dr]o".to!S());
 
         assert(replaceFirst(s1, re1, "court") == t1F);
         assert(replaceFirst(s2, re2, "ho") == t2F);
@@ -1604,7 +1586,7 @@ unittest
 {
     import std.algorithm.comparison : equal;
 
-    auto pattern = regex(`([\.,])`);
+    const pattern = regex(`([\.,])`);
 
     assert("2003.04.05"
         .splitter!(Yes.keepSeparators)(pattern)

From cd2c28f40220366b3591da40cd209da39400bf09 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 7 Oct 2016 01:43:05 +0300
Subject: [PATCH 21/23] [std.regex] Trailing whites

---
 std/regex/internal/parser.d | 2 +-
 std/regex/package.d         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d
index 9e9be2fdc57..3f0fb806730 100644
--- a/std/regex/internal/parser.d
+++ b/std/regex/internal/parser.d
@@ -23,7 +23,7 @@ auto makeRegex(S, CG)(Parser!(S, CG) p) pure
         maxCounterDepth = g.counterDepth;
         flags = p.re_flags;
         charsets = g.charsets
-            .map!(x => 
+            .map!(x =>
                 x.byInterval.map!(x=>Interval(x.a,x.b)).array
             ).array;
         matchers = g.matchers;
diff --git a/std/regex/package.d b/std/regex/package.d
index 0d9e99272b1..11902dfba7b 100644
--- a/std/regex/package.d
+++ b/std/regex/package.d
@@ -668,7 +668,7 @@ private:
         _memory = (enforce(malloc(size), "malloc failed")[0..size]);
         scope(failure) free(_memory.ptr);
         *cast(size_t*)_memory.ptr = 1;
-        _engine = EngineType(prog, Input!Char(input), 
+        _engine = EngineType(prog, Input!Char(input),
           _memory[size_t.sizeof..$], reFlags);
         static if (is(typeof(prog.nativeFn)))
             _engine.nativeFn = prog.nativeFn;
@@ -1085,7 +1085,7 @@ public auto bmatch(R, String)(R input, String re)
     if (isSomeString!R && isSomeString!String)
 {
     import std.regex.internal.backtracking : BacktrackingMatcher;
-    auto r = regex(re); 
+    auto r = regex(re);
     return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)
       (input, r, r.flags);
 }
@@ -1566,7 +1566,7 @@ public:
 /// ditto
 public Splitter!(keepSeparators, Range, RegEx) splitter(
     Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)
-    (Range r, const RegEx pat) 
+    (Range r, const RegEx pat)
     if (is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range))
 {
     return Splitter!(keepSeparators, Range, RegEx)(r, pat);

From d1d53c556ca180909ed803109c1c5be5253f5d2b Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Fri, 7 Oct 2016 02:17:40 +0300
Subject: [PATCH 22/23] [std.regex] Fixes for recent compiler version

---
 std/regex/internal/tests2.d | 12 ++++++------
 std/regex/internal/tests3.d |  8 ++++----
 std/regex/package.d         | 20 +++++++++++++-------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/std/regex/internal/tests2.d b/std/regex/internal/tests2.d
index 19286fa31f8..5aaa360e185 100644
--- a/std/regex/internal/tests2.d
+++ b/std/regex/internal/tests2.d
@@ -45,29 +45,29 @@ unittest
 unittest
 {
     auto rtr = regex("a|b|c");
-    enum ctr = regex("a|b|c");
+    const ctr = regex("a|b|c");
     assert(equal(rtr.ir,ctr.ir));
     //CTFE parser BUG is triggered by group
     //in the middle of alternation (at least not first and not last)
-    enum testCT = regex(`abc|(edf)|xyz`);
+    const testCT = regex(`abc|(edf)|xyz`);
     auto testRT = regex(`abc|(edf)|xyz`);
     assert(equal(testCT.ir,testRT.ir));
 }
 
 unittest
 {
-    enum cx = ctRegex!"(A|B|C)";
+    immutable cx = ctRegex!"(A|B|C)";
     auto mx = match("B",cx);
     assert(mx);
     assert(equal(mx.captures, [ "B", "B"]));
-    enum cx2 = ctRegex!"(A|B)*";
+    immutable cx2 = ctRegex!"(A|B)*";
     assert(match("BAAA",cx2));
 
-    enum cx3 = ctRegex!("a{3,4}","i");
+    immutable cx3 = ctRegex!("a{3,4}","i");
     auto mx3 = match("AaA",cx3);
     assert(mx3);
     assert(mx3.captures[0] == "AaA");
-    enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
+    immutable cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
     auto mx4 = match("aaaabc", cx4);
     assert(mx4);
     assert(mx4.captures[0] == "aaaab");
diff --git a/std/regex/internal/tests3.d b/std/regex/internal/tests3.d
index 07541fbb62d..3bd8cb8f336 100644
--- a/std/regex/internal/tests3.d
+++ b/std/regex/internal/tests3.d
@@ -45,7 +45,7 @@ unittest
 {// bugzilla 7679
     foreach (S; AliasSeq!(string, wstring, dstring))
     (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
-        enum re = ctRegex!(to!S(r"\."));
+        const re = ctRegex!(to!S(r"\."));
         auto str = to!S("a.b");
         assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
         assert(split(str, re) == [to!S("a"), to!S("b")]);
@@ -89,8 +89,8 @@ unittest
 // bugzilla 8349
 unittest
 {
-    enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
-    enum peakRegex = ctRegex!(peakRegexStr);
+    const peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
+    const peakRegex = ctRegex!(peakRegexStr);
     //note that the regex pattern itself is probably bogus
     assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
 }
@@ -168,7 +168,7 @@ unittest
 // bugzilla 11262
 unittest
 {
-    enum reg = ctRegex!(r",", "g");
+    const reg = ctRegex!(r",", "g");
     auto str = "This,List";
     str = str.replace(reg, "-");
     assert(str == "This-List");
diff --git a/std/regex/package.d b/std/regex/package.d
index 11902dfba7b..e4566d47c6f 100644
--- a/std/regex/package.d
+++ b/std/regex/package.d
@@ -324,7 +324,7 @@ public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
     if (isSomeString!(S))
 {
     import std.array : appender;
-    S pat;
+    Unqual!S pat;
     if (patterns.length > 1)
     {
         auto app = appender!S();
@@ -386,18 +386,24 @@ public auto regexImpl(S)(S pattern, const(char)[] flags="") pure
 }
 
 
-template ctRegexImpl(alias pattern, string flags=[])
+private template IsolatedFunc(Char, alias source)
 {
-    import std.regex.internal.parser, std.regex.internal.backtracking;
-    static immutable r = cast(immutable)regexPure([pattern], flags);
-    alias Char = BasicElementOf!(typeof(pattern));
-    enum source = ctGenRegExCode(r);
+    import std.regex.internal.backtracking;
     alias Matcher = BacktrackingMatcher!(true);
-    @trusted bool func(ref Matcher!Char matcher)
+    @trusted bool IsolatedFunc(ref Matcher!Char matcher)
     {
         debug(std_regex_ctr) pragma(msg, source);
         mixin(source);
     }
+}
+
+template ctRegexImpl(alias pattern, string flags=[])
+{
+    import std.regex.internal.parser, std.regex.internal.backtracking;
+    static immutable r = cast(immutable)regexPure([pattern], flags);
+    alias Char = BasicElementOf!(typeof(pattern));
+    enum source = ctGenRegExCode(r);
+    alias func = IsolatedFunc!(Char, source);
     static immutable nr = immutable StaticRegex!Char(r, &func);
 }
 

From f8b3eea065ac3264d2e6784836b4a69a5d4a2be4 Mon Sep 17 00:00:00 2001
From: Dmitry Olshansky <dmitry.olsh@gmail.com>
Date: Sun, 9 Oct 2016 15:35:32 +0300
Subject: [PATCH 23/23] [std.regex] Addressing review comments

---
 std/regex/internal/bitnfa.d | 33 +++++++++++++++++++++------------
 std/regex/internal/ir.d     |  8 ++++----
 std/regex/package.d         | 23 ++++++++++++-----------
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d
index 3e7fbd61bfd..2dd0b8f0d41 100644
--- a/std/regex/internal/bitnfa.d
+++ b/std/regex/internal/bitnfa.d
@@ -1,4 +1,3 @@
-
 //Written in the D programming language
 /*
     Implementation of a concept "NFA in a word" which is
@@ -43,7 +42,7 @@ pure:
         if (!p.occupied)
         {
             items++;
-            if (4*items >= table.length*3)
+            if (4 * items >= table.length * 3)
             {
                 grow();
                 p = locate(key, table);
@@ -107,7 +106,7 @@ private:
             if (slot == table.length)
                 slot = 0;
         }
-        return table.ptr+slot;
+        return table.ptr + slot;
     }
 
     void grow()
@@ -125,6 +124,17 @@ private:
     }
 }
 
+unittest
+{
+    HashTab tab;
+    tab[3] = 1;
+    tab[7] = 2;
+    tab[11] = 3;
+    assert(tab[3] == 1);
+    assert(tab[7] == 2);
+    assert(tab[11] == 3);
+}
+
 
 // Specialized 2-level trie of uint masks for BitNfa.
 // Uses the concept of CoW: a page gets modified in place
@@ -586,11 +596,6 @@ auto reverseBitNfa(Char)(auto ref Regex!Char re, uint length) pure
             while (ir[pc].code == Option)
             {
                 size_t size = ir[pc].data;
-                if (ir[pc+size-IRL!GotoEndOr].code == GotoEndOr)
-                {
-                    ir[pc+size-IRL!(GotoEndOr)].data = ir[pc+size-IRL!(GotoEndOr)].data+1;
-                    size -= IRL!GotoEndOr;
-                }
                 size_t j = pc + IRL!Option;
                 if (ir[j].code == End)
                 {
@@ -657,7 +662,7 @@ version(unittest)
         {
             import std.regex, std.conv;
             import std.stdio;
-            auto rex = regex(re);
+            auto rex = regex(re, "s");
             auto m = make(rex);
             auto s = Input!char(input);
             assert(m.search(s), text("Failed @", line, " ", input, " with ", re));
@@ -672,7 +677,7 @@ version(unittest)
         {
             import std.regex, std.conv;
             import std.stdio;
-            auto rex = regex(re);
+            auto rex = regex(re, "s");
             auto m = make(rex);
             auto s = Input!char(input);
             assert(!m.search(s), text("Should have failed @", line, " " , input, " with ", re));
@@ -709,6 +714,8 @@ unittest
         .checkBit("0123456789_0123456789_0123456789_0123456789", 31);
     "0123456789_0123456789_0123456789_012"
         .checkBit("0123456789(0123456789_0123456789_0123456789_0123456789|01234)",10);
+    "0123456789_0123456789_0123456789_012"
+        .checkBit("0123456789_0123456789_012345678[890]", 31);
     // assertions ignored
     "0abc1".checkBit("(?<![0-9])[a-c]+$", 2);
     // stop on repetition
@@ -724,9 +731,11 @@ unittest
 unittest
 {
     "xxabcy".checkM("abc", 2);
-    "_10bcy".checkM([`\d+`, `[a-z]+`], 1);
+    "пень".checkM("пен.", 0);
+    "_10bcy".checkM([`\d+`, `[a-z]+`, `\*`], 1);
     "1/03/12 - 3/03/12".checkM([r"\d+/\d+/\d+"],0);
-    "abc@email.com".checkM(`\S+@\S?1`, 0);
+    "abcя@email.com".checkM(`\S+@\S?1`, 0);
     "Strap a rocket engine on a chicken.".checkM("[ra]", 2);
     "abcd".checkM("ab|cd", 0);
+    "abcd".checkM("(a|b|c)*(?=x)d", 0);
 }
diff --git a/std/regex/internal/ir.d b/std/regex/internal/ir.d
index 6795e737332..7de14079189 100644
--- a/std/regex/internal/ir.d
+++ b/std/regex/internal/ir.d
@@ -377,7 +377,7 @@ struct Group(DataIndex)
 }
 
 //debugging tool, prints out instruction along with opcodes
-@trusted string disassemble(in Bytecode[] irb, uint pc, in NamedGroup[] dict=[])
+@trusted pure string disassemble(in Bytecode[] irb, uint pc, in NamedGroup[] dict=[])
 {
     import std.array : appender;
     import std.format : formattedWrite;
@@ -674,10 +674,10 @@ package(std.regex):
     {//@@@BUG@@@ write is system
         for (uint i = 0; i < ir.length; i += ir[i].length)
         {
-            writefln("%d\t%s ", i, disassemble(ir, i, dict));
+            debug(std_regex_parser) writefln("%d\t%s ", i, disassemble(ir, i, dict));
         }
-        writeln("Total merge table size: ", hotspotTableSize);
-        writeln("Max counter nesting depth: ", maxCounterDepth);
+        debug(std_regex_parser) writeln("Total merge table size: ", hotspotTableSize);
+        debug(std_regex_parser) writeln("Max counter nesting depth: ", maxCounterDepth);
     }
 
 }
diff --git a/std/regex/package.d b/std/regex/package.d
index e4566d47c6f..b7258dce899 100644
--- a/std/regex/package.d
+++ b/std/regex/package.d
@@ -309,17 +309,7 @@ public alias Regex(Char) = std.regex.internal.ir.Regex!(Char);
 +/
 public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
 
-/++
-    Compile regular expression pattern for the later execution.
-    Returns: $(D Regex) object that works on inputs having
-    the same character width as $(D pattern).
 
-    Params:
-    pattern(s) = Regular expression(s) to match
-    flags = The _attributes (g, i, m and x accepted)
-
-    Throws: $(D RegexException) if there were any errors during compilation.
-+/
 @trusted public auto regexPure(S)(S[] patterns, const(char)[] flags="") pure
     if (isSomeString!(S))
 {
@@ -345,6 +335,17 @@ public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char);
     return regexImpl!S(pat, flags);
 }
 
+/++
+    Compile regular expression pattern for the later execution.
+    Returns: $(D Regex) object that works on inputs having
+    the same character width as $(D pattern).
+
+    Params:
+    pattern(s) = Regular expression(s) to match
+    flags = The _attributes (g, i, m and x accepted)
+
+    Throws: $(D RegexException) if there were any errors during compilation.
++/
 @trusted public auto regex(S)(S[] patterns, const(char)[] flags="")
     if (isSomeString!(S))
 {
@@ -376,7 +377,7 @@ unittest
     assert(m.front[1] == "12");
 }
 
-public auto regexImpl(S)(S pattern, const(char)[] flags="") pure
+private auto regexImpl(S)(S pattern, const(char)[] flags="") pure
     if (isSomeString!(S))
 {
     import std.regex.internal.parser : Parser, CodeGen;